gitee.com/zhaochuninhefei/gmgo@v0.0.31-0.20240209061119-069254a02979/grpc/clientconn.go (about) 1 /* 2 * 3 * Copyright 2014 gRPC authors. 4 * 5 * Licensed under the Apache License, Version 2.0 (the "License"); 6 * you may not use this file except in compliance with the License. 7 * You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 * 17 */ 18 19 package grpc 20 21 import ( 22 "context" 23 "errors" 24 "fmt" 25 "math" 26 "net/url" 27 "reflect" 28 "strings" 29 "sync" 30 "sync/atomic" 31 "time" 32 33 "gitee.com/zhaochuninhefei/gmgo/grpc/balancer" 34 "gitee.com/zhaochuninhefei/gmgo/grpc/balancer/base" 35 "gitee.com/zhaochuninhefei/gmgo/grpc/codes" 36 "gitee.com/zhaochuninhefei/gmgo/grpc/connectivity" 37 "gitee.com/zhaochuninhefei/gmgo/grpc/credentials" 38 "gitee.com/zhaochuninhefei/gmgo/grpc/internal/backoff" 39 "gitee.com/zhaochuninhefei/gmgo/grpc/internal/channelz" 40 "gitee.com/zhaochuninhefei/gmgo/grpc/internal/grpcsync" 41 iresolver "gitee.com/zhaochuninhefei/gmgo/grpc/internal/resolver" 42 "gitee.com/zhaochuninhefei/gmgo/grpc/internal/transport" 43 "gitee.com/zhaochuninhefei/gmgo/grpc/keepalive" 44 "gitee.com/zhaochuninhefei/gmgo/grpc/resolver" 45 "gitee.com/zhaochuninhefei/gmgo/grpc/serviceconfig" 46 "gitee.com/zhaochuninhefei/gmgo/grpc/status" 47 48 _ "gitee.com/zhaochuninhefei/gmgo/grpc/balancer/roundrobin" // To register roundrobin. 49 _ "gitee.com/zhaochuninhefei/gmgo/grpc/internal/resolver/dns" // To register dns resolver. 50 _ "gitee.com/zhaochuninhefei/gmgo/grpc/internal/resolver/passthrough" // To register passthrough resolver. 51 _ "gitee.com/zhaochuninhefei/gmgo/grpc/internal/resolver/unix" // To register unix resolver. 52 ) 53 54 const ( 55 // minimum time to give a connection to complete 56 minConnectTimeout = 20 * time.Second 57 // must match grpclbName in grpclb/grpclb.go 58 grpclbName = "grpclb" 59 ) 60 61 var ( 62 // ErrClientConnClosing indicates that the operation is illegal because 63 // the ClientConn is closing. 64 // 65 // ToDeprecated: this error should not be relied upon by users; use the status 66 // code of Canceled instead. 67 ErrClientConnClosing = status.Error(codes.Canceled, "grpc: the client connection is closing") 68 // errConnDrain indicates that the connection starts to be drained and does not accept any new RPCs. 69 errConnDrain = errors.New("grpc: the connection is drained") 70 // errConnClosing indicates that the connection is closing. 71 errConnClosing = errors.New("grpc: the connection is closing") 72 // invalidDefaultServiceConfigErrPrefix is used to prefix the json parsing error for the default 73 // service config. 74 invalidDefaultServiceConfigErrPrefix = "grpc: the provided default service config is invalid" 75 ) 76 77 // The following errors are returned from Dial and DialContext 78 var ( 79 // errNoTransportSecurity indicates that there is no transport security 80 // being set for ClientConn. Users should either set one or explicitly 81 // call WithInsecure DialOption to disable security. 82 errNoTransportSecurity = errors.New("grpc: no transport security set (use grpc.WithInsecure() explicitly or set credentials)") 83 // errTransportCredsAndBundle indicates that creds bundle is used together 84 // with other individual Transport Credentials. 85 errTransportCredsAndBundle = errors.New("grpc: credentials.Bundle may not be used with individual TransportCredentials") 86 // errNoTransportCredsInBundle indicated that the configured creds bundle 87 // returned a transport credentials which was nil. 88 errNoTransportCredsInBundle = errors.New("grpc: credentials.Bundle must return non-nil transport credentials") 89 // errTransportCredentialsMissing indicates that users want to transmit 90 // security information (e.g., OAuth2 token) which requires secure 91 // connection on an insecure connection. 92 errTransportCredentialsMissing = errors.New("grpc: the credentials require transport level security (use grpc.WithTransportCredentials() to set)") 93 ) 94 95 const ( 96 defaultClientMaxReceiveMessageSize = 1024 * 1024 * 4 97 defaultClientMaxSendMessageSize = math.MaxInt32 98 // http2IOBufSize specifies the buffer size for sending frames. 99 defaultWriteBufSize = 32 * 1024 100 defaultReadBufSize = 32 * 1024 101 ) 102 103 // Dial creates a client connection to the given target. 104 func Dial(target string, opts ...DialOption) (*ClientConn, error) { 105 return DialContext(context.Background(), target, opts...) 106 } 107 108 type defaultConfigSelector struct { 109 sc *ServiceConfig 110 } 111 112 func (dcs *defaultConfigSelector) SelectConfig(rpcInfo iresolver.RPCInfo) (*iresolver.RPCConfig, error) { 113 return &iresolver.RPCConfig{ 114 Context: rpcInfo.Context, 115 MethodConfig: getMethodConfig(dcs.sc, rpcInfo.Method), 116 }, nil 117 } 118 119 // DialContext creates a client connection to the given target. By default, it's 120 // a non-blocking dial (the function won't wait for connections to be 121 // established, and connecting happens in the background). To make it a blocking 122 // dial, use WithBlock() dial option. 123 // 124 // In the non-blocking case, the ctx does not act against the connection. It 125 // only controls the setup steps. 126 // 127 // In the blocking case, ctx can be used to cancel or expire the pending 128 // connection. Once this function returns, the cancellation and expiration of 129 // ctx will be noop. Users should call ClientConn.Close to terminate all the 130 // pending operations after this function returns. 131 // 132 // The target name syntax is defined in 133 // https://github.com/grpc/grpc/blob/master/doc/naming.md. 134 // e.g. to use dns resolver, a "dns:///" prefix should be applied to the target. 135 func DialContext(ctx context.Context, target string, opts ...DialOption) (conn *ClientConn, err error) { 136 cc := &ClientConn{ 137 target: target, 138 csMgr: &connectivityStateManager{}, 139 conns: make(map[*addrConn]struct{}), 140 dopts: defaultDialOptions(), 141 blockingpicker: newPickerWrapper(), 142 czData: new(channelzData), 143 firstResolveEvent: grpcsync.NewEvent(), 144 } 145 cc.retryThrottler.Store((*retryThrottler)(nil)) 146 cc.safeConfigSelector.UpdateConfigSelector(&defaultConfigSelector{nil}) 147 cc.ctx, cc.cancel = context.WithCancel(context.Background()) 148 149 for _, opt := range opts { 150 opt.apply(&cc.dopts) 151 } 152 153 chainUnaryClientInterceptors(cc) 154 chainStreamClientInterceptors(cc) 155 156 defer func() { 157 if err != nil { 158 _ = cc.Close() 159 } 160 }() 161 162 if channelz.IsOn() { 163 if cc.dopts.channelzParentID != 0 { 164 cc.channelzID = channelz.RegisterChannel(&channelzChannel{cc}, cc.dopts.channelzParentID, target) 165 channelz.AddTraceEvent(logger, cc.channelzID, 0, &channelz.TraceEventDesc{ 166 Desc: "Channel Created", 167 Severity: channelz.CtInfo, 168 Parent: &channelz.TraceEventDesc{ 169 Desc: fmt.Sprintf("Nested Channel(id:%d) created", cc.channelzID), 170 Severity: channelz.CtInfo, 171 }, 172 }) 173 } else { 174 cc.channelzID = channelz.RegisterChannel(&channelzChannel{cc}, 0, target) 175 channelz.Info(logger, cc.channelzID, "Channel Created") 176 } 177 cc.csMgr.channelzID = cc.channelzID 178 } 179 180 if cc.dopts.copts.TransportCredentials == nil && cc.dopts.copts.CredsBundle == nil { 181 return nil, errNoTransportSecurity 182 } 183 if cc.dopts.copts.TransportCredentials != nil && cc.dopts.copts.CredsBundle != nil { 184 return nil, errTransportCredsAndBundle 185 } 186 if cc.dopts.copts.CredsBundle != nil && cc.dopts.copts.CredsBundle.TransportCredentials() == nil { 187 return nil, errNoTransportCredsInBundle 188 } 189 transportCreds := cc.dopts.copts.TransportCredentials 190 if transportCreds == nil { 191 transportCreds = cc.dopts.copts.CredsBundle.TransportCredentials() 192 } 193 if transportCreds.Info().SecurityProtocol == "insecure" { 194 for _, cd := range cc.dopts.copts.PerRPCCredentials { 195 if cd.RequireTransportSecurity() { 196 return nil, errTransportCredentialsMissing 197 } 198 } 199 } 200 201 if cc.dopts.defaultServiceConfigRawJSON != nil { 202 scpr := parseServiceConfig(*cc.dopts.defaultServiceConfigRawJSON) 203 if scpr.Err != nil { 204 return nil, fmt.Errorf("%s: %v", invalidDefaultServiceConfigErrPrefix, scpr.Err) 205 } 206 cc.dopts.defaultServiceConfig, _ = scpr.Config.(*ServiceConfig) 207 } 208 cc.mkp = cc.dopts.copts.KeepaliveParams 209 210 if cc.dopts.copts.UserAgent != "" { 211 cc.dopts.copts.UserAgent += " " + grpcUA 212 } else { 213 cc.dopts.copts.UserAgent = grpcUA 214 } 215 216 if cc.dopts.timeout > 0 { 217 var cancel context.CancelFunc 218 ctx, cancel = context.WithTimeout(ctx, cc.dopts.timeout) 219 defer cancel() 220 } 221 defer func() { 222 select { 223 case <-ctx.Done(): 224 switch { 225 case errors.Is(ctx.Err(), err): 226 conn = nil 227 case err == nil || !cc.dopts.returnLastError: 228 conn, err = nil, ctx.Err() 229 default: 230 conn, err = nil, fmt.Errorf("%v: %v", ctx.Err(), err) 231 } 232 default: 233 } 234 }() 235 236 scSet := false 237 if cc.dopts.scChan != nil { 238 // Try to get an initial service config. 239 select { 240 case sc, ok := <-cc.dopts.scChan: 241 if ok { 242 cc.sc = &sc 243 cc.safeConfigSelector.UpdateConfigSelector(&defaultConfigSelector{&sc}) 244 scSet = true 245 } 246 default: 247 } 248 } 249 if cc.dopts.bs == nil { 250 cc.dopts.bs = backoff.DefaultExponential 251 } 252 253 // Determine the resolver to use. 254 resolverBuilder, err := cc.parseTargetAndFindResolver() 255 if err != nil { 256 return nil, err 257 } 258 // Endpoint is deprecated, use GetEndpoint() instead. 259 //cc.authority, err = determineAuthority(cc.parsedTarget.Endpoint, cc.target, cc.dopts) 260 cc.authority, err = determineAuthority(cc.parsedTarget.GetEndpoint(), cc.target, cc.dopts) 261 if err != nil { 262 return nil, err 263 } 264 channelz.Infof(logger, cc.channelzID, "Channel authority set to %q", cc.authority) 265 266 if cc.dopts.scChan != nil && !scSet { 267 // Blocking wait for the initial service config. 268 select { 269 case sc, ok := <-cc.dopts.scChan: 270 if ok { 271 cc.sc = &sc 272 cc.safeConfigSelector.UpdateConfigSelector(&defaultConfigSelector{&sc}) 273 } 274 case <-ctx.Done(): 275 return nil, ctx.Err() 276 } 277 } 278 if cc.dopts.scChan != nil { 279 go cc.scWatcher() 280 } 281 282 var credsClone credentials.TransportCredentials 283 if creds := cc.dopts.copts.TransportCredentials; creds != nil { 284 credsClone = creds.Clone() 285 } 286 cc.balancerBuildOpts = balancer.BuildOptions{ 287 DialCreds: credsClone, 288 CredsBundle: cc.dopts.copts.CredsBundle, 289 Dialer: cc.dopts.copts.Dialer, 290 Authority: cc.authority, 291 CustomUserAgent: cc.dopts.copts.UserAgent, 292 ChannelzParentID: cc.channelzID, 293 Target: cc.parsedTarget, 294 } 295 296 // Build the resolver. 297 rWrapper, err := newCCResolverWrapper(cc, resolverBuilder) 298 if err != nil { 299 return nil, fmt.Errorf("failed to build resolver: %v", err) 300 } 301 cc.mu.Lock() 302 cc.resolverWrapper = rWrapper 303 cc.mu.Unlock() 304 305 // A blocking dial blocks until the clientConn is ready. 306 if cc.dopts.block { 307 for { 308 cc.Connect() 309 s := cc.GetState() 310 if s == connectivity.Ready { 311 break 312 } else if cc.dopts.copts.FailOnNonTempDialError && s == connectivity.TransientFailure { 313 if err = cc.connectionError(); err != nil { 314 terr, ok := err.(interface { 315 Temporary() bool 316 }) 317 if ok && !terr.Temporary() { 318 return nil, err 319 } 320 } 321 } 322 if !cc.WaitForStateChange(ctx, s) { 323 // ctx got timeout or canceled. 324 if err = cc.connectionError(); err != nil && cc.dopts.returnLastError { 325 return nil, err 326 } 327 return nil, ctx.Err() 328 } 329 } 330 } 331 332 return cc, nil 333 } 334 335 // chainUnaryClientInterceptors chains all unary client interceptors into one. 336 func chainUnaryClientInterceptors(cc *ClientConn) { 337 interceptors := cc.dopts.chainUnaryInts 338 // Prepend dopts.unaryInt to the chaining interceptors if it exists, since unaryInt will 339 // be executed before any other chained interceptors. 340 if cc.dopts.unaryInt != nil { 341 interceptors = append([]UnaryClientInterceptor{cc.dopts.unaryInt}, interceptors...) 342 } 343 var chainedInt UnaryClientInterceptor 344 if len(interceptors) == 0 { 345 chainedInt = nil 346 } else if len(interceptors) == 1 { 347 chainedInt = interceptors[0] 348 } else { 349 chainedInt = func(ctx context.Context, method string, req, reply interface{}, cc *ClientConn, invoker UnaryInvoker, opts ...CallOption) error { 350 return interceptors[0](ctx, method, req, reply, cc, getChainUnaryInvoker(interceptors, 0, invoker), opts...) 351 } 352 } 353 cc.dopts.unaryInt = chainedInt 354 } 355 356 // getChainUnaryInvoker recursively generate the chained unary invoker. 357 func getChainUnaryInvoker(interceptors []UnaryClientInterceptor, curr int, finalInvoker UnaryInvoker) UnaryInvoker { 358 if curr == len(interceptors)-1 { 359 return finalInvoker 360 } 361 return func(ctx context.Context, method string, req, reply interface{}, cc *ClientConn, opts ...CallOption) error { 362 return interceptors[curr+1](ctx, method, req, reply, cc, getChainUnaryInvoker(interceptors, curr+1, finalInvoker), opts...) 363 } 364 } 365 366 // chainStreamClientInterceptors chains all stream client interceptors into one. 367 func chainStreamClientInterceptors(cc *ClientConn) { 368 interceptors := cc.dopts.chainStreamInts 369 // Prepend dopts.streamInt to the chaining interceptors if it exists, since streamInt will 370 // be executed before any other chained interceptors. 371 if cc.dopts.streamInt != nil { 372 interceptors = append([]StreamClientInterceptor{cc.dopts.streamInt}, interceptors...) 373 } 374 var chainedInt StreamClientInterceptor 375 if len(interceptors) == 0 { 376 chainedInt = nil 377 } else if len(interceptors) == 1 { 378 chainedInt = interceptors[0] 379 } else { 380 chainedInt = func(ctx context.Context, desc *StreamDesc, cc *ClientConn, method string, streamer Streamer, opts ...CallOption) (ClientStream, error) { 381 return interceptors[0](ctx, desc, cc, method, getChainStreamer(interceptors, 0, streamer), opts...) 382 } 383 } 384 cc.dopts.streamInt = chainedInt 385 } 386 387 // getChainStreamer recursively generate the chained client stream constructor. 388 func getChainStreamer(interceptors []StreamClientInterceptor, curr int, finalStreamer Streamer) Streamer { 389 if curr == len(interceptors)-1 { 390 return finalStreamer 391 } 392 return func(ctx context.Context, desc *StreamDesc, cc *ClientConn, method string, opts ...CallOption) (ClientStream, error) { 393 return interceptors[curr+1](ctx, desc, cc, method, getChainStreamer(interceptors, curr+1, finalStreamer), opts...) 394 } 395 } 396 397 // connectivityStateManager keeps the connectivity.State of ClientConn. 398 // This struct will eventually be exported so the balancers can access it. 399 type connectivityStateManager struct { 400 mu sync.Mutex 401 state connectivity.State 402 notifyChan chan struct{} 403 channelzID int64 404 } 405 406 // updateState updates the connectivity.State of ClientConn. 407 // If there's a change it notifies goroutines waiting on state change to 408 // happen. 409 func (csm *connectivityStateManager) updateState(state connectivity.State) { 410 csm.mu.Lock() 411 defer csm.mu.Unlock() 412 if csm.state == connectivity.Shutdown { 413 return 414 } 415 if csm.state == state { 416 return 417 } 418 csm.state = state 419 channelz.Infof(logger, csm.channelzID, "Channel Connectivity change to %v", state) 420 if csm.notifyChan != nil { 421 // There are other goroutines waiting on this channel. 422 close(csm.notifyChan) 423 csm.notifyChan = nil 424 } 425 } 426 427 func (csm *connectivityStateManager) getState() connectivity.State { 428 csm.mu.Lock() 429 defer csm.mu.Unlock() 430 return csm.state 431 } 432 433 func (csm *connectivityStateManager) getNotifyChan() <-chan struct{} { 434 csm.mu.Lock() 435 defer csm.mu.Unlock() 436 if csm.notifyChan == nil { 437 csm.notifyChan = make(chan struct{}) 438 } 439 return csm.notifyChan 440 } 441 442 // ClientConnInterface defines the functions clients need to perform unary and 443 // streaming RPCs. It is implemented by *ClientConn, and is only intended to 444 // be referenced by generated code. 445 type ClientConnInterface interface { 446 // Invoke performs a unary RPC and returns after the response is received 447 // into reply. 448 Invoke(ctx context.Context, method string, args interface{}, reply interface{}, opts ...CallOption) error 449 // NewStream begins a streaming RPC. 450 NewStream(ctx context.Context, desc *StreamDesc, method string, opts ...CallOption) (ClientStream, error) 451 } 452 453 // Assert *ClientConn implements ClientConnInterface. 454 var _ ClientConnInterface = (*ClientConn)(nil) 455 456 // ClientConn represents a virtual connection to a conceptual endpoint, to 457 // perform RPCs. 458 // 459 // A ClientConn is free to have zero or more actual connections to the endpoint 460 // based on configuration, load, etc. It is also free to determine which actual 461 // endpoints to use and may change it every RPC, permitting client-side load 462 // balancing. 463 // 464 // A ClientConn encapsulates a range of functionality including name 465 // resolution, TCP connection establishment (with retries and backoff) and TLS 466 // handshakes. It also handles errors on established connections by 467 // re-resolving the name and reconnecting. 468 type ClientConn struct { 469 ctx context.Context 470 cancel context.CancelFunc 471 472 target string 473 parsedTarget resolver.Target 474 authority string 475 dopts dialOptions 476 csMgr *connectivityStateManager 477 478 balancerBuildOpts balancer.BuildOptions 479 blockingpicker *pickerWrapper 480 481 safeConfigSelector iresolver.SafeConfigSelector 482 483 mu sync.RWMutex 484 resolverWrapper *ccResolverWrapper 485 sc *ServiceConfig 486 conns map[*addrConn]struct{} 487 // Keepalive parameter can be updated if a GoAway is received. 488 mkp keepalive.ClientParameters 489 curBalancerName string 490 balancerWrapper *ccBalancerWrapper 491 retryThrottler atomic.Value 492 493 firstResolveEvent *grpcsync.Event 494 495 channelzID int64 // channelz unique identification number 496 czData *channelzData 497 498 lceMu sync.Mutex // protects lastConnectionError 499 lastConnectionError error 500 } 501 502 // WaitForStateChange waits until the connectivity.State of ClientConn changes from sourceState or 503 // ctx expires. A true value is returned in former case and false in latter. 504 // 505 // Experimental 506 // 507 // Notice: This API is EXPERIMENTAL and may be changed or removed in a 508 // later release. 509 func (cc *ClientConn) WaitForStateChange(ctx context.Context, sourceState connectivity.State) bool { 510 ch := cc.csMgr.getNotifyChan() 511 if cc.csMgr.getState() != sourceState { 512 return true 513 } 514 select { 515 case <-ctx.Done(): 516 return false 517 case <-ch: 518 return true 519 } 520 } 521 522 // GetState returns the connectivity.State of ClientConn. 523 // 524 // Experimental 525 // 526 // Notice: This API is EXPERIMENTAL and may be changed or removed in a later 527 // release. 528 func (cc *ClientConn) GetState() connectivity.State { 529 return cc.csMgr.getState() 530 } 531 532 // Connect causes all subchannels in the ClientConn to attempt to connect if 533 // the channel is idle. Does not wait for the connection attempts to begin 534 // before returning. 535 // 536 // Experimental 537 // 538 // Notice: This API is EXPERIMENTAL and may be changed or removed in a later 539 // release. 540 func (cc *ClientConn) Connect() { 541 cc.mu.Lock() 542 defer cc.mu.Unlock() 543 if cc.balancerWrapper != nil && cc.balancerWrapper.exitIdle() { 544 return 545 } 546 for ac := range cc.conns { 547 acNew := ac 548 go func() { 549 _ = acNew.connect() 550 }() 551 } 552 } 553 554 func (cc *ClientConn) scWatcher() { 555 for { 556 select { 557 case sc, ok := <-cc.dopts.scChan: 558 if !ok { 559 return 560 } 561 cc.mu.Lock() 562 // TODO: load balance policy runtime change is ignored. 563 // We may revisit this decision in the future. 564 cc.sc = &sc 565 cc.safeConfigSelector.UpdateConfigSelector(&defaultConfigSelector{&sc}) 566 cc.mu.Unlock() 567 case <-cc.ctx.Done(): 568 return 569 } 570 } 571 } 572 573 // waitForResolvedAddrs blocks until the resolver has provided addresses or the 574 // context expires. Returns nil unless the context expires first; otherwise 575 // returns a status error based on the context. 576 func (cc *ClientConn) waitForResolvedAddrs(ctx context.Context) error { 577 // This is on the RPC path, so we use a fast path to avoid the 578 // more-expensive "select" below after the resolver has returned once. 579 if cc.firstResolveEvent.HasFired() { 580 return nil 581 } 582 select { 583 case <-cc.firstResolveEvent.Done(): 584 return nil 585 case <-ctx.Done(): 586 return status.FromContextError(ctx.Err()).Err() 587 case <-cc.ctx.Done(): 588 return ErrClientConnClosing 589 } 590 } 591 592 var emptyServiceConfig *ServiceConfig 593 594 func init() { 595 cfg := parseServiceConfig("{}") 596 if cfg.Err != nil { 597 panic(fmt.Sprintf("impossible error parsing empty service config: %v", cfg.Err)) 598 } 599 emptyServiceConfig = cfg.Config.(*ServiceConfig) 600 } 601 602 func (cc *ClientConn) maybeApplyDefaultServiceConfig(addrs []resolver.Address) { 603 if cc.sc != nil { 604 cc.applyServiceConfigAndBalancer(cc.sc, nil, addrs) 605 return 606 } 607 if cc.dopts.defaultServiceConfig != nil { 608 cc.applyServiceConfigAndBalancer(cc.dopts.defaultServiceConfig, &defaultConfigSelector{cc.dopts.defaultServiceConfig}, addrs) 609 } else { 610 cc.applyServiceConfigAndBalancer(emptyServiceConfig, &defaultConfigSelector{emptyServiceConfig}, addrs) 611 } 612 } 613 614 func (cc *ClientConn) updateResolverState(s resolver.State, err error) error { 615 defer cc.firstResolveEvent.Fire() 616 cc.mu.Lock() 617 // Check if the ClientConn is already closed. Some fields (e.g. 618 // balancerWrapper) are set to nil when closing the ClientConn, and could 619 // cause nil pointer panic if we don't have this check. 620 if cc.conns == nil { 621 cc.mu.Unlock() 622 return nil 623 } 624 625 if err != nil { 626 // May need to apply the initial service config in case the resolver 627 // doesn't support service configs, or doesn't provide a service config 628 // with the new addresses. 629 cc.maybeApplyDefaultServiceConfig(nil) 630 631 if cc.balancerWrapper != nil { 632 cc.balancerWrapper.resolverError(err) 633 } 634 635 // No addresses are valid with err set; return early. 636 cc.mu.Unlock() 637 return balancer.ErrBadResolverState 638 } 639 640 var ret error 641 if cc.dopts.disableServiceConfig { 642 channelz.Infof(logger, cc.channelzID, "ignoring service config from resolver (%v) and applying the default because service config is disabled", s.ServiceConfig) 643 cc.maybeApplyDefaultServiceConfig(s.Addresses) 644 } else if s.ServiceConfig == nil { 645 cc.maybeApplyDefaultServiceConfig(s.Addresses) 646 // TODO: do we need to apply a failing LB policy if there is no 647 // default, per the error handling design? 648 } else { 649 if sc, ok := s.ServiceConfig.Config.(*ServiceConfig); s.ServiceConfig.Err == nil && ok { 650 configSelector := iresolver.GetConfigSelector(s) 651 if configSelector != nil { 652 if len(s.ServiceConfig.Config.(*ServiceConfig).Methods) != 0 { 653 channelz.Infof(logger, cc.channelzID, "method configs in service config will be ignored due to presence of config selector") 654 } 655 } else { 656 configSelector = &defaultConfigSelector{sc} 657 } 658 cc.applyServiceConfigAndBalancer(sc, configSelector, s.Addresses) 659 } else { 660 ret = balancer.ErrBadResolverState 661 if cc.balancerWrapper == nil { 662 var err error 663 if s.ServiceConfig.Err != nil { 664 err = status.Errorf(codes.Unavailable, "error parsing service config: %v", s.ServiceConfig.Err) 665 } else { 666 err = status.Errorf(codes.Unavailable, "illegal service config type: %T", s.ServiceConfig.Config) 667 } 668 cc.safeConfigSelector.UpdateConfigSelector(&defaultConfigSelector{cc.sc}) 669 cc.blockingpicker.updatePicker(base.NewErrPicker(err)) 670 cc.csMgr.updateState(connectivity.TransientFailure) 671 cc.mu.Unlock() 672 return ret 673 } 674 } 675 } 676 677 var balCfg serviceconfig.LoadBalancingConfig 678 if cc.dopts.balancerBuilder == nil && cc.sc != nil && cc.sc.lbConfig != nil { 679 balCfg = cc.sc.lbConfig.cfg 680 } 681 682 cbn := cc.curBalancerName 683 bw := cc.balancerWrapper 684 cc.mu.Unlock() 685 if cbn != grpclbName { 686 // Filter any grpclb addresses since we don't have the grpclb balancer. 687 for i := 0; i < len(s.Addresses); { 688 if s.Addresses[i].Type == resolver.GRPCLB { 689 copy(s.Addresses[i:], s.Addresses[i+1:]) 690 s.Addresses = s.Addresses[:len(s.Addresses)-1] 691 continue 692 } 693 i++ 694 } 695 } 696 uccsErr := bw.updateClientConnState(&balancer.ClientConnState{ResolverState: s, BalancerConfig: balCfg}) 697 if ret == nil { 698 ret = uccsErr // prefer ErrBadResolver state since any other error is 699 // currently meaningless to the caller. 700 } 701 return ret 702 } 703 704 // switchBalancer starts the switching from current balancer to the balancer 705 // with the given name. 706 // 707 // It will NOT send the current address list to the new balancer. If needed, 708 // caller of this function should send address list to the new balancer after 709 // this function returns. 710 // 711 // Caller must hold cc.mu. 712 func (cc *ClientConn) switchBalancer(name string) { 713 if strings.EqualFold(cc.curBalancerName, name) { 714 return 715 } 716 717 channelz.Infof(logger, cc.channelzID, "ClientConn switching balancer to %q", name) 718 if cc.dopts.balancerBuilder != nil { 719 channelz.Info(logger, cc.channelzID, "ignoring balancer switching: Balancer DialOption used instead") 720 return 721 } 722 if cc.balancerWrapper != nil { 723 // Don't hold cc.mu while closing the balancers. The balancers may call 724 // methods that require cc.mu (e.g. cc.NewSubConn()). Holding the mutex 725 // would cause a deadlock in that case. 726 cc.mu.Unlock() 727 cc.balancerWrapper.close() 728 cc.mu.Lock() 729 } 730 731 builder := balancer.Get(name) 732 if builder == nil { 733 channelz.Warningf(logger, cc.channelzID, "Channel switches to new LB policy %q due to fallback from invalid balancer name", PickFirstBalancerName) 734 channelz.Infof(logger, cc.channelzID, "failed to get balancer builder for: %v, using pick_first instead", name) 735 builder = newPickfirstBuilder() 736 } else { 737 channelz.Infof(logger, cc.channelzID, "Channel switches to new LB policy %q", name) 738 } 739 740 cc.curBalancerName = builder.Name() 741 cc.balancerWrapper = newCCBalancerWrapper(cc, builder, cc.balancerBuildOpts) 742 } 743 744 func (cc *ClientConn) handleSubConnStateChange(sc balancer.SubConn, s connectivity.State, err error) { 745 cc.mu.Lock() 746 if cc.conns == nil { 747 cc.mu.Unlock() 748 return 749 } 750 // TODO(bar switching) send updates to all balancer wrappers when balancer 751 // gracefully switching is supported. 752 cc.balancerWrapper.handleSubConnStateChange(sc, s, err) 753 cc.mu.Unlock() 754 } 755 756 // newAddrConn creates an addrConn for addrs and adds it to cc.conns. 757 // 758 // Caller needs to make sure len(addrs) > 0. 759 func (cc *ClientConn) newAddrConn(addrs []resolver.Address, opts balancer.NewSubConnOptions) (*addrConn, error) { 760 ac := &addrConn{ 761 state: connectivity.Idle, 762 cc: cc, 763 addrs: addrs, 764 scopts: opts, 765 dopts: cc.dopts, 766 czData: new(channelzData), 767 resetBackoff: make(chan struct{}), 768 } 769 ac.ctx, ac.cancel = context.WithCancel(cc.ctx) 770 // Track ac in cc. This needs to be done before any getTransport(...) is called. 771 cc.mu.Lock() 772 if cc.conns == nil { 773 cc.mu.Unlock() 774 return nil, ErrClientConnClosing 775 } 776 if channelz.IsOn() { 777 ac.channelzID = channelz.RegisterSubChannel(ac, cc.channelzID, "") 778 channelz.AddTraceEvent(logger, ac.channelzID, 0, &channelz.TraceEventDesc{ 779 Desc: "Subchannel Created", 780 Severity: channelz.CtInfo, 781 Parent: &channelz.TraceEventDesc{ 782 Desc: fmt.Sprintf("Subchannel(id:%d) created", ac.channelzID), 783 Severity: channelz.CtInfo, 784 }, 785 }) 786 } 787 cc.conns[ac] = struct{}{} 788 cc.mu.Unlock() 789 return ac, nil 790 } 791 792 // removeAddrConn removes the addrConn in the subConn from clientConn. 793 // It also tears down the ac with the given error. 794 func (cc *ClientConn) removeAddrConn(ac *addrConn, err error) { 795 cc.mu.Lock() 796 if cc.conns == nil { 797 cc.mu.Unlock() 798 return 799 } 800 delete(cc.conns, ac) 801 cc.mu.Unlock() 802 ac.tearDown(err) 803 } 804 805 func (cc *ClientConn) channelzMetric() *channelz.ChannelInternalMetric { 806 return &channelz.ChannelInternalMetric{ 807 State: cc.GetState(), 808 Target: cc.target, 809 CallsStarted: atomic.LoadInt64(&cc.czData.callsStarted), 810 CallsSucceeded: atomic.LoadInt64(&cc.czData.callsSucceeded), 811 CallsFailed: atomic.LoadInt64(&cc.czData.callsFailed), 812 LastCallStartedTimestamp: time.Unix(0, atomic.LoadInt64(&cc.czData.lastCallStartedTime)), 813 } 814 } 815 816 // Target returns the target string of the ClientConn. 817 // 818 // Experimental 819 // 820 // Notice: This API is EXPERIMENTAL and may be changed or removed in a 821 // later release. 822 func (cc *ClientConn) Target() string { 823 return cc.target 824 } 825 826 func (cc *ClientConn) incrCallsStarted() { 827 atomic.AddInt64(&cc.czData.callsStarted, 1) 828 atomic.StoreInt64(&cc.czData.lastCallStartedTime, time.Now().UnixNano()) 829 } 830 831 func (cc *ClientConn) incrCallsSucceeded() { 832 atomic.AddInt64(&cc.czData.callsSucceeded, 1) 833 } 834 835 func (cc *ClientConn) incrCallsFailed() { 836 atomic.AddInt64(&cc.czData.callsFailed, 1) 837 } 838 839 // connect starts creating a transport. 840 // It does nothing if the ac is not IDLE. 841 // TODO(bar) Move this to the addrConn section. 842 func (ac *addrConn) connect() error { 843 ac.mu.Lock() 844 if ac.state == connectivity.Shutdown { 845 ac.mu.Unlock() 846 return errConnClosing 847 } 848 if ac.state != connectivity.Idle { 849 ac.mu.Unlock() 850 return nil 851 } 852 // Update connectivity state within the lock to prevent subsequent or 853 // concurrent calls from resetting the transport more than once. 854 ac.updateConnectivityState(connectivity.Connecting, nil) 855 ac.mu.Unlock() 856 857 ac.resetTransport() 858 return nil 859 } 860 861 // tryUpdateAddrs tries to update ac.addrs with the new addresses list. 862 // 863 // If ac is Connecting, it returns false. The caller should tear down the ac and 864 // create a new one. Note that the backoff will be reset when this happens. 865 // 866 // If ac is TransientFailure, it updates ac.addrs and returns true. The updated 867 // addresses will be picked up by retry in the next iteration after backoff. 868 // 869 // If ac is Shutdown or Idle, it updates ac.addrs and returns true. 870 // 871 // If ac is Ready, it checks whether current connected address of ac is in the 872 // new addrs list. 873 // - If true, it updates ac.addrs and returns true. The ac will keep using 874 // the existing connection. 875 // - If false, it does nothing and returns false. 876 func (ac *addrConn) tryUpdateAddrs(addrs []resolver.Address) bool { 877 ac.mu.Lock() 878 defer ac.mu.Unlock() 879 channelz.Infof(logger, ac.channelzID, "addrConn: tryUpdateAddrs curAddr: %v, addrs: %v", ac.curAddr, addrs) 880 if ac.state == connectivity.Shutdown || 881 ac.state == connectivity.TransientFailure || 882 ac.state == connectivity.Idle { 883 ac.addrs = addrs 884 return true 885 } 886 887 if ac.state == connectivity.Connecting { 888 return false 889 } 890 891 // ac.state is Ready, try to find the connected address. 892 var curAddrFound bool 893 for _, a := range addrs { 894 a.ServerName = ac.cc.getServerName(a) 895 if reflect.DeepEqual(ac.curAddr, a) { 896 curAddrFound = true 897 break 898 } 899 } 900 channelz.Infof(logger, ac.channelzID, "addrConn: tryUpdateAddrs curAddrFound: %v", curAddrFound) 901 if curAddrFound { 902 ac.addrs = addrs 903 } 904 905 return curAddrFound 906 } 907 908 // getServerName determines the serverName to be used in the connection 909 // handshake. The default value for the serverName is the authority on the 910 // ClientConn, which either comes from the user's dial target or through an 911 // authority override specified using the WithAuthority dial option. Name 912 // resolvers can specify a per-address override for the serverName through the 913 // resolver.Address.ServerName field which is used only if the WithAuthority 914 // dial option was not used. The rationale is that per-address authority 915 // overrides specified by the name resolver can represent a security risk, while 916 // an override specified by the user is more dependable since they probably know 917 // what they are doing. 918 func (cc *ClientConn) getServerName(addr resolver.Address) string { 919 if cc.dopts.authority != "" { 920 return cc.dopts.authority 921 } 922 if addr.ServerName != "" { 923 return addr.ServerName 924 } 925 return cc.authority 926 } 927 928 func getMethodConfig(sc *ServiceConfig, method string) MethodConfig { 929 if sc == nil { 930 return MethodConfig{} 931 } 932 if m, ok := sc.Methods[method]; ok { 933 return m 934 } 935 i := strings.LastIndex(method, "/") 936 if m, ok := sc.Methods[method[:i+1]]; ok { 937 return m 938 } 939 return sc.Methods[""] 940 } 941 942 // GetMethodConfig gets the method config of the input method. 943 // If there's an exact match for input method (i.e. /service/method), we return 944 // the corresponding MethodConfig. 945 // If there isn't an exact match for the input method, we look for the service's default 946 // config under the service (i.e /service/) and then for the default for all services (empty string). 947 // 948 // If there is a default MethodConfig for the service, we return it. 949 // Otherwise, we return an empty MethodConfig. 950 func (cc *ClientConn) GetMethodConfig(method string) MethodConfig { 951 // TODO: Avoid the locking here. 952 cc.mu.RLock() 953 defer cc.mu.RUnlock() 954 return getMethodConfig(cc.sc, method) 955 } 956 957 func (cc *ClientConn) healthCheckConfig() *healthCheckConfig { 958 cc.mu.RLock() 959 defer cc.mu.RUnlock() 960 if cc.sc == nil { 961 return nil 962 } 963 return cc.sc.healthCheckConfig 964 } 965 966 func (cc *ClientConn) getTransport(ctx context.Context, failfast bool, method string) (transport.ClientTransport, func(balancer.DoneInfo), error) { 967 t, done, err := cc.blockingpicker.pick(ctx, failfast, balancer.PickInfo{ 968 Ctx: ctx, 969 FullMethodName: method, 970 }) 971 if err != nil { 972 return nil, nil, toRPCErr(err) 973 } 974 return t, done, nil 975 } 976 977 func (cc *ClientConn) applyServiceConfigAndBalancer(sc *ServiceConfig, configSelector iresolver.ConfigSelector, addrs []resolver.Address) { 978 if sc == nil { 979 // should never reach here. 980 return 981 } 982 cc.sc = sc 983 if configSelector != nil { 984 cc.safeConfigSelector.UpdateConfigSelector(configSelector) 985 } 986 987 if cc.sc.retryThrottling != nil { 988 newThrottler := &retryThrottler{ 989 tokens: cc.sc.retryThrottling.MaxTokens, 990 max: cc.sc.retryThrottling.MaxTokens, 991 thresh: cc.sc.retryThrottling.MaxTokens / 2, 992 ratio: cc.sc.retryThrottling.TokenRatio, 993 } 994 cc.retryThrottler.Store(newThrottler) 995 } else { 996 cc.retryThrottler.Store((*retryThrottler)(nil)) 997 } 998 999 if cc.dopts.balancerBuilder == nil { 1000 // Only look at balancer types and switch balancer if balancer dial 1001 // option is not set. 1002 var newBalancerName string 1003 if cc.sc != nil && cc.sc.lbConfig != nil { 1004 newBalancerName = cc.sc.lbConfig.name 1005 } else { 1006 var isGRPCLB bool 1007 for _, a := range addrs { 1008 if a.Type == resolver.GRPCLB { 1009 isGRPCLB = true 1010 break 1011 } 1012 } 1013 if isGRPCLB { 1014 newBalancerName = grpclbName 1015 } else if cc.sc != nil && cc.sc.LB != nil { 1016 newBalancerName = *cc.sc.LB 1017 } else { 1018 newBalancerName = PickFirstBalancerName 1019 } 1020 } 1021 cc.switchBalancer(newBalancerName) 1022 } else if cc.balancerWrapper == nil { 1023 // Balancer dial option was set, and this is the first time handling 1024 // resolved addresses. Build a balancer with dopts.balancerBuilder. 1025 cc.curBalancerName = cc.dopts.balancerBuilder.Name() 1026 cc.balancerWrapper = newCCBalancerWrapper(cc, cc.dopts.balancerBuilder, cc.balancerBuildOpts) 1027 } 1028 } 1029 1030 func (cc *ClientConn) resolveNow(o resolver.ResolveNowOptions) { 1031 cc.mu.RLock() 1032 r := cc.resolverWrapper 1033 cc.mu.RUnlock() 1034 if r == nil { 1035 return 1036 } 1037 go r.resolveNow(o) 1038 } 1039 1040 // ResetConnectBackoff wakes up all subchannels in transient failure and causes 1041 // them to attempt another connection immediately. It also resets the backoff 1042 // times used for subsequent attempts regardless of the current state. 1043 // 1044 // In general, this function should not be used. Typical service or network 1045 // outages result in a reasonable client reconnection strategy by default. 1046 // However, if a previously unavailable network becomes available, this may be 1047 // used to trigger an immediate reconnect. 1048 // 1049 // Experimental 1050 // 1051 // Notice: This API is EXPERIMENTAL and may be changed or removed in a 1052 // later release. 1053 func (cc *ClientConn) ResetConnectBackoff() { 1054 cc.mu.Lock() 1055 conns := cc.conns 1056 cc.mu.Unlock() 1057 for ac := range conns { 1058 ac.resetConnectBackoff() 1059 } 1060 } 1061 1062 // Close tears down the ClientConn and all underlying connections. 1063 func (cc *ClientConn) Close() error { 1064 defer cc.cancel() 1065 1066 cc.mu.Lock() 1067 if cc.conns == nil { 1068 cc.mu.Unlock() 1069 return ErrClientConnClosing 1070 } 1071 conns := cc.conns 1072 cc.conns = nil 1073 cc.csMgr.updateState(connectivity.Shutdown) 1074 1075 rWrapper := cc.resolverWrapper 1076 cc.resolverWrapper = nil 1077 bWrapper := cc.balancerWrapper 1078 cc.balancerWrapper = nil 1079 cc.mu.Unlock() 1080 1081 cc.blockingpicker.close() 1082 1083 if bWrapper != nil { 1084 bWrapper.close() 1085 } 1086 if rWrapper != nil { 1087 rWrapper.close() 1088 } 1089 1090 for ac := range conns { 1091 ac.tearDown(ErrClientConnClosing) 1092 } 1093 if channelz.IsOn() { 1094 ted := &channelz.TraceEventDesc{ 1095 Desc: "Channel Deleted", 1096 Severity: channelz.CtInfo, 1097 } 1098 if cc.dopts.channelzParentID != 0 { 1099 ted.Parent = &channelz.TraceEventDesc{ 1100 Desc: fmt.Sprintf("Nested channel(id:%d) deleted", cc.channelzID), 1101 Severity: channelz.CtInfo, 1102 } 1103 } 1104 channelz.AddTraceEvent(logger, cc.channelzID, 0, ted) 1105 // TraceEvent needs to be called before RemoveEntry, as TraceEvent may add trace reference to 1106 // the entity being deleted, and thus prevent it from being deleted right away. 1107 channelz.RemoveEntry(cc.channelzID) 1108 } 1109 return nil 1110 } 1111 1112 // addrConn is a network connection to a given address. 1113 type addrConn struct { 1114 ctx context.Context 1115 cancel context.CancelFunc 1116 1117 cc *ClientConn 1118 dopts dialOptions 1119 acbw balancer.SubConn 1120 scopts balancer.NewSubConnOptions 1121 1122 // transport is set when there's a viable transport (note: ac state may not be READY as LB channel 1123 // health checking may require server to report healthy to set ac to READY), and is reset 1124 // to nil when the current transport should no longer be used to create a stream (e.g. after GoAway 1125 // is received, transport is closed, ac has been torn down). 1126 transport transport.ClientTransport // The current transport. 1127 1128 mu sync.Mutex 1129 curAddr resolver.Address // The current address. 1130 addrs []resolver.Address // All addresses that the resolver resolved to. 1131 1132 // Use updateConnectivityState for updating addrConn's connectivity state. 1133 state connectivity.State 1134 1135 backoffIdx int // Needs to be stateful for resetConnectBackoff. 1136 resetBackoff chan struct{} 1137 1138 channelzID int64 // channelz unique identification number. 1139 czData *channelzData 1140 } 1141 1142 // Note: this requires a lock on ac.mu. 1143 func (ac *addrConn) updateConnectivityState(s connectivity.State, lastErr error) { 1144 if ac.state == s { 1145 return 1146 } 1147 ac.state = s 1148 channelz.Infof(logger, ac.channelzID, "Subchannel Connectivity change to %v", s) 1149 ac.cc.handleSubConnStateChange(ac.acbw, s, lastErr) 1150 } 1151 1152 // adjustParams updates parameters used to create transports upon 1153 // receiving a GoAway. 1154 func (ac *addrConn) adjustParams(r transport.GoAwayReason) { 1155 switch r { 1156 case transport.GoAwayTooManyPings: 1157 v := 2 * ac.dopts.copts.KeepaliveParams.Time 1158 ac.cc.mu.Lock() 1159 if v > ac.cc.mkp.Time { 1160 ac.cc.mkp.Time = v 1161 } 1162 ac.cc.mu.Unlock() 1163 } 1164 } 1165 1166 func (ac *addrConn) resetTransport() { 1167 ac.mu.Lock() 1168 if ac.state == connectivity.Shutdown { 1169 ac.mu.Unlock() 1170 return 1171 } 1172 1173 addrs := ac.addrs 1174 backoffFor := ac.dopts.bs.Backoff(ac.backoffIdx) 1175 // This will be the duration that dial gets to finish. 1176 dialDuration := minConnectTimeout 1177 if ac.dopts.minConnectTimeout != nil { 1178 dialDuration = ac.dopts.minConnectTimeout() 1179 } 1180 1181 if dialDuration < backoffFor { 1182 // Give dial more time as we keep failing to connect. 1183 dialDuration = backoffFor 1184 } 1185 // We can potentially spend all the time trying the first address, and 1186 // if the server accepts the connection and then hangs, the following 1187 // addresses will never be tried. 1188 // 1189 // The spec doesn't mention what should be done for multiple addresses. 1190 // https://github.com/grpc/grpc/blob/master/doc/connection-backoff.md#proposed-backoff-algorithm 1191 connectDeadline := time.Now().Add(dialDuration) 1192 1193 ac.updateConnectivityState(connectivity.Connecting, nil) 1194 ac.mu.Unlock() 1195 1196 if err := ac.tryAllAddrs(addrs, connectDeadline); err != nil { 1197 ac.cc.resolveNow(resolver.ResolveNowOptions{}) 1198 // After exhausting all addresses, the addrConn enters 1199 // TRANSIENT_FAILURE. 1200 ac.mu.Lock() 1201 if ac.state == connectivity.Shutdown { 1202 ac.mu.Unlock() 1203 return 1204 } 1205 ac.updateConnectivityState(connectivity.TransientFailure, err) 1206 1207 // Backoff. 1208 b := ac.resetBackoff 1209 ac.mu.Unlock() 1210 1211 timer := time.NewTimer(backoffFor) 1212 select { 1213 case <-timer.C: 1214 ac.mu.Lock() 1215 ac.backoffIdx++ 1216 ac.mu.Unlock() 1217 case <-b: 1218 timer.Stop() 1219 case <-ac.ctx.Done(): 1220 timer.Stop() 1221 return 1222 } 1223 1224 ac.mu.Lock() 1225 if ac.state != connectivity.Shutdown { 1226 ac.updateConnectivityState(connectivity.Idle, err) 1227 } 1228 ac.mu.Unlock() 1229 return 1230 } 1231 // Success; reset backoff. 1232 ac.mu.Lock() 1233 ac.backoffIdx = 0 1234 ac.mu.Unlock() 1235 } 1236 1237 // tryAllAddrs tries to creates a connection to the addresses, and stop when at 1238 // the first successful one. It returns an error if no address was successfully 1239 // connected, or updates ac appropriately with the new transport. 1240 func (ac *addrConn) tryAllAddrs(addrs []resolver.Address, connectDeadline time.Time) error { 1241 var firstConnErr error 1242 for _, addr := range addrs { 1243 ac.mu.Lock() 1244 if ac.state == connectivity.Shutdown { 1245 ac.mu.Unlock() 1246 return errConnClosing 1247 } 1248 1249 ac.cc.mu.RLock() 1250 ac.dopts.copts.KeepaliveParams = ac.cc.mkp 1251 ac.cc.mu.RUnlock() 1252 1253 copts := ac.dopts.copts 1254 if ac.scopts.CredsBundle != nil { 1255 copts.CredsBundle = ac.scopts.CredsBundle 1256 } 1257 ac.mu.Unlock() 1258 1259 channelz.Infof(logger, ac.channelzID, "Subchannel picks a new address %q to connect", addr.Addr) 1260 1261 err := ac.createTransport(addr, copts, connectDeadline) 1262 if err == nil { 1263 return nil 1264 } 1265 if firstConnErr == nil { 1266 firstConnErr = err 1267 } 1268 ac.cc.updateConnectionError(err) 1269 } 1270 1271 // Couldn't connect to any address. 1272 return firstConnErr 1273 } 1274 1275 // createTransport creates a connection to addr. It returns an error if the 1276 // address was not successfully connected, or updates ac appropriately with the 1277 // new transport. 1278 func (ac *addrConn) createTransport(addr resolver.Address, copts transport.ConnectOptions, connectDeadline time.Time) error { 1279 // TODO: Delete prefaceReceived and move the logic to wait for it into the 1280 // transport. 1281 prefaceReceived := grpcsync.NewEvent() 1282 connClosed := grpcsync.NewEvent() 1283 1284 addr.ServerName = ac.cc.getServerName(addr) 1285 hctx, hcancel := context.WithCancel(ac.ctx) 1286 hcStarted := false // protected by ac.mu 1287 1288 onClose := func() { 1289 ac.mu.Lock() 1290 defer ac.mu.Unlock() 1291 defer connClosed.Fire() 1292 if !hcStarted || hctx.Err() != nil { 1293 // We didn't start the health check or set the state to READY, so 1294 // no need to do anything else here. 1295 // 1296 // OR, we have already cancelled the health check context, meaning 1297 // we have already called onClose once for this transport. In this 1298 // case it would be dangerous to clear the transport and update the 1299 // state, since there may be a new transport in this addrConn. 1300 return 1301 } 1302 hcancel() 1303 ac.transport = nil 1304 // Refresh the name resolver 1305 ac.cc.resolveNow(resolver.ResolveNowOptions{}) 1306 if ac.state != connectivity.Shutdown { 1307 ac.updateConnectivityState(connectivity.Idle, nil) 1308 } 1309 } 1310 1311 onGoAway := func(r transport.GoAwayReason) { 1312 ac.mu.Lock() 1313 ac.adjustParams(r) 1314 ac.mu.Unlock() 1315 onClose() 1316 } 1317 1318 connectCtx, cancel := context.WithDeadline(ac.ctx, connectDeadline) 1319 defer cancel() 1320 if channelz.IsOn() { 1321 copts.ChannelzParentID = ac.channelzID 1322 } 1323 1324 newTr, err := transport.NewClientTransport(connectCtx, ac.cc.ctx, addr, copts, func() { prefaceReceived.Fire() }, onGoAway, onClose) 1325 if err != nil { 1326 // newTr is either nil, or closed. 1327 channelz.Warningf(logger, ac.channelzID, "grpc: addrConn.createTransport failed to connect to %v. Err: %v", addr, err) 1328 return err 1329 } 1330 1331 select { 1332 case <-connectCtx.Done(): 1333 // We didn't get the preface in time. 1334 // The error we pass to Close() is immaterial since there are no open 1335 // streams at this point, so no trailers with error details will be sent 1336 // out. We just need to pass a non-nil error. 1337 newTr.Close(transport.ErrConnClosing) 1338 if errors.Is(connectCtx.Err(), context.DeadlineExceeded) { 1339 err := errors.New("failed to receive server preface within timeout") 1340 channelz.Warningf(logger, ac.channelzID, "grpc: addrConn.createTransport failed to connect to %v: %v", addr, err) 1341 return err 1342 } 1343 return nil 1344 case <-prefaceReceived.Done(): 1345 // We got the preface - huzzah! things are good. 1346 ac.mu.Lock() 1347 defer ac.mu.Unlock() 1348 if connClosed.HasFired() { 1349 // onClose called first; go idle but do nothing else. 1350 if ac.state != connectivity.Shutdown { 1351 ac.updateConnectivityState(connectivity.Idle, nil) 1352 } 1353 return nil 1354 } 1355 if ac.state == connectivity.Shutdown { 1356 // This can happen if the subConn was removed while in `Connecting` 1357 // state. tearDown() would have set the state to `Shutdown`, but 1358 // would not have closed the transport since ac.transport would not 1359 // been set at that point. 1360 // 1361 // We run this in a goroutine because newTr.Close() calls onClose() 1362 // inline, which requires locking ac.mu. 1363 // 1364 // The error we pass to Close() is immaterial since there are no open 1365 // streams at this point, so no trailers with error details will be sent 1366 // out. We just need to pass a non-nil error. 1367 go newTr.Close(transport.ErrConnClosing) 1368 return nil 1369 } 1370 ac.curAddr = addr 1371 ac.transport = newTr 1372 hcStarted = true 1373 ac.startHealthCheck(hctx) // Will set state to READY if appropriate. 1374 return nil 1375 case <-connClosed.Done(): 1376 // The transport has already closed. If we received the preface, too, 1377 // this is not an error. 1378 select { 1379 case <-prefaceReceived.Done(): 1380 return nil 1381 default: 1382 return errors.New("connection closed before server preface received") 1383 } 1384 } 1385 } 1386 1387 // startHealthCheck starts the health checking stream (RPC) to watch the health 1388 // stats of this connection if health checking is requested and configured. 1389 // 1390 // LB channel health checking is enabled when all requirements below are met: 1391 // 1. it is not disabled by the user with the WithDisableHealthCheck DialOption 1392 // 2. internal.HealthCheckFunc is set by importing the grpc/health package 1393 // 3. a service config with non-empty healthCheckConfig field is provided 1394 // 4. the load balancer requests it 1395 // 1396 // It sets addrConn to READY if the health checking stream is not started. 1397 // 1398 // Caller must hold ac.mu. 1399 func (ac *addrConn) startHealthCheck(ctx context.Context) { 1400 var healthcheckManagingState bool 1401 defer func() { 1402 if !healthcheckManagingState { 1403 ac.updateConnectivityState(connectivity.Ready, nil) 1404 } 1405 }() 1406 1407 if ac.cc.dopts.disableHealthCheck { 1408 return 1409 } 1410 healthCheckConfig := ac.cc.healthCheckConfig() 1411 if healthCheckConfig == nil { 1412 return 1413 } 1414 if !ac.scopts.HealthCheckEnabled { 1415 return 1416 } 1417 healthCheckFunc := ac.cc.dopts.healthCheckFunc 1418 if healthCheckFunc == nil { 1419 // The health package is not imported to set health check function. 1420 // 1421 // TODO: add a link to the health check doc in the error message. 1422 channelz.Error(logger, ac.channelzID, "Health check is requested but health check function is not set.") 1423 return 1424 } 1425 1426 healthcheckManagingState = true 1427 1428 // Set up the health check helper functions. 1429 currentTr := ac.transport 1430 newStream := func(method string) (interface{}, error) { 1431 ac.mu.Lock() 1432 if ac.transport != currentTr { 1433 ac.mu.Unlock() 1434 return nil, status.Error(codes.Canceled, "the provided transport is no longer valid to use") 1435 } 1436 ac.mu.Unlock() 1437 return newNonRetryClientStream(ctx, &StreamDesc{ServerStreams: true}, method, currentTr, ac) 1438 } 1439 setConnectivityState := func(s connectivity.State, lastErr error) { 1440 ac.mu.Lock() 1441 defer ac.mu.Unlock() 1442 if ac.transport != currentTr { 1443 return 1444 } 1445 ac.updateConnectivityState(s, lastErr) 1446 } 1447 // Start the health checking stream. 1448 go func() { 1449 err := ac.cc.dopts.healthCheckFunc(ctx, newStream, setConnectivityState, healthCheckConfig.ServiceName) 1450 if err != nil { 1451 if status.Code(err) == codes.Unimplemented { 1452 channelz.Error(logger, ac.channelzID, "Subchannel health check is unimplemented at server side, thus health check is disabled") 1453 } else { 1454 channelz.Errorf(logger, ac.channelzID, "HealthCheckFunc exits with unexpected error %v", err) 1455 } 1456 } 1457 }() 1458 } 1459 1460 func (ac *addrConn) resetConnectBackoff() { 1461 ac.mu.Lock() 1462 close(ac.resetBackoff) 1463 ac.backoffIdx = 0 1464 ac.resetBackoff = make(chan struct{}) 1465 ac.mu.Unlock() 1466 } 1467 1468 // getReadyTransport returns the transport if ac's state is READY or nil if not. 1469 func (ac *addrConn) getReadyTransport() transport.ClientTransport { 1470 ac.mu.Lock() 1471 defer ac.mu.Unlock() 1472 if ac.state == connectivity.Ready { 1473 return ac.transport 1474 } 1475 return nil 1476 } 1477 1478 // tearDown starts to tear down the addrConn. 1479 // 1480 // Note that tearDown doesn't remove ac from ac.cc.conns, so the addrConn struct 1481 // will leak. In most cases, call cc.removeAddrConn() instead. 1482 func (ac *addrConn) tearDown(err error) { 1483 ac.mu.Lock() 1484 if ac.state == connectivity.Shutdown { 1485 ac.mu.Unlock() 1486 return 1487 } 1488 curTr := ac.transport 1489 ac.transport = nil 1490 // We have to set the state to Shutdown before anything else to prevent races 1491 // between setting the state and logic that waits on context cancellation / etc. 1492 ac.updateConnectivityState(connectivity.Shutdown, nil) 1493 ac.cancel() 1494 ac.curAddr = resolver.Address{} 1495 if errors.Is(err, errConnDrain) && curTr != nil { 1496 // GracefulClose(...) may be executed multiple times when 1497 // i) receiving multiple GoAway frames from the server; or 1498 // ii) there are concurrent name resolver/Balancer triggered 1499 // address removal and GoAway. 1500 // We have to unlock and re-lock here because GracefulClose => Close => onClose, which requires locking ac.mu. 1501 ac.mu.Unlock() 1502 curTr.GracefulClose() 1503 ac.mu.Lock() 1504 } 1505 if channelz.IsOn() { 1506 channelz.AddTraceEvent(logger, ac.channelzID, 0, &channelz.TraceEventDesc{ 1507 Desc: "Subchannel Deleted", 1508 Severity: channelz.CtInfo, 1509 Parent: &channelz.TraceEventDesc{ 1510 Desc: fmt.Sprintf("Subchanel(id:%d) deleted", ac.channelzID), 1511 Severity: channelz.CtInfo, 1512 }, 1513 }) 1514 // TraceEvent needs to be called before RemoveEntry, as TraceEvent may add trace reference to 1515 // the entity being deleted, and thus prevent it from being deleted right away. 1516 channelz.RemoveEntry(ac.channelzID) 1517 } 1518 ac.mu.Unlock() 1519 } 1520 1521 func (ac *addrConn) getState() connectivity.State { 1522 ac.mu.Lock() 1523 defer ac.mu.Unlock() 1524 return ac.state 1525 } 1526 1527 func (ac *addrConn) ChannelzMetric() *channelz.ChannelInternalMetric { 1528 ac.mu.Lock() 1529 addr := ac.curAddr.Addr 1530 ac.mu.Unlock() 1531 return &channelz.ChannelInternalMetric{ 1532 State: ac.getState(), 1533 Target: addr, 1534 CallsStarted: atomic.LoadInt64(&ac.czData.callsStarted), 1535 CallsSucceeded: atomic.LoadInt64(&ac.czData.callsSucceeded), 1536 CallsFailed: atomic.LoadInt64(&ac.czData.callsFailed), 1537 LastCallStartedTimestamp: time.Unix(0, atomic.LoadInt64(&ac.czData.lastCallStartedTime)), 1538 } 1539 } 1540 1541 func (ac *addrConn) incrCallsStarted() { 1542 atomic.AddInt64(&ac.czData.callsStarted, 1) 1543 atomic.StoreInt64(&ac.czData.lastCallStartedTime, time.Now().UnixNano()) 1544 } 1545 1546 func (ac *addrConn) incrCallsSucceeded() { 1547 atomic.AddInt64(&ac.czData.callsSucceeded, 1) 1548 } 1549 1550 func (ac *addrConn) incrCallsFailed() { 1551 atomic.AddInt64(&ac.czData.callsFailed, 1) 1552 } 1553 1554 type retryThrottler struct { 1555 max float64 1556 thresh float64 1557 ratio float64 1558 1559 mu sync.Mutex 1560 tokens float64 // TODO(dfawley): replace with atomic and remove lock. 1561 } 1562 1563 // throttle subtracts a retry token from the pool and returns whether a retry 1564 // should be throttled (disallowed) based upon the retry throttling policy in 1565 // the service config. 1566 func (rt *retryThrottler) throttle() bool { 1567 if rt == nil { 1568 return false 1569 } 1570 rt.mu.Lock() 1571 defer rt.mu.Unlock() 1572 rt.tokens-- 1573 if rt.tokens < 0 { 1574 rt.tokens = 0 1575 } 1576 return rt.tokens <= rt.thresh 1577 } 1578 1579 func (rt *retryThrottler) successfulRPC() { 1580 if rt == nil { 1581 return 1582 } 1583 rt.mu.Lock() 1584 defer rt.mu.Unlock() 1585 rt.tokens += rt.ratio 1586 if rt.tokens > rt.max { 1587 rt.tokens = rt.max 1588 } 1589 } 1590 1591 type channelzChannel struct { 1592 cc *ClientConn 1593 } 1594 1595 func (c *channelzChannel) ChannelzMetric() *channelz.ChannelInternalMetric { 1596 return c.cc.channelzMetric() 1597 } 1598 1599 // ErrClientConnTimeout indicates that the ClientConn cannot establish the 1600 // underlying connections within the specified timeout. 1601 // 1602 // ToDeprecated: This error is never returned by grpc and should not be 1603 // referenced by users. 1604 //goland:noinspection GoUnusedGlobalVariable 1605 var ErrClientConnTimeout = errors.New("grpc: timed out when dialing") 1606 1607 func (cc *ClientConn) getResolver(scheme string) resolver.Builder { 1608 for _, rb := range cc.dopts.resolvers { 1609 if scheme == rb.Scheme() { 1610 return rb 1611 } 1612 } 1613 return resolver.Get(scheme) 1614 } 1615 1616 func (cc *ClientConn) updateConnectionError(err error) { 1617 cc.lceMu.Lock() 1618 cc.lastConnectionError = err 1619 cc.lceMu.Unlock() 1620 } 1621 1622 func (cc *ClientConn) connectionError() error { 1623 cc.lceMu.Lock() 1624 defer cc.lceMu.Unlock() 1625 return cc.lastConnectionError 1626 } 1627 1628 func (cc *ClientConn) parseTargetAndFindResolver() (resolver.Builder, error) { 1629 channelz.Infof(logger, cc.channelzID, "original dial target is: %q", cc.target) 1630 1631 var rb resolver.Builder 1632 parsedTarget, err := parseTarget(cc.target) 1633 if err != nil { 1634 channelz.Infof(logger, cc.channelzID, "dial target %q parse failed: %v", cc.target, err) 1635 } else { 1636 channelz.Infof(logger, cc.channelzID, "parsed dial target is: %+v", parsedTarget) 1637 // Target.Scheme is deprecated, use Target.GetScheme() instead. 1638 //rb = cc.getResolver(parsedTarget.Scheme) 1639 rb = cc.getResolver(parsedTarget.GetScheme()) 1640 if rb != nil { 1641 cc.parsedTarget = parsedTarget 1642 return rb, nil 1643 } 1644 } 1645 1646 // We are here because the user's dial target did not contain a scheme or 1647 // specified an unregistered scheme. We should fallback to the default 1648 // scheme, except when a custom dialer is specified in which case, we should 1649 // always use passthrough scheme. 1650 defScheme := resolver.GetDefaultScheme() 1651 channelz.Infof(logger, cc.channelzID, "fallback to scheme %q", defScheme) 1652 canonicalTarget := defScheme + ":///" + cc.target 1653 1654 parsedTarget, err = parseTarget(canonicalTarget) 1655 if err != nil { 1656 channelz.Infof(logger, cc.channelzID, "dial target %q parse failed: %v", canonicalTarget, err) 1657 return nil, err 1658 } 1659 channelz.Infof(logger, cc.channelzID, "parsed dial target is: %+v", parsedTarget) 1660 // Target.Scheme is deprecated, use Target.GetScheme() instead. 1661 //rb = cc.getResolver(parsedTarget.Scheme) 1662 rb = cc.getResolver(parsedTarget.GetScheme()) 1663 if rb == nil { 1664 // Target.Scheme is deprecated, use Target.GetScheme() instead. 1665 //return nil, fmt.Errorf("could not get resolver for default scheme: %q", parsedTarget.Scheme) 1666 return nil, fmt.Errorf("could not get resolver for default scheme: %q", parsedTarget.GetScheme()) 1667 } 1668 cc.parsedTarget = parsedTarget 1669 return rb, nil 1670 } 1671 1672 // parseTarget uses RFC 3986 semantics to parse the given target into a 1673 // resolver.Target struct containing scheme, authority and endpoint. Query 1674 // params are stripped from the endpoint. 1675 func parseTarget(target string) (resolver.Target, error) { 1676 u, err := url.Parse(target) 1677 if err != nil { 1678 return resolver.Target{}, err 1679 } 1680 // For targets of the form "[scheme]://[authority]/endpoint, the endpoint 1681 // value returned from url.Parse() contains a leading "/". Although this is 1682 // in accordance with RFC 3986, we do not want to break existing resolver 1683 // implementations which expect the endpoint without the leading "/". So, we 1684 // end up stripping the leading "/" here. But this will result in an 1685 // incorrect parsing for something like "unix:///path/to/socket". Since we 1686 // own the "unix" resolver, we can workaround in the unix resolver by using 1687 // the `URL` field instead of the `Endpoint` field. 1688 1689 // Target.Endpoint已弃用,这里无需事先计算,获取endpoint的逻辑已移动到Target.GetEndpoint()方法中 1690 //endpoint := u.Path 1691 //if endpoint == "" { 1692 // endpoint = u.Opaque 1693 //} 1694 //endpoint = strings.TrimPrefix(endpoint, "/") 1695 return resolver.Target{ 1696 // Target.Scheme、Target.Authority、Target.Endpoint are deprecated. 1697 //Scheme: u.Scheme, 1698 //Authority: u.Host, 1699 //Endpoint: endpoint, 1700 URL: *u, 1701 }, nil 1702 } 1703 1704 // Determine channel authority. The order of precedence is as follows: 1705 // - user specified authority override using `WithAuthority` dial option 1706 // - creds' notion of server name for the authentication handshake 1707 // - endpoint from dial target of the form "scheme://[authority]/endpoint" 1708 func determineAuthority(endpoint, target string, dopts dialOptions) (string, error) { 1709 // Historically, we had two options for users to specify the serverName or 1710 // authority for a channel. One was through the transport credentials 1711 // (either in its constructor, or through the OverrideServerName() method). 1712 // The other option (for cases where WithInsecure() dial option was used) 1713 // was to use the WithAuthority() dial option. 1714 // 1715 // A few things have changed since: 1716 // - `insecure` package with an implementation of the `TransportCredentials` 1717 // interface for the insecure case 1718 // - WithAuthority() dial option support for secure credentials 1719 authorityFromCreds := "" 1720 if creds := dopts.copts.TransportCredentials; creds != nil && creds.Info().ServerName != "" { 1721 authorityFromCreds = creds.Info().ServerName 1722 } 1723 authorityFromDialOption := dopts.authority 1724 if (authorityFromCreds != "" && authorityFromDialOption != "") && authorityFromCreds != authorityFromDialOption { 1725 return "", fmt.Errorf("ClientConn's authority from transport creds %q and dial option %q don't match", authorityFromCreds, authorityFromDialOption) 1726 } 1727 1728 switch { 1729 case authorityFromDialOption != "": 1730 return authorityFromDialOption, nil 1731 case authorityFromCreds != "": 1732 return authorityFromCreds, nil 1733 case strings.HasPrefix(target, "unix:") || strings.HasPrefix(target, "unix-abstract:"): 1734 // TODO: remove when the unix resolver implements optional interface to 1735 // return channel authority. 1736 return "localhost", nil 1737 case strings.HasPrefix(endpoint, ":"): 1738 return "localhost" + endpoint, nil 1739 default: 1740 // TODO: Define an optional interface on the resolver builder to return 1741 // the channel authority given the user's dial target. For resolvers 1742 // which don't implement this interface, we will use the endpoint from 1743 // "scheme://authority/endpoint" as the default authority. 1744 return endpoint, nil 1745 } 1746 }