google.golang.org/grpc@v1.62.1/clientconn.go (about) 1 /* 2 * 3 * Copyright 2014 gRPC authors. 4 * 5 * Licensed under the Apache License, Version 2.0 (the "License"); 6 * you may not use this file except in compliance with the License. 7 * You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 * 17 */ 18 19 package grpc 20 21 import ( 22 "context" 23 "errors" 24 "fmt" 25 "math" 26 "net/url" 27 "strings" 28 "sync" 29 "sync/atomic" 30 "time" 31 32 "google.golang.org/grpc/balancer" 33 "google.golang.org/grpc/balancer/base" 34 "google.golang.org/grpc/codes" 35 "google.golang.org/grpc/connectivity" 36 "google.golang.org/grpc/internal" 37 "google.golang.org/grpc/internal/channelz" 38 "google.golang.org/grpc/internal/grpcsync" 39 "google.golang.org/grpc/internal/idle" 40 "google.golang.org/grpc/internal/pretty" 41 iresolver "google.golang.org/grpc/internal/resolver" 42 "google.golang.org/grpc/internal/transport" 43 "google.golang.org/grpc/keepalive" 44 "google.golang.org/grpc/resolver" 45 "google.golang.org/grpc/serviceconfig" 46 "google.golang.org/grpc/status" 47 48 _ "google.golang.org/grpc/balancer/roundrobin" // To register roundrobin. 49 _ "google.golang.org/grpc/internal/resolver/passthrough" // To register passthrough resolver. 50 _ "google.golang.org/grpc/internal/resolver/unix" // To register unix resolver. 51 _ "google.golang.org/grpc/resolver/dns" // To register dns resolver. 52 ) 53 54 const ( 55 // minimum time to give a connection to complete 56 minConnectTimeout = 20 * time.Second 57 ) 58 59 var ( 60 // ErrClientConnClosing indicates that the operation is illegal because 61 // the ClientConn is closing. 62 // 63 // Deprecated: this error should not be relied upon by users; use the status 64 // code of Canceled instead. 65 ErrClientConnClosing = status.Error(codes.Canceled, "grpc: the client connection is closing") 66 // errConnDrain indicates that the connection starts to be drained and does not accept any new RPCs. 67 errConnDrain = errors.New("grpc: the connection is drained") 68 // errConnClosing indicates that the connection is closing. 69 errConnClosing = errors.New("grpc: the connection is closing") 70 // errConnIdling indicates the the connection is being closed as the channel 71 // is moving to an idle mode due to inactivity. 72 errConnIdling = errors.New("grpc: the connection is closing due to channel idleness") 73 // invalidDefaultServiceConfigErrPrefix is used to prefix the json parsing error for the default 74 // service config. 75 invalidDefaultServiceConfigErrPrefix = "grpc: the provided default service config is invalid" 76 ) 77 78 // The following errors are returned from Dial and DialContext 79 var ( 80 // errNoTransportSecurity indicates that there is no transport security 81 // being set for ClientConn. Users should either set one or explicitly 82 // call WithInsecure DialOption to disable security. 83 errNoTransportSecurity = errors.New("grpc: no transport security set (use grpc.WithTransportCredentials(insecure.NewCredentials()) explicitly or set credentials)") 84 // errTransportCredsAndBundle indicates that creds bundle is used together 85 // with other individual Transport Credentials. 86 errTransportCredsAndBundle = errors.New("grpc: credentials.Bundle may not be used with individual TransportCredentials") 87 // errNoTransportCredsInBundle indicated that the configured creds bundle 88 // returned a transport credentials which was nil. 89 errNoTransportCredsInBundle = errors.New("grpc: credentials.Bundle must return non-nil transport credentials") 90 // errTransportCredentialsMissing indicates that users want to transmit 91 // security information (e.g., OAuth2 token) which requires secure 92 // connection on an insecure connection. 93 errTransportCredentialsMissing = errors.New("grpc: the credentials require transport level security (use grpc.WithTransportCredentials() to set)") 94 ) 95 96 const ( 97 defaultClientMaxReceiveMessageSize = 1024 * 1024 * 4 98 defaultClientMaxSendMessageSize = math.MaxInt32 99 // http2IOBufSize specifies the buffer size for sending frames. 100 defaultWriteBufSize = 32 * 1024 101 defaultReadBufSize = 32 * 1024 102 ) 103 104 // Dial creates a client connection to the given target. 105 func Dial(target string, opts ...DialOption) (*ClientConn, error) { 106 return DialContext(context.Background(), target, opts...) 107 } 108 109 type defaultConfigSelector struct { 110 sc *ServiceConfig 111 } 112 113 func (dcs *defaultConfigSelector) SelectConfig(rpcInfo iresolver.RPCInfo) (*iresolver.RPCConfig, error) { 114 return &iresolver.RPCConfig{ 115 Context: rpcInfo.Context, 116 MethodConfig: getMethodConfig(dcs.sc, rpcInfo.Method), 117 }, nil 118 } 119 120 // newClient returns a new client in idle mode. 121 func newClient(target string, opts ...DialOption) (conn *ClientConn, err error) { 122 cc := &ClientConn{ 123 target: target, 124 conns: make(map[*addrConn]struct{}), 125 dopts: defaultDialOptions(), 126 czData: new(channelzData), 127 } 128 129 cc.retryThrottler.Store((*retryThrottler)(nil)) 130 cc.safeConfigSelector.UpdateConfigSelector(&defaultConfigSelector{nil}) 131 cc.ctx, cc.cancel = context.WithCancel(context.Background()) 132 133 // Apply dial options. 134 disableGlobalOpts := false 135 for _, opt := range opts { 136 if _, ok := opt.(*disableGlobalDialOptions); ok { 137 disableGlobalOpts = true 138 break 139 } 140 } 141 142 if !disableGlobalOpts { 143 for _, opt := range globalDialOptions { 144 opt.apply(&cc.dopts) 145 } 146 } 147 148 for _, opt := range opts { 149 opt.apply(&cc.dopts) 150 } 151 chainUnaryClientInterceptors(cc) 152 chainStreamClientInterceptors(cc) 153 154 if err := cc.validateTransportCredentials(); err != nil { 155 return nil, err 156 } 157 158 if cc.dopts.defaultServiceConfigRawJSON != nil { 159 scpr := parseServiceConfig(*cc.dopts.defaultServiceConfigRawJSON) 160 if scpr.Err != nil { 161 return nil, fmt.Errorf("%s: %v", invalidDefaultServiceConfigErrPrefix, scpr.Err) 162 } 163 cc.dopts.defaultServiceConfig, _ = scpr.Config.(*ServiceConfig) 164 } 165 cc.mkp = cc.dopts.copts.KeepaliveParams 166 167 // Register ClientConn with channelz. 168 cc.channelzRegistration(target) 169 170 // TODO: Ideally it should be impossible to error from this function after 171 // channelz registration. This will require removing some channelz logs 172 // from the following functions that can error. Errors can be returned to 173 // the user, and successful logs can be emitted here, after the checks have 174 // passed and channelz is subsequently registered. 175 176 // Determine the resolver to use. 177 if err := cc.parseTargetAndFindResolver(); err != nil { 178 channelz.RemoveEntry(cc.channelzID) 179 return nil, err 180 } 181 if err = cc.determineAuthority(); err != nil { 182 channelz.RemoveEntry(cc.channelzID) 183 return nil, err 184 } 185 186 cc.csMgr = newConnectivityStateManager(cc.ctx, cc.channelzID) 187 cc.pickerWrapper = newPickerWrapper(cc.dopts.copts.StatsHandlers) 188 189 cc.initIdleStateLocked() // Safe to call without the lock, since nothing else has a reference to cc. 190 cc.idlenessMgr = idle.NewManager((*idler)(cc), cc.dopts.idleTimeout) 191 return cc, nil 192 } 193 194 // DialContext creates a client connection to the given target. By default, it's 195 // a non-blocking dial (the function won't wait for connections to be 196 // established, and connecting happens in the background). To make it a blocking 197 // dial, use WithBlock() dial option. 198 // 199 // In the non-blocking case, the ctx does not act against the connection. It 200 // only controls the setup steps. 201 // 202 // In the blocking case, ctx can be used to cancel or expire the pending 203 // connection. Once this function returns, the cancellation and expiration of 204 // ctx will be noop. Users should call ClientConn.Close to terminate all the 205 // pending operations after this function returns. 206 // 207 // The target name syntax is defined in 208 // https://github.com/grpc/grpc/blob/master/doc/naming.md. 209 // e.g. to use dns resolver, a "dns:///" prefix should be applied to the target. 210 func DialContext(ctx context.Context, target string, opts ...DialOption) (conn *ClientConn, err error) { 211 cc, err := newClient(target, opts...) 212 if err != nil { 213 return nil, err 214 } 215 216 // We start the channel off in idle mode, but kick it out of idle now, 217 // instead of waiting for the first RPC. Other gRPC implementations do wait 218 // for the first RPC to kick the channel out of idle. But doing so would be 219 // a major behavior change for our users who are used to seeing the channel 220 // active after Dial. 221 // 222 // Taking this approach of kicking it out of idle at the end of this method 223 // allows us to share the code between channel creation and exiting idle 224 // mode. This will also make it easy for us to switch to starting the 225 // channel off in idle, i.e. by making newClient exported. 226 227 defer func() { 228 if err != nil { 229 cc.Close() 230 } 231 }() 232 233 // This creates the name resolver, load balancer, etc. 234 if err := cc.idlenessMgr.ExitIdleMode(); err != nil { 235 return nil, err 236 } 237 238 // Return now for non-blocking dials. 239 if !cc.dopts.block { 240 return cc, nil 241 } 242 243 if cc.dopts.timeout > 0 { 244 var cancel context.CancelFunc 245 ctx, cancel = context.WithTimeout(ctx, cc.dopts.timeout) 246 defer cancel() 247 } 248 defer func() { 249 select { 250 case <-ctx.Done(): 251 switch { 252 case ctx.Err() == err: 253 conn = nil 254 case err == nil || !cc.dopts.returnLastError: 255 conn, err = nil, ctx.Err() 256 default: 257 conn, err = nil, fmt.Errorf("%v: %v", ctx.Err(), err) 258 } 259 default: 260 } 261 }() 262 263 // A blocking dial blocks until the clientConn is ready. 264 for { 265 s := cc.GetState() 266 if s == connectivity.Idle { 267 cc.Connect() 268 } 269 if s == connectivity.Ready { 270 return cc, nil 271 } else if cc.dopts.copts.FailOnNonTempDialError && s == connectivity.TransientFailure { 272 if err = cc.connectionError(); err != nil { 273 terr, ok := err.(interface { 274 Temporary() bool 275 }) 276 if ok && !terr.Temporary() { 277 return nil, err 278 } 279 } 280 } 281 if !cc.WaitForStateChange(ctx, s) { 282 // ctx got timeout or canceled. 283 if err = cc.connectionError(); err != nil && cc.dopts.returnLastError { 284 return nil, err 285 } 286 return nil, ctx.Err() 287 } 288 } 289 } 290 291 // addTraceEvent is a helper method to add a trace event on the channel. If the 292 // channel is a nested one, the same event is also added on the parent channel. 293 func (cc *ClientConn) addTraceEvent(msg string) { 294 ted := &channelz.TraceEventDesc{ 295 Desc: fmt.Sprintf("Channel %s", msg), 296 Severity: channelz.CtInfo, 297 } 298 if cc.dopts.channelzParentID != nil { 299 ted.Parent = &channelz.TraceEventDesc{ 300 Desc: fmt.Sprintf("Nested channel(id:%d) %s", cc.channelzID.Int(), msg), 301 Severity: channelz.CtInfo, 302 } 303 } 304 channelz.AddTraceEvent(logger, cc.channelzID, 0, ted) 305 } 306 307 type idler ClientConn 308 309 func (i *idler) EnterIdleMode() { 310 (*ClientConn)(i).enterIdleMode() 311 } 312 313 func (i *idler) ExitIdleMode() error { 314 return (*ClientConn)(i).exitIdleMode() 315 } 316 317 // exitIdleMode moves the channel out of idle mode by recreating the name 318 // resolver and load balancer. This should never be called directly; use 319 // cc.idlenessMgr.ExitIdleMode instead. 320 func (cc *ClientConn) exitIdleMode() (err error) { 321 cc.mu.Lock() 322 if cc.conns == nil { 323 cc.mu.Unlock() 324 return errConnClosing 325 } 326 cc.mu.Unlock() 327 328 // This needs to be called without cc.mu because this builds a new resolver 329 // which might update state or report error inline, which would then need to 330 // acquire cc.mu. 331 if err := cc.resolverWrapper.start(); err != nil { 332 return err 333 } 334 335 cc.addTraceEvent("exiting idle mode") 336 return nil 337 } 338 339 // initIdleStateLocked initializes common state to how it should be while idle. 340 func (cc *ClientConn) initIdleStateLocked() { 341 cc.resolverWrapper = newCCResolverWrapper(cc) 342 cc.balancerWrapper = newCCBalancerWrapper(cc) 343 cc.firstResolveEvent = grpcsync.NewEvent() 344 // cc.conns == nil is a proxy for the ClientConn being closed. So, instead 345 // of setting it to nil here, we recreate the map. This also means that we 346 // don't have to do this when exiting idle mode. 347 cc.conns = make(map[*addrConn]struct{}) 348 } 349 350 // enterIdleMode puts the channel in idle mode, and as part of it shuts down the 351 // name resolver, load balancer, and any subchannels. This should never be 352 // called directly; use cc.idlenessMgr.EnterIdleMode instead. 353 func (cc *ClientConn) enterIdleMode() { 354 cc.mu.Lock() 355 356 if cc.conns == nil { 357 cc.mu.Unlock() 358 return 359 } 360 361 conns := cc.conns 362 363 rWrapper := cc.resolverWrapper 364 rWrapper.close() 365 cc.pickerWrapper.reset() 366 bWrapper := cc.balancerWrapper 367 bWrapper.close() 368 cc.csMgr.updateState(connectivity.Idle) 369 cc.addTraceEvent("entering idle mode") 370 371 cc.initIdleStateLocked() 372 373 cc.mu.Unlock() 374 375 // Block until the name resolver and LB policy are closed. 376 <-rWrapper.serializer.Done() 377 <-bWrapper.serializer.Done() 378 379 // Close all subchannels after the LB policy is closed. 380 for ac := range conns { 381 ac.tearDown(errConnIdling) 382 } 383 } 384 385 // validateTransportCredentials performs a series of checks on the configured 386 // transport credentials. It returns a non-nil error if any of these conditions 387 // are met: 388 // - no transport creds and no creds bundle is configured 389 // - both transport creds and creds bundle are configured 390 // - creds bundle is configured, but it lacks a transport credentials 391 // - insecure transport creds configured alongside call creds that require 392 // transport level security 393 // 394 // If none of the above conditions are met, the configured credentials are 395 // deemed valid and a nil error is returned. 396 func (cc *ClientConn) validateTransportCredentials() error { 397 if cc.dopts.copts.TransportCredentials == nil && cc.dopts.copts.CredsBundle == nil { 398 return errNoTransportSecurity 399 } 400 if cc.dopts.copts.TransportCredentials != nil && cc.dopts.copts.CredsBundle != nil { 401 return errTransportCredsAndBundle 402 } 403 if cc.dopts.copts.CredsBundle != nil && cc.dopts.copts.CredsBundle.TransportCredentials() == nil { 404 return errNoTransportCredsInBundle 405 } 406 transportCreds := cc.dopts.copts.TransportCredentials 407 if transportCreds == nil { 408 transportCreds = cc.dopts.copts.CredsBundle.TransportCredentials() 409 } 410 if transportCreds.Info().SecurityProtocol == "insecure" { 411 for _, cd := range cc.dopts.copts.PerRPCCredentials { 412 if cd.RequireTransportSecurity() { 413 return errTransportCredentialsMissing 414 } 415 } 416 } 417 return nil 418 } 419 420 // channelzRegistration registers the newly created ClientConn with channelz and 421 // stores the returned identifier in `cc.channelzID` and `cc.csMgr.channelzID`. 422 // A channelz trace event is emitted for ClientConn creation. If the newly 423 // created ClientConn is a nested one, i.e a valid parent ClientConn ID is 424 // specified via a dial option, the trace event is also added to the parent. 425 // 426 // Doesn't grab cc.mu as this method is expected to be called only at Dial time. 427 func (cc *ClientConn) channelzRegistration(target string) { 428 cc.channelzID = channelz.RegisterChannel(&channelzChannel{cc}, cc.dopts.channelzParentID, target) 429 cc.addTraceEvent("created") 430 } 431 432 // chainUnaryClientInterceptors chains all unary client interceptors into one. 433 func chainUnaryClientInterceptors(cc *ClientConn) { 434 interceptors := cc.dopts.chainUnaryInts 435 // Prepend dopts.unaryInt to the chaining interceptors if it exists, since unaryInt will 436 // be executed before any other chained interceptors. 437 if cc.dopts.unaryInt != nil { 438 interceptors = append([]UnaryClientInterceptor{cc.dopts.unaryInt}, interceptors...) 439 } 440 var chainedInt UnaryClientInterceptor 441 if len(interceptors) == 0 { 442 chainedInt = nil 443 } else if len(interceptors) == 1 { 444 chainedInt = interceptors[0] 445 } else { 446 chainedInt = func(ctx context.Context, method string, req, reply any, cc *ClientConn, invoker UnaryInvoker, opts ...CallOption) error { 447 return interceptors[0](ctx, method, req, reply, cc, getChainUnaryInvoker(interceptors, 0, invoker), opts...) 448 } 449 } 450 cc.dopts.unaryInt = chainedInt 451 } 452 453 // getChainUnaryInvoker recursively generate the chained unary invoker. 454 func getChainUnaryInvoker(interceptors []UnaryClientInterceptor, curr int, finalInvoker UnaryInvoker) UnaryInvoker { 455 if curr == len(interceptors)-1 { 456 return finalInvoker 457 } 458 return func(ctx context.Context, method string, req, reply any, cc *ClientConn, opts ...CallOption) error { 459 return interceptors[curr+1](ctx, method, req, reply, cc, getChainUnaryInvoker(interceptors, curr+1, finalInvoker), opts...) 460 } 461 } 462 463 // chainStreamClientInterceptors chains all stream client interceptors into one. 464 func chainStreamClientInterceptors(cc *ClientConn) { 465 interceptors := cc.dopts.chainStreamInts 466 // Prepend dopts.streamInt to the chaining interceptors if it exists, since streamInt will 467 // be executed before any other chained interceptors. 468 if cc.dopts.streamInt != nil { 469 interceptors = append([]StreamClientInterceptor{cc.dopts.streamInt}, interceptors...) 470 } 471 var chainedInt StreamClientInterceptor 472 if len(interceptors) == 0 { 473 chainedInt = nil 474 } else if len(interceptors) == 1 { 475 chainedInt = interceptors[0] 476 } else { 477 chainedInt = func(ctx context.Context, desc *StreamDesc, cc *ClientConn, method string, streamer Streamer, opts ...CallOption) (ClientStream, error) { 478 return interceptors[0](ctx, desc, cc, method, getChainStreamer(interceptors, 0, streamer), opts...) 479 } 480 } 481 cc.dopts.streamInt = chainedInt 482 } 483 484 // getChainStreamer recursively generate the chained client stream constructor. 485 func getChainStreamer(interceptors []StreamClientInterceptor, curr int, finalStreamer Streamer) Streamer { 486 if curr == len(interceptors)-1 { 487 return finalStreamer 488 } 489 return func(ctx context.Context, desc *StreamDesc, cc *ClientConn, method string, opts ...CallOption) (ClientStream, error) { 490 return interceptors[curr+1](ctx, desc, cc, method, getChainStreamer(interceptors, curr+1, finalStreamer), opts...) 491 } 492 } 493 494 // newConnectivityStateManager creates an connectivityStateManager with 495 // the specified id. 496 func newConnectivityStateManager(ctx context.Context, id *channelz.Identifier) *connectivityStateManager { 497 return &connectivityStateManager{ 498 channelzID: id, 499 pubSub: grpcsync.NewPubSub(ctx), 500 } 501 } 502 503 // connectivityStateManager keeps the connectivity.State of ClientConn. 504 // This struct will eventually be exported so the balancers can access it. 505 // 506 // TODO: If possible, get rid of the `connectivityStateManager` type, and 507 // provide this functionality using the `PubSub`, to avoid keeping track of 508 // the connectivity state at two places. 509 type connectivityStateManager struct { 510 mu sync.Mutex 511 state connectivity.State 512 notifyChan chan struct{} 513 channelzID *channelz.Identifier 514 pubSub *grpcsync.PubSub 515 } 516 517 // updateState updates the connectivity.State of ClientConn. 518 // If there's a change it notifies goroutines waiting on state change to 519 // happen. 520 func (csm *connectivityStateManager) updateState(state connectivity.State) { 521 csm.mu.Lock() 522 defer csm.mu.Unlock() 523 if csm.state == connectivity.Shutdown { 524 return 525 } 526 if csm.state == state { 527 return 528 } 529 csm.state = state 530 csm.pubSub.Publish(state) 531 532 channelz.Infof(logger, csm.channelzID, "Channel Connectivity change to %v", state) 533 if csm.notifyChan != nil { 534 // There are other goroutines waiting on this channel. 535 close(csm.notifyChan) 536 csm.notifyChan = nil 537 } 538 } 539 540 func (csm *connectivityStateManager) getState() connectivity.State { 541 csm.mu.Lock() 542 defer csm.mu.Unlock() 543 return csm.state 544 } 545 546 func (csm *connectivityStateManager) getNotifyChan() <-chan struct{} { 547 csm.mu.Lock() 548 defer csm.mu.Unlock() 549 if csm.notifyChan == nil { 550 csm.notifyChan = make(chan struct{}) 551 } 552 return csm.notifyChan 553 } 554 555 // ClientConnInterface defines the functions clients need to perform unary and 556 // streaming RPCs. It is implemented by *ClientConn, and is only intended to 557 // be referenced by generated code. 558 type ClientConnInterface interface { 559 // Invoke performs a unary RPC and returns after the response is received 560 // into reply. 561 Invoke(ctx context.Context, method string, args any, reply any, opts ...CallOption) error 562 // NewStream begins a streaming RPC. 563 NewStream(ctx context.Context, desc *StreamDesc, method string, opts ...CallOption) (ClientStream, error) 564 } 565 566 // Assert *ClientConn implements ClientConnInterface. 567 var _ ClientConnInterface = (*ClientConn)(nil) 568 569 // ClientConn represents a virtual connection to a conceptual endpoint, to 570 // perform RPCs. 571 // 572 // A ClientConn is free to have zero or more actual connections to the endpoint 573 // based on configuration, load, etc. It is also free to determine which actual 574 // endpoints to use and may change it every RPC, permitting client-side load 575 // balancing. 576 // 577 // A ClientConn encapsulates a range of functionality including name 578 // resolution, TCP connection establishment (with retries and backoff) and TLS 579 // handshakes. It also handles errors on established connections by 580 // re-resolving the name and reconnecting. 581 type ClientConn struct { 582 ctx context.Context // Initialized using the background context at dial time. 583 cancel context.CancelFunc // Cancelled on close. 584 585 // The following are initialized at dial time, and are read-only after that. 586 target string // User's dial target. 587 parsedTarget resolver.Target // See parseTargetAndFindResolver(). 588 authority string // See determineAuthority(). 589 dopts dialOptions // Default and user specified dial options. 590 channelzID *channelz.Identifier // Channelz identifier for the channel. 591 resolverBuilder resolver.Builder // See parseTargetAndFindResolver(). 592 idlenessMgr *idle.Manager 593 594 // The following provide their own synchronization, and therefore don't 595 // require cc.mu to be held to access them. 596 csMgr *connectivityStateManager 597 pickerWrapper *pickerWrapper 598 safeConfigSelector iresolver.SafeConfigSelector 599 czData *channelzData 600 retryThrottler atomic.Value // Updated from service config. 601 602 // mu protects the following fields. 603 // TODO: split mu so the same mutex isn't used for everything. 604 mu sync.RWMutex 605 resolverWrapper *ccResolverWrapper // Always recreated whenever entering idle to simplify Close. 606 balancerWrapper *ccBalancerWrapper // Always recreated whenever entering idle to simplify Close. 607 sc *ServiceConfig // Latest service config received from the resolver. 608 conns map[*addrConn]struct{} // Set to nil on close. 609 mkp keepalive.ClientParameters // May be updated upon receipt of a GoAway. 610 // firstResolveEvent is used to track whether the name resolver sent us at 611 // least one update. RPCs block on this event. May be accessed without mu 612 // if we know we cannot be asked to enter idle mode while accessing it (e.g. 613 // when the idle manager has already been closed, or if we are already 614 // entering idle mode). 615 firstResolveEvent *grpcsync.Event 616 617 lceMu sync.Mutex // protects lastConnectionError 618 lastConnectionError error 619 } 620 621 // WaitForStateChange waits until the connectivity.State of ClientConn changes from sourceState or 622 // ctx expires. A true value is returned in former case and false in latter. 623 // 624 // # Experimental 625 // 626 // Notice: This API is EXPERIMENTAL and may be changed or removed in a 627 // later release. 628 func (cc *ClientConn) WaitForStateChange(ctx context.Context, sourceState connectivity.State) bool { 629 ch := cc.csMgr.getNotifyChan() 630 if cc.csMgr.getState() != sourceState { 631 return true 632 } 633 select { 634 case <-ctx.Done(): 635 return false 636 case <-ch: 637 return true 638 } 639 } 640 641 // GetState returns the connectivity.State of ClientConn. 642 // 643 // # Experimental 644 // 645 // Notice: This API is EXPERIMENTAL and may be changed or removed in a later 646 // release. 647 func (cc *ClientConn) GetState() connectivity.State { 648 return cc.csMgr.getState() 649 } 650 651 // Connect causes all subchannels in the ClientConn to attempt to connect if 652 // the channel is idle. Does not wait for the connection attempts to begin 653 // before returning. 654 // 655 // # Experimental 656 // 657 // Notice: This API is EXPERIMENTAL and may be changed or removed in a later 658 // release. 659 func (cc *ClientConn) Connect() { 660 if err := cc.idlenessMgr.ExitIdleMode(); err != nil { 661 cc.addTraceEvent(err.Error()) 662 return 663 } 664 // If the ClientConn was not in idle mode, we need to call ExitIdle on the 665 // LB policy so that connections can be created. 666 cc.mu.Lock() 667 cc.balancerWrapper.exitIdle() 668 cc.mu.Unlock() 669 } 670 671 // waitForResolvedAddrs blocks until the resolver has provided addresses or the 672 // context expires. Returns nil unless the context expires first; otherwise 673 // returns a status error based on the context. 674 func (cc *ClientConn) waitForResolvedAddrs(ctx context.Context) error { 675 // This is on the RPC path, so we use a fast path to avoid the 676 // more-expensive "select" below after the resolver has returned once. 677 if cc.firstResolveEvent.HasFired() { 678 return nil 679 } 680 select { 681 case <-cc.firstResolveEvent.Done(): 682 return nil 683 case <-ctx.Done(): 684 return status.FromContextError(ctx.Err()).Err() 685 case <-cc.ctx.Done(): 686 return ErrClientConnClosing 687 } 688 } 689 690 var emptyServiceConfig *ServiceConfig 691 692 func init() { 693 cfg := parseServiceConfig("{}") 694 if cfg.Err != nil { 695 panic(fmt.Sprintf("impossible error parsing empty service config: %v", cfg.Err)) 696 } 697 emptyServiceConfig = cfg.Config.(*ServiceConfig) 698 699 internal.SubscribeToConnectivityStateChanges = func(cc *ClientConn, s grpcsync.Subscriber) func() { 700 return cc.csMgr.pubSub.Subscribe(s) 701 } 702 internal.EnterIdleModeForTesting = func(cc *ClientConn) { 703 cc.idlenessMgr.EnterIdleModeForTesting() 704 } 705 internal.ExitIdleModeForTesting = func(cc *ClientConn) error { 706 return cc.idlenessMgr.ExitIdleMode() 707 } 708 } 709 710 func (cc *ClientConn) maybeApplyDefaultServiceConfig(addrs []resolver.Address) { 711 if cc.sc != nil { 712 cc.applyServiceConfigAndBalancer(cc.sc, nil, addrs) 713 return 714 } 715 if cc.dopts.defaultServiceConfig != nil { 716 cc.applyServiceConfigAndBalancer(cc.dopts.defaultServiceConfig, &defaultConfigSelector{cc.dopts.defaultServiceConfig}, addrs) 717 } else { 718 cc.applyServiceConfigAndBalancer(emptyServiceConfig, &defaultConfigSelector{emptyServiceConfig}, addrs) 719 } 720 } 721 722 func (cc *ClientConn) updateResolverStateAndUnlock(s resolver.State, err error) error { 723 defer cc.firstResolveEvent.Fire() 724 // Check if the ClientConn is already closed. Some fields (e.g. 725 // balancerWrapper) are set to nil when closing the ClientConn, and could 726 // cause nil pointer panic if we don't have this check. 727 if cc.conns == nil { 728 cc.mu.Unlock() 729 return nil 730 } 731 732 if err != nil { 733 // May need to apply the initial service config in case the resolver 734 // doesn't support service configs, or doesn't provide a service config 735 // with the new addresses. 736 cc.maybeApplyDefaultServiceConfig(nil) 737 738 cc.balancerWrapper.resolverError(err) 739 740 // No addresses are valid with err set; return early. 741 cc.mu.Unlock() 742 return balancer.ErrBadResolverState 743 } 744 745 var ret error 746 if cc.dopts.disableServiceConfig { 747 channelz.Infof(logger, cc.channelzID, "ignoring service config from resolver (%v) and applying the default because service config is disabled", s.ServiceConfig) 748 cc.maybeApplyDefaultServiceConfig(s.Addresses) 749 } else if s.ServiceConfig == nil { 750 cc.maybeApplyDefaultServiceConfig(s.Addresses) 751 // TODO: do we need to apply a failing LB policy if there is no 752 // default, per the error handling design? 753 } else { 754 if sc, ok := s.ServiceConfig.Config.(*ServiceConfig); s.ServiceConfig.Err == nil && ok { 755 configSelector := iresolver.GetConfigSelector(s) 756 if configSelector != nil { 757 if len(s.ServiceConfig.Config.(*ServiceConfig).Methods) != 0 { 758 channelz.Infof(logger, cc.channelzID, "method configs in service config will be ignored due to presence of config selector") 759 } 760 } else { 761 configSelector = &defaultConfigSelector{sc} 762 } 763 cc.applyServiceConfigAndBalancer(sc, configSelector, s.Addresses) 764 } else { 765 ret = balancer.ErrBadResolverState 766 if cc.sc == nil { 767 // Apply the failing LB only if we haven't received valid service config 768 // from the name resolver in the past. 769 cc.applyFailingLBLocked(s.ServiceConfig) 770 cc.mu.Unlock() 771 return ret 772 } 773 } 774 } 775 776 var balCfg serviceconfig.LoadBalancingConfig 777 if cc.sc != nil && cc.sc.lbConfig != nil { 778 balCfg = cc.sc.lbConfig.cfg 779 } 780 bw := cc.balancerWrapper 781 cc.mu.Unlock() 782 783 uccsErr := bw.updateClientConnState(&balancer.ClientConnState{ResolverState: s, BalancerConfig: balCfg}) 784 if ret == nil { 785 ret = uccsErr // prefer ErrBadResolver state since any other error is 786 // currently meaningless to the caller. 787 } 788 return ret 789 } 790 791 // applyFailingLBLocked is akin to configuring an LB policy on the channel which 792 // always fails RPCs. Here, an actual LB policy is not configured, but an always 793 // erroring picker is configured, which returns errors with information about 794 // what was invalid in the received service config. A config selector with no 795 // service config is configured, and the connectivity state of the channel is 796 // set to TransientFailure. 797 func (cc *ClientConn) applyFailingLBLocked(sc *serviceconfig.ParseResult) { 798 var err error 799 if sc.Err != nil { 800 err = status.Errorf(codes.Unavailable, "error parsing service config: %v", sc.Err) 801 } else { 802 err = status.Errorf(codes.Unavailable, "illegal service config type: %T", sc.Config) 803 } 804 cc.safeConfigSelector.UpdateConfigSelector(&defaultConfigSelector{nil}) 805 cc.pickerWrapper.updatePicker(base.NewErrPicker(err)) 806 cc.csMgr.updateState(connectivity.TransientFailure) 807 } 808 809 // Makes a copy of the input addresses slice and clears out the balancer 810 // attributes field. Addresses are passed during subconn creation and address 811 // update operations. In both cases, we will clear the balancer attributes by 812 // calling this function, and therefore we will be able to use the Equal method 813 // provided by the resolver.Address type for comparison. 814 func copyAddressesWithoutBalancerAttributes(in []resolver.Address) []resolver.Address { 815 out := make([]resolver.Address, len(in)) 816 for i := range in { 817 out[i] = in[i] 818 out[i].BalancerAttributes = nil 819 } 820 return out 821 } 822 823 // newAddrConnLocked creates an addrConn for addrs and adds it to cc.conns. 824 // 825 // Caller needs to make sure len(addrs) > 0. 826 func (cc *ClientConn) newAddrConnLocked(addrs []resolver.Address, opts balancer.NewSubConnOptions) (*addrConn, error) { 827 if cc.conns == nil { 828 return nil, ErrClientConnClosing 829 } 830 831 ac := &addrConn{ 832 state: connectivity.Idle, 833 cc: cc, 834 addrs: copyAddressesWithoutBalancerAttributes(addrs), 835 scopts: opts, 836 dopts: cc.dopts, 837 czData: new(channelzData), 838 resetBackoff: make(chan struct{}), 839 stateChan: make(chan struct{}), 840 } 841 ac.ctx, ac.cancel = context.WithCancel(cc.ctx) 842 843 var err error 844 ac.channelzID, err = channelz.RegisterSubChannel(ac, cc.channelzID, "") 845 if err != nil { 846 return nil, err 847 } 848 channelz.AddTraceEvent(logger, ac.channelzID, 0, &channelz.TraceEventDesc{ 849 Desc: "Subchannel created", 850 Severity: channelz.CtInfo, 851 Parent: &channelz.TraceEventDesc{ 852 Desc: fmt.Sprintf("Subchannel(id:%d) created", ac.channelzID.Int()), 853 Severity: channelz.CtInfo, 854 }, 855 }) 856 857 // Track ac in cc. This needs to be done before any getTransport(...) is called. 858 cc.conns[ac] = struct{}{} 859 return ac, nil 860 } 861 862 // removeAddrConn removes the addrConn in the subConn from clientConn. 863 // It also tears down the ac with the given error. 864 func (cc *ClientConn) removeAddrConn(ac *addrConn, err error) { 865 cc.mu.Lock() 866 if cc.conns == nil { 867 cc.mu.Unlock() 868 return 869 } 870 delete(cc.conns, ac) 871 cc.mu.Unlock() 872 ac.tearDown(err) 873 } 874 875 func (cc *ClientConn) channelzMetric() *channelz.ChannelInternalMetric { 876 return &channelz.ChannelInternalMetric{ 877 State: cc.GetState(), 878 Target: cc.target, 879 CallsStarted: atomic.LoadInt64(&cc.czData.callsStarted), 880 CallsSucceeded: atomic.LoadInt64(&cc.czData.callsSucceeded), 881 CallsFailed: atomic.LoadInt64(&cc.czData.callsFailed), 882 LastCallStartedTimestamp: time.Unix(0, atomic.LoadInt64(&cc.czData.lastCallStartedTime)), 883 } 884 } 885 886 // Target returns the target string of the ClientConn. 887 // 888 // # Experimental 889 // 890 // Notice: This API is EXPERIMENTAL and may be changed or removed in a 891 // later release. 892 func (cc *ClientConn) Target() string { 893 return cc.target 894 } 895 896 func (cc *ClientConn) incrCallsStarted() { 897 atomic.AddInt64(&cc.czData.callsStarted, 1) 898 atomic.StoreInt64(&cc.czData.lastCallStartedTime, time.Now().UnixNano()) 899 } 900 901 func (cc *ClientConn) incrCallsSucceeded() { 902 atomic.AddInt64(&cc.czData.callsSucceeded, 1) 903 } 904 905 func (cc *ClientConn) incrCallsFailed() { 906 atomic.AddInt64(&cc.czData.callsFailed, 1) 907 } 908 909 // connect starts creating a transport. 910 // It does nothing if the ac is not IDLE. 911 // TODO(bar) Move this to the addrConn section. 912 func (ac *addrConn) connect() error { 913 ac.mu.Lock() 914 if ac.state == connectivity.Shutdown { 915 if logger.V(2) { 916 logger.Infof("connect called on shutdown addrConn; ignoring.") 917 } 918 ac.mu.Unlock() 919 return errConnClosing 920 } 921 if ac.state != connectivity.Idle { 922 if logger.V(2) { 923 logger.Infof("connect called on addrConn in non-idle state (%v); ignoring.", ac.state) 924 } 925 ac.mu.Unlock() 926 return nil 927 } 928 ac.mu.Unlock() 929 930 ac.resetTransport() 931 return nil 932 } 933 934 func equalAddresses(a, b []resolver.Address) bool { 935 if len(a) != len(b) { 936 return false 937 } 938 for i, v := range a { 939 if !v.Equal(b[i]) { 940 return false 941 } 942 } 943 return true 944 } 945 946 // updateAddrs updates ac.addrs with the new addresses list and handles active 947 // connections or connection attempts. 948 func (ac *addrConn) updateAddrs(addrs []resolver.Address) { 949 ac.mu.Lock() 950 channelz.Infof(logger, ac.channelzID, "addrConn: updateAddrs curAddr: %v, addrs: %v", pretty.ToJSON(ac.curAddr), pretty.ToJSON(addrs)) 951 952 addrs = copyAddressesWithoutBalancerAttributes(addrs) 953 if equalAddresses(ac.addrs, addrs) { 954 ac.mu.Unlock() 955 return 956 } 957 958 ac.addrs = addrs 959 960 if ac.state == connectivity.Shutdown || 961 ac.state == connectivity.TransientFailure || 962 ac.state == connectivity.Idle { 963 // We were not connecting, so do nothing but update the addresses. 964 ac.mu.Unlock() 965 return 966 } 967 968 if ac.state == connectivity.Ready { 969 // Try to find the connected address. 970 for _, a := range addrs { 971 a.ServerName = ac.cc.getServerName(a) 972 if a.Equal(ac.curAddr) { 973 // We are connected to a valid address, so do nothing but 974 // update the addresses. 975 ac.mu.Unlock() 976 return 977 } 978 } 979 } 980 981 // We are either connected to the wrong address or currently connecting. 982 // Stop the current iteration and restart. 983 984 ac.cancel() 985 ac.ctx, ac.cancel = context.WithCancel(ac.cc.ctx) 986 987 // We have to defer here because GracefulClose => onClose, which requires 988 // locking ac.mu. 989 if ac.transport != nil { 990 defer ac.transport.GracefulClose() 991 ac.transport = nil 992 } 993 994 if len(addrs) == 0 { 995 ac.updateConnectivityState(connectivity.Idle, nil) 996 } 997 998 ac.mu.Unlock() 999 1000 // Since we were connecting/connected, we should start a new connection 1001 // attempt. 1002 go ac.resetTransport() 1003 } 1004 1005 // getServerName determines the serverName to be used in the connection 1006 // handshake. The default value for the serverName is the authority on the 1007 // ClientConn, which either comes from the user's dial target or through an 1008 // authority override specified using the WithAuthority dial option. Name 1009 // resolvers can specify a per-address override for the serverName through the 1010 // resolver.Address.ServerName field which is used only if the WithAuthority 1011 // dial option was not used. The rationale is that per-address authority 1012 // overrides specified by the name resolver can represent a security risk, while 1013 // an override specified by the user is more dependable since they probably know 1014 // what they are doing. 1015 func (cc *ClientConn) getServerName(addr resolver.Address) string { 1016 if cc.dopts.authority != "" { 1017 return cc.dopts.authority 1018 } 1019 if addr.ServerName != "" { 1020 return addr.ServerName 1021 } 1022 return cc.authority 1023 } 1024 1025 func getMethodConfig(sc *ServiceConfig, method string) MethodConfig { 1026 if sc == nil { 1027 return MethodConfig{} 1028 } 1029 if m, ok := sc.Methods[method]; ok { 1030 return m 1031 } 1032 i := strings.LastIndex(method, "/") 1033 if m, ok := sc.Methods[method[:i+1]]; ok { 1034 return m 1035 } 1036 return sc.Methods[""] 1037 } 1038 1039 // GetMethodConfig gets the method config of the input method. 1040 // If there's an exact match for input method (i.e. /service/method), we return 1041 // the corresponding MethodConfig. 1042 // If there isn't an exact match for the input method, we look for the service's default 1043 // config under the service (i.e /service/) and then for the default for all services (empty string). 1044 // 1045 // If there is a default MethodConfig for the service, we return it. 1046 // Otherwise, we return an empty MethodConfig. 1047 func (cc *ClientConn) GetMethodConfig(method string) MethodConfig { 1048 // TODO: Avoid the locking here. 1049 cc.mu.RLock() 1050 defer cc.mu.RUnlock() 1051 return getMethodConfig(cc.sc, method) 1052 } 1053 1054 func (cc *ClientConn) healthCheckConfig() *healthCheckConfig { 1055 cc.mu.RLock() 1056 defer cc.mu.RUnlock() 1057 if cc.sc == nil { 1058 return nil 1059 } 1060 return cc.sc.healthCheckConfig 1061 } 1062 1063 func (cc *ClientConn) getTransport(ctx context.Context, failfast bool, method string) (transport.ClientTransport, balancer.PickResult, error) { 1064 return cc.pickerWrapper.pick(ctx, failfast, balancer.PickInfo{ 1065 Ctx: ctx, 1066 FullMethodName: method, 1067 }) 1068 } 1069 1070 func (cc *ClientConn) applyServiceConfigAndBalancer(sc *ServiceConfig, configSelector iresolver.ConfigSelector, addrs []resolver.Address) { 1071 if sc == nil { 1072 // should never reach here. 1073 return 1074 } 1075 cc.sc = sc 1076 if configSelector != nil { 1077 cc.safeConfigSelector.UpdateConfigSelector(configSelector) 1078 } 1079 1080 if cc.sc.retryThrottling != nil { 1081 newThrottler := &retryThrottler{ 1082 tokens: cc.sc.retryThrottling.MaxTokens, 1083 max: cc.sc.retryThrottling.MaxTokens, 1084 thresh: cc.sc.retryThrottling.MaxTokens / 2, 1085 ratio: cc.sc.retryThrottling.TokenRatio, 1086 } 1087 cc.retryThrottler.Store(newThrottler) 1088 } else { 1089 cc.retryThrottler.Store((*retryThrottler)(nil)) 1090 } 1091 1092 var newBalancerName string 1093 if cc.sc == nil || (cc.sc.lbConfig == nil && cc.sc.LB == nil) { 1094 // No service config or no LB policy specified in config. 1095 newBalancerName = PickFirstBalancerName 1096 } else if cc.sc.lbConfig != nil { 1097 newBalancerName = cc.sc.lbConfig.name 1098 } else { // cc.sc.LB != nil 1099 newBalancerName = *cc.sc.LB 1100 } 1101 cc.balancerWrapper.switchTo(newBalancerName) 1102 } 1103 1104 func (cc *ClientConn) resolveNow(o resolver.ResolveNowOptions) { 1105 cc.mu.RLock() 1106 cc.resolverWrapper.resolveNow(o) 1107 cc.mu.RUnlock() 1108 } 1109 1110 func (cc *ClientConn) resolveNowLocked(o resolver.ResolveNowOptions) { 1111 cc.resolverWrapper.resolveNow(o) 1112 } 1113 1114 // ResetConnectBackoff wakes up all subchannels in transient failure and causes 1115 // them to attempt another connection immediately. It also resets the backoff 1116 // times used for subsequent attempts regardless of the current state. 1117 // 1118 // In general, this function should not be used. Typical service or network 1119 // outages result in a reasonable client reconnection strategy by default. 1120 // However, if a previously unavailable network becomes available, this may be 1121 // used to trigger an immediate reconnect. 1122 // 1123 // # Experimental 1124 // 1125 // Notice: This API is EXPERIMENTAL and may be changed or removed in a 1126 // later release. 1127 func (cc *ClientConn) ResetConnectBackoff() { 1128 cc.mu.Lock() 1129 conns := cc.conns 1130 cc.mu.Unlock() 1131 for ac := range conns { 1132 ac.resetConnectBackoff() 1133 } 1134 } 1135 1136 // Close tears down the ClientConn and all underlying connections. 1137 func (cc *ClientConn) Close() error { 1138 defer func() { 1139 cc.cancel() 1140 <-cc.csMgr.pubSub.Done() 1141 }() 1142 1143 // Prevent calls to enter/exit idle immediately, and ensure we are not 1144 // currently entering/exiting idle mode. 1145 cc.idlenessMgr.Close() 1146 1147 cc.mu.Lock() 1148 if cc.conns == nil { 1149 cc.mu.Unlock() 1150 return ErrClientConnClosing 1151 } 1152 1153 conns := cc.conns 1154 cc.conns = nil 1155 cc.csMgr.updateState(connectivity.Shutdown) 1156 1157 // We can safely unlock and continue to access all fields now as 1158 // cc.conns==nil, preventing any further operations on cc. 1159 cc.mu.Unlock() 1160 1161 cc.resolverWrapper.close() 1162 // The order of closing matters here since the balancer wrapper assumes the 1163 // picker is closed before it is closed. 1164 cc.pickerWrapper.close() 1165 cc.balancerWrapper.close() 1166 1167 <-cc.resolverWrapper.serializer.Done() 1168 <-cc.balancerWrapper.serializer.Done() 1169 1170 for ac := range conns { 1171 ac.tearDown(ErrClientConnClosing) 1172 } 1173 cc.addTraceEvent("deleted") 1174 // TraceEvent needs to be called before RemoveEntry, as TraceEvent may add 1175 // trace reference to the entity being deleted, and thus prevent it from being 1176 // deleted right away. 1177 channelz.RemoveEntry(cc.channelzID) 1178 1179 return nil 1180 } 1181 1182 // addrConn is a network connection to a given address. 1183 type addrConn struct { 1184 ctx context.Context 1185 cancel context.CancelFunc 1186 1187 cc *ClientConn 1188 dopts dialOptions 1189 acbw *acBalancerWrapper 1190 scopts balancer.NewSubConnOptions 1191 1192 // transport is set when there's a viable transport (note: ac state may not be READY as LB channel 1193 // health checking may require server to report healthy to set ac to READY), and is reset 1194 // to nil when the current transport should no longer be used to create a stream (e.g. after GoAway 1195 // is received, transport is closed, ac has been torn down). 1196 transport transport.ClientTransport // The current transport. 1197 1198 mu sync.Mutex 1199 curAddr resolver.Address // The current address. 1200 addrs []resolver.Address // All addresses that the resolver resolved to. 1201 1202 // Use updateConnectivityState for updating addrConn's connectivity state. 1203 state connectivity.State 1204 stateChan chan struct{} // closed and recreated on every state change. 1205 1206 backoffIdx int // Needs to be stateful for resetConnectBackoff. 1207 resetBackoff chan struct{} 1208 1209 channelzID *channelz.Identifier 1210 czData *channelzData 1211 } 1212 1213 // Note: this requires a lock on ac.mu. 1214 func (ac *addrConn) updateConnectivityState(s connectivity.State, lastErr error) { 1215 if ac.state == s { 1216 return 1217 } 1218 // When changing states, reset the state change channel. 1219 close(ac.stateChan) 1220 ac.stateChan = make(chan struct{}) 1221 ac.state = s 1222 if lastErr == nil { 1223 channelz.Infof(logger, ac.channelzID, "Subchannel Connectivity change to %v", s) 1224 } else { 1225 channelz.Infof(logger, ac.channelzID, "Subchannel Connectivity change to %v, last error: %s", s, lastErr) 1226 } 1227 ac.acbw.updateState(s, lastErr) 1228 } 1229 1230 // adjustParams updates parameters used to create transports upon 1231 // receiving a GoAway. 1232 func (ac *addrConn) adjustParams(r transport.GoAwayReason) { 1233 switch r { 1234 case transport.GoAwayTooManyPings: 1235 v := 2 * ac.dopts.copts.KeepaliveParams.Time 1236 ac.cc.mu.Lock() 1237 if v > ac.cc.mkp.Time { 1238 ac.cc.mkp.Time = v 1239 } 1240 ac.cc.mu.Unlock() 1241 } 1242 } 1243 1244 func (ac *addrConn) resetTransport() { 1245 ac.mu.Lock() 1246 acCtx := ac.ctx 1247 if acCtx.Err() != nil { 1248 ac.mu.Unlock() 1249 return 1250 } 1251 1252 addrs := ac.addrs 1253 backoffFor := ac.dopts.bs.Backoff(ac.backoffIdx) 1254 // This will be the duration that dial gets to finish. 1255 dialDuration := minConnectTimeout 1256 if ac.dopts.minConnectTimeout != nil { 1257 dialDuration = ac.dopts.minConnectTimeout() 1258 } 1259 1260 if dialDuration < backoffFor { 1261 // Give dial more time as we keep failing to connect. 1262 dialDuration = backoffFor 1263 } 1264 // We can potentially spend all the time trying the first address, and 1265 // if the server accepts the connection and then hangs, the following 1266 // addresses will never be tried. 1267 // 1268 // The spec doesn't mention what should be done for multiple addresses. 1269 // https://github.com/grpc/grpc/blob/master/doc/connection-backoff.md#proposed-backoff-algorithm 1270 connectDeadline := time.Now().Add(dialDuration) 1271 1272 ac.updateConnectivityState(connectivity.Connecting, nil) 1273 ac.mu.Unlock() 1274 1275 if err := ac.tryAllAddrs(acCtx, addrs, connectDeadline); err != nil { 1276 ac.cc.resolveNow(resolver.ResolveNowOptions{}) 1277 ac.mu.Lock() 1278 if acCtx.Err() != nil { 1279 // addrConn was torn down. 1280 ac.mu.Unlock() 1281 return 1282 } 1283 // After exhausting all addresses, the addrConn enters 1284 // TRANSIENT_FAILURE. 1285 ac.updateConnectivityState(connectivity.TransientFailure, err) 1286 1287 // Backoff. 1288 b := ac.resetBackoff 1289 ac.mu.Unlock() 1290 1291 timer := time.NewTimer(backoffFor) 1292 select { 1293 case <-timer.C: 1294 ac.mu.Lock() 1295 ac.backoffIdx++ 1296 ac.mu.Unlock() 1297 case <-b: 1298 timer.Stop() 1299 case <-acCtx.Done(): 1300 timer.Stop() 1301 return 1302 } 1303 1304 ac.mu.Lock() 1305 if acCtx.Err() == nil { 1306 ac.updateConnectivityState(connectivity.Idle, err) 1307 } 1308 ac.mu.Unlock() 1309 return 1310 } 1311 // Success; reset backoff. 1312 ac.mu.Lock() 1313 ac.backoffIdx = 0 1314 ac.mu.Unlock() 1315 } 1316 1317 // tryAllAddrs tries to creates a connection to the addresses, and stop when at 1318 // the first successful one. It returns an error if no address was successfully 1319 // connected, or updates ac appropriately with the new transport. 1320 func (ac *addrConn) tryAllAddrs(ctx context.Context, addrs []resolver.Address, connectDeadline time.Time) error { 1321 var firstConnErr error 1322 for _, addr := range addrs { 1323 if ctx.Err() != nil { 1324 return errConnClosing 1325 } 1326 ac.mu.Lock() 1327 1328 ac.cc.mu.RLock() 1329 ac.dopts.copts.KeepaliveParams = ac.cc.mkp 1330 ac.cc.mu.RUnlock() 1331 1332 copts := ac.dopts.copts 1333 if ac.scopts.CredsBundle != nil { 1334 copts.CredsBundle = ac.scopts.CredsBundle 1335 } 1336 ac.mu.Unlock() 1337 1338 channelz.Infof(logger, ac.channelzID, "Subchannel picks a new address %q to connect", addr.Addr) 1339 1340 err := ac.createTransport(ctx, addr, copts, connectDeadline) 1341 if err == nil { 1342 return nil 1343 } 1344 if firstConnErr == nil { 1345 firstConnErr = err 1346 } 1347 ac.cc.updateConnectionError(err) 1348 } 1349 1350 // Couldn't connect to any address. 1351 return firstConnErr 1352 } 1353 1354 // createTransport creates a connection to addr. It returns an error if the 1355 // address was not successfully connected, or updates ac appropriately with the 1356 // new transport. 1357 func (ac *addrConn) createTransport(ctx context.Context, addr resolver.Address, copts transport.ConnectOptions, connectDeadline time.Time) error { 1358 addr.ServerName = ac.cc.getServerName(addr) 1359 hctx, hcancel := context.WithCancel(ctx) 1360 1361 onClose := func(r transport.GoAwayReason) { 1362 ac.mu.Lock() 1363 defer ac.mu.Unlock() 1364 // adjust params based on GoAwayReason 1365 ac.adjustParams(r) 1366 if ctx.Err() != nil { 1367 // Already shut down or connection attempt canceled. tearDown() or 1368 // updateAddrs() already cleared the transport and canceled hctx 1369 // via ac.ctx, and we expected this connection to be closed, so do 1370 // nothing here. 1371 return 1372 } 1373 hcancel() 1374 if ac.transport == nil { 1375 // We're still connecting to this address, which could error. Do 1376 // not update the connectivity state or resolve; these will happen 1377 // at the end of the tryAllAddrs connection loop in the event of an 1378 // error. 1379 return 1380 } 1381 ac.transport = nil 1382 // Refresh the name resolver on any connection loss. 1383 ac.cc.resolveNow(resolver.ResolveNowOptions{}) 1384 // Always go idle and wait for the LB policy to initiate a new 1385 // connection attempt. 1386 ac.updateConnectivityState(connectivity.Idle, nil) 1387 } 1388 1389 connectCtx, cancel := context.WithDeadline(ctx, connectDeadline) 1390 defer cancel() 1391 copts.ChannelzParentID = ac.channelzID 1392 1393 newTr, err := transport.NewClientTransport(connectCtx, ac.cc.ctx, addr, copts, onClose) 1394 if err != nil { 1395 if logger.V(2) { 1396 logger.Infof("Creating new client transport to %q: %v", addr, err) 1397 } 1398 // newTr is either nil, or closed. 1399 hcancel() 1400 channelz.Warningf(logger, ac.channelzID, "grpc: addrConn.createTransport failed to connect to %s. Err: %v", addr, err) 1401 return err 1402 } 1403 1404 ac.mu.Lock() 1405 defer ac.mu.Unlock() 1406 if ctx.Err() != nil { 1407 // This can happen if the subConn was removed while in `Connecting` 1408 // state. tearDown() would have set the state to `Shutdown`, but 1409 // would not have closed the transport since ac.transport would not 1410 // have been set at that point. 1411 // 1412 // We run this in a goroutine because newTr.Close() calls onClose() 1413 // inline, which requires locking ac.mu. 1414 // 1415 // The error we pass to Close() is immaterial since there are no open 1416 // streams at this point, so no trailers with error details will be sent 1417 // out. We just need to pass a non-nil error. 1418 // 1419 // This can also happen when updateAddrs is called during a connection 1420 // attempt. 1421 go newTr.Close(transport.ErrConnClosing) 1422 return nil 1423 } 1424 if hctx.Err() != nil { 1425 // onClose was already called for this connection, but the connection 1426 // was successfully established first. Consider it a success and set 1427 // the new state to Idle. 1428 ac.updateConnectivityState(connectivity.Idle, nil) 1429 return nil 1430 } 1431 ac.curAddr = addr 1432 ac.transport = newTr 1433 ac.startHealthCheck(hctx) // Will set state to READY if appropriate. 1434 return nil 1435 } 1436 1437 // startHealthCheck starts the health checking stream (RPC) to watch the health 1438 // stats of this connection if health checking is requested and configured. 1439 // 1440 // LB channel health checking is enabled when all requirements below are met: 1441 // 1. it is not disabled by the user with the WithDisableHealthCheck DialOption 1442 // 2. internal.HealthCheckFunc is set by importing the grpc/health package 1443 // 3. a service config with non-empty healthCheckConfig field is provided 1444 // 4. the load balancer requests it 1445 // 1446 // It sets addrConn to READY if the health checking stream is not started. 1447 // 1448 // Caller must hold ac.mu. 1449 func (ac *addrConn) startHealthCheck(ctx context.Context) { 1450 var healthcheckManagingState bool 1451 defer func() { 1452 if !healthcheckManagingState { 1453 ac.updateConnectivityState(connectivity.Ready, nil) 1454 } 1455 }() 1456 1457 if ac.cc.dopts.disableHealthCheck { 1458 return 1459 } 1460 healthCheckConfig := ac.cc.healthCheckConfig() 1461 if healthCheckConfig == nil { 1462 return 1463 } 1464 if !ac.scopts.HealthCheckEnabled { 1465 return 1466 } 1467 healthCheckFunc := ac.cc.dopts.healthCheckFunc 1468 if healthCheckFunc == nil { 1469 // The health package is not imported to set health check function. 1470 // 1471 // TODO: add a link to the health check doc in the error message. 1472 channelz.Error(logger, ac.channelzID, "Health check is requested but health check function is not set.") 1473 return 1474 } 1475 1476 healthcheckManagingState = true 1477 1478 // Set up the health check helper functions. 1479 currentTr := ac.transport 1480 newStream := func(method string) (any, error) { 1481 ac.mu.Lock() 1482 if ac.transport != currentTr { 1483 ac.mu.Unlock() 1484 return nil, status.Error(codes.Canceled, "the provided transport is no longer valid to use") 1485 } 1486 ac.mu.Unlock() 1487 return newNonRetryClientStream(ctx, &StreamDesc{ServerStreams: true}, method, currentTr, ac) 1488 } 1489 setConnectivityState := func(s connectivity.State, lastErr error) { 1490 ac.mu.Lock() 1491 defer ac.mu.Unlock() 1492 if ac.transport != currentTr { 1493 return 1494 } 1495 ac.updateConnectivityState(s, lastErr) 1496 } 1497 // Start the health checking stream. 1498 go func() { 1499 err := ac.cc.dopts.healthCheckFunc(ctx, newStream, setConnectivityState, healthCheckConfig.ServiceName) 1500 if err != nil { 1501 if status.Code(err) == codes.Unimplemented { 1502 channelz.Error(logger, ac.channelzID, "Subchannel health check is unimplemented at server side, thus health check is disabled") 1503 } else { 1504 channelz.Errorf(logger, ac.channelzID, "Health checking failed: %v", err) 1505 } 1506 } 1507 }() 1508 } 1509 1510 func (ac *addrConn) resetConnectBackoff() { 1511 ac.mu.Lock() 1512 close(ac.resetBackoff) 1513 ac.backoffIdx = 0 1514 ac.resetBackoff = make(chan struct{}) 1515 ac.mu.Unlock() 1516 } 1517 1518 // getReadyTransport returns the transport if ac's state is READY or nil if not. 1519 func (ac *addrConn) getReadyTransport() transport.ClientTransport { 1520 ac.mu.Lock() 1521 defer ac.mu.Unlock() 1522 if ac.state == connectivity.Ready { 1523 return ac.transport 1524 } 1525 return nil 1526 } 1527 1528 // getTransport waits until the addrconn is ready and returns the transport. 1529 // If the context expires first, returns an appropriate status. If the 1530 // addrConn is stopped first, returns an Unavailable status error. 1531 func (ac *addrConn) getTransport(ctx context.Context) (transport.ClientTransport, error) { 1532 for ctx.Err() == nil { 1533 ac.mu.Lock() 1534 t, state, sc := ac.transport, ac.state, ac.stateChan 1535 ac.mu.Unlock() 1536 if state == connectivity.Ready { 1537 return t, nil 1538 } 1539 if state == connectivity.Shutdown { 1540 return nil, status.Errorf(codes.Unavailable, "SubConn shutting down") 1541 } 1542 1543 select { 1544 case <-ctx.Done(): 1545 case <-sc: 1546 } 1547 } 1548 return nil, status.FromContextError(ctx.Err()).Err() 1549 } 1550 1551 // tearDown starts to tear down the addrConn. 1552 // 1553 // Note that tearDown doesn't remove ac from ac.cc.conns, so the addrConn struct 1554 // will leak. In most cases, call cc.removeAddrConn() instead. 1555 func (ac *addrConn) tearDown(err error) { 1556 ac.mu.Lock() 1557 if ac.state == connectivity.Shutdown { 1558 ac.mu.Unlock() 1559 return 1560 } 1561 curTr := ac.transport 1562 ac.transport = nil 1563 // We have to set the state to Shutdown before anything else to prevent races 1564 // between setting the state and logic that waits on context cancellation / etc. 1565 ac.updateConnectivityState(connectivity.Shutdown, nil) 1566 ac.cancel() 1567 ac.curAddr = resolver.Address{} 1568 1569 channelz.AddTraceEvent(logger, ac.channelzID, 0, &channelz.TraceEventDesc{ 1570 Desc: "Subchannel deleted", 1571 Severity: channelz.CtInfo, 1572 Parent: &channelz.TraceEventDesc{ 1573 Desc: fmt.Sprintf("Subchannel(id:%d) deleted", ac.channelzID.Int()), 1574 Severity: channelz.CtInfo, 1575 }, 1576 }) 1577 // TraceEvent needs to be called before RemoveEntry, as TraceEvent may add 1578 // trace reference to the entity being deleted, and thus prevent it from 1579 // being deleted right away. 1580 channelz.RemoveEntry(ac.channelzID) 1581 ac.mu.Unlock() 1582 1583 // We have to release the lock before the call to GracefulClose/Close here 1584 // because both of them call onClose(), which requires locking ac.mu. 1585 if curTr != nil { 1586 if err == errConnDrain { 1587 // Close the transport gracefully when the subConn is being shutdown. 1588 // 1589 // GracefulClose() may be executed multiple times if: 1590 // - multiple GoAway frames are received from the server 1591 // - there are concurrent name resolver or balancer triggered 1592 // address removal and GoAway 1593 curTr.GracefulClose() 1594 } else { 1595 // Hard close the transport when the channel is entering idle or is 1596 // being shutdown. In the case where the channel is being shutdown, 1597 // closing of transports is also taken care of by cancelation of cc.ctx. 1598 // But in the case where the channel is entering idle, we need to 1599 // explicitly close the transports here. Instead of distinguishing 1600 // between these two cases, it is simpler to close the transport 1601 // unconditionally here. 1602 curTr.Close(err) 1603 } 1604 } 1605 } 1606 1607 func (ac *addrConn) getState() connectivity.State { 1608 ac.mu.Lock() 1609 defer ac.mu.Unlock() 1610 return ac.state 1611 } 1612 1613 func (ac *addrConn) ChannelzMetric() *channelz.ChannelInternalMetric { 1614 ac.mu.Lock() 1615 addr := ac.curAddr.Addr 1616 ac.mu.Unlock() 1617 return &channelz.ChannelInternalMetric{ 1618 State: ac.getState(), 1619 Target: addr, 1620 CallsStarted: atomic.LoadInt64(&ac.czData.callsStarted), 1621 CallsSucceeded: atomic.LoadInt64(&ac.czData.callsSucceeded), 1622 CallsFailed: atomic.LoadInt64(&ac.czData.callsFailed), 1623 LastCallStartedTimestamp: time.Unix(0, atomic.LoadInt64(&ac.czData.lastCallStartedTime)), 1624 } 1625 } 1626 1627 func (ac *addrConn) incrCallsStarted() { 1628 atomic.AddInt64(&ac.czData.callsStarted, 1) 1629 atomic.StoreInt64(&ac.czData.lastCallStartedTime, time.Now().UnixNano()) 1630 } 1631 1632 func (ac *addrConn) incrCallsSucceeded() { 1633 atomic.AddInt64(&ac.czData.callsSucceeded, 1) 1634 } 1635 1636 func (ac *addrConn) incrCallsFailed() { 1637 atomic.AddInt64(&ac.czData.callsFailed, 1) 1638 } 1639 1640 type retryThrottler struct { 1641 max float64 1642 thresh float64 1643 ratio float64 1644 1645 mu sync.Mutex 1646 tokens float64 // TODO(dfawley): replace with atomic and remove lock. 1647 } 1648 1649 // throttle subtracts a retry token from the pool and returns whether a retry 1650 // should be throttled (disallowed) based upon the retry throttling policy in 1651 // the service config. 1652 func (rt *retryThrottler) throttle() bool { 1653 if rt == nil { 1654 return false 1655 } 1656 rt.mu.Lock() 1657 defer rt.mu.Unlock() 1658 rt.tokens-- 1659 if rt.tokens < 0 { 1660 rt.tokens = 0 1661 } 1662 return rt.tokens <= rt.thresh 1663 } 1664 1665 func (rt *retryThrottler) successfulRPC() { 1666 if rt == nil { 1667 return 1668 } 1669 rt.mu.Lock() 1670 defer rt.mu.Unlock() 1671 rt.tokens += rt.ratio 1672 if rt.tokens > rt.max { 1673 rt.tokens = rt.max 1674 } 1675 } 1676 1677 type channelzChannel struct { 1678 cc *ClientConn 1679 } 1680 1681 func (c *channelzChannel) ChannelzMetric() *channelz.ChannelInternalMetric { 1682 return c.cc.channelzMetric() 1683 } 1684 1685 // ErrClientConnTimeout indicates that the ClientConn cannot establish the 1686 // underlying connections within the specified timeout. 1687 // 1688 // Deprecated: This error is never returned by grpc and should not be 1689 // referenced by users. 1690 var ErrClientConnTimeout = errors.New("grpc: timed out when dialing") 1691 1692 // getResolver finds the scheme in the cc's resolvers or the global registry. 1693 // scheme should always be lowercase (typically by virtue of url.Parse() 1694 // performing proper RFC3986 behavior). 1695 func (cc *ClientConn) getResolver(scheme string) resolver.Builder { 1696 for _, rb := range cc.dopts.resolvers { 1697 if scheme == rb.Scheme() { 1698 return rb 1699 } 1700 } 1701 return resolver.Get(scheme) 1702 } 1703 1704 func (cc *ClientConn) updateConnectionError(err error) { 1705 cc.lceMu.Lock() 1706 cc.lastConnectionError = err 1707 cc.lceMu.Unlock() 1708 } 1709 1710 func (cc *ClientConn) connectionError() error { 1711 cc.lceMu.Lock() 1712 defer cc.lceMu.Unlock() 1713 return cc.lastConnectionError 1714 } 1715 1716 // parseTargetAndFindResolver parses the user's dial target and stores the 1717 // parsed target in `cc.parsedTarget`. 1718 // 1719 // The resolver to use is determined based on the scheme in the parsed target 1720 // and the same is stored in `cc.resolverBuilder`. 1721 // 1722 // Doesn't grab cc.mu as this method is expected to be called only at Dial time. 1723 func (cc *ClientConn) parseTargetAndFindResolver() error { 1724 channelz.Infof(logger, cc.channelzID, "original dial target is: %q", cc.target) 1725 1726 var rb resolver.Builder 1727 parsedTarget, err := parseTarget(cc.target) 1728 if err != nil { 1729 channelz.Infof(logger, cc.channelzID, "dial target %q parse failed: %v", cc.target, err) 1730 } else { 1731 channelz.Infof(logger, cc.channelzID, "parsed dial target is: %#v", parsedTarget) 1732 rb = cc.getResolver(parsedTarget.URL.Scheme) 1733 if rb != nil { 1734 cc.parsedTarget = parsedTarget 1735 cc.resolverBuilder = rb 1736 return nil 1737 } 1738 } 1739 1740 // We are here because the user's dial target did not contain a scheme or 1741 // specified an unregistered scheme. We should fallback to the default 1742 // scheme, except when a custom dialer is specified in which case, we should 1743 // always use passthrough scheme. 1744 defScheme := resolver.GetDefaultScheme() 1745 channelz.Infof(logger, cc.channelzID, "fallback to scheme %q", defScheme) 1746 canonicalTarget := defScheme + ":///" + cc.target 1747 1748 parsedTarget, err = parseTarget(canonicalTarget) 1749 if err != nil { 1750 channelz.Infof(logger, cc.channelzID, "dial target %q parse failed: %v", canonicalTarget, err) 1751 return err 1752 } 1753 channelz.Infof(logger, cc.channelzID, "parsed dial target is: %+v", parsedTarget) 1754 rb = cc.getResolver(parsedTarget.URL.Scheme) 1755 if rb == nil { 1756 return fmt.Errorf("could not get resolver for default scheme: %q", parsedTarget.URL.Scheme) 1757 } 1758 cc.parsedTarget = parsedTarget 1759 cc.resolverBuilder = rb 1760 return nil 1761 } 1762 1763 // parseTarget uses RFC 3986 semantics to parse the given target into a 1764 // resolver.Target struct containing url. Query params are stripped from the 1765 // endpoint. 1766 func parseTarget(target string) (resolver.Target, error) { 1767 u, err := url.Parse(target) 1768 if err != nil { 1769 return resolver.Target{}, err 1770 } 1771 1772 return resolver.Target{URL: *u}, nil 1773 } 1774 1775 // encodeAuthority escapes the authority string based on valid chars defined in 1776 // https://datatracker.ietf.org/doc/html/rfc3986#section-3.2. 1777 func encodeAuthority(authority string) string { 1778 const upperhex = "0123456789ABCDEF" 1779 1780 // Return for characters that must be escaped as per 1781 // Valid chars are mentioned here: 1782 // https://datatracker.ietf.org/doc/html/rfc3986#section-3.2 1783 shouldEscape := func(c byte) bool { 1784 // Alphanum are always allowed. 1785 if 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || '0' <= c && c <= '9' { 1786 return false 1787 } 1788 switch c { 1789 case '-', '_', '.', '~': // Unreserved characters 1790 return false 1791 case '!', '$', '&', '\'', '(', ')', '*', '+', ',', ';', '=': // Subdelim characters 1792 return false 1793 case ':', '[', ']', '@': // Authority related delimeters 1794 return false 1795 } 1796 // Everything else must be escaped. 1797 return true 1798 } 1799 1800 hexCount := 0 1801 for i := 0; i < len(authority); i++ { 1802 c := authority[i] 1803 if shouldEscape(c) { 1804 hexCount++ 1805 } 1806 } 1807 1808 if hexCount == 0 { 1809 return authority 1810 } 1811 1812 required := len(authority) + 2*hexCount 1813 t := make([]byte, required) 1814 1815 j := 0 1816 // This logic is a barebones version of escape in the go net/url library. 1817 for i := 0; i < len(authority); i++ { 1818 switch c := authority[i]; { 1819 case shouldEscape(c): 1820 t[j] = '%' 1821 t[j+1] = upperhex[c>>4] 1822 t[j+2] = upperhex[c&15] 1823 j += 3 1824 default: 1825 t[j] = authority[i] 1826 j++ 1827 } 1828 } 1829 return string(t) 1830 } 1831 1832 // Determine channel authority. The order of precedence is as follows: 1833 // - user specified authority override using `WithAuthority` dial option 1834 // - creds' notion of server name for the authentication handshake 1835 // - endpoint from dial target of the form "scheme://[authority]/endpoint" 1836 // 1837 // Stores the determined authority in `cc.authority`. 1838 // 1839 // Returns a non-nil error if the authority returned by the transport 1840 // credentials do not match the authority configured through the dial option. 1841 // 1842 // Doesn't grab cc.mu as this method is expected to be called only at Dial time. 1843 func (cc *ClientConn) determineAuthority() error { 1844 dopts := cc.dopts 1845 // Historically, we had two options for users to specify the serverName or 1846 // authority for a channel. One was through the transport credentials 1847 // (either in its constructor, or through the OverrideServerName() method). 1848 // The other option (for cases where WithInsecure() dial option was used) 1849 // was to use the WithAuthority() dial option. 1850 // 1851 // A few things have changed since: 1852 // - `insecure` package with an implementation of the `TransportCredentials` 1853 // interface for the insecure case 1854 // - WithAuthority() dial option support for secure credentials 1855 authorityFromCreds := "" 1856 if creds := dopts.copts.TransportCredentials; creds != nil && creds.Info().ServerName != "" { 1857 authorityFromCreds = creds.Info().ServerName 1858 } 1859 authorityFromDialOption := dopts.authority 1860 if (authorityFromCreds != "" && authorityFromDialOption != "") && authorityFromCreds != authorityFromDialOption { 1861 return fmt.Errorf("ClientConn's authority from transport creds %q and dial option %q don't match", authorityFromCreds, authorityFromDialOption) 1862 } 1863 1864 endpoint := cc.parsedTarget.Endpoint() 1865 if authorityFromDialOption != "" { 1866 cc.authority = authorityFromDialOption 1867 } else if authorityFromCreds != "" { 1868 cc.authority = authorityFromCreds 1869 } else if auth, ok := cc.resolverBuilder.(resolver.AuthorityOverrider); ok { 1870 cc.authority = auth.OverrideAuthority(cc.parsedTarget) 1871 } else if strings.HasPrefix(endpoint, ":") { 1872 cc.authority = "localhost" + endpoint 1873 } else { 1874 cc.authority = encodeAuthority(endpoint) 1875 } 1876 channelz.Infof(logger, cc.channelzID, "Channel authority set to %q", cc.authority) 1877 return nil 1878 }