google.golang.org/grpc@v1.74.2/clientconn.go (about) 1 /* 2 * 3 * Copyright 2014 gRPC authors. 4 * 5 * Licensed under the Apache License, Version 2.0 (the "License"); 6 * you may not use this file except in compliance with the License. 7 * You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 * 17 */ 18 19 package grpc 20 21 import ( 22 "context" 23 "errors" 24 "fmt" 25 "math" 26 "net/url" 27 "slices" 28 "strings" 29 "sync" 30 "sync/atomic" 31 "time" 32 33 "google.golang.org/grpc/balancer" 34 "google.golang.org/grpc/balancer/base" 35 "google.golang.org/grpc/balancer/pickfirst" 36 "google.golang.org/grpc/codes" 37 "google.golang.org/grpc/connectivity" 38 "google.golang.org/grpc/internal" 39 "google.golang.org/grpc/internal/channelz" 40 "google.golang.org/grpc/internal/grpcsync" 41 "google.golang.org/grpc/internal/idle" 42 iresolver "google.golang.org/grpc/internal/resolver" 43 "google.golang.org/grpc/internal/stats" 44 "google.golang.org/grpc/internal/transport" 45 "google.golang.org/grpc/keepalive" 46 "google.golang.org/grpc/resolver" 47 "google.golang.org/grpc/serviceconfig" 48 "google.golang.org/grpc/status" 49 50 _ "google.golang.org/grpc/balancer/roundrobin" // To register roundrobin. 51 _ "google.golang.org/grpc/internal/resolver/passthrough" // To register passthrough resolver. 52 _ "google.golang.org/grpc/internal/resolver/unix" // To register unix resolver. 53 _ "google.golang.org/grpc/resolver/dns" // To register dns resolver. 54 ) 55 56 const ( 57 // minimum time to give a connection to complete 58 minConnectTimeout = 20 * time.Second 59 ) 60 61 var ( 62 // ErrClientConnClosing indicates that the operation is illegal because 63 // the ClientConn is closing. 64 // 65 // Deprecated: this error should not be relied upon by users; use the status 66 // code of Canceled instead. 67 ErrClientConnClosing = status.Error(codes.Canceled, "grpc: the client connection is closing") 68 // errConnDrain indicates that the connection starts to be drained and does not accept any new RPCs. 69 errConnDrain = errors.New("grpc: the connection is drained") 70 // errConnClosing indicates that the connection is closing. 71 errConnClosing = errors.New("grpc: the connection is closing") 72 // errConnIdling indicates the connection is being closed as the channel 73 // is moving to an idle mode due to inactivity. 74 errConnIdling = errors.New("grpc: the connection is closing due to channel idleness") 75 // invalidDefaultServiceConfigErrPrefix is used to prefix the json parsing error for the default 76 // service config. 77 invalidDefaultServiceConfigErrPrefix = "grpc: the provided default service config is invalid" 78 // PickFirstBalancerName is the name of the pick_first balancer. 79 PickFirstBalancerName = pickfirst.Name 80 ) 81 82 // The following errors are returned from Dial and DialContext 83 var ( 84 // errNoTransportSecurity indicates that there is no transport security 85 // being set for ClientConn. Users should either set one or explicitly 86 // call WithInsecure DialOption to disable security. 87 errNoTransportSecurity = errors.New("grpc: no transport security set (use grpc.WithTransportCredentials(insecure.NewCredentials()) explicitly or set credentials)") 88 // errTransportCredsAndBundle indicates that creds bundle is used together 89 // with other individual Transport Credentials. 90 errTransportCredsAndBundle = errors.New("grpc: credentials.Bundle may not be used with individual TransportCredentials") 91 // errNoTransportCredsInBundle indicated that the configured creds bundle 92 // returned a transport credentials which was nil. 93 errNoTransportCredsInBundle = errors.New("grpc: credentials.Bundle must return non-nil transport credentials") 94 // errTransportCredentialsMissing indicates that users want to transmit 95 // security information (e.g., OAuth2 token) which requires secure 96 // connection on an insecure connection. 97 errTransportCredentialsMissing = errors.New("grpc: the credentials require transport level security (use grpc.WithTransportCredentials() to set)") 98 ) 99 100 const ( 101 defaultClientMaxReceiveMessageSize = 1024 * 1024 * 4 102 defaultClientMaxSendMessageSize = math.MaxInt32 103 // http2IOBufSize specifies the buffer size for sending frames. 104 defaultWriteBufSize = 32 * 1024 105 defaultReadBufSize = 32 * 1024 106 ) 107 108 type defaultConfigSelector struct { 109 sc *ServiceConfig 110 } 111 112 func (dcs *defaultConfigSelector) SelectConfig(rpcInfo iresolver.RPCInfo) (*iresolver.RPCConfig, error) { 113 return &iresolver.RPCConfig{ 114 Context: rpcInfo.Context, 115 MethodConfig: getMethodConfig(dcs.sc, rpcInfo.Method), 116 }, nil 117 } 118 119 // NewClient creates a new gRPC "channel" for the target URI provided. No I/O 120 // is performed. Use of the ClientConn for RPCs will automatically cause it to 121 // connect. The Connect method may be called to manually create a connection, 122 // but for most users this should be unnecessary. 123 // 124 // The target name syntax is defined in 125 // https://github.com/grpc/grpc/blob/master/doc/naming.md. E.g. to use the dns 126 // name resolver, a "dns:///" prefix may be applied to the target. The default 127 // name resolver will be used if no scheme is detected, or if the parsed scheme 128 // is not a registered name resolver. The default resolver is "dns" but can be 129 // overridden using the resolver package's SetDefaultScheme. 130 // 131 // Examples: 132 // 133 // - "foo.googleapis.com:8080" 134 // - "dns:///foo.googleapis.com:8080" 135 // - "dns:///foo.googleapis.com" 136 // - "dns:///10.0.0.213:8080" 137 // - "dns:///%5B2001:db8:85a3:8d3:1319:8a2e:370:7348%5D:443" 138 // - "dns://8.8.8.8/foo.googleapis.com:8080" 139 // - "dns://8.8.8.8/foo.googleapis.com" 140 // - "zookeeper://zk.example.com:9900/example_service" 141 // 142 // The DialOptions returned by WithBlock, WithTimeout, 143 // WithReturnConnectionError, and FailOnNonTempDialError are ignored by this 144 // function. 145 func NewClient(target string, opts ...DialOption) (conn *ClientConn, err error) { 146 cc := &ClientConn{ 147 target: target, 148 conns: make(map[*addrConn]struct{}), 149 dopts: defaultDialOptions(), 150 } 151 152 cc.retryThrottler.Store((*retryThrottler)(nil)) 153 cc.safeConfigSelector.UpdateConfigSelector(&defaultConfigSelector{nil}) 154 cc.ctx, cc.cancel = context.WithCancel(context.Background()) 155 156 // Apply dial options. 157 disableGlobalOpts := false 158 for _, opt := range opts { 159 if _, ok := opt.(*disableGlobalDialOptions); ok { 160 disableGlobalOpts = true 161 break 162 } 163 } 164 165 if !disableGlobalOpts { 166 for _, opt := range globalDialOptions { 167 opt.apply(&cc.dopts) 168 } 169 } 170 171 for _, opt := range opts { 172 opt.apply(&cc.dopts) 173 } 174 175 // Determine the resolver to use. 176 if err := cc.initParsedTargetAndResolverBuilder(); err != nil { 177 return nil, err 178 } 179 180 for _, opt := range globalPerTargetDialOptions { 181 opt.DialOptionForTarget(cc.parsedTarget.URL).apply(&cc.dopts) 182 } 183 184 chainUnaryClientInterceptors(cc) 185 chainStreamClientInterceptors(cc) 186 187 if err := cc.validateTransportCredentials(); err != nil { 188 return nil, err 189 } 190 191 if cc.dopts.defaultServiceConfigRawJSON != nil { 192 scpr := parseServiceConfig(*cc.dopts.defaultServiceConfigRawJSON, cc.dopts.maxCallAttempts) 193 if scpr.Err != nil { 194 return nil, fmt.Errorf("%s: %v", invalidDefaultServiceConfigErrPrefix, scpr.Err) 195 } 196 cc.dopts.defaultServiceConfig, _ = scpr.Config.(*ServiceConfig) 197 } 198 cc.keepaliveParams = cc.dopts.copts.KeepaliveParams 199 200 if err = cc.initAuthority(); err != nil { 201 return nil, err 202 } 203 204 // Register ClientConn with channelz. Note that this is only done after 205 // channel creation cannot fail. 206 cc.channelzRegistration(target) 207 channelz.Infof(logger, cc.channelz, "parsed dial target is: %#v", cc.parsedTarget) 208 channelz.Infof(logger, cc.channelz, "Channel authority set to %q", cc.authority) 209 210 cc.csMgr = newConnectivityStateManager(cc.ctx, cc.channelz) 211 cc.pickerWrapper = newPickerWrapper(cc.dopts.copts.StatsHandlers) 212 213 cc.metricsRecorderList = stats.NewMetricsRecorderList(cc.dopts.copts.StatsHandlers) 214 215 cc.initIdleStateLocked() // Safe to call without the lock, since nothing else has a reference to cc. 216 cc.idlenessMgr = idle.NewManager((*idler)(cc), cc.dopts.idleTimeout) 217 218 return cc, nil 219 } 220 221 // Dial calls DialContext(context.Background(), target, opts...). 222 // 223 // Deprecated: use NewClient instead. Will be supported throughout 1.x. 224 func Dial(target string, opts ...DialOption) (*ClientConn, error) { 225 return DialContext(context.Background(), target, opts...) 226 } 227 228 // DialContext calls NewClient and then exits idle mode. If WithBlock(true) is 229 // used, it calls Connect and WaitForStateChange until either the context 230 // expires or the state of the ClientConn is Ready. 231 // 232 // One subtle difference between NewClient and Dial and DialContext is that the 233 // former uses "dns" as the default name resolver, while the latter use 234 // "passthrough" for backward compatibility. This distinction should not matter 235 // to most users, but could matter to legacy users that specify a custom dialer 236 // and expect it to receive the target string directly. 237 // 238 // Deprecated: use NewClient instead. Will be supported throughout 1.x. 239 func DialContext(ctx context.Context, target string, opts ...DialOption) (conn *ClientConn, err error) { 240 // At the end of this method, we kick the channel out of idle, rather than 241 // waiting for the first rpc. 242 // 243 // WithLocalDNSResolution dial option in `grpc.Dial` ensures that it 244 // preserves behavior: when default scheme passthrough is used, skip 245 // hostname resolution, when "dns" is used for resolution, perform 246 // resolution on the client. 247 opts = append([]DialOption{withDefaultScheme("passthrough"), WithLocalDNSResolution()}, opts...) 248 cc, err := NewClient(target, opts...) 249 if err != nil { 250 return nil, err 251 } 252 253 // We start the channel off in idle mode, but kick it out of idle now, 254 // instead of waiting for the first RPC. This is the legacy behavior of 255 // Dial. 256 defer func() { 257 if err != nil { 258 cc.Close() 259 } 260 }() 261 262 // This creates the name resolver, load balancer, etc. 263 if err := cc.idlenessMgr.ExitIdleMode(); err != nil { 264 return nil, err 265 } 266 267 // Return now for non-blocking dials. 268 if !cc.dopts.block { 269 return cc, nil 270 } 271 272 if cc.dopts.timeout > 0 { 273 var cancel context.CancelFunc 274 ctx, cancel = context.WithTimeout(ctx, cc.dopts.timeout) 275 defer cancel() 276 } 277 defer func() { 278 select { 279 case <-ctx.Done(): 280 switch { 281 case ctx.Err() == err: 282 conn = nil 283 case err == nil || !cc.dopts.returnLastError: 284 conn, err = nil, ctx.Err() 285 default: 286 conn, err = nil, fmt.Errorf("%v: %v", ctx.Err(), err) 287 } 288 default: 289 } 290 }() 291 292 // A blocking dial blocks until the clientConn is ready. 293 for { 294 s := cc.GetState() 295 if s == connectivity.Idle { 296 cc.Connect() 297 } 298 if s == connectivity.Ready { 299 return cc, nil 300 } else if cc.dopts.copts.FailOnNonTempDialError && s == connectivity.TransientFailure { 301 if err = cc.connectionError(); err != nil { 302 terr, ok := err.(interface { 303 Temporary() bool 304 }) 305 if ok && !terr.Temporary() { 306 return nil, err 307 } 308 } 309 } 310 if !cc.WaitForStateChange(ctx, s) { 311 // ctx got timeout or canceled. 312 if err = cc.connectionError(); err != nil && cc.dopts.returnLastError { 313 return nil, err 314 } 315 return nil, ctx.Err() 316 } 317 } 318 } 319 320 // addTraceEvent is a helper method to add a trace event on the channel. If the 321 // channel is a nested one, the same event is also added on the parent channel. 322 func (cc *ClientConn) addTraceEvent(msg string) { 323 ted := &channelz.TraceEvent{ 324 Desc: fmt.Sprintf("Channel %s", msg), 325 Severity: channelz.CtInfo, 326 } 327 if cc.dopts.channelzParent != nil { 328 ted.Parent = &channelz.TraceEvent{ 329 Desc: fmt.Sprintf("Nested channel(id:%d) %s", cc.channelz.ID, msg), 330 Severity: channelz.CtInfo, 331 } 332 } 333 channelz.AddTraceEvent(logger, cc.channelz, 0, ted) 334 } 335 336 type idler ClientConn 337 338 func (i *idler) EnterIdleMode() { 339 (*ClientConn)(i).enterIdleMode() 340 } 341 342 func (i *idler) ExitIdleMode() error { 343 return (*ClientConn)(i).exitIdleMode() 344 } 345 346 // exitIdleMode moves the channel out of idle mode by recreating the name 347 // resolver and load balancer. This should never be called directly; use 348 // cc.idlenessMgr.ExitIdleMode instead. 349 func (cc *ClientConn) exitIdleMode() (err error) { 350 cc.mu.Lock() 351 if cc.conns == nil { 352 cc.mu.Unlock() 353 return errConnClosing 354 } 355 cc.mu.Unlock() 356 357 // This needs to be called without cc.mu because this builds a new resolver 358 // which might update state or report error inline, which would then need to 359 // acquire cc.mu. 360 if err := cc.resolverWrapper.start(); err != nil { 361 return err 362 } 363 364 cc.addTraceEvent("exiting idle mode") 365 return nil 366 } 367 368 // initIdleStateLocked initializes common state to how it should be while idle. 369 func (cc *ClientConn) initIdleStateLocked() { 370 cc.resolverWrapper = newCCResolverWrapper(cc) 371 cc.balancerWrapper = newCCBalancerWrapper(cc) 372 cc.firstResolveEvent = grpcsync.NewEvent() 373 // cc.conns == nil is a proxy for the ClientConn being closed. So, instead 374 // of setting it to nil here, we recreate the map. This also means that we 375 // don't have to do this when exiting idle mode. 376 cc.conns = make(map[*addrConn]struct{}) 377 } 378 379 // enterIdleMode puts the channel in idle mode, and as part of it shuts down the 380 // name resolver, load balancer, and any subchannels. This should never be 381 // called directly; use cc.idlenessMgr.EnterIdleMode instead. 382 func (cc *ClientConn) enterIdleMode() { 383 cc.mu.Lock() 384 385 if cc.conns == nil { 386 cc.mu.Unlock() 387 return 388 } 389 390 conns := cc.conns 391 392 rWrapper := cc.resolverWrapper 393 rWrapper.close() 394 cc.pickerWrapper.reset() 395 bWrapper := cc.balancerWrapper 396 bWrapper.close() 397 cc.csMgr.updateState(connectivity.Idle) 398 cc.addTraceEvent("entering idle mode") 399 400 cc.initIdleStateLocked() 401 402 cc.mu.Unlock() 403 404 // Block until the name resolver and LB policy are closed. 405 <-rWrapper.serializer.Done() 406 <-bWrapper.serializer.Done() 407 408 // Close all subchannels after the LB policy is closed. 409 for ac := range conns { 410 ac.tearDown(errConnIdling) 411 } 412 } 413 414 // validateTransportCredentials performs a series of checks on the configured 415 // transport credentials. It returns a non-nil error if any of these conditions 416 // are met: 417 // - no transport creds and no creds bundle is configured 418 // - both transport creds and creds bundle are configured 419 // - creds bundle is configured, but it lacks a transport credentials 420 // - insecure transport creds configured alongside call creds that require 421 // transport level security 422 // 423 // If none of the above conditions are met, the configured credentials are 424 // deemed valid and a nil error is returned. 425 func (cc *ClientConn) validateTransportCredentials() error { 426 if cc.dopts.copts.TransportCredentials == nil && cc.dopts.copts.CredsBundle == nil { 427 return errNoTransportSecurity 428 } 429 if cc.dopts.copts.TransportCredentials != nil && cc.dopts.copts.CredsBundle != nil { 430 return errTransportCredsAndBundle 431 } 432 if cc.dopts.copts.CredsBundle != nil && cc.dopts.copts.CredsBundle.TransportCredentials() == nil { 433 return errNoTransportCredsInBundle 434 } 435 transportCreds := cc.dopts.copts.TransportCredentials 436 if transportCreds == nil { 437 transportCreds = cc.dopts.copts.CredsBundle.TransportCredentials() 438 } 439 if transportCreds.Info().SecurityProtocol == "insecure" { 440 for _, cd := range cc.dopts.copts.PerRPCCredentials { 441 if cd.RequireTransportSecurity() { 442 return errTransportCredentialsMissing 443 } 444 } 445 } 446 return nil 447 } 448 449 // channelzRegistration registers the newly created ClientConn with channelz and 450 // stores the returned identifier in `cc.channelz`. A channelz trace event is 451 // emitted for ClientConn creation. If the newly created ClientConn is a nested 452 // one, i.e a valid parent ClientConn ID is specified via a dial option, the 453 // trace event is also added to the parent. 454 // 455 // Doesn't grab cc.mu as this method is expected to be called only at Dial time. 456 func (cc *ClientConn) channelzRegistration(target string) { 457 parentChannel, _ := cc.dopts.channelzParent.(*channelz.Channel) 458 cc.channelz = channelz.RegisterChannel(parentChannel, target) 459 cc.addTraceEvent("created") 460 } 461 462 // chainUnaryClientInterceptors chains all unary client interceptors into one. 463 func chainUnaryClientInterceptors(cc *ClientConn) { 464 interceptors := cc.dopts.chainUnaryInts 465 // Prepend dopts.unaryInt to the chaining interceptors if it exists, since unaryInt will 466 // be executed before any other chained interceptors. 467 if cc.dopts.unaryInt != nil { 468 interceptors = append([]UnaryClientInterceptor{cc.dopts.unaryInt}, interceptors...) 469 } 470 var chainedInt UnaryClientInterceptor 471 if len(interceptors) == 0 { 472 chainedInt = nil 473 } else if len(interceptors) == 1 { 474 chainedInt = interceptors[0] 475 } else { 476 chainedInt = func(ctx context.Context, method string, req, reply any, cc *ClientConn, invoker UnaryInvoker, opts ...CallOption) error { 477 return interceptors[0](ctx, method, req, reply, cc, getChainUnaryInvoker(interceptors, 0, invoker), opts...) 478 } 479 } 480 cc.dopts.unaryInt = chainedInt 481 } 482 483 // getChainUnaryInvoker recursively generate the chained unary invoker. 484 func getChainUnaryInvoker(interceptors []UnaryClientInterceptor, curr int, finalInvoker UnaryInvoker) UnaryInvoker { 485 if curr == len(interceptors)-1 { 486 return finalInvoker 487 } 488 return func(ctx context.Context, method string, req, reply any, cc *ClientConn, opts ...CallOption) error { 489 return interceptors[curr+1](ctx, method, req, reply, cc, getChainUnaryInvoker(interceptors, curr+1, finalInvoker), opts...) 490 } 491 } 492 493 // chainStreamClientInterceptors chains all stream client interceptors into one. 494 func chainStreamClientInterceptors(cc *ClientConn) { 495 interceptors := cc.dopts.chainStreamInts 496 // Prepend dopts.streamInt to the chaining interceptors if it exists, since streamInt will 497 // be executed before any other chained interceptors. 498 if cc.dopts.streamInt != nil { 499 interceptors = append([]StreamClientInterceptor{cc.dopts.streamInt}, interceptors...) 500 } 501 var chainedInt StreamClientInterceptor 502 if len(interceptors) == 0 { 503 chainedInt = nil 504 } else if len(interceptors) == 1 { 505 chainedInt = interceptors[0] 506 } else { 507 chainedInt = func(ctx context.Context, desc *StreamDesc, cc *ClientConn, method string, streamer Streamer, opts ...CallOption) (ClientStream, error) { 508 return interceptors[0](ctx, desc, cc, method, getChainStreamer(interceptors, 0, streamer), opts...) 509 } 510 } 511 cc.dopts.streamInt = chainedInt 512 } 513 514 // getChainStreamer recursively generate the chained client stream constructor. 515 func getChainStreamer(interceptors []StreamClientInterceptor, curr int, finalStreamer Streamer) Streamer { 516 if curr == len(interceptors)-1 { 517 return finalStreamer 518 } 519 return func(ctx context.Context, desc *StreamDesc, cc *ClientConn, method string, opts ...CallOption) (ClientStream, error) { 520 return interceptors[curr+1](ctx, desc, cc, method, getChainStreamer(interceptors, curr+1, finalStreamer), opts...) 521 } 522 } 523 524 // newConnectivityStateManager creates an connectivityStateManager with 525 // the specified channel. 526 func newConnectivityStateManager(ctx context.Context, channel *channelz.Channel) *connectivityStateManager { 527 return &connectivityStateManager{ 528 channelz: channel, 529 pubSub: grpcsync.NewPubSub(ctx), 530 } 531 } 532 533 // connectivityStateManager keeps the connectivity.State of ClientConn. 534 // This struct will eventually be exported so the balancers can access it. 535 // 536 // TODO: If possible, get rid of the `connectivityStateManager` type, and 537 // provide this functionality using the `PubSub`, to avoid keeping track of 538 // the connectivity state at two places. 539 type connectivityStateManager struct { 540 mu sync.Mutex 541 state connectivity.State 542 notifyChan chan struct{} 543 channelz *channelz.Channel 544 pubSub *grpcsync.PubSub 545 } 546 547 // updateState updates the connectivity.State of ClientConn. 548 // If there's a change it notifies goroutines waiting on state change to 549 // happen. 550 func (csm *connectivityStateManager) updateState(state connectivity.State) { 551 csm.mu.Lock() 552 defer csm.mu.Unlock() 553 if csm.state == connectivity.Shutdown { 554 return 555 } 556 if csm.state == state { 557 return 558 } 559 csm.state = state 560 csm.channelz.ChannelMetrics.State.Store(&state) 561 csm.pubSub.Publish(state) 562 563 channelz.Infof(logger, csm.channelz, "Channel Connectivity change to %v", state) 564 if csm.notifyChan != nil { 565 // There are other goroutines waiting on this channel. 566 close(csm.notifyChan) 567 csm.notifyChan = nil 568 } 569 } 570 571 func (csm *connectivityStateManager) getState() connectivity.State { 572 csm.mu.Lock() 573 defer csm.mu.Unlock() 574 return csm.state 575 } 576 577 func (csm *connectivityStateManager) getNotifyChan() <-chan struct{} { 578 csm.mu.Lock() 579 defer csm.mu.Unlock() 580 if csm.notifyChan == nil { 581 csm.notifyChan = make(chan struct{}) 582 } 583 return csm.notifyChan 584 } 585 586 // ClientConnInterface defines the functions clients need to perform unary and 587 // streaming RPCs. It is implemented by *ClientConn, and is only intended to 588 // be referenced by generated code. 589 type ClientConnInterface interface { 590 // Invoke performs a unary RPC and returns after the response is received 591 // into reply. 592 Invoke(ctx context.Context, method string, args any, reply any, opts ...CallOption) error 593 // NewStream begins a streaming RPC. 594 NewStream(ctx context.Context, desc *StreamDesc, method string, opts ...CallOption) (ClientStream, error) 595 } 596 597 // Assert *ClientConn implements ClientConnInterface. 598 var _ ClientConnInterface = (*ClientConn)(nil) 599 600 // ClientConn represents a virtual connection to a conceptual endpoint, to 601 // perform RPCs. 602 // 603 // A ClientConn is free to have zero or more actual connections to the endpoint 604 // based on configuration, load, etc. It is also free to determine which actual 605 // endpoints to use and may change it every RPC, permitting client-side load 606 // balancing. 607 // 608 // A ClientConn encapsulates a range of functionality including name 609 // resolution, TCP connection establishment (with retries and backoff) and TLS 610 // handshakes. It also handles errors on established connections by 611 // re-resolving the name and reconnecting. 612 type ClientConn struct { 613 ctx context.Context // Initialized using the background context at dial time. 614 cancel context.CancelFunc // Cancelled on close. 615 616 // The following are initialized at dial time, and are read-only after that. 617 target string // User's dial target. 618 parsedTarget resolver.Target // See initParsedTargetAndResolverBuilder(). 619 authority string // See initAuthority(). 620 dopts dialOptions // Default and user specified dial options. 621 channelz *channelz.Channel // Channelz object. 622 resolverBuilder resolver.Builder // See initParsedTargetAndResolverBuilder(). 623 idlenessMgr *idle.Manager 624 metricsRecorderList *stats.MetricsRecorderList 625 626 // The following provide their own synchronization, and therefore don't 627 // require cc.mu to be held to access them. 628 csMgr *connectivityStateManager 629 pickerWrapper *pickerWrapper 630 safeConfigSelector iresolver.SafeConfigSelector 631 retryThrottler atomic.Value // Updated from service config. 632 633 // mu protects the following fields. 634 // TODO: split mu so the same mutex isn't used for everything. 635 mu sync.RWMutex 636 resolverWrapper *ccResolverWrapper // Always recreated whenever entering idle to simplify Close. 637 balancerWrapper *ccBalancerWrapper // Always recreated whenever entering idle to simplify Close. 638 sc *ServiceConfig // Latest service config received from the resolver. 639 conns map[*addrConn]struct{} // Set to nil on close. 640 keepaliveParams keepalive.ClientParameters // May be updated upon receipt of a GoAway. 641 // firstResolveEvent is used to track whether the name resolver sent us at 642 // least one update. RPCs block on this event. May be accessed without mu 643 // if we know we cannot be asked to enter idle mode while accessing it (e.g. 644 // when the idle manager has already been closed, or if we are already 645 // entering idle mode). 646 firstResolveEvent *grpcsync.Event 647 648 lceMu sync.Mutex // protects lastConnectionError 649 lastConnectionError error 650 } 651 652 // WaitForStateChange waits until the connectivity.State of ClientConn changes from sourceState or 653 // ctx expires. A true value is returned in former case and false in latter. 654 func (cc *ClientConn) WaitForStateChange(ctx context.Context, sourceState connectivity.State) bool { 655 ch := cc.csMgr.getNotifyChan() 656 if cc.csMgr.getState() != sourceState { 657 return true 658 } 659 select { 660 case <-ctx.Done(): 661 return false 662 case <-ch: 663 return true 664 } 665 } 666 667 // GetState returns the connectivity.State of ClientConn. 668 func (cc *ClientConn) GetState() connectivity.State { 669 return cc.csMgr.getState() 670 } 671 672 // Connect causes all subchannels in the ClientConn to attempt to connect if 673 // the channel is idle. Does not wait for the connection attempts to begin 674 // before returning. 675 // 676 // # Experimental 677 // 678 // Notice: This API is EXPERIMENTAL and may be changed or removed in a later 679 // release. 680 func (cc *ClientConn) Connect() { 681 if err := cc.idlenessMgr.ExitIdleMode(); err != nil { 682 cc.addTraceEvent(err.Error()) 683 return 684 } 685 // If the ClientConn was not in idle mode, we need to call ExitIdle on the 686 // LB policy so that connections can be created. 687 cc.mu.Lock() 688 cc.balancerWrapper.exitIdle() 689 cc.mu.Unlock() 690 } 691 692 // waitForResolvedAddrs blocks until the resolver provides addresses or the 693 // context expires, whichever happens first. 694 // 695 // Error is nil unless the context expires first; otherwise returns a status 696 // error based on the context. 697 // 698 // The returned boolean indicates whether it did block or not. If the 699 // resolution has already happened once before, it returns false without 700 // blocking. Otherwise, it wait for the resolution and return true if 701 // resolution has succeeded or return false along with error if resolution has 702 // failed. 703 func (cc *ClientConn) waitForResolvedAddrs(ctx context.Context) (bool, error) { 704 // This is on the RPC path, so we use a fast path to avoid the 705 // more-expensive "select" below after the resolver has returned once. 706 if cc.firstResolveEvent.HasFired() { 707 return false, nil 708 } 709 internal.NewStreamWaitingForResolver() 710 select { 711 case <-cc.firstResolveEvent.Done(): 712 return true, nil 713 case <-ctx.Done(): 714 return false, status.FromContextError(ctx.Err()).Err() 715 case <-cc.ctx.Done(): 716 return false, ErrClientConnClosing 717 } 718 } 719 720 var emptyServiceConfig *ServiceConfig 721 722 func init() { 723 cfg := parseServiceConfig("{}", defaultMaxCallAttempts) 724 if cfg.Err != nil { 725 panic(fmt.Sprintf("impossible error parsing empty service config: %v", cfg.Err)) 726 } 727 emptyServiceConfig = cfg.Config.(*ServiceConfig) 728 729 internal.SubscribeToConnectivityStateChanges = func(cc *ClientConn, s grpcsync.Subscriber) func() { 730 return cc.csMgr.pubSub.Subscribe(s) 731 } 732 internal.EnterIdleModeForTesting = func(cc *ClientConn) { 733 cc.idlenessMgr.EnterIdleModeForTesting() 734 } 735 internal.ExitIdleModeForTesting = func(cc *ClientConn) error { 736 return cc.idlenessMgr.ExitIdleMode() 737 } 738 } 739 740 func (cc *ClientConn) maybeApplyDefaultServiceConfig() { 741 if cc.sc != nil { 742 cc.applyServiceConfigAndBalancer(cc.sc, nil) 743 return 744 } 745 if cc.dopts.defaultServiceConfig != nil { 746 cc.applyServiceConfigAndBalancer(cc.dopts.defaultServiceConfig, &defaultConfigSelector{cc.dopts.defaultServiceConfig}) 747 } else { 748 cc.applyServiceConfigAndBalancer(emptyServiceConfig, &defaultConfigSelector{emptyServiceConfig}) 749 } 750 } 751 752 func (cc *ClientConn) updateResolverStateAndUnlock(s resolver.State, err error) error { 753 defer cc.firstResolveEvent.Fire() 754 // Check if the ClientConn is already closed. Some fields (e.g. 755 // balancerWrapper) are set to nil when closing the ClientConn, and could 756 // cause nil pointer panic if we don't have this check. 757 if cc.conns == nil { 758 cc.mu.Unlock() 759 return nil 760 } 761 762 if err != nil { 763 // May need to apply the initial service config in case the resolver 764 // doesn't support service configs, or doesn't provide a service config 765 // with the new addresses. 766 cc.maybeApplyDefaultServiceConfig() 767 768 cc.balancerWrapper.resolverError(err) 769 770 // No addresses are valid with err set; return early. 771 cc.mu.Unlock() 772 return balancer.ErrBadResolverState 773 } 774 775 var ret error 776 if cc.dopts.disableServiceConfig { 777 channelz.Infof(logger, cc.channelz, "ignoring service config from resolver (%v) and applying the default because service config is disabled", s.ServiceConfig) 778 cc.maybeApplyDefaultServiceConfig() 779 } else if s.ServiceConfig == nil { 780 cc.maybeApplyDefaultServiceConfig() 781 // TODO: do we need to apply a failing LB policy if there is no 782 // default, per the error handling design? 783 } else { 784 if sc, ok := s.ServiceConfig.Config.(*ServiceConfig); s.ServiceConfig.Err == nil && ok { 785 configSelector := iresolver.GetConfigSelector(s) 786 if configSelector != nil { 787 if len(s.ServiceConfig.Config.(*ServiceConfig).Methods) != 0 { 788 channelz.Infof(logger, cc.channelz, "method configs in service config will be ignored due to presence of config selector") 789 } 790 } else { 791 configSelector = &defaultConfigSelector{sc} 792 } 793 cc.applyServiceConfigAndBalancer(sc, configSelector) 794 } else { 795 ret = balancer.ErrBadResolverState 796 if cc.sc == nil { 797 // Apply the failing LB only if we haven't received valid service config 798 // from the name resolver in the past. 799 cc.applyFailingLBLocked(s.ServiceConfig) 800 cc.mu.Unlock() 801 return ret 802 } 803 } 804 } 805 806 balCfg := cc.sc.lbConfig 807 bw := cc.balancerWrapper 808 cc.mu.Unlock() 809 810 uccsErr := bw.updateClientConnState(&balancer.ClientConnState{ResolverState: s, BalancerConfig: balCfg}) 811 if ret == nil { 812 ret = uccsErr // prefer ErrBadResolver state since any other error is 813 // currently meaningless to the caller. 814 } 815 return ret 816 } 817 818 // applyFailingLBLocked is akin to configuring an LB policy on the channel which 819 // always fails RPCs. Here, an actual LB policy is not configured, but an always 820 // erroring picker is configured, which returns errors with information about 821 // what was invalid in the received service config. A config selector with no 822 // service config is configured, and the connectivity state of the channel is 823 // set to TransientFailure. 824 func (cc *ClientConn) applyFailingLBLocked(sc *serviceconfig.ParseResult) { 825 var err error 826 if sc.Err != nil { 827 err = status.Errorf(codes.Unavailable, "error parsing service config: %v", sc.Err) 828 } else { 829 err = status.Errorf(codes.Unavailable, "illegal service config type: %T", sc.Config) 830 } 831 cc.safeConfigSelector.UpdateConfigSelector(&defaultConfigSelector{nil}) 832 cc.pickerWrapper.updatePicker(base.NewErrPicker(err)) 833 cc.csMgr.updateState(connectivity.TransientFailure) 834 } 835 836 // Makes a copy of the input addresses slice. Addresses are passed during 837 // subconn creation and address update operations. 838 func copyAddresses(in []resolver.Address) []resolver.Address { 839 out := make([]resolver.Address, len(in)) 840 copy(out, in) 841 return out 842 } 843 844 // newAddrConnLocked creates an addrConn for addrs and adds it to cc.conns. 845 // 846 // Caller needs to make sure len(addrs) > 0. 847 func (cc *ClientConn) newAddrConnLocked(addrs []resolver.Address, opts balancer.NewSubConnOptions) (*addrConn, error) { 848 if cc.conns == nil { 849 return nil, ErrClientConnClosing 850 } 851 852 ac := &addrConn{ 853 state: connectivity.Idle, 854 cc: cc, 855 addrs: copyAddresses(addrs), 856 scopts: opts, 857 dopts: cc.dopts, 858 channelz: channelz.RegisterSubChannel(cc.channelz, ""), 859 resetBackoff: make(chan struct{}), 860 } 861 ac.ctx, ac.cancel = context.WithCancel(cc.ctx) 862 // Start with our address set to the first address; this may be updated if 863 // we connect to different addresses. 864 ac.channelz.ChannelMetrics.Target.Store(&addrs[0].Addr) 865 866 channelz.AddTraceEvent(logger, ac.channelz, 0, &channelz.TraceEvent{ 867 Desc: "Subchannel created", 868 Severity: channelz.CtInfo, 869 Parent: &channelz.TraceEvent{ 870 Desc: fmt.Sprintf("Subchannel(id:%d) created", ac.channelz.ID), 871 Severity: channelz.CtInfo, 872 }, 873 }) 874 875 // Track ac in cc. This needs to be done before any getTransport(...) is called. 876 cc.conns[ac] = struct{}{} 877 return ac, nil 878 } 879 880 // removeAddrConn removes the addrConn in the subConn from clientConn. 881 // It also tears down the ac with the given error. 882 func (cc *ClientConn) removeAddrConn(ac *addrConn, err error) { 883 cc.mu.Lock() 884 if cc.conns == nil { 885 cc.mu.Unlock() 886 return 887 } 888 delete(cc.conns, ac) 889 cc.mu.Unlock() 890 ac.tearDown(err) 891 } 892 893 // Target returns the target string of the ClientConn. 894 func (cc *ClientConn) Target() string { 895 return cc.target 896 } 897 898 // CanonicalTarget returns the canonical target string used when creating cc. 899 // 900 // This always has the form "<scheme>://[authority]/<endpoint>". For example: 901 // 902 // - "dns:///example.com:42" 903 // - "dns://8.8.8.8/example.com:42" 904 // - "unix:///path/to/socket" 905 func (cc *ClientConn) CanonicalTarget() string { 906 return cc.parsedTarget.String() 907 } 908 909 func (cc *ClientConn) incrCallsStarted() { 910 cc.channelz.ChannelMetrics.CallsStarted.Add(1) 911 cc.channelz.ChannelMetrics.LastCallStartedTimestamp.Store(time.Now().UnixNano()) 912 } 913 914 func (cc *ClientConn) incrCallsSucceeded() { 915 cc.channelz.ChannelMetrics.CallsSucceeded.Add(1) 916 } 917 918 func (cc *ClientConn) incrCallsFailed() { 919 cc.channelz.ChannelMetrics.CallsFailed.Add(1) 920 } 921 922 // connect starts creating a transport. 923 // It does nothing if the ac is not IDLE. 924 // TODO(bar) Move this to the addrConn section. 925 func (ac *addrConn) connect() error { 926 ac.mu.Lock() 927 if ac.state == connectivity.Shutdown { 928 if logger.V(2) { 929 logger.Infof("connect called on shutdown addrConn; ignoring.") 930 } 931 ac.mu.Unlock() 932 return errConnClosing 933 } 934 if ac.state != connectivity.Idle { 935 if logger.V(2) { 936 logger.Infof("connect called on addrConn in non-idle state (%v); ignoring.", ac.state) 937 } 938 ac.mu.Unlock() 939 return nil 940 } 941 942 ac.resetTransportAndUnlock() 943 return nil 944 } 945 946 // equalAddressIgnoringBalAttributes returns true is a and b are considered equal. 947 // This is different from the Equal method on the resolver.Address type which 948 // considers all fields to determine equality. Here, we only consider fields 949 // that are meaningful to the subConn. 950 func equalAddressIgnoringBalAttributes(a, b *resolver.Address) bool { 951 return a.Addr == b.Addr && a.ServerName == b.ServerName && 952 a.Attributes.Equal(b.Attributes) && 953 a.Metadata == b.Metadata 954 } 955 956 func equalAddressesIgnoringBalAttributes(a, b []resolver.Address) bool { 957 return slices.EqualFunc(a, b, func(a, b resolver.Address) bool { return equalAddressIgnoringBalAttributes(&a, &b) }) 958 } 959 960 // updateAddrs updates ac.addrs with the new addresses list and handles active 961 // connections or connection attempts. 962 func (ac *addrConn) updateAddrs(addrs []resolver.Address) { 963 addrs = copyAddresses(addrs) 964 limit := len(addrs) 965 if limit > 5 { 966 limit = 5 967 } 968 channelz.Infof(logger, ac.channelz, "addrConn: updateAddrs addrs (%d of %d): %v", limit, len(addrs), addrs[:limit]) 969 970 ac.mu.Lock() 971 if equalAddressesIgnoringBalAttributes(ac.addrs, addrs) { 972 ac.mu.Unlock() 973 return 974 } 975 976 ac.addrs = addrs 977 978 if ac.state == connectivity.Shutdown || 979 ac.state == connectivity.TransientFailure || 980 ac.state == connectivity.Idle { 981 // We were not connecting, so do nothing but update the addresses. 982 ac.mu.Unlock() 983 return 984 } 985 986 if ac.state == connectivity.Ready { 987 // Try to find the connected address. 988 for _, a := range addrs { 989 a.ServerName = ac.cc.getServerName(a) 990 if equalAddressIgnoringBalAttributes(&a, &ac.curAddr) { 991 // We are connected to a valid address, so do nothing but 992 // update the addresses. 993 ac.mu.Unlock() 994 return 995 } 996 } 997 } 998 999 // We are either connected to the wrong address or currently connecting. 1000 // Stop the current iteration and restart. 1001 1002 ac.cancel() 1003 ac.ctx, ac.cancel = context.WithCancel(ac.cc.ctx) 1004 1005 // We have to defer here because GracefulClose => onClose, which requires 1006 // locking ac.mu. 1007 if ac.transport != nil { 1008 defer ac.transport.GracefulClose() 1009 ac.transport = nil 1010 } 1011 1012 if len(addrs) == 0 { 1013 ac.updateConnectivityState(connectivity.Idle, nil) 1014 } 1015 1016 // Since we were connecting/connected, we should start a new connection 1017 // attempt. 1018 go ac.resetTransportAndUnlock() 1019 } 1020 1021 // getServerName determines the serverName to be used in the connection 1022 // handshake. The default value for the serverName is the authority on the 1023 // ClientConn, which either comes from the user's dial target or through an 1024 // authority override specified using the WithAuthority dial option. Name 1025 // resolvers can specify a per-address override for the serverName through the 1026 // resolver.Address.ServerName field which is used only if the WithAuthority 1027 // dial option was not used. The rationale is that per-address authority 1028 // overrides specified by the name resolver can represent a security risk, while 1029 // an override specified by the user is more dependable since they probably know 1030 // what they are doing. 1031 func (cc *ClientConn) getServerName(addr resolver.Address) string { 1032 if cc.dopts.authority != "" { 1033 return cc.dopts.authority 1034 } 1035 if addr.ServerName != "" { 1036 return addr.ServerName 1037 } 1038 return cc.authority 1039 } 1040 1041 func getMethodConfig(sc *ServiceConfig, method string) MethodConfig { 1042 if sc == nil { 1043 return MethodConfig{} 1044 } 1045 if m, ok := sc.Methods[method]; ok { 1046 return m 1047 } 1048 i := strings.LastIndex(method, "/") 1049 if m, ok := sc.Methods[method[:i+1]]; ok { 1050 return m 1051 } 1052 return sc.Methods[""] 1053 } 1054 1055 // GetMethodConfig gets the method config of the input method. 1056 // If there's an exact match for input method (i.e. /service/method), we return 1057 // the corresponding MethodConfig. 1058 // If there isn't an exact match for the input method, we look for the service's default 1059 // config under the service (i.e /service/) and then for the default for all services (empty string). 1060 // 1061 // If there is a default MethodConfig for the service, we return it. 1062 // Otherwise, we return an empty MethodConfig. 1063 func (cc *ClientConn) GetMethodConfig(method string) MethodConfig { 1064 // TODO: Avoid the locking here. 1065 cc.mu.RLock() 1066 defer cc.mu.RUnlock() 1067 return getMethodConfig(cc.sc, method) 1068 } 1069 1070 func (cc *ClientConn) healthCheckConfig() *healthCheckConfig { 1071 cc.mu.RLock() 1072 defer cc.mu.RUnlock() 1073 if cc.sc == nil { 1074 return nil 1075 } 1076 return cc.sc.healthCheckConfig 1077 } 1078 1079 func (cc *ClientConn) getTransport(ctx context.Context, failfast bool, method string) (transport.ClientTransport, balancer.PickResult, error) { 1080 return cc.pickerWrapper.pick(ctx, failfast, balancer.PickInfo{ 1081 Ctx: ctx, 1082 FullMethodName: method, 1083 }) 1084 } 1085 1086 func (cc *ClientConn) applyServiceConfigAndBalancer(sc *ServiceConfig, configSelector iresolver.ConfigSelector) { 1087 if sc == nil { 1088 // should never reach here. 1089 return 1090 } 1091 cc.sc = sc 1092 if configSelector != nil { 1093 cc.safeConfigSelector.UpdateConfigSelector(configSelector) 1094 } 1095 1096 if cc.sc.retryThrottling != nil { 1097 newThrottler := &retryThrottler{ 1098 tokens: cc.sc.retryThrottling.MaxTokens, 1099 max: cc.sc.retryThrottling.MaxTokens, 1100 thresh: cc.sc.retryThrottling.MaxTokens / 2, 1101 ratio: cc.sc.retryThrottling.TokenRatio, 1102 } 1103 cc.retryThrottler.Store(newThrottler) 1104 } else { 1105 cc.retryThrottler.Store((*retryThrottler)(nil)) 1106 } 1107 } 1108 1109 func (cc *ClientConn) resolveNow(o resolver.ResolveNowOptions) { 1110 cc.mu.RLock() 1111 cc.resolverWrapper.resolveNow(o) 1112 cc.mu.RUnlock() 1113 } 1114 1115 func (cc *ClientConn) resolveNowLocked(o resolver.ResolveNowOptions) { 1116 cc.resolverWrapper.resolveNow(o) 1117 } 1118 1119 // ResetConnectBackoff wakes up all subchannels in transient failure and causes 1120 // them to attempt another connection immediately. It also resets the backoff 1121 // times used for subsequent attempts regardless of the current state. 1122 // 1123 // In general, this function should not be used. Typical service or network 1124 // outages result in a reasonable client reconnection strategy by default. 1125 // However, if a previously unavailable network becomes available, this may be 1126 // used to trigger an immediate reconnect. 1127 // 1128 // # Experimental 1129 // 1130 // Notice: This API is EXPERIMENTAL and may be changed or removed in a 1131 // later release. 1132 func (cc *ClientConn) ResetConnectBackoff() { 1133 cc.mu.Lock() 1134 conns := cc.conns 1135 cc.mu.Unlock() 1136 for ac := range conns { 1137 ac.resetConnectBackoff() 1138 } 1139 } 1140 1141 // Close tears down the ClientConn and all underlying connections. 1142 func (cc *ClientConn) Close() error { 1143 defer func() { 1144 cc.cancel() 1145 <-cc.csMgr.pubSub.Done() 1146 }() 1147 1148 // Prevent calls to enter/exit idle immediately, and ensure we are not 1149 // currently entering/exiting idle mode. 1150 cc.idlenessMgr.Close() 1151 1152 cc.mu.Lock() 1153 if cc.conns == nil { 1154 cc.mu.Unlock() 1155 return ErrClientConnClosing 1156 } 1157 1158 conns := cc.conns 1159 cc.conns = nil 1160 cc.csMgr.updateState(connectivity.Shutdown) 1161 1162 // We can safely unlock and continue to access all fields now as 1163 // cc.conns==nil, preventing any further operations on cc. 1164 cc.mu.Unlock() 1165 1166 cc.resolverWrapper.close() 1167 // The order of closing matters here since the balancer wrapper assumes the 1168 // picker is closed before it is closed. 1169 cc.pickerWrapper.close() 1170 cc.balancerWrapper.close() 1171 1172 <-cc.resolverWrapper.serializer.Done() 1173 <-cc.balancerWrapper.serializer.Done() 1174 var wg sync.WaitGroup 1175 for ac := range conns { 1176 wg.Add(1) 1177 go func(ac *addrConn) { 1178 defer wg.Done() 1179 ac.tearDown(ErrClientConnClosing) 1180 }(ac) 1181 } 1182 wg.Wait() 1183 cc.addTraceEvent("deleted") 1184 // TraceEvent needs to be called before RemoveEntry, as TraceEvent may add 1185 // trace reference to the entity being deleted, and thus prevent it from being 1186 // deleted right away. 1187 channelz.RemoveEntry(cc.channelz.ID) 1188 1189 return nil 1190 } 1191 1192 // addrConn is a network connection to a given address. 1193 type addrConn struct { 1194 ctx context.Context 1195 cancel context.CancelFunc 1196 1197 cc *ClientConn 1198 dopts dialOptions 1199 acbw *acBalancerWrapper 1200 scopts balancer.NewSubConnOptions 1201 1202 // transport is set when there's a viable transport (note: ac state may not be READY as LB channel 1203 // health checking may require server to report healthy to set ac to READY), and is reset 1204 // to nil when the current transport should no longer be used to create a stream (e.g. after GoAway 1205 // is received, transport is closed, ac has been torn down). 1206 transport transport.ClientTransport // The current transport. 1207 1208 // This mutex is used on the RPC path, so its usage should be minimized as 1209 // much as possible. 1210 // TODO: Find a lock-free way to retrieve the transport and state from the 1211 // addrConn. 1212 mu sync.Mutex 1213 curAddr resolver.Address // The current address. 1214 addrs []resolver.Address // All addresses that the resolver resolved to. 1215 1216 // Use updateConnectivityState for updating addrConn's connectivity state. 1217 state connectivity.State 1218 1219 backoffIdx int // Needs to be stateful for resetConnectBackoff. 1220 resetBackoff chan struct{} 1221 1222 channelz *channelz.SubChannel 1223 } 1224 1225 // Note: this requires a lock on ac.mu. 1226 func (ac *addrConn) updateConnectivityState(s connectivity.State, lastErr error) { 1227 if ac.state == s { 1228 return 1229 } 1230 ac.state = s 1231 ac.channelz.ChannelMetrics.State.Store(&s) 1232 if lastErr == nil { 1233 channelz.Infof(logger, ac.channelz, "Subchannel Connectivity change to %v", s) 1234 } else { 1235 channelz.Infof(logger, ac.channelz, "Subchannel Connectivity change to %v, last error: %s", s, lastErr) 1236 } 1237 ac.acbw.updateState(s, ac.curAddr, lastErr) 1238 } 1239 1240 // adjustParams updates parameters used to create transports upon 1241 // receiving a GoAway. 1242 func (ac *addrConn) adjustParams(r transport.GoAwayReason) { 1243 if r == transport.GoAwayTooManyPings { 1244 v := 2 * ac.dopts.copts.KeepaliveParams.Time 1245 ac.cc.mu.Lock() 1246 if v > ac.cc.keepaliveParams.Time { 1247 ac.cc.keepaliveParams.Time = v 1248 } 1249 ac.cc.mu.Unlock() 1250 } 1251 } 1252 1253 // resetTransportAndUnlock unconditionally connects the addrConn. 1254 // 1255 // ac.mu must be held by the caller, and this function will guarantee it is released. 1256 func (ac *addrConn) resetTransportAndUnlock() { 1257 acCtx := ac.ctx 1258 if acCtx.Err() != nil { 1259 ac.mu.Unlock() 1260 return 1261 } 1262 1263 addrs := ac.addrs 1264 backoffFor := ac.dopts.bs.Backoff(ac.backoffIdx) 1265 // This will be the duration that dial gets to finish. 1266 dialDuration := minConnectTimeout 1267 if ac.dopts.minConnectTimeout != nil { 1268 dialDuration = ac.dopts.minConnectTimeout() 1269 } 1270 1271 if dialDuration < backoffFor { 1272 // Give dial more time as we keep failing to connect. 1273 dialDuration = backoffFor 1274 } 1275 // We can potentially spend all the time trying the first address, and 1276 // if the server accepts the connection and then hangs, the following 1277 // addresses will never be tried. 1278 // 1279 // The spec doesn't mention what should be done for multiple addresses. 1280 // https://github.com/grpc/grpc/blob/master/doc/connection-backoff.md#proposed-backoff-algorithm 1281 connectDeadline := time.Now().Add(dialDuration) 1282 1283 ac.updateConnectivityState(connectivity.Connecting, nil) 1284 ac.mu.Unlock() 1285 1286 if err := ac.tryAllAddrs(acCtx, addrs, connectDeadline); err != nil { 1287 // TODO: #7534 - Move re-resolution requests into the pick_first LB policy 1288 // to ensure one resolution request per pass instead of per subconn failure. 1289 ac.cc.resolveNow(resolver.ResolveNowOptions{}) 1290 ac.mu.Lock() 1291 if acCtx.Err() != nil { 1292 // addrConn was torn down. 1293 ac.mu.Unlock() 1294 return 1295 } 1296 // After exhausting all addresses, the addrConn enters 1297 // TRANSIENT_FAILURE. 1298 ac.updateConnectivityState(connectivity.TransientFailure, err) 1299 1300 // Backoff. 1301 b := ac.resetBackoff 1302 ac.mu.Unlock() 1303 1304 timer := time.NewTimer(backoffFor) 1305 select { 1306 case <-timer.C: 1307 ac.mu.Lock() 1308 ac.backoffIdx++ 1309 ac.mu.Unlock() 1310 case <-b: 1311 timer.Stop() 1312 case <-acCtx.Done(): 1313 timer.Stop() 1314 return 1315 } 1316 1317 ac.mu.Lock() 1318 if acCtx.Err() == nil { 1319 ac.updateConnectivityState(connectivity.Idle, err) 1320 } 1321 ac.mu.Unlock() 1322 return 1323 } 1324 // Success; reset backoff. 1325 ac.mu.Lock() 1326 ac.backoffIdx = 0 1327 ac.mu.Unlock() 1328 } 1329 1330 // tryAllAddrs tries to create a connection to the addresses, and stop when at 1331 // the first successful one. It returns an error if no address was successfully 1332 // connected, or updates ac appropriately with the new transport. 1333 func (ac *addrConn) tryAllAddrs(ctx context.Context, addrs []resolver.Address, connectDeadline time.Time) error { 1334 var firstConnErr error 1335 for _, addr := range addrs { 1336 ac.channelz.ChannelMetrics.Target.Store(&addr.Addr) 1337 if ctx.Err() != nil { 1338 return errConnClosing 1339 } 1340 ac.mu.Lock() 1341 1342 ac.cc.mu.RLock() 1343 ac.dopts.copts.KeepaliveParams = ac.cc.keepaliveParams 1344 ac.cc.mu.RUnlock() 1345 1346 copts := ac.dopts.copts 1347 if ac.scopts.CredsBundle != nil { 1348 copts.CredsBundle = ac.scopts.CredsBundle 1349 } 1350 ac.mu.Unlock() 1351 1352 channelz.Infof(logger, ac.channelz, "Subchannel picks a new address %q to connect", addr.Addr) 1353 1354 err := ac.createTransport(ctx, addr, copts, connectDeadline) 1355 if err == nil { 1356 return nil 1357 } 1358 if firstConnErr == nil { 1359 firstConnErr = err 1360 } 1361 ac.cc.updateConnectionError(err) 1362 } 1363 1364 // Couldn't connect to any address. 1365 return firstConnErr 1366 } 1367 1368 // createTransport creates a connection to addr. It returns an error if the 1369 // address was not successfully connected, or updates ac appropriately with the 1370 // new transport. 1371 func (ac *addrConn) createTransport(ctx context.Context, addr resolver.Address, copts transport.ConnectOptions, connectDeadline time.Time) error { 1372 addr.ServerName = ac.cc.getServerName(addr) 1373 hctx, hcancel := context.WithCancel(ctx) 1374 1375 onClose := func(r transport.GoAwayReason) { 1376 ac.mu.Lock() 1377 defer ac.mu.Unlock() 1378 // adjust params based on GoAwayReason 1379 ac.adjustParams(r) 1380 if ctx.Err() != nil { 1381 // Already shut down or connection attempt canceled. tearDown() or 1382 // updateAddrs() already cleared the transport and canceled hctx 1383 // via ac.ctx, and we expected this connection to be closed, so do 1384 // nothing here. 1385 return 1386 } 1387 hcancel() 1388 if ac.transport == nil { 1389 // We're still connecting to this address, which could error. Do 1390 // not update the connectivity state or resolve; these will happen 1391 // at the end of the tryAllAddrs connection loop in the event of an 1392 // error. 1393 return 1394 } 1395 ac.transport = nil 1396 // Refresh the name resolver on any connection loss. 1397 ac.cc.resolveNow(resolver.ResolveNowOptions{}) 1398 // Always go idle and wait for the LB policy to initiate a new 1399 // connection attempt. 1400 ac.updateConnectivityState(connectivity.Idle, nil) 1401 } 1402 1403 connectCtx, cancel := context.WithDeadline(ctx, connectDeadline) 1404 defer cancel() 1405 copts.ChannelzParent = ac.channelz 1406 1407 newTr, err := transport.NewHTTP2Client(connectCtx, ac.cc.ctx, addr, copts, onClose) 1408 if err != nil { 1409 if logger.V(2) { 1410 logger.Infof("Creating new client transport to %q: %v", addr, err) 1411 } 1412 // newTr is either nil, or closed. 1413 hcancel() 1414 channelz.Warningf(logger, ac.channelz, "grpc: addrConn.createTransport failed to connect to %s. Err: %v", addr, err) 1415 return err 1416 } 1417 1418 ac.mu.Lock() 1419 defer ac.mu.Unlock() 1420 if ctx.Err() != nil { 1421 // This can happen if the subConn was removed while in `Connecting` 1422 // state. tearDown() would have set the state to `Shutdown`, but 1423 // would not have closed the transport since ac.transport would not 1424 // have been set at that point. 1425 // 1426 // We run this in a goroutine because newTr.Close() calls onClose() 1427 // inline, which requires locking ac.mu. 1428 // 1429 // The error we pass to Close() is immaterial since there are no open 1430 // streams at this point, so no trailers with error details will be sent 1431 // out. We just need to pass a non-nil error. 1432 // 1433 // This can also happen when updateAddrs is called during a connection 1434 // attempt. 1435 go newTr.Close(transport.ErrConnClosing) 1436 return nil 1437 } 1438 if hctx.Err() != nil { 1439 // onClose was already called for this connection, but the connection 1440 // was successfully established first. Consider it a success and set 1441 // the new state to Idle. 1442 ac.updateConnectivityState(connectivity.Idle, nil) 1443 return nil 1444 } 1445 ac.curAddr = addr 1446 ac.transport = newTr 1447 ac.startHealthCheck(hctx) // Will set state to READY if appropriate. 1448 return nil 1449 } 1450 1451 // startHealthCheck starts the health checking stream (RPC) to watch the health 1452 // stats of this connection if health checking is requested and configured. 1453 // 1454 // LB channel health checking is enabled when all requirements below are met: 1455 // 1. it is not disabled by the user with the WithDisableHealthCheck DialOption 1456 // 2. internal.HealthCheckFunc is set by importing the grpc/health package 1457 // 3. a service config with non-empty healthCheckConfig field is provided 1458 // 4. the load balancer requests it 1459 // 1460 // It sets addrConn to READY if the health checking stream is not started. 1461 // 1462 // Caller must hold ac.mu. 1463 func (ac *addrConn) startHealthCheck(ctx context.Context) { 1464 var healthcheckManagingState bool 1465 defer func() { 1466 if !healthcheckManagingState { 1467 ac.updateConnectivityState(connectivity.Ready, nil) 1468 } 1469 }() 1470 1471 if ac.cc.dopts.disableHealthCheck { 1472 return 1473 } 1474 healthCheckConfig := ac.cc.healthCheckConfig() 1475 if healthCheckConfig == nil { 1476 return 1477 } 1478 if !ac.scopts.HealthCheckEnabled { 1479 return 1480 } 1481 healthCheckFunc := internal.HealthCheckFunc 1482 if healthCheckFunc == nil { 1483 // The health package is not imported to set health check function. 1484 // 1485 // TODO: add a link to the health check doc in the error message. 1486 channelz.Error(logger, ac.channelz, "Health check is requested but health check function is not set.") 1487 return 1488 } 1489 1490 healthcheckManagingState = true 1491 1492 // Set up the health check helper functions. 1493 currentTr := ac.transport 1494 newStream := func(method string) (any, error) { 1495 ac.mu.Lock() 1496 if ac.transport != currentTr { 1497 ac.mu.Unlock() 1498 return nil, status.Error(codes.Canceled, "the provided transport is no longer valid to use") 1499 } 1500 ac.mu.Unlock() 1501 return newNonRetryClientStream(ctx, &StreamDesc{ServerStreams: true}, method, currentTr, ac) 1502 } 1503 setConnectivityState := func(s connectivity.State, lastErr error) { 1504 ac.mu.Lock() 1505 defer ac.mu.Unlock() 1506 if ac.transport != currentTr { 1507 return 1508 } 1509 ac.updateConnectivityState(s, lastErr) 1510 } 1511 // Start the health checking stream. 1512 go func() { 1513 err := healthCheckFunc(ctx, newStream, setConnectivityState, healthCheckConfig.ServiceName) 1514 if err != nil { 1515 if status.Code(err) == codes.Unimplemented { 1516 channelz.Error(logger, ac.channelz, "Subchannel health check is unimplemented at server side, thus health check is disabled") 1517 } else { 1518 channelz.Errorf(logger, ac.channelz, "Health checking failed: %v", err) 1519 } 1520 } 1521 }() 1522 } 1523 1524 func (ac *addrConn) resetConnectBackoff() { 1525 ac.mu.Lock() 1526 close(ac.resetBackoff) 1527 ac.backoffIdx = 0 1528 ac.resetBackoff = make(chan struct{}) 1529 ac.mu.Unlock() 1530 } 1531 1532 // getReadyTransport returns the transport if ac's state is READY or nil if not. 1533 func (ac *addrConn) getReadyTransport() transport.ClientTransport { 1534 ac.mu.Lock() 1535 defer ac.mu.Unlock() 1536 if ac.state == connectivity.Ready { 1537 return ac.transport 1538 } 1539 return nil 1540 } 1541 1542 // tearDown starts to tear down the addrConn. 1543 // 1544 // Note that tearDown doesn't remove ac from ac.cc.conns, so the addrConn struct 1545 // will leak. In most cases, call cc.removeAddrConn() instead. 1546 func (ac *addrConn) tearDown(err error) { 1547 ac.mu.Lock() 1548 if ac.state == connectivity.Shutdown { 1549 ac.mu.Unlock() 1550 return 1551 } 1552 curTr := ac.transport 1553 ac.transport = nil 1554 // We have to set the state to Shutdown before anything else to prevent races 1555 // between setting the state and logic that waits on context cancellation / etc. 1556 ac.updateConnectivityState(connectivity.Shutdown, nil) 1557 ac.cancel() 1558 ac.curAddr = resolver.Address{} 1559 1560 channelz.AddTraceEvent(logger, ac.channelz, 0, &channelz.TraceEvent{ 1561 Desc: "Subchannel deleted", 1562 Severity: channelz.CtInfo, 1563 Parent: &channelz.TraceEvent{ 1564 Desc: fmt.Sprintf("Subchannel(id:%d) deleted", ac.channelz.ID), 1565 Severity: channelz.CtInfo, 1566 }, 1567 }) 1568 // TraceEvent needs to be called before RemoveEntry, as TraceEvent may add 1569 // trace reference to the entity being deleted, and thus prevent it from 1570 // being deleted right away. 1571 channelz.RemoveEntry(ac.channelz.ID) 1572 ac.mu.Unlock() 1573 1574 // We have to release the lock before the call to GracefulClose/Close here 1575 // because both of them call onClose(), which requires locking ac.mu. 1576 if curTr != nil { 1577 if err == errConnDrain { 1578 // Close the transport gracefully when the subConn is being shutdown. 1579 // 1580 // GracefulClose() may be executed multiple times if: 1581 // - multiple GoAway frames are received from the server 1582 // - there are concurrent name resolver or balancer triggered 1583 // address removal and GoAway 1584 curTr.GracefulClose() 1585 } else { 1586 // Hard close the transport when the channel is entering idle or is 1587 // being shutdown. In the case where the channel is being shutdown, 1588 // closing of transports is also taken care of by cancellation of cc.ctx. 1589 // But in the case where the channel is entering idle, we need to 1590 // explicitly close the transports here. Instead of distinguishing 1591 // between these two cases, it is simpler to close the transport 1592 // unconditionally here. 1593 curTr.Close(err) 1594 } 1595 } 1596 } 1597 1598 type retryThrottler struct { 1599 max float64 1600 thresh float64 1601 ratio float64 1602 1603 mu sync.Mutex 1604 tokens float64 // TODO(dfawley): replace with atomic and remove lock. 1605 } 1606 1607 // throttle subtracts a retry token from the pool and returns whether a retry 1608 // should be throttled (disallowed) based upon the retry throttling policy in 1609 // the service config. 1610 func (rt *retryThrottler) throttle() bool { 1611 if rt == nil { 1612 return false 1613 } 1614 rt.mu.Lock() 1615 defer rt.mu.Unlock() 1616 rt.tokens-- 1617 if rt.tokens < 0 { 1618 rt.tokens = 0 1619 } 1620 return rt.tokens <= rt.thresh 1621 } 1622 1623 func (rt *retryThrottler) successfulRPC() { 1624 if rt == nil { 1625 return 1626 } 1627 rt.mu.Lock() 1628 defer rt.mu.Unlock() 1629 rt.tokens += rt.ratio 1630 if rt.tokens > rt.max { 1631 rt.tokens = rt.max 1632 } 1633 } 1634 1635 func (ac *addrConn) incrCallsStarted() { 1636 ac.channelz.ChannelMetrics.CallsStarted.Add(1) 1637 ac.channelz.ChannelMetrics.LastCallStartedTimestamp.Store(time.Now().UnixNano()) 1638 } 1639 1640 func (ac *addrConn) incrCallsSucceeded() { 1641 ac.channelz.ChannelMetrics.CallsSucceeded.Add(1) 1642 } 1643 1644 func (ac *addrConn) incrCallsFailed() { 1645 ac.channelz.ChannelMetrics.CallsFailed.Add(1) 1646 } 1647 1648 // ErrClientConnTimeout indicates that the ClientConn cannot establish the 1649 // underlying connections within the specified timeout. 1650 // 1651 // Deprecated: This error is never returned by grpc and should not be 1652 // referenced by users. 1653 var ErrClientConnTimeout = errors.New("grpc: timed out when dialing") 1654 1655 // getResolver finds the scheme in the cc's resolvers or the global registry. 1656 // scheme should always be lowercase (typically by virtue of url.Parse() 1657 // performing proper RFC3986 behavior). 1658 func (cc *ClientConn) getResolver(scheme string) resolver.Builder { 1659 for _, rb := range cc.dopts.resolvers { 1660 if scheme == rb.Scheme() { 1661 return rb 1662 } 1663 } 1664 return resolver.Get(scheme) 1665 } 1666 1667 func (cc *ClientConn) updateConnectionError(err error) { 1668 cc.lceMu.Lock() 1669 cc.lastConnectionError = err 1670 cc.lceMu.Unlock() 1671 } 1672 1673 func (cc *ClientConn) connectionError() error { 1674 cc.lceMu.Lock() 1675 defer cc.lceMu.Unlock() 1676 return cc.lastConnectionError 1677 } 1678 1679 // initParsedTargetAndResolverBuilder parses the user's dial target and stores 1680 // the parsed target in `cc.parsedTarget`. 1681 // 1682 // The resolver to use is determined based on the scheme in the parsed target 1683 // and the same is stored in `cc.resolverBuilder`. 1684 // 1685 // Doesn't grab cc.mu as this method is expected to be called only at Dial time. 1686 func (cc *ClientConn) initParsedTargetAndResolverBuilder() error { 1687 logger.Infof("original dial target is: %q", cc.target) 1688 1689 var rb resolver.Builder 1690 parsedTarget, err := parseTarget(cc.target) 1691 if err == nil { 1692 rb = cc.getResolver(parsedTarget.URL.Scheme) 1693 if rb != nil { 1694 cc.parsedTarget = parsedTarget 1695 cc.resolverBuilder = rb 1696 return nil 1697 } 1698 } 1699 1700 // We are here because the user's dial target did not contain a scheme or 1701 // specified an unregistered scheme. We should fallback to the default 1702 // scheme, except when a custom dialer is specified in which case, we should 1703 // always use passthrough scheme. For either case, we need to respect any overridden 1704 // global defaults set by the user. 1705 defScheme := cc.dopts.defaultScheme 1706 if internal.UserSetDefaultScheme { 1707 defScheme = resolver.GetDefaultScheme() 1708 } 1709 1710 canonicalTarget := defScheme + ":///" + cc.target 1711 1712 parsedTarget, err = parseTarget(canonicalTarget) 1713 if err != nil { 1714 return err 1715 } 1716 rb = cc.getResolver(parsedTarget.URL.Scheme) 1717 if rb == nil { 1718 return fmt.Errorf("could not get resolver for default scheme: %q", parsedTarget.URL.Scheme) 1719 } 1720 cc.parsedTarget = parsedTarget 1721 cc.resolverBuilder = rb 1722 return nil 1723 } 1724 1725 // parseTarget uses RFC 3986 semantics to parse the given target into a 1726 // resolver.Target struct containing url. Query params are stripped from the 1727 // endpoint. 1728 func parseTarget(target string) (resolver.Target, error) { 1729 u, err := url.Parse(target) 1730 if err != nil { 1731 return resolver.Target{}, err 1732 } 1733 1734 return resolver.Target{URL: *u}, nil 1735 } 1736 1737 // encodeAuthority escapes the authority string based on valid chars defined in 1738 // https://datatracker.ietf.org/doc/html/rfc3986#section-3.2. 1739 func encodeAuthority(authority string) string { 1740 const upperhex = "0123456789ABCDEF" 1741 1742 // Return for characters that must be escaped as per 1743 // Valid chars are mentioned here: 1744 // https://datatracker.ietf.org/doc/html/rfc3986#section-3.2 1745 shouldEscape := func(c byte) bool { 1746 // Alphanum are always allowed. 1747 if 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || '0' <= c && c <= '9' { 1748 return false 1749 } 1750 switch c { 1751 case '-', '_', '.', '~': // Unreserved characters 1752 return false 1753 case '!', '$', '&', '\'', '(', ')', '*', '+', ',', ';', '=': // Subdelim characters 1754 return false 1755 case ':', '[', ']', '@': // Authority related delimiters 1756 return false 1757 } 1758 // Everything else must be escaped. 1759 return true 1760 } 1761 1762 hexCount := 0 1763 for i := 0; i < len(authority); i++ { 1764 c := authority[i] 1765 if shouldEscape(c) { 1766 hexCount++ 1767 } 1768 } 1769 1770 if hexCount == 0 { 1771 return authority 1772 } 1773 1774 required := len(authority) + 2*hexCount 1775 t := make([]byte, required) 1776 1777 j := 0 1778 // This logic is a barebones version of escape in the go net/url library. 1779 for i := 0; i < len(authority); i++ { 1780 switch c := authority[i]; { 1781 case shouldEscape(c): 1782 t[j] = '%' 1783 t[j+1] = upperhex[c>>4] 1784 t[j+2] = upperhex[c&15] 1785 j += 3 1786 default: 1787 t[j] = authority[i] 1788 j++ 1789 } 1790 } 1791 return string(t) 1792 } 1793 1794 // Determine channel authority. The order of precedence is as follows: 1795 // - user specified authority override using `WithAuthority` dial option 1796 // - creds' notion of server name for the authentication handshake 1797 // - endpoint from dial target of the form "scheme://[authority]/endpoint" 1798 // 1799 // Stores the determined authority in `cc.authority`. 1800 // 1801 // Returns a non-nil error if the authority returned by the transport 1802 // credentials do not match the authority configured through the dial option. 1803 // 1804 // Doesn't grab cc.mu as this method is expected to be called only at Dial time. 1805 func (cc *ClientConn) initAuthority() error { 1806 dopts := cc.dopts 1807 // Historically, we had two options for users to specify the serverName or 1808 // authority for a channel. One was through the transport credentials 1809 // (either in its constructor, or through the OverrideServerName() method). 1810 // The other option (for cases where WithInsecure() dial option was used) 1811 // was to use the WithAuthority() dial option. 1812 // 1813 // A few things have changed since: 1814 // - `insecure` package with an implementation of the `TransportCredentials` 1815 // interface for the insecure case 1816 // - WithAuthority() dial option support for secure credentials 1817 authorityFromCreds := "" 1818 if creds := dopts.copts.TransportCredentials; creds != nil && creds.Info().ServerName != "" { 1819 authorityFromCreds = creds.Info().ServerName 1820 } 1821 authorityFromDialOption := dopts.authority 1822 if (authorityFromCreds != "" && authorityFromDialOption != "") && authorityFromCreds != authorityFromDialOption { 1823 return fmt.Errorf("ClientConn's authority from transport creds %q and dial option %q don't match", authorityFromCreds, authorityFromDialOption) 1824 } 1825 1826 endpoint := cc.parsedTarget.Endpoint() 1827 if authorityFromDialOption != "" { 1828 cc.authority = authorityFromDialOption 1829 } else if authorityFromCreds != "" { 1830 cc.authority = authorityFromCreds 1831 } else if auth, ok := cc.resolverBuilder.(resolver.AuthorityOverrider); ok { 1832 cc.authority = auth.OverrideAuthority(cc.parsedTarget) 1833 } else if strings.HasPrefix(endpoint, ":") { 1834 cc.authority = "localhost" + endpoint 1835 } else { 1836 cc.authority = encodeAuthority(endpoint) 1837 } 1838 return nil 1839 }