gitee.com/ks-custle/core-gm@v0.0.0-20230922171213-b83bdd97b62c/grpc/clientconn.go (about) 1 /* 2 * 3 * Copyright 2014 gRPC authors. 4 * 5 * Licensed under the Apache License, Version 2.0 (the "License"); 6 * you may not use this file except in compliance with the License. 7 * You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 * 17 */ 18 19 package grpc 20 21 import ( 22 "context" 23 "errors" 24 "fmt" 25 "math" 26 "net/url" 27 "reflect" 28 "strings" 29 "sync" 30 "sync/atomic" 31 "time" 32 33 "gitee.com/ks-custle/core-gm/grpc/balancer" 34 "gitee.com/ks-custle/core-gm/grpc/balancer/base" 35 "gitee.com/ks-custle/core-gm/grpc/codes" 36 "gitee.com/ks-custle/core-gm/grpc/connectivity" 37 "gitee.com/ks-custle/core-gm/grpc/credentials" 38 "gitee.com/ks-custle/core-gm/grpc/internal/backoff" 39 "gitee.com/ks-custle/core-gm/grpc/internal/channelz" 40 "gitee.com/ks-custle/core-gm/grpc/internal/grpcsync" 41 iresolver "gitee.com/ks-custle/core-gm/grpc/internal/resolver" 42 "gitee.com/ks-custle/core-gm/grpc/internal/transport" 43 "gitee.com/ks-custle/core-gm/grpc/keepalive" 44 "gitee.com/ks-custle/core-gm/grpc/resolver" 45 "gitee.com/ks-custle/core-gm/grpc/serviceconfig" 46 "gitee.com/ks-custle/core-gm/grpc/status" 47 48 _ "gitee.com/ks-custle/core-gm/grpc/balancer/roundrobin" // To register roundrobin. 49 _ "gitee.com/ks-custle/core-gm/grpc/internal/resolver/dns" // To register dns resolver. 50 _ "gitee.com/ks-custle/core-gm/grpc/internal/resolver/passthrough" // To register passthrough resolver. 51 _ "gitee.com/ks-custle/core-gm/grpc/internal/resolver/unix" // To register unix resolver. 52 ) 53 54 const ( 55 // minimum time to give a connection to complete 56 minConnectTimeout = 20 * time.Second 57 // must match grpclbName in grpclb/grpclb.go 58 grpclbName = "grpclb" 59 ) 60 61 var ( 62 // ErrClientConnClosing indicates that the operation is illegal because 63 // the ClientConn is closing. 64 // 65 // Deprecated: this error should not be relied upon by users; use the status 66 // code of Canceled instead. 67 ErrClientConnClosing = status.Error(codes.Canceled, "grpc: the client connection is closing") 68 // errConnDrain indicates that the connection starts to be drained and does not accept any new RPCs. 69 errConnDrain = errors.New("grpc: the connection is drained") 70 // errConnClosing indicates that the connection is closing. 71 errConnClosing = errors.New("grpc: the connection is closing") 72 // invalidDefaultServiceConfigErrPrefix is used to prefix the json parsing error for the default 73 // service config. 74 invalidDefaultServiceConfigErrPrefix = "grpc: the provided default service config is invalid" 75 ) 76 77 // The following errors are returned from Dial and DialContext 78 var ( 79 // errNoTransportSecurity indicates that there is no transport security 80 // being set for ClientConn. Users should either set one or explicitly 81 // call WithInsecure DialOption to disable security. 82 errNoTransportSecurity = errors.New("grpc: no transport security set (use grpc.WithInsecure() explicitly or set credentials)") 83 // errTransportCredsAndBundle indicates that creds bundle is used together 84 // with other individual Transport Credentials. 85 errTransportCredsAndBundle = errors.New("grpc: credentials.Bundle may not be used with individual TransportCredentials") 86 // errNoTransportCredsInBundle indicated that the configured creds bundle 87 // returned a transport credentials which was nil. 88 errNoTransportCredsInBundle = errors.New("grpc: credentials.Bundle must return non-nil transport credentials") 89 // errTransportCredentialsMissing indicates that users want to transmit 90 // security information (e.g., OAuth2 token) which requires secure 91 // connection on an insecure connection. 92 errTransportCredentialsMissing = errors.New("grpc: the credentials require transport level security (use grpc.WithTransportCredentials() to set)") 93 ) 94 95 const ( 96 defaultClientMaxReceiveMessageSize = 1024 * 1024 * 4 97 defaultClientMaxSendMessageSize = math.MaxInt32 98 // http2IOBufSize specifies the buffer size for sending frames. 99 defaultWriteBufSize = 32 * 1024 100 defaultReadBufSize = 32 * 1024 101 ) 102 103 // Dial creates a client connection to the given target. 104 func Dial(target string, opts ...DialOption) (*ClientConn, error) { 105 return DialContext(context.Background(), target, opts...) 106 } 107 108 type defaultConfigSelector struct { 109 sc *ServiceConfig 110 } 111 112 func (dcs *defaultConfigSelector) SelectConfig(rpcInfo iresolver.RPCInfo) (*iresolver.RPCConfig, error) { 113 return &iresolver.RPCConfig{ 114 Context: rpcInfo.Context, 115 MethodConfig: getMethodConfig(dcs.sc, rpcInfo.Method), 116 }, nil 117 } 118 119 // DialContext creates a client connection to the given target. By default, it's 120 // a non-blocking dial (the function won't wait for connections to be 121 // established, and connecting happens in the background). To make it a blocking 122 // dial, use WithBlock() dial option. 123 // 124 // In the non-blocking case, the ctx does not act against the connection. It 125 // only controls the setup steps. 126 // 127 // In the blocking case, ctx can be used to cancel or expire the pending 128 // connection. Once this function returns, the cancellation and expiration of 129 // ctx will be noop. Users should call ClientConn.Close to terminate all the 130 // pending operations after this function returns. 131 // 132 // The target name syntax is defined in 133 // https://github.com/grpc/grpc/blob/master/doc/naming.md. 134 // e.g. to use dns resolver, a "dns:///" prefix should be applied to the target. 135 func DialContext(ctx context.Context, target string, opts ...DialOption) (conn *ClientConn, err error) { 136 cc := &ClientConn{ 137 target: target, 138 csMgr: &connectivityStateManager{}, 139 conns: make(map[*addrConn]struct{}), 140 dopts: defaultDialOptions(), 141 blockingpicker: newPickerWrapper(), 142 czData: new(channelzData), 143 firstResolveEvent: grpcsync.NewEvent(), 144 } 145 cc.retryThrottler.Store((*retryThrottler)(nil)) 146 cc.safeConfigSelector.UpdateConfigSelector(&defaultConfigSelector{nil}) 147 cc.ctx, cc.cancel = context.WithCancel(context.Background()) 148 149 for _, opt := range opts { 150 opt.apply(&cc.dopts) 151 } 152 153 chainUnaryClientInterceptors(cc) 154 chainStreamClientInterceptors(cc) 155 156 defer func() { 157 if err != nil { 158 cc.Close() 159 } 160 }() 161 162 if channelz.IsOn() { 163 if cc.dopts.channelzParentID != 0 { 164 cc.channelzID = channelz.RegisterChannel(&channelzChannel{cc}, cc.dopts.channelzParentID, target) 165 channelz.AddTraceEvent(logger, cc.channelzID, 0, &channelz.TraceEventDesc{ 166 Desc: "Channel Created", 167 Severity: channelz.CtInfo, 168 Parent: &channelz.TraceEventDesc{ 169 Desc: fmt.Sprintf("Nested Channel(id:%d) created", cc.channelzID), 170 Severity: channelz.CtInfo, 171 }, 172 }) 173 } else { 174 cc.channelzID = channelz.RegisterChannel(&channelzChannel{cc}, 0, target) 175 channelz.Info(logger, cc.channelzID, "Channel Created") 176 } 177 cc.csMgr.channelzID = cc.channelzID 178 } 179 180 if cc.dopts.copts.TransportCredentials == nil && cc.dopts.copts.CredsBundle == nil { 181 return nil, errNoTransportSecurity 182 } 183 if cc.dopts.copts.TransportCredentials != nil && cc.dopts.copts.CredsBundle != nil { 184 return nil, errTransportCredsAndBundle 185 } 186 if cc.dopts.copts.CredsBundle != nil && cc.dopts.copts.CredsBundle.TransportCredentials() == nil { 187 return nil, errNoTransportCredsInBundle 188 } 189 transportCreds := cc.dopts.copts.TransportCredentials 190 if transportCreds == nil { 191 transportCreds = cc.dopts.copts.CredsBundle.TransportCredentials() 192 } 193 if transportCreds.Info().SecurityProtocol == "insecure" { 194 for _, cd := range cc.dopts.copts.PerRPCCredentials { 195 if cd.RequireTransportSecurity() { 196 return nil, errTransportCredentialsMissing 197 } 198 } 199 } 200 201 if cc.dopts.defaultServiceConfigRawJSON != nil { 202 scpr := parseServiceConfig(*cc.dopts.defaultServiceConfigRawJSON) 203 if scpr.Err != nil { 204 return nil, fmt.Errorf("%s: %v", invalidDefaultServiceConfigErrPrefix, scpr.Err) 205 } 206 cc.dopts.defaultServiceConfig, _ = scpr.Config.(*ServiceConfig) 207 } 208 cc.mkp = cc.dopts.copts.KeepaliveParams 209 210 if cc.dopts.copts.UserAgent != "" { 211 cc.dopts.copts.UserAgent += " " + grpcUA 212 } else { 213 cc.dopts.copts.UserAgent = grpcUA 214 } 215 216 if cc.dopts.timeout > 0 { 217 var cancel context.CancelFunc 218 ctx, cancel = context.WithTimeout(ctx, cc.dopts.timeout) 219 defer cancel() 220 } 221 defer func() { 222 select { 223 case <-ctx.Done(): 224 switch { 225 case ctx.Err() == err: 226 conn = nil 227 case err == nil || !cc.dopts.returnLastError: 228 conn, err = nil, ctx.Err() 229 default: 230 conn, err = nil, fmt.Errorf("%v: %v", ctx.Err(), err) 231 } 232 default: 233 } 234 }() 235 236 scSet := false 237 if cc.dopts.scChan != nil { 238 // Try to get an initial service config. 239 select { 240 case sc, ok := <-cc.dopts.scChan: 241 if ok { 242 cc.sc = &sc 243 cc.safeConfigSelector.UpdateConfigSelector(&defaultConfigSelector{&sc}) 244 scSet = true 245 } 246 default: 247 } 248 } 249 if cc.dopts.bs == nil { 250 cc.dopts.bs = backoff.DefaultExponential 251 } 252 253 // Determine the resolver to use. 254 resolverBuilder, err := cc.parseTargetAndFindResolver() 255 if err != nil { 256 return nil, err 257 } 258 // Endpoint is deprecated, use GetEndpoint() instead. 259 //cc.authority, err = determineAuthority(cc.parsedTarget.Endpoint, cc.target, cc.dopts) 260 cc.authority, err = determineAuthority(cc.parsedTarget.GetEndpoint(), cc.target, cc.dopts) 261 if err != nil { 262 return nil, err 263 } 264 channelz.Infof(logger, cc.channelzID, "Channel authority set to %q", cc.authority) 265 266 if cc.dopts.scChan != nil && !scSet { 267 // Blocking wait for the initial service config. 268 select { 269 case sc, ok := <-cc.dopts.scChan: 270 if ok { 271 cc.sc = &sc 272 cc.safeConfigSelector.UpdateConfigSelector(&defaultConfigSelector{&sc}) 273 } 274 case <-ctx.Done(): 275 return nil, ctx.Err() 276 } 277 } 278 if cc.dopts.scChan != nil { 279 go cc.scWatcher() 280 } 281 282 var credsClone credentials.TransportCredentials 283 if creds := cc.dopts.copts.TransportCredentials; creds != nil { 284 credsClone = creds.Clone() 285 } 286 cc.balancerBuildOpts = balancer.BuildOptions{ 287 DialCreds: credsClone, 288 CredsBundle: cc.dopts.copts.CredsBundle, 289 Dialer: cc.dopts.copts.Dialer, 290 Authority: cc.authority, 291 CustomUserAgent: cc.dopts.copts.UserAgent, 292 ChannelzParentID: cc.channelzID, 293 Target: cc.parsedTarget, 294 } 295 296 // Build the resolver. 297 rWrapper, err := newCCResolverWrapper(cc, resolverBuilder) 298 if err != nil { 299 return nil, fmt.Errorf("failed to build resolver: %v", err) 300 } 301 cc.mu.Lock() 302 cc.resolverWrapper = rWrapper 303 cc.mu.Unlock() 304 305 // A blocking dial blocks until the clientConn is ready. 306 if cc.dopts.block { 307 for { 308 cc.Connect() 309 s := cc.GetState() 310 if s == connectivity.Ready { 311 break 312 } else if cc.dopts.copts.FailOnNonTempDialError && s == connectivity.TransientFailure { 313 if err = cc.connectionError(); err != nil { 314 terr, ok := err.(interface { 315 Temporary() bool 316 }) 317 if ok && !terr.Temporary() { 318 return nil, err 319 } 320 } 321 } 322 if !cc.WaitForStateChange(ctx, s) { 323 // ctx got timeout or canceled. 324 if err = cc.connectionError(); err != nil && cc.dopts.returnLastError { 325 return nil, err 326 } 327 return nil, ctx.Err() 328 } 329 } 330 } 331 332 return cc, nil 333 } 334 335 // chainUnaryClientInterceptors chains all unary client interceptors into one. 336 func chainUnaryClientInterceptors(cc *ClientConn) { 337 interceptors := cc.dopts.chainUnaryInts 338 // Prepend dopts.unaryInt to the chaining interceptors if it exists, since unaryInt will 339 // be executed before any other chained interceptors. 340 if cc.dopts.unaryInt != nil { 341 interceptors = append([]UnaryClientInterceptor{cc.dopts.unaryInt}, interceptors...) 342 } 343 var chainedInt UnaryClientInterceptor 344 if len(interceptors) == 0 { 345 chainedInt = nil 346 } else if len(interceptors) == 1 { 347 chainedInt = interceptors[0] 348 } else { 349 chainedInt = func(ctx context.Context, method string, req, reply interface{}, cc *ClientConn, invoker UnaryInvoker, opts ...CallOption) error { 350 return interceptors[0](ctx, method, req, reply, cc, getChainUnaryInvoker(interceptors, 0, invoker), opts...) 351 } 352 } 353 cc.dopts.unaryInt = chainedInt 354 } 355 356 // getChainUnaryInvoker recursively generate the chained unary invoker. 357 func getChainUnaryInvoker(interceptors []UnaryClientInterceptor, curr int, finalInvoker UnaryInvoker) UnaryInvoker { 358 if curr == len(interceptors)-1 { 359 return finalInvoker 360 } 361 return func(ctx context.Context, method string, req, reply interface{}, cc *ClientConn, opts ...CallOption) error { 362 return interceptors[curr+1](ctx, method, req, reply, cc, getChainUnaryInvoker(interceptors, curr+1, finalInvoker), opts...) 363 } 364 } 365 366 // chainStreamClientInterceptors chains all stream client interceptors into one. 367 func chainStreamClientInterceptors(cc *ClientConn) { 368 interceptors := cc.dopts.chainStreamInts 369 // Prepend dopts.streamInt to the chaining interceptors if it exists, since streamInt will 370 // be executed before any other chained interceptors. 371 if cc.dopts.streamInt != nil { 372 interceptors = append([]StreamClientInterceptor{cc.dopts.streamInt}, interceptors...) 373 } 374 var chainedInt StreamClientInterceptor 375 if len(interceptors) == 0 { 376 chainedInt = nil 377 } else if len(interceptors) == 1 { 378 chainedInt = interceptors[0] 379 } else { 380 chainedInt = func(ctx context.Context, desc *StreamDesc, cc *ClientConn, method string, streamer Streamer, opts ...CallOption) (ClientStream, error) { 381 return interceptors[0](ctx, desc, cc, method, getChainStreamer(interceptors, 0, streamer), opts...) 382 } 383 } 384 cc.dopts.streamInt = chainedInt 385 } 386 387 // getChainStreamer recursively generate the chained client stream constructor. 388 func getChainStreamer(interceptors []StreamClientInterceptor, curr int, finalStreamer Streamer) Streamer { 389 if curr == len(interceptors)-1 { 390 return finalStreamer 391 } 392 return func(ctx context.Context, desc *StreamDesc, cc *ClientConn, method string, opts ...CallOption) (ClientStream, error) { 393 return interceptors[curr+1](ctx, desc, cc, method, getChainStreamer(interceptors, curr+1, finalStreamer), opts...) 394 } 395 } 396 397 // connectivityStateManager keeps the connectivity.State of ClientConn. 398 // This struct will eventually be exported so the balancers can access it. 399 type connectivityStateManager struct { 400 mu sync.Mutex 401 state connectivity.State 402 notifyChan chan struct{} 403 channelzID int64 404 } 405 406 // updateState updates the connectivity.State of ClientConn. 407 // If there's a change it notifies goroutines waiting on state change to 408 // happen. 409 func (csm *connectivityStateManager) updateState(state connectivity.State) { 410 csm.mu.Lock() 411 defer csm.mu.Unlock() 412 if csm.state == connectivity.Shutdown { 413 return 414 } 415 if csm.state == state { 416 return 417 } 418 csm.state = state 419 channelz.Infof(logger, csm.channelzID, "Channel Connectivity change to %v", state) 420 if csm.notifyChan != nil { 421 // There are other goroutines waiting on this channel. 422 close(csm.notifyChan) 423 csm.notifyChan = nil 424 } 425 } 426 427 func (csm *connectivityStateManager) getState() connectivity.State { 428 csm.mu.Lock() 429 defer csm.mu.Unlock() 430 return csm.state 431 } 432 433 func (csm *connectivityStateManager) getNotifyChan() <-chan struct{} { 434 csm.mu.Lock() 435 defer csm.mu.Unlock() 436 if csm.notifyChan == nil { 437 csm.notifyChan = make(chan struct{}) 438 } 439 return csm.notifyChan 440 } 441 442 // ClientConnInterface defines the functions clients need to perform unary and 443 // streaming RPCs. It is implemented by *ClientConn, and is only intended to 444 // be referenced by generated code. 445 type ClientConnInterface interface { 446 // Invoke performs a unary RPC and returns after the response is received 447 // into reply. 448 Invoke(ctx context.Context, method string, args interface{}, reply interface{}, opts ...CallOption) error 449 // NewStream begins a streaming RPC. 450 NewStream(ctx context.Context, desc *StreamDesc, method string, opts ...CallOption) (ClientStream, error) 451 } 452 453 // Assert *ClientConn implements ClientConnInterface. 454 var _ ClientConnInterface = (*ClientConn)(nil) 455 456 // ClientConn represents a virtual connection to a conceptual endpoint, to 457 // perform RPCs. 458 // 459 // A ClientConn is free to have zero or more actual connections to the endpoint 460 // based on configuration, load, etc. It is also free to determine which actual 461 // endpoints to use and may change it every RPC, permitting client-side load 462 // balancing. 463 // 464 // A ClientConn encapsulates a range of functionality including name 465 // resolution, TCP connection establishment (with retries and backoff) and TLS 466 // handshakes. It also handles errors on established connections by 467 // re-resolving the name and reconnecting. 468 type ClientConn struct { 469 ctx context.Context 470 cancel context.CancelFunc 471 472 target string 473 parsedTarget resolver.Target 474 authority string 475 dopts dialOptions 476 csMgr *connectivityStateManager 477 478 balancerBuildOpts balancer.BuildOptions 479 blockingpicker *pickerWrapper 480 481 safeConfigSelector iresolver.SafeConfigSelector 482 483 mu sync.RWMutex 484 resolverWrapper *ccResolverWrapper 485 sc *ServiceConfig 486 conns map[*addrConn]struct{} 487 // Keepalive parameter can be updated if a GoAway is received. 488 mkp keepalive.ClientParameters 489 curBalancerName string 490 balancerWrapper *ccBalancerWrapper 491 retryThrottler atomic.Value 492 493 firstResolveEvent *grpcsync.Event 494 495 channelzID int64 // channelz unique identification number 496 czData *channelzData 497 498 lceMu sync.Mutex // protects lastConnectionError 499 lastConnectionError error 500 } 501 502 // WaitForStateChange waits until the connectivity.State of ClientConn changes from sourceState or 503 // ctx expires. A true value is returned in former case and false in latter. 504 // 505 // # Experimental 506 // 507 // Notice: This API is EXPERIMENTAL and may be changed or removed in a 508 // later release. 509 func (cc *ClientConn) WaitForStateChange(ctx context.Context, sourceState connectivity.State) bool { 510 ch := cc.csMgr.getNotifyChan() 511 if cc.csMgr.getState() != sourceState { 512 return true 513 } 514 select { 515 case <-ctx.Done(): 516 return false 517 case <-ch: 518 return true 519 } 520 } 521 522 // GetState returns the connectivity.State of ClientConn. 523 // 524 // # Experimental 525 // 526 // Notice: This API is EXPERIMENTAL and may be changed or removed in a later 527 // release. 528 func (cc *ClientConn) GetState() connectivity.State { 529 return cc.csMgr.getState() 530 } 531 532 // Connect causes all subchannels in the ClientConn to attempt to connect if 533 // the channel is idle. Does not wait for the connection attempts to begin 534 // before returning. 535 // 536 // # Experimental 537 // 538 // Notice: This API is EXPERIMENTAL and may be changed or removed in a later 539 // release. 540 func (cc *ClientConn) Connect() { 541 cc.mu.Lock() 542 defer cc.mu.Unlock() 543 if cc.balancerWrapper != nil && cc.balancerWrapper.exitIdle() { 544 return 545 } 546 for ac := range cc.conns { 547 go ac.connect() 548 } 549 } 550 551 func (cc *ClientConn) scWatcher() { 552 for { 553 select { 554 case sc, ok := <-cc.dopts.scChan: 555 if !ok { 556 return 557 } 558 cc.mu.Lock() 559 // TODO: load balance policy runtime change is ignored. 560 // We may revisit this decision in the future. 561 cc.sc = &sc 562 cc.safeConfigSelector.UpdateConfigSelector(&defaultConfigSelector{&sc}) 563 cc.mu.Unlock() 564 case <-cc.ctx.Done(): 565 return 566 } 567 } 568 } 569 570 // waitForResolvedAddrs blocks until the resolver has provided addresses or the 571 // context expires. Returns nil unless the context expires first; otherwise 572 // returns a status error based on the context. 573 func (cc *ClientConn) waitForResolvedAddrs(ctx context.Context) error { 574 // This is on the RPC path, so we use a fast path to avoid the 575 // more-expensive "select" below after the resolver has returned once. 576 if cc.firstResolveEvent.HasFired() { 577 return nil 578 } 579 select { 580 case <-cc.firstResolveEvent.Done(): 581 return nil 582 case <-ctx.Done(): 583 return status.FromContextError(ctx.Err()).Err() 584 case <-cc.ctx.Done(): 585 return ErrClientConnClosing 586 } 587 } 588 589 var emptyServiceConfig *ServiceConfig 590 591 func init() { 592 cfg := parseServiceConfig("{}") 593 if cfg.Err != nil { 594 panic(fmt.Sprintf("impossible error parsing empty service config: %v", cfg.Err)) 595 } 596 emptyServiceConfig = cfg.Config.(*ServiceConfig) 597 } 598 599 func (cc *ClientConn) maybeApplyDefaultServiceConfig(addrs []resolver.Address) { 600 if cc.sc != nil { 601 cc.applyServiceConfigAndBalancer(cc.sc, nil, addrs) 602 return 603 } 604 if cc.dopts.defaultServiceConfig != nil { 605 cc.applyServiceConfigAndBalancer(cc.dopts.defaultServiceConfig, &defaultConfigSelector{cc.dopts.defaultServiceConfig}, addrs) 606 } else { 607 cc.applyServiceConfigAndBalancer(emptyServiceConfig, &defaultConfigSelector{emptyServiceConfig}, addrs) 608 } 609 } 610 611 func (cc *ClientConn) updateResolverState(s resolver.State, err error) error { 612 defer cc.firstResolveEvent.Fire() 613 cc.mu.Lock() 614 // Check if the ClientConn is already closed. Some fields (e.g. 615 // balancerWrapper) are set to nil when closing the ClientConn, and could 616 // cause nil pointer panic if we don't have this check. 617 if cc.conns == nil { 618 cc.mu.Unlock() 619 return nil 620 } 621 622 if err != nil { 623 // May need to apply the initial service config in case the resolver 624 // doesn't support service configs, or doesn't provide a service config 625 // with the new addresses. 626 cc.maybeApplyDefaultServiceConfig(nil) 627 628 if cc.balancerWrapper != nil { 629 cc.balancerWrapper.resolverError(err) 630 } 631 632 // No addresses are valid with err set; return early. 633 cc.mu.Unlock() 634 return balancer.ErrBadResolverState 635 } 636 637 var ret error 638 if cc.dopts.disableServiceConfig { 639 channelz.Infof(logger, cc.channelzID, "ignoring service config from resolver (%v) and applying the default because service config is disabled", s.ServiceConfig) 640 cc.maybeApplyDefaultServiceConfig(s.Addresses) 641 } else if s.ServiceConfig == nil { 642 cc.maybeApplyDefaultServiceConfig(s.Addresses) 643 // TODO: do we need to apply a failing LB policy if there is no 644 // default, per the error handling design? 645 } else { 646 if sc, ok := s.ServiceConfig.Config.(*ServiceConfig); s.ServiceConfig.Err == nil && ok { 647 configSelector := iresolver.GetConfigSelector(s) 648 if configSelector != nil { 649 if len(s.ServiceConfig.Config.(*ServiceConfig).Methods) != 0 { 650 channelz.Infof(logger, cc.channelzID, "method configs in service config will be ignored due to presence of config selector") 651 } 652 } else { 653 configSelector = &defaultConfigSelector{sc} 654 } 655 cc.applyServiceConfigAndBalancer(sc, configSelector, s.Addresses) 656 } else { 657 ret = balancer.ErrBadResolverState 658 if cc.balancerWrapper == nil { 659 var err error 660 if s.ServiceConfig.Err != nil { 661 err = status.Errorf(codes.Unavailable, "error parsing service config: %v", s.ServiceConfig.Err) 662 } else { 663 err = status.Errorf(codes.Unavailable, "illegal service config type: %T", s.ServiceConfig.Config) 664 } 665 cc.safeConfigSelector.UpdateConfigSelector(&defaultConfigSelector{cc.sc}) 666 cc.blockingpicker.updatePicker(base.NewErrPicker(err)) 667 cc.csMgr.updateState(connectivity.TransientFailure) 668 cc.mu.Unlock() 669 return ret 670 } 671 } 672 } 673 674 var balCfg serviceconfig.LoadBalancingConfig 675 if cc.dopts.balancerBuilder == nil && cc.sc != nil && cc.sc.lbConfig != nil { 676 balCfg = cc.sc.lbConfig.cfg 677 } 678 679 cbn := cc.curBalancerName 680 bw := cc.balancerWrapper 681 cc.mu.Unlock() 682 if cbn != grpclbName { 683 // Filter any grpclb addresses since we don't have the grpclb balancer. 684 for i := 0; i < len(s.Addresses); { 685 if s.Addresses[i].Type == resolver.GRPCLB { 686 copy(s.Addresses[i:], s.Addresses[i+1:]) 687 s.Addresses = s.Addresses[:len(s.Addresses)-1] 688 continue 689 } 690 i++ 691 } 692 } 693 uccsErr := bw.updateClientConnState(&balancer.ClientConnState{ResolverState: s, BalancerConfig: balCfg}) 694 if ret == nil { 695 ret = uccsErr // prefer ErrBadResolver state since any other error is 696 // currently meaningless to the caller. 697 } 698 return ret 699 } 700 701 // switchBalancer starts the switching from current balancer to the balancer 702 // with the given name. 703 // 704 // It will NOT send the current address list to the new balancer. If needed, 705 // caller of this function should send address list to the new balancer after 706 // this function returns. 707 // 708 // Caller must hold cc.mu. 709 func (cc *ClientConn) switchBalancer(name string) { 710 if strings.EqualFold(cc.curBalancerName, name) { 711 return 712 } 713 714 channelz.Infof(logger, cc.channelzID, "ClientConn switching balancer to %q", name) 715 if cc.dopts.balancerBuilder != nil { 716 channelz.Info(logger, cc.channelzID, "ignoring balancer switching: Balancer DialOption used instead") 717 return 718 } 719 if cc.balancerWrapper != nil { 720 // Don't hold cc.mu while closing the balancers. The balancers may call 721 // methods that require cc.mu (e.g. cc.NewSubConn()). Holding the mutex 722 // would cause a deadlock in that case. 723 cc.mu.Unlock() 724 cc.balancerWrapper.close() 725 cc.mu.Lock() 726 } 727 728 builder := balancer.Get(name) 729 if builder == nil { 730 channelz.Warningf(logger, cc.channelzID, "Channel switches to new LB policy %q due to fallback from invalid balancer name", PickFirstBalancerName) 731 channelz.Infof(logger, cc.channelzID, "failed to get balancer builder for: %v, using pick_first instead", name) 732 builder = newPickfirstBuilder() 733 } else { 734 channelz.Infof(logger, cc.channelzID, "Channel switches to new LB policy %q", name) 735 } 736 737 cc.curBalancerName = builder.Name() 738 cc.balancerWrapper = newCCBalancerWrapper(cc, builder, cc.balancerBuildOpts) 739 } 740 741 func (cc *ClientConn) handleSubConnStateChange(sc balancer.SubConn, s connectivity.State, err error) { 742 cc.mu.Lock() 743 if cc.conns == nil { 744 cc.mu.Unlock() 745 return 746 } 747 // TODO(bar switching) send updates to all balancer wrappers when balancer 748 // gracefully switching is supported. 749 cc.balancerWrapper.handleSubConnStateChange(sc, s, err) 750 cc.mu.Unlock() 751 } 752 753 // newAddrConn creates an addrConn for addrs and adds it to cc.conns. 754 // 755 // Caller needs to make sure len(addrs) > 0. 756 func (cc *ClientConn) newAddrConn(addrs []resolver.Address, opts balancer.NewSubConnOptions) (*addrConn, error) { 757 ac := &addrConn{ 758 state: connectivity.Idle, 759 cc: cc, 760 addrs: addrs, 761 scopts: opts, 762 dopts: cc.dopts, 763 czData: new(channelzData), 764 resetBackoff: make(chan struct{}), 765 } 766 ac.ctx, ac.cancel = context.WithCancel(cc.ctx) 767 // Track ac in cc. This needs to be done before any getTransport(...) is called. 768 cc.mu.Lock() 769 if cc.conns == nil { 770 cc.mu.Unlock() 771 return nil, ErrClientConnClosing 772 } 773 if channelz.IsOn() { 774 ac.channelzID = channelz.RegisterSubChannel(ac, cc.channelzID, "") 775 channelz.AddTraceEvent(logger, ac.channelzID, 0, &channelz.TraceEventDesc{ 776 Desc: "Subchannel Created", 777 Severity: channelz.CtInfo, 778 Parent: &channelz.TraceEventDesc{ 779 Desc: fmt.Sprintf("Subchannel(id:%d) created", ac.channelzID), 780 Severity: channelz.CtInfo, 781 }, 782 }) 783 } 784 cc.conns[ac] = struct{}{} 785 cc.mu.Unlock() 786 return ac, nil 787 } 788 789 // removeAddrConn removes the addrConn in the subConn from clientConn. 790 // It also tears down the ac with the given error. 791 func (cc *ClientConn) removeAddrConn(ac *addrConn, err error) { 792 cc.mu.Lock() 793 if cc.conns == nil { 794 cc.mu.Unlock() 795 return 796 } 797 delete(cc.conns, ac) 798 cc.mu.Unlock() 799 ac.tearDown(err) 800 } 801 802 func (cc *ClientConn) channelzMetric() *channelz.ChannelInternalMetric { 803 return &channelz.ChannelInternalMetric{ 804 State: cc.GetState(), 805 Target: cc.target, 806 CallsStarted: atomic.LoadInt64(&cc.czData.callsStarted), 807 CallsSucceeded: atomic.LoadInt64(&cc.czData.callsSucceeded), 808 CallsFailed: atomic.LoadInt64(&cc.czData.callsFailed), 809 LastCallStartedTimestamp: time.Unix(0, atomic.LoadInt64(&cc.czData.lastCallStartedTime)), 810 } 811 } 812 813 // Target returns the target string of the ClientConn. 814 // 815 // # Experimental 816 // 817 // Notice: This API is EXPERIMENTAL and may be changed or removed in a 818 // later release. 819 func (cc *ClientConn) Target() string { 820 return cc.target 821 } 822 823 func (cc *ClientConn) incrCallsStarted() { 824 atomic.AddInt64(&cc.czData.callsStarted, 1) 825 atomic.StoreInt64(&cc.czData.lastCallStartedTime, time.Now().UnixNano()) 826 } 827 828 func (cc *ClientConn) incrCallsSucceeded() { 829 atomic.AddInt64(&cc.czData.callsSucceeded, 1) 830 } 831 832 func (cc *ClientConn) incrCallsFailed() { 833 atomic.AddInt64(&cc.czData.callsFailed, 1) 834 } 835 836 // connect starts creating a transport. 837 // It does nothing if the ac is not IDLE. 838 // TODO(bar) Move this to the addrConn section. 839 func (ac *addrConn) connect() error { 840 ac.mu.Lock() 841 if ac.state == connectivity.Shutdown { 842 ac.mu.Unlock() 843 return errConnClosing 844 } 845 if ac.state != connectivity.Idle { 846 ac.mu.Unlock() 847 return nil 848 } 849 // Update connectivity state within the lock to prevent subsequent or 850 // concurrent calls from resetting the transport more than once. 851 ac.updateConnectivityState(connectivity.Connecting, nil) 852 ac.mu.Unlock() 853 854 ac.resetTransport() 855 return nil 856 } 857 858 // tryUpdateAddrs tries to update ac.addrs with the new addresses list. 859 // 860 // If ac is Connecting, it returns false. The caller should tear down the ac and 861 // create a new one. Note that the backoff will be reset when this happens. 862 // 863 // If ac is TransientFailure, it updates ac.addrs and returns true. The updated 864 // addresses will be picked up by retry in the next iteration after backoff. 865 // 866 // If ac is Shutdown or Idle, it updates ac.addrs and returns true. 867 // 868 // If ac is Ready, it checks whether current connected address of ac is in the 869 // new addrs list. 870 // - If true, it updates ac.addrs and returns true. The ac will keep using 871 // the existing connection. 872 // - If false, it does nothing and returns false. 873 func (ac *addrConn) tryUpdateAddrs(addrs []resolver.Address) bool { 874 ac.mu.Lock() 875 defer ac.mu.Unlock() 876 channelz.Infof(logger, ac.channelzID, "addrConn: tryUpdateAddrs curAddr: %v, addrs: %v", ac.curAddr, addrs) 877 if ac.state == connectivity.Shutdown || 878 ac.state == connectivity.TransientFailure || 879 ac.state == connectivity.Idle { 880 ac.addrs = addrs 881 return true 882 } 883 884 if ac.state == connectivity.Connecting { 885 return false 886 } 887 888 // ac.state is Ready, try to find the connected address. 889 var curAddrFound bool 890 for _, a := range addrs { 891 a.ServerName = ac.cc.getServerName(a) 892 if reflect.DeepEqual(ac.curAddr, a) { 893 curAddrFound = true 894 break 895 } 896 } 897 channelz.Infof(logger, ac.channelzID, "addrConn: tryUpdateAddrs curAddrFound: %v", curAddrFound) 898 if curAddrFound { 899 ac.addrs = addrs 900 } 901 902 return curAddrFound 903 } 904 905 // getServerName determines the serverName to be used in the connection 906 // handshake. The default value for the serverName is the authority on the 907 // ClientConn, which either comes from the user's dial target or through an 908 // authority override specified using the WithAuthority dial option. Name 909 // resolvers can specify a per-address override for the serverName through the 910 // resolver.Address.ServerName field which is used only if the WithAuthority 911 // dial option was not used. The rationale is that per-address authority 912 // overrides specified by the name resolver can represent a security risk, while 913 // an override specified by the user is more dependable since they probably know 914 // what they are doing. 915 func (cc *ClientConn) getServerName(addr resolver.Address) string { 916 if cc.dopts.authority != "" { 917 return cc.dopts.authority 918 } 919 if addr.ServerName != "" { 920 return addr.ServerName 921 } 922 return cc.authority 923 } 924 925 func getMethodConfig(sc *ServiceConfig, method string) MethodConfig { 926 if sc == nil { 927 return MethodConfig{} 928 } 929 if m, ok := sc.Methods[method]; ok { 930 return m 931 } 932 i := strings.LastIndex(method, "/") 933 if m, ok := sc.Methods[method[:i+1]]; ok { 934 return m 935 } 936 return sc.Methods[""] 937 } 938 939 // GetMethodConfig gets the method config of the input method. 940 // If there's an exact match for input method (i.e. /service/method), we return 941 // the corresponding MethodConfig. 942 // If there isn't an exact match for the input method, we look for the service's default 943 // config under the service (i.e /service/) and then for the default for all services (empty string). 944 // 945 // If there is a default MethodConfig for the service, we return it. 946 // Otherwise, we return an empty MethodConfig. 947 func (cc *ClientConn) GetMethodConfig(method string) MethodConfig { 948 // TODO: Avoid the locking here. 949 cc.mu.RLock() 950 defer cc.mu.RUnlock() 951 return getMethodConfig(cc.sc, method) 952 } 953 954 func (cc *ClientConn) healthCheckConfig() *healthCheckConfig { 955 cc.mu.RLock() 956 defer cc.mu.RUnlock() 957 if cc.sc == nil { 958 return nil 959 } 960 return cc.sc.healthCheckConfig 961 } 962 963 func (cc *ClientConn) getTransport(ctx context.Context, failfast bool, method string) (transport.ClientTransport, func(balancer.DoneInfo), error) { 964 t, done, err := cc.blockingpicker.pick(ctx, failfast, balancer.PickInfo{ 965 Ctx: ctx, 966 FullMethodName: method, 967 }) 968 if err != nil { 969 return nil, nil, toRPCErr(err) 970 } 971 return t, done, nil 972 } 973 974 func (cc *ClientConn) applyServiceConfigAndBalancer(sc *ServiceConfig, configSelector iresolver.ConfigSelector, addrs []resolver.Address) { 975 if sc == nil { 976 // should never reach here. 977 return 978 } 979 cc.sc = sc 980 if configSelector != nil { 981 cc.safeConfigSelector.UpdateConfigSelector(configSelector) 982 } 983 984 if cc.sc.retryThrottling != nil { 985 newThrottler := &retryThrottler{ 986 tokens: cc.sc.retryThrottling.MaxTokens, 987 max: cc.sc.retryThrottling.MaxTokens, 988 thresh: cc.sc.retryThrottling.MaxTokens / 2, 989 ratio: cc.sc.retryThrottling.TokenRatio, 990 } 991 cc.retryThrottler.Store(newThrottler) 992 } else { 993 cc.retryThrottler.Store((*retryThrottler)(nil)) 994 } 995 996 if cc.dopts.balancerBuilder == nil { 997 // Only look at balancer types and switch balancer if balancer dial 998 // option is not set. 999 var newBalancerName string 1000 if cc.sc != nil && cc.sc.lbConfig != nil { 1001 newBalancerName = cc.sc.lbConfig.name 1002 } else { 1003 var isGRPCLB bool 1004 for _, a := range addrs { 1005 if a.Type == resolver.GRPCLB { 1006 isGRPCLB = true 1007 break 1008 } 1009 } 1010 if isGRPCLB { 1011 newBalancerName = grpclbName 1012 } else if cc.sc != nil && cc.sc.LB != nil { 1013 newBalancerName = *cc.sc.LB 1014 } else { 1015 newBalancerName = PickFirstBalancerName 1016 } 1017 } 1018 cc.switchBalancer(newBalancerName) 1019 } else if cc.balancerWrapper == nil { 1020 // Balancer dial option was set, and this is the first time handling 1021 // resolved addresses. Build a balancer with dopts.balancerBuilder. 1022 cc.curBalancerName = cc.dopts.balancerBuilder.Name() 1023 cc.balancerWrapper = newCCBalancerWrapper(cc, cc.dopts.balancerBuilder, cc.balancerBuildOpts) 1024 } 1025 } 1026 1027 func (cc *ClientConn) resolveNow(o resolver.ResolveNowOptions) { 1028 cc.mu.RLock() 1029 r := cc.resolverWrapper 1030 cc.mu.RUnlock() 1031 if r == nil { 1032 return 1033 } 1034 go r.resolveNow(o) 1035 } 1036 1037 // ResetConnectBackoff wakes up all subchannels in transient failure and causes 1038 // them to attempt another connection immediately. It also resets the backoff 1039 // times used for subsequent attempts regardless of the current state. 1040 // 1041 // In general, this function should not be used. Typical service or network 1042 // outages result in a reasonable client reconnection strategy by default. 1043 // However, if a previously unavailable network becomes available, this may be 1044 // used to trigger an immediate reconnect. 1045 // 1046 // # Experimental 1047 // 1048 // Notice: This API is EXPERIMENTAL and may be changed or removed in a 1049 // later release. 1050 func (cc *ClientConn) ResetConnectBackoff() { 1051 cc.mu.Lock() 1052 conns := cc.conns 1053 cc.mu.Unlock() 1054 for ac := range conns { 1055 ac.resetConnectBackoff() 1056 } 1057 } 1058 1059 // Close tears down the ClientConn and all underlying connections. 1060 func (cc *ClientConn) Close() error { 1061 defer cc.cancel() 1062 1063 cc.mu.Lock() 1064 if cc.conns == nil { 1065 cc.mu.Unlock() 1066 return ErrClientConnClosing 1067 } 1068 conns := cc.conns 1069 cc.conns = nil 1070 cc.csMgr.updateState(connectivity.Shutdown) 1071 1072 rWrapper := cc.resolverWrapper 1073 cc.resolverWrapper = nil 1074 bWrapper := cc.balancerWrapper 1075 cc.balancerWrapper = nil 1076 cc.mu.Unlock() 1077 1078 cc.blockingpicker.close() 1079 1080 if bWrapper != nil { 1081 bWrapper.close() 1082 } 1083 if rWrapper != nil { 1084 rWrapper.close() 1085 } 1086 1087 for ac := range conns { 1088 ac.tearDown(ErrClientConnClosing) 1089 } 1090 if channelz.IsOn() { 1091 ted := &channelz.TraceEventDesc{ 1092 Desc: "Channel Deleted", 1093 Severity: channelz.CtInfo, 1094 } 1095 if cc.dopts.channelzParentID != 0 { 1096 ted.Parent = &channelz.TraceEventDesc{ 1097 Desc: fmt.Sprintf("Nested channel(id:%d) deleted", cc.channelzID), 1098 Severity: channelz.CtInfo, 1099 } 1100 } 1101 channelz.AddTraceEvent(logger, cc.channelzID, 0, ted) 1102 // TraceEvent needs to be called before RemoveEntry, as TraceEvent may add trace reference to 1103 // the entity being deleted, and thus prevent it from being deleted right away. 1104 channelz.RemoveEntry(cc.channelzID) 1105 } 1106 return nil 1107 } 1108 1109 // addrConn is a network connection to a given address. 1110 type addrConn struct { 1111 ctx context.Context 1112 cancel context.CancelFunc 1113 1114 cc *ClientConn 1115 dopts dialOptions 1116 acbw balancer.SubConn 1117 scopts balancer.NewSubConnOptions 1118 1119 // transport is set when there's a viable transport (note: ac state may not be READY as LB channel 1120 // health checking may require server to report healthy to set ac to READY), and is reset 1121 // to nil when the current transport should no longer be used to create a stream (e.g. after GoAway 1122 // is received, transport is closed, ac has been torn down). 1123 transport transport.ClientTransport // The current transport. 1124 1125 mu sync.Mutex 1126 curAddr resolver.Address // The current address. 1127 addrs []resolver.Address // All addresses that the resolver resolved to. 1128 1129 // Use updateConnectivityState for updating addrConn's connectivity state. 1130 state connectivity.State 1131 1132 backoffIdx int // Needs to be stateful for resetConnectBackoff. 1133 resetBackoff chan struct{} 1134 1135 channelzID int64 // channelz unique identification number. 1136 czData *channelzData 1137 } 1138 1139 // Note: this requires a lock on ac.mu. 1140 func (ac *addrConn) updateConnectivityState(s connectivity.State, lastErr error) { 1141 if ac.state == s { 1142 return 1143 } 1144 ac.state = s 1145 channelz.Infof(logger, ac.channelzID, "Subchannel Connectivity change to %v", s) 1146 ac.cc.handleSubConnStateChange(ac.acbw, s, lastErr) 1147 } 1148 1149 // adjustParams updates parameters used to create transports upon 1150 // receiving a GoAway. 1151 func (ac *addrConn) adjustParams(r transport.GoAwayReason) { 1152 switch r { 1153 case transport.GoAwayTooManyPings: 1154 v := 2 * ac.dopts.copts.KeepaliveParams.Time 1155 ac.cc.mu.Lock() 1156 if v > ac.cc.mkp.Time { 1157 ac.cc.mkp.Time = v 1158 } 1159 ac.cc.mu.Unlock() 1160 } 1161 } 1162 1163 func (ac *addrConn) resetTransport() { 1164 ac.mu.Lock() 1165 if ac.state == connectivity.Shutdown { 1166 ac.mu.Unlock() 1167 return 1168 } 1169 1170 addrs := ac.addrs 1171 backoffFor := ac.dopts.bs.Backoff(ac.backoffIdx) 1172 // This will be the duration that dial gets to finish. 1173 dialDuration := minConnectTimeout 1174 if ac.dopts.minConnectTimeout != nil { 1175 dialDuration = ac.dopts.minConnectTimeout() 1176 } 1177 1178 if dialDuration < backoffFor { 1179 // Give dial more time as we keep failing to connect. 1180 dialDuration = backoffFor 1181 } 1182 // We can potentially spend all the time trying the first address, and 1183 // if the server accepts the connection and then hangs, the following 1184 // addresses will never be tried. 1185 // 1186 // The spec doesn't mention what should be done for multiple addresses. 1187 // https://github.com/grpc/grpc/blob/master/doc/connection-backoff.md#proposed-backoff-algorithm 1188 connectDeadline := time.Now().Add(dialDuration) 1189 1190 ac.updateConnectivityState(connectivity.Connecting, nil) 1191 ac.mu.Unlock() 1192 1193 if err := ac.tryAllAddrs(addrs, connectDeadline); err != nil { 1194 ac.cc.resolveNow(resolver.ResolveNowOptions{}) 1195 // After exhausting all addresses, the addrConn enters 1196 // TRANSIENT_FAILURE. 1197 ac.mu.Lock() 1198 if ac.state == connectivity.Shutdown { 1199 ac.mu.Unlock() 1200 return 1201 } 1202 ac.updateConnectivityState(connectivity.TransientFailure, err) 1203 1204 // Backoff. 1205 b := ac.resetBackoff 1206 ac.mu.Unlock() 1207 1208 timer := time.NewTimer(backoffFor) 1209 select { 1210 case <-timer.C: 1211 ac.mu.Lock() 1212 ac.backoffIdx++ 1213 ac.mu.Unlock() 1214 case <-b: 1215 timer.Stop() 1216 case <-ac.ctx.Done(): 1217 timer.Stop() 1218 return 1219 } 1220 1221 ac.mu.Lock() 1222 if ac.state != connectivity.Shutdown { 1223 ac.updateConnectivityState(connectivity.Idle, err) 1224 } 1225 ac.mu.Unlock() 1226 return 1227 } 1228 // Success; reset backoff. 1229 ac.mu.Lock() 1230 ac.backoffIdx = 0 1231 ac.mu.Unlock() 1232 } 1233 1234 // tryAllAddrs tries to creates a connection to the addresses, and stop when at 1235 // the first successful one. It returns an error if no address was successfully 1236 // connected, or updates ac appropriately with the new transport. 1237 func (ac *addrConn) tryAllAddrs(addrs []resolver.Address, connectDeadline time.Time) error { 1238 var firstConnErr error 1239 for _, addr := range addrs { 1240 ac.mu.Lock() 1241 if ac.state == connectivity.Shutdown { 1242 ac.mu.Unlock() 1243 return errConnClosing 1244 } 1245 1246 ac.cc.mu.RLock() 1247 ac.dopts.copts.KeepaliveParams = ac.cc.mkp 1248 ac.cc.mu.RUnlock() 1249 1250 copts := ac.dopts.copts 1251 if ac.scopts.CredsBundle != nil { 1252 copts.CredsBundle = ac.scopts.CredsBundle 1253 } 1254 ac.mu.Unlock() 1255 1256 channelz.Infof(logger, ac.channelzID, "Subchannel picks a new address %q to connect", addr.Addr) 1257 1258 err := ac.createTransport(addr, copts, connectDeadline) 1259 if err == nil { 1260 return nil 1261 } 1262 if firstConnErr == nil { 1263 firstConnErr = err 1264 } 1265 ac.cc.updateConnectionError(err) 1266 } 1267 1268 // Couldn't connect to any address. 1269 return firstConnErr 1270 } 1271 1272 // createTransport creates a connection to addr. It returns an error if the 1273 // address was not successfully connected, or updates ac appropriately with the 1274 // new transport. 1275 func (ac *addrConn) createTransport(addr resolver.Address, copts transport.ConnectOptions, connectDeadline time.Time) error { 1276 // TODO: Delete prefaceReceived and move the logic to wait for it into the 1277 // transport. 1278 prefaceReceived := grpcsync.NewEvent() 1279 connClosed := grpcsync.NewEvent() 1280 1281 addr.ServerName = ac.cc.getServerName(addr) 1282 hctx, hcancel := context.WithCancel(ac.ctx) 1283 hcStarted := false // protected by ac.mu 1284 1285 onClose := func() { 1286 ac.mu.Lock() 1287 defer ac.mu.Unlock() 1288 defer connClosed.Fire() 1289 if !hcStarted || hctx.Err() != nil { 1290 // We didn't start the health check or set the state to READY, so 1291 // no need to do anything else here. 1292 // 1293 // OR, we have already cancelled the health check context, meaning 1294 // we have already called onClose once for this transport. In this 1295 // case it would be dangerous to clear the transport and update the 1296 // state, since there may be a new transport in this addrConn. 1297 return 1298 } 1299 hcancel() 1300 ac.transport = nil 1301 // Refresh the name resolver 1302 ac.cc.resolveNow(resolver.ResolveNowOptions{}) 1303 if ac.state != connectivity.Shutdown { 1304 ac.updateConnectivityState(connectivity.Idle, nil) 1305 } 1306 } 1307 1308 onGoAway := func(r transport.GoAwayReason) { 1309 ac.mu.Lock() 1310 ac.adjustParams(r) 1311 ac.mu.Unlock() 1312 onClose() 1313 } 1314 1315 connectCtx, cancel := context.WithDeadline(ac.ctx, connectDeadline) 1316 defer cancel() 1317 if channelz.IsOn() { 1318 copts.ChannelzParentID = ac.channelzID 1319 } 1320 1321 newTr, err := transport.NewClientTransport(connectCtx, ac.cc.ctx, addr, copts, func() { prefaceReceived.Fire() }, onGoAway, onClose) 1322 if err != nil { 1323 // newTr is either nil, or closed. 1324 channelz.Warningf(logger, ac.channelzID, "grpc: addrConn.createTransport failed to connect to %v. Err: %v", addr, err) 1325 return err 1326 } 1327 1328 select { 1329 case <-connectCtx.Done(): 1330 // We didn't get the preface in time. 1331 // The error we pass to Close() is immaterial since there are no open 1332 // streams at this point, so no trailers with error details will be sent 1333 // out. We just need to pass a non-nil error. 1334 newTr.Close(transport.ErrConnClosing) 1335 if connectCtx.Err() == context.DeadlineExceeded { 1336 err := errors.New("failed to receive server preface within timeout") 1337 channelz.Warningf(logger, ac.channelzID, "grpc: addrConn.createTransport failed to connect to %v: %v", addr, err) 1338 return err 1339 } 1340 return nil 1341 case <-prefaceReceived.Done(): 1342 // We got the preface - huzzah! things are good. 1343 ac.mu.Lock() 1344 defer ac.mu.Unlock() 1345 if connClosed.HasFired() { 1346 // onClose called first; go idle but do nothing else. 1347 if ac.state != connectivity.Shutdown { 1348 ac.updateConnectivityState(connectivity.Idle, nil) 1349 } 1350 return nil 1351 } 1352 if ac.state == connectivity.Shutdown { 1353 // This can happen if the subConn was removed while in `Connecting` 1354 // state. tearDown() would have set the state to `Shutdown`, but 1355 // would not have closed the transport since ac.transport would not 1356 // been set at that point. 1357 // 1358 // We run this in a goroutine because newTr.Close() calls onClose() 1359 // inline, which requires locking ac.mu. 1360 // 1361 // The error we pass to Close() is immaterial since there are no open 1362 // streams at this point, so no trailers with error details will be sent 1363 // out. We just need to pass a non-nil error. 1364 go newTr.Close(transport.ErrConnClosing) 1365 return nil 1366 } 1367 ac.curAddr = addr 1368 ac.transport = newTr 1369 hcStarted = true 1370 ac.startHealthCheck(hctx) // Will set state to READY if appropriate. 1371 return nil 1372 case <-connClosed.Done(): 1373 // The transport has already closed. If we received the preface, too, 1374 // this is not an error. 1375 select { 1376 case <-prefaceReceived.Done(): 1377 return nil 1378 default: 1379 return errors.New("connection closed before server preface received") 1380 } 1381 } 1382 } 1383 1384 // startHealthCheck starts the health checking stream (RPC) to watch the health 1385 // stats of this connection if health checking is requested and configured. 1386 // 1387 // LB channel health checking is enabled when all requirements below are met: 1388 // 1. it is not disabled by the user with the WithDisableHealthCheck DialOption 1389 // 2. internal.HealthCheckFunc is set by importing the grpc/health package 1390 // 3. a service config with non-empty healthCheckConfig field is provided 1391 // 4. the load balancer requests it 1392 // 1393 // It sets addrConn to READY if the health checking stream is not started. 1394 // 1395 // Caller must hold ac.mu. 1396 func (ac *addrConn) startHealthCheck(ctx context.Context) { 1397 var healthcheckManagingState bool 1398 defer func() { 1399 if !healthcheckManagingState { 1400 ac.updateConnectivityState(connectivity.Ready, nil) 1401 } 1402 }() 1403 1404 if ac.cc.dopts.disableHealthCheck { 1405 return 1406 } 1407 healthCheckConfig := ac.cc.healthCheckConfig() 1408 if healthCheckConfig == nil { 1409 return 1410 } 1411 if !ac.scopts.HealthCheckEnabled { 1412 return 1413 } 1414 healthCheckFunc := ac.cc.dopts.healthCheckFunc 1415 if healthCheckFunc == nil { 1416 // The health package is not imported to set health check function. 1417 // 1418 // TODO: add a link to the health check doc in the error message. 1419 channelz.Error(logger, ac.channelzID, "Health check is requested but health check function is not set.") 1420 return 1421 } 1422 1423 healthcheckManagingState = true 1424 1425 // Set up the health check helper functions. 1426 currentTr := ac.transport 1427 newStream := func(method string) (interface{}, error) { 1428 ac.mu.Lock() 1429 if ac.transport != currentTr { 1430 ac.mu.Unlock() 1431 return nil, status.Error(codes.Canceled, "the provided transport is no longer valid to use") 1432 } 1433 ac.mu.Unlock() 1434 return newNonRetryClientStream(ctx, &StreamDesc{ServerStreams: true}, method, currentTr, ac) 1435 } 1436 setConnectivityState := func(s connectivity.State, lastErr error) { 1437 ac.mu.Lock() 1438 defer ac.mu.Unlock() 1439 if ac.transport != currentTr { 1440 return 1441 } 1442 ac.updateConnectivityState(s, lastErr) 1443 } 1444 // Start the health checking stream. 1445 go func() { 1446 err := ac.cc.dopts.healthCheckFunc(ctx, newStream, setConnectivityState, healthCheckConfig.ServiceName) 1447 if err != nil { 1448 if status.Code(err) == codes.Unimplemented { 1449 channelz.Error(logger, ac.channelzID, "Subchannel health check is unimplemented at server side, thus health check is disabled") 1450 } else { 1451 channelz.Errorf(logger, ac.channelzID, "HealthCheckFunc exits with unexpected error %v", err) 1452 } 1453 } 1454 }() 1455 } 1456 1457 func (ac *addrConn) resetConnectBackoff() { 1458 ac.mu.Lock() 1459 close(ac.resetBackoff) 1460 ac.backoffIdx = 0 1461 ac.resetBackoff = make(chan struct{}) 1462 ac.mu.Unlock() 1463 } 1464 1465 // getReadyTransport returns the transport if ac's state is READY or nil if not. 1466 func (ac *addrConn) getReadyTransport() transport.ClientTransport { 1467 ac.mu.Lock() 1468 defer ac.mu.Unlock() 1469 if ac.state == connectivity.Ready { 1470 return ac.transport 1471 } 1472 return nil 1473 } 1474 1475 // tearDown starts to tear down the addrConn. 1476 // 1477 // Note that tearDown doesn't remove ac from ac.cc.conns, so the addrConn struct 1478 // will leak. In most cases, call cc.removeAddrConn() instead. 1479 func (ac *addrConn) tearDown(err error) { 1480 ac.mu.Lock() 1481 if ac.state == connectivity.Shutdown { 1482 ac.mu.Unlock() 1483 return 1484 } 1485 curTr := ac.transport 1486 ac.transport = nil 1487 // We have to set the state to Shutdown before anything else to prevent races 1488 // between setting the state and logic that waits on context cancellation / etc. 1489 ac.updateConnectivityState(connectivity.Shutdown, nil) 1490 ac.cancel() 1491 ac.curAddr = resolver.Address{} 1492 if err == errConnDrain && curTr != nil { 1493 // GracefulClose(...) may be executed multiple times when 1494 // i) receiving multiple GoAway frames from the server; or 1495 // ii) there are concurrent name resolver/Balancer triggered 1496 // address removal and GoAway. 1497 // We have to unlock and re-lock here because GracefulClose => Close => onClose, which requires locking ac.mu. 1498 ac.mu.Unlock() 1499 curTr.GracefulClose() 1500 ac.mu.Lock() 1501 } 1502 if channelz.IsOn() { 1503 channelz.AddTraceEvent(logger, ac.channelzID, 0, &channelz.TraceEventDesc{ 1504 Desc: "Subchannel Deleted", 1505 Severity: channelz.CtInfo, 1506 Parent: &channelz.TraceEventDesc{ 1507 Desc: fmt.Sprintf("Subchanel(id:%d) deleted", ac.channelzID), 1508 Severity: channelz.CtInfo, 1509 }, 1510 }) 1511 // TraceEvent needs to be called before RemoveEntry, as TraceEvent may add trace reference to 1512 // the entity being deleted, and thus prevent it from being deleted right away. 1513 channelz.RemoveEntry(ac.channelzID) 1514 } 1515 ac.mu.Unlock() 1516 } 1517 1518 func (ac *addrConn) getState() connectivity.State { 1519 ac.mu.Lock() 1520 defer ac.mu.Unlock() 1521 return ac.state 1522 } 1523 1524 func (ac *addrConn) ChannelzMetric() *channelz.ChannelInternalMetric { 1525 ac.mu.Lock() 1526 addr := ac.curAddr.Addr 1527 ac.mu.Unlock() 1528 return &channelz.ChannelInternalMetric{ 1529 State: ac.getState(), 1530 Target: addr, 1531 CallsStarted: atomic.LoadInt64(&ac.czData.callsStarted), 1532 CallsSucceeded: atomic.LoadInt64(&ac.czData.callsSucceeded), 1533 CallsFailed: atomic.LoadInt64(&ac.czData.callsFailed), 1534 LastCallStartedTimestamp: time.Unix(0, atomic.LoadInt64(&ac.czData.lastCallStartedTime)), 1535 } 1536 } 1537 1538 func (ac *addrConn) incrCallsStarted() { 1539 atomic.AddInt64(&ac.czData.callsStarted, 1) 1540 atomic.StoreInt64(&ac.czData.lastCallStartedTime, time.Now().UnixNano()) 1541 } 1542 1543 func (ac *addrConn) incrCallsSucceeded() { 1544 atomic.AddInt64(&ac.czData.callsSucceeded, 1) 1545 } 1546 1547 func (ac *addrConn) incrCallsFailed() { 1548 atomic.AddInt64(&ac.czData.callsFailed, 1) 1549 } 1550 1551 type retryThrottler struct { 1552 max float64 1553 thresh float64 1554 ratio float64 1555 1556 mu sync.Mutex 1557 tokens float64 // TODO(dfawley): replace with atomic and remove lock. 1558 } 1559 1560 // throttle subtracts a retry token from the pool and returns whether a retry 1561 // should be throttled (disallowed) based upon the retry throttling policy in 1562 // the service config. 1563 func (rt *retryThrottler) throttle() bool { 1564 if rt == nil { 1565 return false 1566 } 1567 rt.mu.Lock() 1568 defer rt.mu.Unlock() 1569 rt.tokens-- 1570 if rt.tokens < 0 { 1571 rt.tokens = 0 1572 } 1573 return rt.tokens <= rt.thresh 1574 } 1575 1576 func (rt *retryThrottler) successfulRPC() { 1577 if rt == nil { 1578 return 1579 } 1580 rt.mu.Lock() 1581 defer rt.mu.Unlock() 1582 rt.tokens += rt.ratio 1583 if rt.tokens > rt.max { 1584 rt.tokens = rt.max 1585 } 1586 } 1587 1588 type channelzChannel struct { 1589 cc *ClientConn 1590 } 1591 1592 func (c *channelzChannel) ChannelzMetric() *channelz.ChannelInternalMetric { 1593 return c.cc.channelzMetric() 1594 } 1595 1596 // ErrClientConnTimeout indicates that the ClientConn cannot establish the 1597 // underlying connections within the specified timeout. 1598 // 1599 // Deprecated: This error is never returned by grpc and should not be 1600 // referenced by users. 1601 var ErrClientConnTimeout = errors.New("grpc: timed out when dialing") 1602 1603 func (cc *ClientConn) getResolver(scheme string) resolver.Builder { 1604 for _, rb := range cc.dopts.resolvers { 1605 if scheme == rb.Scheme() { 1606 return rb 1607 } 1608 } 1609 return resolver.Get(scheme) 1610 } 1611 1612 func (cc *ClientConn) updateConnectionError(err error) { 1613 cc.lceMu.Lock() 1614 cc.lastConnectionError = err 1615 cc.lceMu.Unlock() 1616 } 1617 1618 func (cc *ClientConn) connectionError() error { 1619 cc.lceMu.Lock() 1620 defer cc.lceMu.Unlock() 1621 return cc.lastConnectionError 1622 } 1623 1624 func (cc *ClientConn) parseTargetAndFindResolver() (resolver.Builder, error) { 1625 channelz.Infof(logger, cc.channelzID, "original dial target is: %q", cc.target) 1626 1627 var rb resolver.Builder 1628 parsedTarget, err := parseTarget(cc.target) 1629 if err != nil { 1630 channelz.Infof(logger, cc.channelzID, "dial target %q parse failed: %v", cc.target, err) 1631 } else { 1632 channelz.Infof(logger, cc.channelzID, "parsed dial target is: %+v", parsedTarget) 1633 // Target.Scheme is deprecated, use Target.GetScheme() instead. 1634 //rb = cc.getResolver(parsedTarget.Scheme) 1635 rb = cc.getResolver(parsedTarget.GetScheme()) 1636 if rb != nil { 1637 cc.parsedTarget = parsedTarget 1638 return rb, nil 1639 } 1640 } 1641 1642 // We are here because the user's dial target did not contain a scheme or 1643 // specified an unregistered scheme. We should fallback to the default 1644 // scheme, except when a custom dialer is specified in which case, we should 1645 // always use passthrough scheme. 1646 defScheme := resolver.GetDefaultScheme() 1647 channelz.Infof(logger, cc.channelzID, "fallback to scheme %q", defScheme) 1648 canonicalTarget := defScheme + ":///" + cc.target 1649 1650 parsedTarget, err = parseTarget(canonicalTarget) 1651 if err != nil { 1652 channelz.Infof(logger, cc.channelzID, "dial target %q parse failed: %v", canonicalTarget, err) 1653 return nil, err 1654 } 1655 channelz.Infof(logger, cc.channelzID, "parsed dial target is: %+v", parsedTarget) 1656 // Target.Scheme is deprecated, use Target.GetScheme() instead. 1657 //rb = cc.getResolver(parsedTarget.Scheme) 1658 rb = cc.getResolver(parsedTarget.GetScheme()) 1659 if rb == nil { 1660 // Target.Scheme is deprecated, use Target.GetScheme() instead. 1661 //return nil, fmt.Errorf("could not get resolver for default scheme: %q", parsedTarget.Scheme) 1662 return nil, fmt.Errorf("could not get resolver for default scheme: %q", parsedTarget.GetScheme()) 1663 } 1664 cc.parsedTarget = parsedTarget 1665 return rb, nil 1666 } 1667 1668 // parseTarget uses RFC 3986 semantics to parse the given target into a 1669 // resolver.Target struct containing scheme, authority and endpoint. Query 1670 // params are stripped from the endpoint. 1671 func parseTarget(target string) (resolver.Target, error) { 1672 u, err := url.Parse(target) 1673 if err != nil { 1674 return resolver.Target{}, err 1675 } 1676 // For targets of the form "[scheme]://[authority]/endpoint, the endpoint 1677 // value returned from url.Parse() contains a leading "/". Although this is 1678 // in accordance with RFC 3986, we do not want to break existing resolver 1679 // implementations which expect the endpoint without the leading "/". So, we 1680 // end up stripping the leading "/" here. But this will result in an 1681 // incorrect parsing for something like "unix:///path/to/socket". Since we 1682 // own the "unix" resolver, we can workaround in the unix resolver by using 1683 // the `URL` field instead of the `Endpoint` field. 1684 1685 // Target.Endpoint已弃用,这里无需事先计算,获取endpoint的逻辑已移动到Target.GetEndpoint()方法中 1686 //endpoint := u.Path 1687 //if endpoint == "" { 1688 // endpoint = u.Opaque 1689 //} 1690 //endpoint = strings.TrimPrefix(endpoint, "/") 1691 return resolver.Target{ 1692 // Target.Scheme、Target.Authority、Target.Endpoint are deprecated. 1693 //Scheme: u.Scheme, 1694 //Authority: u.Host, 1695 //Endpoint: endpoint, 1696 URL: *u, 1697 }, nil 1698 } 1699 1700 // Determine channel authority. The order of precedence is as follows: 1701 // - user specified authority override using `WithAuthority` dial option 1702 // - creds' notion of server name for the authentication handshake 1703 // - endpoint from dial target of the form "scheme://[authority]/endpoint" 1704 func determineAuthority(endpoint, target string, dopts dialOptions) (string, error) { 1705 // Historically, we had two options for users to specify the serverName or 1706 // authority for a channel. One was through the transport credentials 1707 // (either in its constructor, or through the OverrideServerName() method). 1708 // The other option (for cases where WithInsecure() dial option was used) 1709 // was to use the WithAuthority() dial option. 1710 // 1711 // A few things have changed since: 1712 // - `insecure` package with an implementation of the `TransportCredentials` 1713 // interface for the insecure case 1714 // - WithAuthority() dial option support for secure credentials 1715 authorityFromCreds := "" 1716 if creds := dopts.copts.TransportCredentials; creds != nil && creds.Info().ServerName != "" { 1717 authorityFromCreds = creds.Info().ServerName 1718 } 1719 authorityFromDialOption := dopts.authority 1720 if (authorityFromCreds != "" && authorityFromDialOption != "") && authorityFromCreds != authorityFromDialOption { 1721 return "", fmt.Errorf("ClientConn's authority from transport creds %q and dial option %q don't match", authorityFromCreds, authorityFromDialOption) 1722 } 1723 1724 switch { 1725 case authorityFromDialOption != "": 1726 return authorityFromDialOption, nil 1727 case authorityFromCreds != "": 1728 return authorityFromCreds, nil 1729 case strings.HasPrefix(target, "unix:") || strings.HasPrefix(target, "unix-abstract:"): 1730 // TODO: remove when the unix resolver implements optional interface to 1731 // return channel authority. 1732 return "localhost", nil 1733 case strings.HasPrefix(endpoint, ":"): 1734 return "localhost" + endpoint, nil 1735 default: 1736 // TODO: Define an optional interface on the resolver builder to return 1737 // the channel authority given the user's dial target. For resolvers 1738 // which don't implement this interface, we will use the endpoint from 1739 // "scheme://authority/endpoint" as the default authority. 1740 return endpoint, nil 1741 } 1742 }