dubbo.apache.org/dubbo-go/v3@v3.1.1/xds/balancer/cdsbalancer/cdsbalancer.go (about) 1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 18 /* 19 * 20 * Copyright 2019 gRPC authors. 21 * 22 */ 23 24 // Package cdsbalancer implements a balancer to handle CDS responses. 25 package cdsbalancer 26 27 import ( 28 "encoding/json" 29 "errors" 30 "fmt" 31 ) 32 33 import ( 34 dubbogoLogger "github.com/dubbogo/gost/log/logger" 35 36 "google.golang.org/grpc/balancer" 37 "google.golang.org/grpc/balancer/base" 38 39 "google.golang.org/grpc/connectivity" 40 41 "google.golang.org/grpc/credentials" 42 43 "google.golang.org/grpc/resolver" 44 45 "google.golang.org/grpc/serviceconfig" 46 ) 47 48 import ( 49 "dubbo.apache.org/dubbo-go/v3/xds/balancer/clusterresolver" 50 "dubbo.apache.org/dubbo-go/v3/xds/balancer/ringhash" 51 "dubbo.apache.org/dubbo-go/v3/xds/client" 52 "dubbo.apache.org/dubbo-go/v3/xds/client/resource" 53 "dubbo.apache.org/dubbo-go/v3/xds/credentials/certprovider" 54 "dubbo.apache.org/dubbo-go/v3/xds/utils/buffer" 55 xdsinternal "dubbo.apache.org/dubbo-go/v3/xds/utils/credentials/xds" 56 "dubbo.apache.org/dubbo-go/v3/xds/utils/grpcsync" 57 "dubbo.apache.org/dubbo-go/v3/xds/utils/pretty" 58 internalserviceconfig "dubbo.apache.org/dubbo-go/v3/xds/utils/serviceconfig" 59 ) 60 61 const ( 62 cdsName = "cds_experimental" 63 ) 64 65 var ( 66 errBalancerClosed = errors.New("cdsBalancer is closed") 67 68 // newChildBalancer is a helper function to build a new cluster_resolver 69 // balancer and will be overridden in unittests. 70 newChildBalancer = func(cc balancer.ClientConn, opts balancer.BuildOptions) (balancer.Balancer, error) { 71 builder := balancer.Get(clusterresolver.Name) 72 if builder == nil { 73 return nil, fmt.Errorf("xds: no balancer builder with name %v", clusterresolver.Name) 74 } 75 // We directly pass the parent clientConn to the underlying 76 // cluster_resolver balancer because the cdsBalancer does not deal with 77 // subConns. 78 return builder.Build(cc, opts), nil 79 } 80 buildProvider = buildProviderFunc 81 ) 82 83 func init() { 84 balancer.Register(bb{}) 85 } 86 87 // bb implements the balancer.Builder interface to help build a cdsBalancer. 88 // It also implements the balancer.ConfigParser interface to help parse the 89 // JSON service config, to be passed to the cdsBalancer. 90 type bb struct{} 91 92 // Build creates a new CDS balancer with the ClientConn. 93 func (bb) Build(cc balancer.ClientConn, opts balancer.BuildOptions) balancer.Balancer { 94 b := &cdsBalancer{ 95 bOpts: opts, 96 updateCh: buffer.NewUnbounded(), 97 closed: grpcsync.NewEvent(), 98 done: grpcsync.NewEvent(), 99 xdsHI: xdsinternal.NewHandshakeInfo(nil, nil), 100 } 101 b.logger = dubbogoLogger.GetLogger() 102 b.logger.Infof("Created") 103 var creds credentials.TransportCredentials 104 switch { 105 case opts.DialCreds != nil: 106 creds = opts.DialCreds 107 case opts.CredsBundle != nil: 108 creds = opts.CredsBundle.TransportCredentials() 109 } 110 if xc, ok := creds.(interface{ UsesXDS() bool }); ok && xc.UsesXDS() { 111 b.xdsCredsInUse = true 112 } 113 b.logger.Infof("xDS credentials in use: %v", b.xdsCredsInUse) 114 b.clusterHandler = newClusterHandler(b) 115 b.ccw = &ccWrapper{ 116 ClientConn: cc, 117 xdsHI: b.xdsHI, 118 } 119 go b.run() 120 return b 121 } 122 123 // Name returns the name of balancers built by this builder. 124 func (bb) Name() string { 125 return cdsName 126 } 127 128 // lbConfig represents the loadBalancingConfig section of the service config 129 // for the cdsBalancer. 130 type lbConfig struct { 131 serviceconfig.LoadBalancingConfig 132 ClusterName string `json:"Cluster"` 133 } 134 135 // ParseConfig parses the JSON load balancer config provided into an 136 // internal form or returns an error if the config is invalid. 137 func (bb) ParseConfig(c json.RawMessage) (serviceconfig.LoadBalancingConfig, error) { 138 var cfg lbConfig 139 if err := json.Unmarshal(c, &cfg); err != nil { 140 return nil, fmt.Errorf("xds: unable to unmarshal lbconfig: %s, error: %v", string(c), err) 141 } 142 return &cfg, nil 143 } 144 145 // ccUpdate wraps a clientConn update received from gRPC (pushed from the 146 // xdsResolver). A valid clusterName causes the cdsBalancer to register a CDS 147 // watcher with the xdsClient, while a non-nil error causes it to cancel the 148 // existing watch and propagate the error to the underlying cluster_resolver 149 // balancer. 150 type ccUpdate struct { 151 clusterName string 152 err error 153 } 154 155 // scUpdate wraps a subConn update received from gRPC. This is directly passed 156 // on to the cluster_resolver balancer. 157 type scUpdate struct { 158 subConn balancer.SubConn 159 state balancer.SubConnState 160 } 161 162 type exitIdle struct{} 163 164 // cdsBalancer implements a CDS based LB policy. It instantiates a 165 // cluster_resolver balancer to further resolve the serviceName received from 166 // CDS, into localities and endpoints. Implements the balancer.Balancer 167 // interface which is exposed to gRPC and implements the balancer.ClientConn 168 // interface which is exposed to the cluster_resolver balancer. 169 type cdsBalancer struct { 170 ccw *ccWrapper // ClientConn interface passed to child LB. 171 bOpts balancer.BuildOptions // BuildOptions passed to child LB. 172 updateCh *buffer.Unbounded // Channel for gRPC and xdsClient updates. 173 xdsClient client.XDSClient // xDS client to watch Cluster resource. 174 clusterHandler *clusterHandler // To watch the clusters. 175 childLB balancer.Balancer 176 logger dubbogoLogger.Logger 177 closed *grpcsync.Event 178 done *grpcsync.Event 179 180 // The certificate providers are cached here to that they can be closed when 181 // a new provider is to be created. 182 cachedRoot certprovider.Provider 183 cachedIdentity certprovider.Provider 184 xdsHI *xdsinternal.HandshakeInfo 185 xdsCredsInUse bool 186 } 187 188 // handleClientConnUpdate handles a ClientConnUpdate received from gRPC. Good 189 // updates lead to registration of a CDS watch. Updates with error lead to 190 // cancellation of existing watch and propagation of the same error to the 191 // cluster_resolver balancer. 192 func (b *cdsBalancer) handleClientConnUpdate(update *ccUpdate) { 193 // We first handle errors, if any, and then proceed with handling the 194 // update, only if the status quo has changed. 195 if err := update.err; err != nil { 196 b.handleErrorFromUpdate(err, true) 197 return 198 } 199 b.clusterHandler.updateRootCluster(update.clusterName) 200 } 201 202 // handleSecurityConfig processes the security configuration received from the 203 // management server, creates appropriate certificate provider plugins, and 204 // updates the HandhakeInfo which is added as an address attribute in 205 // NewSubConn() calls. 206 func (b *cdsBalancer) handleSecurityConfig(config *resource.SecurityConfig) error { 207 // If xdsCredentials are not in use, i.e, the user did not want to get 208 // security configuration from an xDS server, we should not be acting on the 209 // received security config here. Doing so poses a security threat. 210 if !b.xdsCredsInUse { 211 return nil 212 } 213 214 // Security config being nil is a valid case where the management server has 215 // not sent any security configuration. The xdsCredentials implementation 216 // handles this by delegating to its fallback credentials. 217 if config == nil { 218 // We need to explicitly set the fields to nil here since this might be 219 // a case of switching from a good security configuration to an empty 220 // one where fallback credentials are to be used. 221 b.xdsHI.SetRootCertProvider(nil) 222 b.xdsHI.SetIdentityCertProvider(nil) 223 b.xdsHI.SetSANMatchers(nil) 224 return nil 225 } 226 227 bc := b.xdsClient.BootstrapConfig() 228 if bc == nil || bc.CertProviderConfigs == nil { 229 // Bootstrap did not find any certificate provider configs, but the user 230 // has specified xdsCredentials and the management server has sent down 231 // security configuration. 232 return errors.New("xds: certificate_providers config missing in bootstrap file") 233 } 234 cpc := bc.CertProviderConfigs 235 236 // A root provider is required whether we are using TLS or mTLS. 237 rootProvider, err := buildProvider(cpc, config.RootInstanceName, config.RootCertName, false, true) 238 if err != nil { 239 return err 240 } 241 242 // The identity provider is only present when using mTLS. 243 var identityProvider certprovider.Provider 244 if name, cert := config.IdentityInstanceName, config.IdentityCertName; name != "" { 245 var err error 246 identityProvider, err = buildProvider(cpc, name, cert, true, false) 247 if err != nil { 248 return err 249 } 250 } 251 252 // Close the old providers and cache the new ones. 253 if b.cachedRoot != nil { 254 b.cachedRoot.Close() 255 } 256 if b.cachedIdentity != nil { 257 b.cachedIdentity.Close() 258 } 259 b.cachedRoot = rootProvider 260 b.cachedIdentity = identityProvider 261 262 // We set all fields here, even if some of them are nil, since they 263 // could have been non-nil earlier. 264 b.xdsHI.SetRootCertProvider(rootProvider) 265 b.xdsHI.SetIdentityCertProvider(identityProvider) 266 b.xdsHI.SetSANMatchers(config.SubjectAltNameMatchers) 267 return nil 268 } 269 270 func buildProviderFunc(configs map[string]*certprovider.BuildableConfig, instanceName, certName string, wantIdentity, wantRoot bool) (certprovider.Provider, error) { 271 cfg, ok := configs[instanceName] 272 if !ok { 273 return nil, fmt.Errorf("certificate provider instance %q not found in bootstrap file", instanceName) 274 } 275 provider, err := cfg.Build(certprovider.BuildOptions{ 276 CertName: certName, 277 WantIdentity: wantIdentity, 278 WantRoot: wantRoot, 279 }) 280 if err != nil { 281 // This error is not expected since the bootstrap process parses the 282 // config and makes sure that it is acceptable to the plugin. Still, it 283 // is possible that the plugin parses the config successfully, but its 284 // Build() method errors out. 285 return nil, fmt.Errorf("xds: failed to get security plugin instance (%+v): %v", cfg, err) 286 } 287 return provider, nil 288 } 289 290 // handleWatchUpdate handles a watch update from the xDS Client. Good updates 291 // lead to clientConn updates being invoked on the underlying cluster_resolver balancer. 292 func (b *cdsBalancer) handleWatchUpdate(update clusterHandlerUpdate) { 293 if err := update.err; err != nil { 294 b.logger.Warnf("Watch error from xds-client %p: %v", b.xdsClient, err) 295 b.handleErrorFromUpdate(err, false) 296 return 297 } 298 299 b.logger.Infof("Watch update from xds-client %p, content: %+v, security config: %v", b.xdsClient, pretty.ToJSON(update.updates), pretty.ToJSON(update.securityCfg)) 300 301 // Process the security config from the received update before building the 302 // child policy or forwarding the update to it. We do this because the child 303 // policy may try to create a new subConn inline. Processing the security 304 // configuration here and setting up the handshakeInfo will make sure that 305 // such attempts are handled properly. 306 if err := b.handleSecurityConfig(update.securityCfg); err != nil { 307 // If the security config is invalid, for example, if the provider 308 // instance is not found in the bootstrap config, we need to put the 309 // channel in transient failure. 310 b.logger.Warnf("Invalid security config update from xds-client %p: %v", b.xdsClient, err) 311 b.handleErrorFromUpdate(err, false) 312 return 313 } 314 315 // The first good update from the watch API leads to the instantiation of an 316 // cluster_resolver balancer. Further updates/errors are propagated to the existing 317 // cluster_resolver balancer. 318 if b.childLB == nil { 319 childLB, err := newChildBalancer(b.ccw, b.bOpts) 320 if err != nil { 321 b.logger.Errorf("Failed to create child policy of type %s, %v", clusterresolver.Name, err) 322 return 323 } 324 b.childLB = childLB 325 b.logger.Infof("Created child policy %p of type %s", b.childLB, clusterresolver.Name) 326 } 327 328 dms := make([]clusterresolver.DiscoveryMechanism, len(update.updates)) 329 for i, cu := range update.updates { 330 switch cu.ClusterType { 331 case resource.ClusterTypeEDS: 332 dms[i] = clusterresolver.DiscoveryMechanism{ 333 Type: clusterresolver.DiscoveryMechanismTypeEDS, 334 Cluster: cu.ClusterName, 335 EDSServiceName: cu.EDSServiceName, 336 MaxConcurrentRequests: cu.MaxRequests, 337 } 338 if cu.EnableLRS { 339 // An empty string here indicates that the cluster_resolver balancer should use the 340 // same xDS server for load reporting as it does for EDS 341 // requests/responses. 342 dms[i].LoadReportingServerName = new(string) 343 344 } 345 case resource.ClusterTypeLogicalDNS: 346 dms[i] = clusterresolver.DiscoveryMechanism{ 347 Type: clusterresolver.DiscoveryMechanismTypeLogicalDNS, 348 DNSHostname: cu.DNSHostName, 349 } 350 default: 351 b.logger.Infof("unexpected cluster type %v when handling update from cluster handler", cu.ClusterType) 352 } 353 } 354 lbCfg := &clusterresolver.LBConfig{ 355 DiscoveryMechanisms: dms, 356 } 357 358 // lbPolicy is set only when the policy is ringhash. The default (when it's 359 // not set) is roundrobin. And similarly, we only need to set XDSLBPolicy 360 // for ringhash (it also defaults to roundrobin). 361 if lbp := update.lbPolicy; lbp != nil { 362 lbCfg.XDSLBPolicy = &internalserviceconfig.BalancerConfig{ 363 Name: ringhash.Name, 364 Config: &ringhash.LBConfig{ 365 MinRingSize: lbp.MinimumRingSize, 366 MaxRingSize: lbp.MaximumRingSize, 367 }, 368 } 369 } 370 371 ccState := balancer.ClientConnState{ 372 ResolverState: client.SetClient(resolver.State{}, b.xdsClient), 373 BalancerConfig: lbCfg, 374 } 375 if err := b.childLB.UpdateClientConnState(ccState); err != nil { 376 b.logger.Errorf("xds: cluster_resolver balancer.UpdateClientConnState(%+v) returned error: %v", ccState, err) 377 } 378 } 379 380 // run is a long-running goroutine which handles all updates from gRPC. All 381 // methods which are invoked directly by gRPC or xdsClient simply push an 382 // update onto a channel which is read and acted upon right here. 383 func (b *cdsBalancer) run() { 384 for { 385 select { 386 case u := <-b.updateCh.Get(): 387 b.updateCh.Load() 388 switch update := u.(type) { 389 case *ccUpdate: 390 b.handleClientConnUpdate(update) 391 case *scUpdate: 392 // SubConn updates are passthrough and are simply handed over to 393 // the underlying cluster_resolver balancer. 394 if b.childLB == nil { 395 b.logger.Errorf("xds: received scUpdate {%+v} with no cluster_resolver balancer", update) 396 break 397 } 398 b.childLB.UpdateSubConnState(update.subConn, update.state) 399 case exitIdle: 400 if b.childLB == nil { 401 b.logger.Errorf("xds: received ExitIdle with no child balancer") 402 break 403 } 404 // This implementation assumes the child balancer supports 405 // ExitIdle (but still checks for the interface's existence to 406 // avoid a panic if not). If the child does not, no subconns 407 // will be connected. 408 if ei, ok := b.childLB.(balancer.ExitIdler); ok { 409 ei.ExitIdle() 410 } 411 } 412 case u := <-b.clusterHandler.updateChannel: 413 b.handleWatchUpdate(u) 414 case <-b.closed.Done(): 415 b.clusterHandler.close() 416 if b.childLB != nil { 417 b.childLB.Close() 418 b.childLB = nil 419 } 420 if b.cachedRoot != nil { 421 b.cachedRoot.Close() 422 } 423 if b.cachedIdentity != nil { 424 b.cachedIdentity.Close() 425 } 426 b.logger.Infof("Shutdown") 427 b.done.Fire() 428 return 429 } 430 } 431 } 432 433 // handleErrorFromUpdate handles both the error from parent ClientConn (from 434 // resolver) and the error from xds client (from the watcher). fromParent is 435 // true if error is from parent ClientConn. 436 // 437 // If the error is connection error, it's passed down to the child policy. 438 // Nothing needs to be done in CDS (e.g. it doesn't go into fallback). 439 // 440 // If the error is resource-not-found: 441 // - If it's from resolver, it means LDS resources were removed. The CDS watch 442 // should be canceled. 443 // - If it's from xds client, it means CDS resource were removed. The CDS 444 // watcher should keep watching. 445 // 446 // In both cases, the error will be forwarded to the child balancer. And if 447 // error is resource-not-found, the child balancer will stop watching EDS. 448 func (b *cdsBalancer) handleErrorFromUpdate(err error, fromParent bool) { 449 // This is not necessary today, because xds client never sends connection 450 // errors. 451 if fromParent && resource.ErrType(err) == resource.ErrorTypeResourceNotFound { 452 b.clusterHandler.close() 453 } 454 if b.childLB != nil { 455 if resource.ErrType(err) != resource.ErrorTypeConnection { 456 // Connection errors will be sent to the child balancers directly. 457 // There's no need to forward them. 458 b.childLB.ResolverError(err) 459 } 460 } else { 461 // If child balancer was never created, fail the RPCs with 462 // errors. 463 b.ccw.UpdateState(balancer.State{ 464 ConnectivityState: connectivity.TransientFailure, 465 Picker: base.NewErrPicker(err), 466 }) 467 } 468 } 469 470 // UpdateClientConnState receives the serviceConfig (which contains the 471 // clusterName to watch for in CDS) and the xdsClient object from the 472 // xdsResolver. 473 func (b *cdsBalancer) UpdateClientConnState(state balancer.ClientConnState) error { 474 if b.closed.HasFired() { 475 b.logger.Warnf("xds: received ClientConnState {%+v} after cdsBalancer was closed", state) 476 return errBalancerClosed 477 } 478 479 if b.xdsClient == nil { 480 c := client.FromResolverState(state.ResolverState) 481 if c == nil { 482 return balancer.ErrBadResolverState 483 } 484 b.xdsClient = c 485 } 486 487 b.logger.Infof("Received update from resolver, balancer config: %+v", pretty.ToJSON(state.BalancerConfig)) 488 // The errors checked here should ideally never happen because the 489 // ServiceConfig in this case is prepared by the xdsResolver and is not 490 // something that is received on the wire. 491 lbCfg, ok := state.BalancerConfig.(*lbConfig) 492 if !ok { 493 b.logger.Warnf("xds: unexpected LoadBalancingConfig type: %T", state.BalancerConfig) 494 return balancer.ErrBadResolverState 495 } 496 if lbCfg.ClusterName == "" { 497 b.logger.Warnf("xds: no clusterName found in LoadBalancingConfig: %+v", lbCfg) 498 return balancer.ErrBadResolverState 499 } 500 b.updateCh.Put(&ccUpdate{clusterName: lbCfg.ClusterName}) 501 return nil 502 } 503 504 // ResolverError handles errors reported by the xdsResolver. 505 func (b *cdsBalancer) ResolverError(err error) { 506 if b.closed.HasFired() { 507 b.logger.Warnf("xds: received resolver error {%v} after cdsBalancer was closed", err) 508 return 509 } 510 b.updateCh.Put(&ccUpdate{err: err}) 511 } 512 513 // UpdateSubConnState handles subConn updates from gRPC. 514 func (b *cdsBalancer) UpdateSubConnState(sc balancer.SubConn, state balancer.SubConnState) { 515 if b.closed.HasFired() { 516 b.logger.Warnf("xds: received subConn update {%v, %v} after cdsBalancer was closed", sc, state) 517 return 518 } 519 b.updateCh.Put(&scUpdate{subConn: sc, state: state}) 520 } 521 522 // Close cancels the CDS watch, closes the child policy and closes the 523 // cdsBalancer. 524 func (b *cdsBalancer) Close() { 525 b.closed.Fire() 526 <-b.done.Done() 527 } 528 529 func (b *cdsBalancer) ExitIdle() { 530 b.updateCh.Put(exitIdle{}) 531 } 532 533 // ccWrapper wraps the balancer.ClientConn passed to the CDS balancer at 534 // creation and intercepts the NewSubConn() and UpdateAddresses() call from the 535 // child policy to add security configuration required by xDS credentials. 536 // 537 // Other methods of the balancer.ClientConn interface are not overridden and 538 // hence get the original implementation. 539 type ccWrapper struct { 540 balancer.ClientConn 541 542 // The certificate providers in this HandshakeInfo are updated based on the 543 // received security configuration in the Cluster resource. 544 xdsHI *xdsinternal.HandshakeInfo 545 } 546 547 // NewSubConn intercepts NewSubConn() calls from the child policy and adds an 548 // address attribute which provides all information required by the xdsCreds 549 // handshaker to perform the TLS handshake. 550 func (ccw *ccWrapper) NewSubConn(addrs []resolver.Address, opts balancer.NewSubConnOptions) (balancer.SubConn, error) { 551 newAddrs := make([]resolver.Address, len(addrs)) 552 for i, addr := range addrs { 553 newAddrs[i] = xdsinternal.SetHandshakeInfo(addr, ccw.xdsHI) 554 } 555 return ccw.ClientConn.NewSubConn(newAddrs, opts) 556 } 557 558 func (ccw *ccWrapper) UpdateAddresses(sc balancer.SubConn, addrs []resolver.Address) { 559 newAddrs := make([]resolver.Address, len(addrs)) 560 for i, addr := range addrs { 561 newAddrs[i] = xdsinternal.SetHandshakeInfo(addr, ccw.xdsHI) 562 } 563 ccw.ClientConn.UpdateAddresses(sc, newAddrs) 564 }