google.golang.org/grpc@v1.72.2/xds/internal/balancer/clusterresolver/clusterresolver.go (about) 1 /* 2 * 3 * Copyright 2019 gRPC authors. 4 * 5 * Licensed under the Apache License, Version 2.0 (the "License"); 6 * you may not use this file except in compliance with the License. 7 * You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 * 17 */ 18 19 // Package clusterresolver contains the implementation of the 20 // cluster_resolver_experimental LB policy which resolves endpoint addresses 21 // using a list of one or more discovery mechanisms. 22 package clusterresolver 23 24 import ( 25 "encoding/json" 26 "errors" 27 "fmt" 28 29 "google.golang.org/grpc/attributes" 30 "google.golang.org/grpc/balancer" 31 "google.golang.org/grpc/balancer/base" 32 "google.golang.org/grpc/connectivity" 33 "google.golang.org/grpc/internal/balancer/nop" 34 "google.golang.org/grpc/internal/buffer" 35 "google.golang.org/grpc/internal/grpclog" 36 "google.golang.org/grpc/internal/grpcsync" 37 "google.golang.org/grpc/internal/pretty" 38 "google.golang.org/grpc/resolver" 39 "google.golang.org/grpc/serviceconfig" 40 "google.golang.org/grpc/xds/internal/balancer/outlierdetection" 41 "google.golang.org/grpc/xds/internal/balancer/priority" 42 "google.golang.org/grpc/xds/internal/xdsclient" 43 "google.golang.org/grpc/xds/internal/xdsclient/xdsresource" 44 ) 45 46 // Name is the name of the cluster_resolver balancer. 47 const Name = "cluster_resolver_experimental" 48 49 var ( 50 errBalancerClosed = errors.New("cdsBalancer is closed") 51 newChildBalancer = func(bb balancer.Builder, cc balancer.ClientConn, o balancer.BuildOptions) balancer.Balancer { 52 return bb.Build(cc, o) 53 } 54 ) 55 56 func init() { 57 balancer.Register(bb{}) 58 } 59 60 type bb struct{} 61 62 // Build helps implement the balancer.Builder interface. 63 func (bb) Build(cc balancer.ClientConn, opts balancer.BuildOptions) balancer.Balancer { 64 priorityBuilder := balancer.Get(priority.Name) 65 if priorityBuilder == nil { 66 logger.Errorf("%q LB policy is needed but not registered", priority.Name) 67 return nop.NewBalancer(cc, fmt.Errorf("%q LB policy is needed but not registered", priority.Name)) 68 } 69 priorityConfigParser, ok := priorityBuilder.(balancer.ConfigParser) 70 if !ok { 71 logger.Errorf("%q LB policy does not implement a config parser", priority.Name) 72 return nop.NewBalancer(cc, fmt.Errorf("%q LB policy does not implement a config parser", priority.Name)) 73 } 74 75 b := &clusterResolverBalancer{ 76 bOpts: opts, 77 updateCh: buffer.NewUnbounded(), 78 closed: grpcsync.NewEvent(), 79 done: grpcsync.NewEvent(), 80 81 priorityBuilder: priorityBuilder, 82 priorityConfigParser: priorityConfigParser, 83 } 84 b.logger = prefixLogger(b) 85 b.logger.Infof("Created") 86 87 b.resourceWatcher = newResourceResolver(b, b.logger) 88 b.cc = &ccWrapper{ 89 ClientConn: cc, 90 b: b, 91 resourceWatcher: b.resourceWatcher, 92 } 93 94 go b.run() 95 return b 96 } 97 98 func (bb) Name() string { 99 return Name 100 } 101 102 func (bb) ParseConfig(j json.RawMessage) (serviceconfig.LoadBalancingConfig, error) { 103 odBuilder := balancer.Get(outlierdetection.Name) 104 if odBuilder == nil { 105 // Shouldn't happen, registered through imported Outlier Detection, 106 // defensive programming. 107 return nil, fmt.Errorf("%q LB policy is needed but not registered", outlierdetection.Name) 108 } 109 odParser, ok := odBuilder.(balancer.ConfigParser) 110 if !ok { 111 // Shouldn't happen, imported Outlier Detection builder has this method. 112 return nil, fmt.Errorf("%q LB policy does not implement a config parser", outlierdetection.Name) 113 } 114 115 var cfg *LBConfig 116 if err := json.Unmarshal(j, &cfg); err != nil { 117 return nil, fmt.Errorf("unable to unmarshal balancer config %s into cluster-resolver config, error: %v", string(j), err) 118 } 119 120 for i, dm := range cfg.DiscoveryMechanisms { 121 lbCfg, err := odParser.ParseConfig(dm.OutlierDetection) 122 if err != nil { 123 return nil, fmt.Errorf("error parsing Outlier Detection config %v: %v", dm.OutlierDetection, err) 124 } 125 odCfg, ok := lbCfg.(*outlierdetection.LBConfig) 126 if !ok { 127 // Shouldn't happen, Parser built at build time with Outlier Detection 128 // builder pulled from gRPC LB Registry. 129 return nil, fmt.Errorf("odParser returned config with unexpected type %T: %v", lbCfg, lbCfg) 130 } 131 cfg.DiscoveryMechanisms[i].outlierDetection = *odCfg 132 } 133 if err := json.Unmarshal(cfg.XDSLBPolicy, &cfg.xdsLBPolicy); err != nil { 134 // This will never occur, valid configuration is emitted from the xDS 135 // Client. Validity is already checked in the xDS Client, however, this 136 // double validation is present because Unmarshalling and Validating are 137 // coupled into one json.Unmarshal operation. We will switch this in 138 // the future to two separate operations. 139 return nil, fmt.Errorf("error unmarshalling xDS LB Policy: %v", err) 140 } 141 return cfg, nil 142 } 143 144 // ccUpdate wraps a clientConn update received from gRPC. 145 type ccUpdate struct { 146 state balancer.ClientConnState 147 err error 148 } 149 150 type exitIdle struct{} 151 152 // clusterResolverBalancer resolves endpoint addresses using a list of one or 153 // more discovery mechanisms. 154 type clusterResolverBalancer struct { 155 cc balancer.ClientConn 156 bOpts balancer.BuildOptions 157 updateCh *buffer.Unbounded // Channel for updates from gRPC. 158 resourceWatcher *resourceResolver 159 logger *grpclog.PrefixLogger 160 closed *grpcsync.Event 161 done *grpcsync.Event 162 163 priorityBuilder balancer.Builder 164 priorityConfigParser balancer.ConfigParser 165 166 config *LBConfig 167 configRaw *serviceconfig.ParseResult 168 xdsClient xdsclient.XDSClient // xDS client to watch EDS resource. 169 attrsWithClient *attributes.Attributes // Attributes with xdsClient attached to be passed to the child policies. 170 171 child balancer.Balancer 172 priorities []priorityConfig 173 watchUpdateReceived bool 174 } 175 176 // handleClientConnUpdate handles a ClientConnUpdate received from gRPC. 177 // 178 // A good update results in creation of endpoint resolvers for the configured 179 // discovery mechanisms. An update with an error results in cancellation of any 180 // existing endpoint resolution and propagation of the same to the child policy. 181 func (b *clusterResolverBalancer) handleClientConnUpdate(update *ccUpdate) { 182 if err := update.err; err != nil { 183 b.handleErrorFromUpdate(err, true) 184 return 185 } 186 187 if b.logger.V(2) { 188 b.logger.Infof("Received new balancer config: %v", pretty.ToJSON(update.state.BalancerConfig)) 189 } 190 191 cfg, _ := update.state.BalancerConfig.(*LBConfig) 192 if cfg == nil { 193 b.logger.Warningf("Ignoring unsupported balancer configuration of type: %T", update.state.BalancerConfig) 194 return 195 } 196 197 b.config = cfg 198 b.configRaw = update.state.ResolverState.ServiceConfig 199 b.resourceWatcher.updateMechanisms(cfg.DiscoveryMechanisms) 200 201 // The child policy is created only after all configured discovery 202 // mechanisms have been successfully returned endpoints. If that is not the 203 // case, we return early. 204 if !b.watchUpdateReceived { 205 return 206 } 207 b.updateChildConfig() 208 } 209 210 // handleResourceUpdate handles a resource update or error from the resource 211 // resolver by propagating the same to the child LB policy. 212 func (b *clusterResolverBalancer) handleResourceUpdate(update *resourceUpdate) { 213 b.watchUpdateReceived = true 214 b.priorities = update.priorities 215 216 // An update from the resource resolver contains resolved endpoint addresses 217 // for all configured discovery mechanisms ordered by priority. This is used 218 // to generate configuration for the priority LB policy. 219 b.updateChildConfig() 220 221 if update.onDone != nil { 222 update.onDone() 223 } 224 } 225 226 // updateChildConfig builds child policy configuration using endpoint addresses 227 // returned by the resource resolver and child policy configuration provided by 228 // parent LB policy. 229 // 230 // A child policy is created if one doesn't already exist. The newly built 231 // configuration is then pushed to the child policy. 232 func (b *clusterResolverBalancer) updateChildConfig() { 233 if b.child == nil { 234 b.child = newChildBalancer(b.priorityBuilder, b.cc, b.bOpts) 235 } 236 237 childCfgBytes, endpoints, err := buildPriorityConfigJSON(b.priorities, &b.config.xdsLBPolicy) 238 if err != nil { 239 b.logger.Warningf("Failed to build child policy config: %v", err) 240 return 241 } 242 childCfg, err := b.priorityConfigParser.ParseConfig(childCfgBytes) 243 if err != nil { 244 b.logger.Warningf("Failed to parse child policy config. This should never happen because the config was generated: %v", err) 245 return 246 } 247 if b.logger.V(2) { 248 b.logger.Infof("Built child policy config: %s", pretty.ToJSON(childCfg)) 249 } 250 251 flattenedAddrs := make([]resolver.Address, len(endpoints)) 252 for i := range endpoints { 253 for j := range endpoints[i].Addresses { 254 addr := endpoints[i].Addresses[j] 255 addr.BalancerAttributes = endpoints[i].Attributes 256 // If the endpoint has multiple addresses, only the first is added 257 // to the flattened address list. This ensures that LB policies 258 // that don't support endpoints create only one subchannel to a 259 // backend. 260 if j == 0 { 261 flattenedAddrs[i] = addr 262 } 263 // BalancerAttributes need to be present in endpoint addresses. This 264 // temporary workaround is required to make load reporting work 265 // with the old pickfirst policy which creates SubConns with multiple 266 // addresses. Since the addresses can be from different localities, 267 // an Address.BalancerAttribute is used to identify the locality of the 268 // address used by the transport. This workaround can be removed once 269 // the old pickfirst is removed. 270 // See https://github.com/grpc/grpc-go/issues/7339 271 endpoints[i].Addresses[j] = addr 272 } 273 } 274 if err := b.child.UpdateClientConnState(balancer.ClientConnState{ 275 ResolverState: resolver.State{ 276 Endpoints: endpoints, 277 Addresses: flattenedAddrs, 278 ServiceConfig: b.configRaw, 279 Attributes: b.attrsWithClient, 280 }, 281 BalancerConfig: childCfg, 282 }); err != nil { 283 b.logger.Warningf("Failed to push config to child policy: %v", err) 284 } 285 } 286 287 // handleErrorFromUpdate handles errors from the parent LB policy and endpoint 288 // resolvers. fromParent is true if error is from the parent LB policy. In both 289 // cases, the error is propagated to the child policy, if one exists. 290 func (b *clusterResolverBalancer) handleErrorFromUpdate(err error, fromParent bool) { 291 b.logger.Warningf("Received error: %v", err) 292 293 // A resource-not-found error from the parent LB policy means that the LDS 294 // or CDS resource was removed. This should result in endpoint resolvers 295 // being stopped here. 296 // 297 // A resource-not-found error from the EDS endpoint resolver means that the 298 // EDS resource was removed. No action needs to be taken for this, and we 299 // should continue watching the same EDS resource. 300 if fromParent && xdsresource.ErrType(err) == xdsresource.ErrorTypeResourceNotFound { 301 b.resourceWatcher.stop(false) 302 } 303 304 if b.child != nil { 305 b.child.ResolverError(err) 306 return 307 } 308 b.cc.UpdateState(balancer.State{ 309 ConnectivityState: connectivity.TransientFailure, 310 Picker: base.NewErrPicker(err), 311 }) 312 } 313 314 // run is a long-running goroutine that handles updates from gRPC and endpoint 315 // resolvers. The methods handling the individual updates simply push them onto 316 // a channel which is read and acted upon from here. 317 func (b *clusterResolverBalancer) run() { 318 for { 319 select { 320 case u, ok := <-b.updateCh.Get(): 321 if !ok { 322 return 323 } 324 b.updateCh.Load() 325 switch update := u.(type) { 326 case *ccUpdate: 327 b.handleClientConnUpdate(update) 328 case exitIdle: 329 if b.child == nil { 330 // This is not necessarily an error. The EDS/DNS watch may 331 // not have returned a list of endpoints yet, so the child 332 // may not be built. 333 if b.logger.V(2) { 334 b.logger.Infof("xds: received ExitIdle with no child balancer") 335 } 336 break 337 } 338 // This implementation assumes the child balancer supports 339 // ExitIdle (but still checks for the interface's existence to 340 // avoid a panic if not). If the child does not, no subconns 341 // will be connected. 342 if ei, ok := b.child.(balancer.ExitIdler); ok { 343 ei.ExitIdle() 344 } 345 } 346 case u := <-b.resourceWatcher.updateChannel: 347 b.handleResourceUpdate(u) 348 349 // Close results in stopping the endpoint resolvers and closing the 350 // underlying child policy and is the only way to exit this goroutine. 351 case <-b.closed.Done(): 352 b.resourceWatcher.stop(true) 353 354 if b.child != nil { 355 b.child.Close() 356 b.child = nil 357 } 358 b.updateCh.Close() 359 // This is the *ONLY* point of return from this function. 360 b.logger.Infof("Shutdown") 361 b.done.Fire() 362 return 363 } 364 } 365 } 366 367 // Following are methods to implement the balancer interface. 368 369 func (b *clusterResolverBalancer) UpdateClientConnState(state balancer.ClientConnState) error { 370 if b.closed.HasFired() { 371 b.logger.Warningf("Received update from gRPC {%+v} after close", state) 372 return errBalancerClosed 373 } 374 375 if b.xdsClient == nil { 376 c := xdsclient.FromResolverState(state.ResolverState) 377 if c == nil { 378 return balancer.ErrBadResolverState 379 } 380 b.xdsClient = c 381 b.attrsWithClient = state.ResolverState.Attributes 382 } 383 384 b.updateCh.Put(&ccUpdate{state: state}) 385 return nil 386 } 387 388 // ResolverError handles errors reported by the xdsResolver. 389 func (b *clusterResolverBalancer) ResolverError(err error) { 390 if b.closed.HasFired() { 391 b.logger.Warningf("Received resolver error {%v} after close", err) 392 return 393 } 394 b.updateCh.Put(&ccUpdate{err: err}) 395 } 396 397 // UpdateSubConnState handles subConn updates from gRPC. 398 func (b *clusterResolverBalancer) UpdateSubConnState(sc balancer.SubConn, state balancer.SubConnState) { 399 b.logger.Errorf("UpdateSubConnState(%v, %+v) called unexpectedly", sc, state) 400 } 401 402 // Close closes the cdsBalancer and the underlying child balancer. 403 func (b *clusterResolverBalancer) Close() { 404 b.closed.Fire() 405 <-b.done.Done() 406 } 407 408 func (b *clusterResolverBalancer) ExitIdle() { 409 b.updateCh.Put(exitIdle{}) 410 } 411 412 // ccWrapper overrides ResolveNow(), so that re-resolution from the child 413 // policies will trigger the DNS resolver in cluster_resolver balancer. It 414 // also intercepts NewSubConn calls in case children don't set the 415 // StateListener, to allow redirection to happen via this cluster_resolver 416 // balancer. 417 type ccWrapper struct { 418 balancer.ClientConn 419 b *clusterResolverBalancer 420 resourceWatcher *resourceResolver 421 } 422 423 func (c *ccWrapper) ResolveNow(resolver.ResolveNowOptions) { 424 c.resourceWatcher.resolveNow() 425 }