google.golang.org/grpc@v1.74.2/xds/internal/clients/xdsclient/xdsclient.go (about) 1 /* 2 * 3 * Copyright 2025 gRPC authors. 4 * 5 * Licensed under the Apache License, Version 2.0 (the "License"); 6 * you may not use this file except in compliance with the License. 7 * You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 * 17 */ 18 19 // Package xdsclient provides an xDS (* Discovery Service) client. 20 // 21 // It allows applications to: 22 // - Create xDS client instances with in-memory configurations. 23 // - Register watches for named resources. 24 // - Receive resources via an ADS (Aggregated Discovery Service) stream. 25 // - Register watches for named resources (e.g. listeners, routes, or 26 // clusters). 27 // 28 // This enables applications to dynamically discover and configure resources 29 // such as listeners, routes, clusters, and endpoints from an xDS management 30 // server. 31 package xdsclient 32 33 import ( 34 "context" 35 "errors" 36 "fmt" 37 "sync" 38 "sync/atomic" 39 "time" 40 41 "google.golang.org/grpc/internal/grpclog" 42 "google.golang.org/grpc/xds/internal/clients" 43 clientsinternal "google.golang.org/grpc/xds/internal/clients/internal" 44 "google.golang.org/grpc/xds/internal/clients/internal/backoff" 45 "google.golang.org/grpc/xds/internal/clients/internal/syncutil" 46 xdsclientinternal "google.golang.org/grpc/xds/internal/clients/xdsclient/internal" 47 "google.golang.org/grpc/xds/internal/clients/xdsclient/internal/xdsresource" 48 "google.golang.org/grpc/xds/internal/clients/xdsclient/metrics" 49 "google.golang.org/protobuf/proto" 50 51 v3statuspb "github.com/envoyproxy/go-control-plane/envoy/service/status/v3" 52 ) 53 54 const ( 55 defaultWatchExpiryTimeout = 15 * time.Second 56 name = "xds-client" 57 ) 58 59 var ( 60 defaultExponentialBackoff = backoff.DefaultExponential.Backoff 61 ) 62 63 func init() { 64 xdsclientinternal.WatchExpiryTimeout = defaultWatchExpiryTimeout 65 xdsclientinternal.StreamBackoff = defaultExponentialBackoff 66 xdsclientinternal.ResourceWatchStateForTesting = resourceWatchStateForTesting 67 } 68 69 // XDSClient is a client which queries a set of discovery APIs (collectively 70 // termed as xDS) on a remote management server, to discover 71 // various dynamic resources. 72 type XDSClient struct { 73 // The following fields are initialized at creation time and are read-only 74 // after that, and therefore can be accessed without a mutex. 75 done *syncutil.Event // Fired when the client is closed. 76 topLevelAuthority *authority // The top-level authority, used only for old-style names without an authority. 77 authorities map[string]*authority // Map from authority names in config to authority struct. 78 config *Config // Complete xDS client configuration. 79 watchExpiryTimeout time.Duration // Expiry timeout for ADS watch. 80 backoff func(int) time.Duration // Backoff for ADS and LRS stream failures. 81 transportBuilder clients.TransportBuilder // Builder to create transports to xDS server. 82 resourceTypes map[string]ResourceType // Registry of resource types, for parsing incoming ADS responses. 83 serializer *syncutil.CallbackSerializer // Serializer for invoking resource watcher callbacks. 84 serializerClose func() // Function to close the serializer. 85 logger *grpclog.PrefixLogger 86 target string 87 metricsReporter clients.MetricsReporter 88 89 // The XDSClient owns a bunch of channels to individual xDS servers 90 // specified in the xDS client configuration. Authorities acquire references 91 // to these channels based on server configs within the authority config. 92 // The XDSClient maintains a list of interested authorities for each of 93 // these channels, and forwards updates from the channels to each of these 94 // authorities. 95 // 96 // Once all references to a channel are dropped, the channel is closed. 97 channelsMu sync.Mutex 98 xdsActiveChannels map[ServerConfig]*channelState // Map from server config to in-use xdsChannels. 99 } 100 101 // New returns a new xDS Client configured with the provided config. 102 func New(config Config) (*XDSClient, error) { 103 switch { 104 case config.Node.ID == "": 105 return nil, errors.New("xdsclient: node ID is empty") 106 case config.ResourceTypes == nil: 107 return nil, errors.New("xdsclient: resource types map is nil") 108 case config.TransportBuilder == nil: 109 return nil, errors.New("xdsclient: transport builder is nil") 110 case config.Authorities == nil && config.Servers == nil: 111 return nil, errors.New("xdsclient: no servers or authorities specified") 112 } 113 114 client, err := newClient(&config, name) 115 if err != nil { 116 return nil, err 117 } 118 return client, nil 119 } 120 121 // SetWatchExpiryTimeoutForTesting override the default watch expiry timeout 122 // with provided timeout value. 123 func (c *XDSClient) SetWatchExpiryTimeoutForTesting(watchExpiryTimeout time.Duration) { 124 c.watchExpiryTimeout = watchExpiryTimeout 125 } 126 127 // newClient returns a new XDSClient with the given config. 128 func newClient(config *Config, target string) (*XDSClient, error) { 129 ctx, cancel := context.WithCancel(context.Background()) 130 c := &XDSClient{ 131 target: target, 132 done: syncutil.NewEvent(), 133 authorities: make(map[string]*authority), 134 config: config, 135 watchExpiryTimeout: xdsclientinternal.WatchExpiryTimeout, 136 backoff: xdsclientinternal.StreamBackoff, 137 serializer: syncutil.NewCallbackSerializer(ctx), 138 serializerClose: cancel, 139 transportBuilder: config.TransportBuilder, 140 resourceTypes: config.ResourceTypes, 141 xdsActiveChannels: make(map[ServerConfig]*channelState), 142 metricsReporter: config.MetricsReporter, 143 } 144 145 for name, cfg := range config.Authorities { 146 // If server configs are specified in the authorities map, use that. 147 // Else, use the top-level server configs. 148 serverCfg := config.Servers 149 if len(cfg.XDSServers) >= 1 { 150 serverCfg = cfg.XDSServers 151 } 152 c.authorities[name] = newAuthority(authorityBuildOptions{ 153 serverConfigs: serverCfg, 154 name: name, 155 serializer: c.serializer, 156 getChannelForADS: c.getChannelForADS, 157 logPrefix: clientPrefix(c), 158 target: target, 159 metricsReporter: c.metricsReporter, 160 }) 161 } 162 c.topLevelAuthority = newAuthority(authorityBuildOptions{ 163 serverConfigs: config.Servers, 164 name: "", 165 serializer: c.serializer, 166 getChannelForADS: c.getChannelForADS, 167 logPrefix: clientPrefix(c), 168 target: target, 169 metricsReporter: c.metricsReporter, 170 }) 171 c.logger = prefixLogger(c) 172 173 return c, nil 174 } 175 176 // Close closes the xDS client and releases all resources. 177 func (c *XDSClient) Close() { 178 if c.done.HasFired() { 179 return 180 } 181 c.done.Fire() 182 183 c.topLevelAuthority.close() 184 for _, a := range c.authorities { 185 a.close() 186 } 187 188 // Channel close cannot be invoked with the lock held, because it can race 189 // with stream failure happening at the same time. The latter will callback 190 // into the XDSClient and will attempt to grab the lock. This will result 191 // in a deadlock. So instead, we release the lock and wait for all active 192 // channels to be closed. 193 var channelsToClose []*xdsChannel 194 c.channelsMu.Lock() 195 for _, cs := range c.xdsActiveChannels { 196 channelsToClose = append(channelsToClose, cs.channel) 197 } 198 c.xdsActiveChannels = nil 199 c.channelsMu.Unlock() 200 for _, c := range channelsToClose { 201 c.close() 202 } 203 204 c.serializerClose() 205 <-c.serializer.Done() 206 207 c.logger.Infof("Shutdown") 208 } 209 210 // getChannelForADS returns an xdsChannel for the given server configuration. 211 // 212 // If an xdsChannel exists for the given server configuration, it is returned. 213 // Else a new one is created. It also ensures that the calling authority is 214 // added to the set of interested authorities for the returned channel. 215 // 216 // It returns the xdsChannel and a function to release the calling authority's 217 // reference on the channel. The caller must call the cancel function when it is 218 // no longer interested in this channel. 219 // 220 // A non-nil error is returned if an xdsChannel was not created. 221 func (c *XDSClient) getChannelForADS(serverConfig *ServerConfig, callingAuthority *authority) (*xdsChannel, func(), error) { 222 if c.done.HasFired() { 223 return nil, nil, errors.New("xds: the xDS client is closed") 224 } 225 226 initLocked := func(s *channelState) { 227 if c.logger.V(2) { 228 c.logger.Infof("Adding authority %q to the set of interested authorities for channel [%p]", callingAuthority.name, s.channel) 229 } 230 s.interestedAuthorities[callingAuthority] = true 231 } 232 deInitLocked := func(s *channelState) { 233 if c.logger.V(2) { 234 c.logger.Infof("Removing authority %q from the set of interested authorities for channel [%p]", callingAuthority.name, s.channel) 235 } 236 delete(s.interestedAuthorities, callingAuthority) 237 } 238 239 return c.getOrCreateChannel(serverConfig, initLocked, deInitLocked) 240 } 241 242 // getOrCreateChannel returns an xdsChannel for the given server configuration. 243 // 244 // If an active xdsChannel exists for the given server configuration, it is 245 // returned. If an idle xdsChannel exists for the given server configuration, it 246 // is revived from the idle cache and returned. Else a new one is created. 247 // 248 // The initLocked function runs some initialization logic before the channel is 249 // returned. This includes adding the calling authority to the set of interested 250 // authorities for the channel or incrementing the count of the number of LRS 251 // calls on the channel. 252 // 253 // The deInitLocked function runs some cleanup logic when the returned cleanup 254 // function is called. This involves removing the calling authority from the set 255 // of interested authorities for the channel or decrementing the count of the 256 // number of LRS calls on the channel. 257 // 258 // Both initLocked and deInitLocked are called with the c.channelsMu held. 259 // 260 // Returns the xdsChannel and a cleanup function to be invoked when the channel 261 // is no longer required. A non-nil error is returned if an xdsChannel was not 262 // created. 263 func (c *XDSClient) getOrCreateChannel(serverConfig *ServerConfig, initLocked, deInitLocked func(*channelState)) (*xdsChannel, func(), error) { 264 c.channelsMu.Lock() 265 defer c.channelsMu.Unlock() 266 267 if c.logger.V(2) { 268 c.logger.Infof("Received request for a reference to an xdsChannel for server config %q", serverConfig) 269 } 270 271 // Use an existing channel, if one exists for this server config. 272 if st, ok := c.xdsActiveChannels[*serverConfig]; ok { 273 if c.logger.V(2) { 274 c.logger.Infof("Reusing an existing xdsChannel for server config %q", serverConfig) 275 } 276 initLocked(st) 277 return st.channel, c.releaseChannel(serverConfig, st, deInitLocked), nil 278 } 279 280 if c.logger.V(2) { 281 c.logger.Infof("Creating a new xdsChannel for server config %q", serverConfig) 282 } 283 284 // Create a new transport and create a new xdsChannel, and add it to the 285 // map of xdsChannels. 286 tr, err := c.transportBuilder.Build(serverConfig.ServerIdentifier) 287 if err != nil { 288 return nil, func() {}, fmt.Errorf("xds: failed to create transport for server config %v: %v", serverConfig, err) 289 } 290 state := &channelState{ 291 parent: c, 292 serverConfig: serverConfig, 293 interestedAuthorities: make(map[*authority]bool), 294 } 295 channel, err := newXDSChannel(xdsChannelOpts{ 296 transport: tr, 297 serverConfig: serverConfig, 298 clientConfig: c.config, 299 eventHandler: state, 300 backoff: c.backoff, 301 watchExpiryTimeout: c.watchExpiryTimeout, 302 logPrefix: clientPrefix(c), 303 }) 304 if err != nil { 305 return nil, func() {}, fmt.Errorf("xds: failed to create a new channel for server config %v: %v", serverConfig, err) 306 } 307 state.channel = channel 308 c.xdsActiveChannels[*serverConfig] = state 309 initLocked(state) 310 return state.channel, c.releaseChannel(serverConfig, state, deInitLocked), nil 311 } 312 313 // releaseChannel is a function that is called when a reference to an xdsChannel 314 // needs to be released. It handles closing channels with no active references. 315 // 316 // The function takes the following parameters: 317 // - serverConfig: the server configuration for the xdsChannel 318 // - state: the state of the xdsChannel 319 // - deInitLocked: a function that performs any necessary cleanup for the xdsChannel 320 // 321 // The function returns another function that can be called to release the 322 // reference to the xdsChannel. This returned function is idempotent, meaning 323 // it can be called multiple times without any additional effect. 324 func (c *XDSClient) releaseChannel(serverConfig *ServerConfig, state *channelState, deInitLocked func(*channelState)) func() { 325 return sync.OnceFunc(func() { 326 c.channelsMu.Lock() 327 328 if c.logger.V(2) { 329 c.logger.Infof("Received request to release a reference to an xdsChannel for server config %q", serverConfig) 330 } 331 deInitLocked(state) 332 333 // The channel has active users. Do nothing and return. 334 if len(state.interestedAuthorities) != 0 { 335 if c.logger.V(2) { 336 c.logger.Infof("xdsChannel %p has other active references", state.channel) 337 } 338 c.channelsMu.Unlock() 339 return 340 } 341 342 delete(c.xdsActiveChannels, *serverConfig) 343 if c.logger.V(2) { 344 c.logger.Infof("Closing xdsChannel [%p] for server config %s", state.channel, serverConfig) 345 } 346 channelToClose := state.channel 347 c.channelsMu.Unlock() 348 349 channelToClose.close() 350 }) 351 } 352 353 // DumpResources returns the status and contents of all xDS resources being 354 // watched by the xDS client. 355 func (c *XDSClient) DumpResources() ([]byte, error) { 356 retCfg := c.topLevelAuthority.dumpResources() 357 for _, a := range c.authorities { 358 retCfg = append(retCfg, a.dumpResources()...) 359 } 360 361 nodeProto := clientsinternal.NodeProto(c.config.Node) 362 nodeProto.ClientFeatures = []string{clientFeatureNoOverprovisioning, clientFeatureResourceWrapper} 363 resp := &v3statuspb.ClientStatusResponse{} 364 resp.Config = append(resp.Config, &v3statuspb.ClientConfig{ 365 Node: nodeProto, 366 GenericXdsConfigs: retCfg, 367 }) 368 return proto.Marshal(resp) 369 } 370 371 // channelState represents the state of an xDS channel. It tracks the number of 372 // LRS references, the authorities interested in the channel, and the server 373 // configuration used for the channel. 374 // 375 // It receives callbacks for events on the underlying ADS stream and invokes 376 // corresponding callbacks on interested authorities. 377 type channelState struct { 378 parent *XDSClient 379 serverConfig *ServerConfig 380 381 // Access to the following fields should be protected by the parent's 382 // channelsMu. 383 channel *xdsChannel 384 interestedAuthorities map[*authority]bool 385 } 386 387 func (cs *channelState) adsStreamFailure(err error) { 388 if cs.parent.done.HasFired() { 389 return 390 } 391 392 if xdsresource.ErrType(err) != xdsresource.ErrTypeStreamFailedAfterRecv && cs.parent.metricsReporter != nil { 393 cs.parent.metricsReporter.ReportMetric(&metrics.ServerFailure{ 394 ServerURI: cs.serverConfig.ServerIdentifier.ServerURI, 395 }) 396 } 397 398 cs.parent.channelsMu.Lock() 399 defer cs.parent.channelsMu.Unlock() 400 for authority := range cs.interestedAuthorities { 401 authority.adsStreamFailure(cs.serverConfig, err) 402 } 403 } 404 405 func (cs *channelState) adsResourceUpdate(typ ResourceType, updates map[string]dataAndErrTuple, md xdsresource.UpdateMetadata, onDone func()) { 406 if cs.parent.done.HasFired() { 407 return 408 } 409 410 cs.parent.channelsMu.Lock() 411 defer cs.parent.channelsMu.Unlock() 412 413 if len(cs.interestedAuthorities) == 0 { 414 onDone() 415 return 416 } 417 418 authorityCnt := new(atomic.Int64) 419 authorityCnt.Add(int64(len(cs.interestedAuthorities))) 420 done := func() { 421 if authorityCnt.Add(-1) == 0 { 422 onDone() 423 } 424 } 425 for authority := range cs.interestedAuthorities { 426 authority.adsResourceUpdate(cs.serverConfig, typ, updates, md, done) 427 } 428 } 429 430 func (cs *channelState) adsResourceDoesNotExist(typ ResourceType, resourceName string) { 431 if cs.parent.done.HasFired() { 432 return 433 } 434 435 cs.parent.channelsMu.Lock() 436 defer cs.parent.channelsMu.Unlock() 437 for authority := range cs.interestedAuthorities { 438 authority.adsResourceDoesNotExist(typ, resourceName) 439 } 440 } 441 442 func resourceWatchStateForTesting(c *XDSClient, rType ResourceType, resourceName string) (xdsresource.ResourceWatchState, error) { 443 c.channelsMu.Lock() 444 defer c.channelsMu.Unlock() 445 446 for _, state := range c.xdsActiveChannels { 447 if st, err := state.channel.ads.adsResourceWatchStateForTesting(rType, resourceName); err == nil { 448 return st, nil 449 } 450 } 451 return xdsresource.ResourceWatchState{}, fmt.Errorf("unable to find watch state for resource type %q and name %q", rType.TypeName, resourceName) 452 }