gitee.com/ks-custle/core-gm@v0.0.0-20230922171213-b83bdd97b62c/go-control-plane/pkg/cache/v3/simple.go (about) 1 // Copyright 2018 Envoyproxy Authors 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package cache 16 17 import ( 18 "context" 19 "fmt" 20 "gitee.com/ks-custle/core-gm/go-control-plane/pkg/server/stream/v3" 21 "sync" 22 "sync/atomic" 23 "time" 24 25 "gitee.com/ks-custle/core-gm/go-control-plane/pkg/cache/types" 26 "gitee.com/ks-custle/core-gm/go-control-plane/pkg/log" 27 ) 28 29 // SnapshotCache is a snapshot-based cache that maintains a single versioned 30 // snapshot of responses per node. SnapshotCache consistently replies with the 31 // latest snapshot. For the protocol to work correctly in ADS mode, EDS/RDS 32 // requests are responded only when all resources in the snapshot xDS response 33 // are named as part of the request. It is expected that the CDS response names 34 // all EDS clusters, and the LDS response names all RDS routes in a snapshot, 35 // to ensure that Envoy makes the request for all EDS clusters or RDS routes 36 // eventually. 37 // 38 // SnapshotCache can operate as a REST or regular xDS backend. The snapshot 39 // can be partial, e.g. only include RDS or EDS resources. 40 type SnapshotCache interface { 41 Cache 42 43 // SetSnapshot sets a response snapshot for a node. For ADS, the snapshots 44 // should have distinct versions and be internally consistent (e.g. all 45 // referenced resources must be included in the snapshot). 46 // 47 // This method will cause the server to respond to all open watches, for which 48 // the version differs from the snapshot version. 49 SetSnapshot(ctx context.Context, node string, snapshot Snapshot) error 50 51 // GetSnapshot gets the snapshot for a node. 52 GetSnapshot(node string) (Snapshot, error) 53 54 // ClearSnapshot removes all status and snapshot information associated with a node. 55 ClearSnapshot(node string) 56 57 // GetStatusInfo retrieves status information for a node ID. 58 GetStatusInfo(string) StatusInfo 59 60 // GetStatusKeys retrieves node IDs for all statuses. 61 GetStatusKeys() []string 62 } 63 64 type snapshotCache struct { 65 // watchCount and deltaWatchCount are atomic counters incremented for each watch respectively. They need to 66 // be the first fields in the struct to guarantee 64-bit alignment, 67 // which is a requirement for atomic operations on 64-bit operands to work on 68 // 32-bit machines. 69 watchCount int64 70 deltaWatchCount int64 71 72 log log.Logger 73 74 // ads flag to hold responses until all resources are named 75 ads bool 76 77 // snapshots are cached resources indexed by node IDs 78 snapshots map[string]Snapshot 79 80 // status information for all nodes indexed by node IDs 81 status map[string]*statusInfo 82 83 // hash is the hashing function for Envoy nodes 84 hash NodeHash 85 86 mu sync.RWMutex 87 } 88 89 // NewSnapshotCache initializes a simple cache. 90 // 91 // ADS flag forces a delay in responding to streaming requests until all 92 // resources are explicitly named in the request. This avoids the problem of a 93 // partial request over a single stream for a subset of resources which would 94 // require generating a fresh version for acknowledgement. ADS flag requires 95 // snapshot consistency. For non-ADS case (and fetch), multiple partial 96 // requests are sent across multiple streams and re-using the snapshot version 97 // is OK. 98 // 99 // Logger is optional. 100 func NewSnapshotCache(ads bool, hash NodeHash, logger log.Logger) SnapshotCache { 101 return newSnapshotCache(ads, hash, logger) 102 } 103 104 func newSnapshotCache(ads bool, hash NodeHash, logger log.Logger) *snapshotCache { 105 cache := &snapshotCache{ 106 log: logger, 107 ads: ads, 108 snapshots: make(map[string]Snapshot), 109 status: make(map[string]*statusInfo), 110 hash: hash, 111 } 112 113 return cache 114 } 115 116 // NewSnapshotCacheWithHeartbeating initializes a simple cache that sends periodic heartbeat 117 // responses for resources with a TTL. 118 // 119 // ADS flag forces a delay in responding to streaming requests until all 120 // resources are explicitly named in the request. This avoids the problem of a 121 // partial request over a single stream for a subset of resources which would 122 // require generating a fresh version for acknowledgement. ADS flag requires 123 // snapshot consistency. For non-ADS case (and fetch), multiple partial 124 // requests are sent across multiple streams and re-using the snapshot version 125 // is OK. 126 // 127 // Logger is optional. 128 // 129 // The context provides a way to cancel the heartbeating routine, while the heartbeatInterval 130 // parameter controls how often heartbeating occurs. 131 func NewSnapshotCacheWithHeartbeating(ctx context.Context, ads bool, hash NodeHash, logger log.Logger, heartbeatInterval time.Duration) SnapshotCache { 132 cache := newSnapshotCache(ads, hash, logger) 133 go func() { 134 t := time.NewTicker(heartbeatInterval) 135 136 for { 137 select { 138 case <-t.C: 139 cache.mu.Lock() 140 for node := range cache.status { 141 // TODO(snowp): Omit heartbeats if a real response has been sent recently. 142 cache.sendHeartbeats(ctx, node) 143 } 144 cache.mu.Unlock() 145 case <-ctx.Done(): 146 return 147 } 148 } 149 }() 150 return cache 151 } 152 153 func (cache *snapshotCache) sendHeartbeats(ctx context.Context, node string) { 154 snapshot := cache.snapshots[node] 155 if info, ok := cache.status[node]; ok { 156 info.mu.Lock() 157 for id, watch := range info.watches { 158 // Respond with the current version regardless of whether the version has changed. 159 version := snapshot.GetVersion(watch.Request.TypeUrl) 160 resources := snapshot.GetResourcesAndTTL(watch.Request.TypeUrl) 161 162 // TODO(snowp): Construct this once per type instead of once per watch. 163 resourcesWithTTL := map[string]types.ResourceWithTTL{} 164 for k, v := range resources { 165 if v.TTL != nil { 166 resourcesWithTTL[k] = v 167 } 168 } 169 170 if len(resourcesWithTTL) == 0 { 171 continue 172 } 173 if cache.log != nil { 174 cache.log.Debugf("respond open watch %d%v with heartbeat for version %q", id, watch.Request.ResourceNames, version) 175 } 176 177 _ = cache.respond(ctx, watch.Request, watch.Response, resourcesWithTTL, version, true) 178 179 // The watch must be deleted and we must rely on the client to ack this response to create a new watch. 180 delete(info.watches, id) 181 } 182 info.mu.Unlock() 183 } 184 } 185 186 // SetSnapshot updates a snapshot for a node. 187 func (cache *snapshotCache) SetSnapshot(ctx context.Context, node string, snapshot Snapshot) error { 188 cache.mu.Lock() 189 defer cache.mu.Unlock() 190 191 // update the existing entry 192 cache.snapshots[node] = snapshot 193 194 // trigger existing watches for which version changed 195 if info, ok := cache.status[node]; ok { 196 info.mu.Lock() 197 defer info.mu.Unlock() 198 for id, watch := range info.watches { 199 version := snapshot.GetVersion(watch.Request.TypeUrl) 200 if version != watch.Request.VersionInfo { 201 if cache.log != nil { 202 cache.log.Debugf("respond open watch %d%v with new version %q", id, watch.Request.ResourceNames, version) 203 } 204 resources := snapshot.GetResourcesAndTTL(watch.Request.TypeUrl) 205 err := cache.respond(ctx, watch.Request, watch.Response, resources, version, false) 206 if err != nil { 207 return err 208 } 209 210 // discard the watch 211 delete(info.watches, id) 212 } 213 } 214 215 // We only calculate version hashes when using delta. We don't 216 // want to do this when using SOTW so we can avoid unnecessary 217 // computational cost if not using delta. 218 if len(info.deltaWatches) > 0 { 219 err := snapshot.ConstructVersionMap() 220 if err != nil { 221 return err 222 } 223 } 224 225 // process our delta watches 226 for id, watch := range info.deltaWatches { 227 res, err := cache.respondDelta( 228 ctx, 229 &snapshot, 230 watch.Request, 231 watch.Response, 232 watch.StreamState, 233 ) 234 if err != nil { 235 return err 236 } 237 // If we detect a nil response here, that means there has been no state change 238 // so we don't want to respond or remove any existing resource watches 239 if res != nil { 240 delete(info.deltaWatches, id) 241 } 242 } 243 } 244 245 return nil 246 } 247 248 // GetSnapshot gets the snapshot for a node, and returns an error if not found. 249 func (cache *snapshotCache) GetSnapshot(node string) (Snapshot, error) { 250 cache.mu.RLock() 251 defer cache.mu.RUnlock() 252 253 snap, ok := cache.snapshots[node] 254 if !ok { 255 return Snapshot{}, fmt.Errorf("no snapshot found for node %s", node) 256 } 257 return snap, nil 258 } 259 260 // ClearSnapshot clears snapshot and info for a node. 261 func (cache *snapshotCache) ClearSnapshot(node string) { 262 cache.mu.Lock() 263 defer cache.mu.Unlock() 264 265 delete(cache.snapshots, node) 266 delete(cache.status, node) 267 } 268 269 // nameSet creates a map from a string slice to value true. 270 func nameSet(names []string) map[string]bool { 271 set := make(map[string]bool) 272 for _, name := range names { 273 set[name] = true 274 } 275 return set 276 } 277 278 // superset checks that all resources are listed in the names set. 279 func superset(names map[string]bool, resources map[string]types.ResourceWithTTL) error { 280 for resourceName := range resources { 281 if _, exists := names[resourceName]; !exists { 282 return fmt.Errorf("%q not listed", resourceName) 283 } 284 } 285 return nil 286 } 287 288 // CreateWatch returns a watch for an xDS request. 289 func (cache *snapshotCache) CreateWatch(request *Request, value chan Response) func() { 290 nodeID := cache.hash.ID(request.Node) 291 292 cache.mu.Lock() 293 defer cache.mu.Unlock() 294 295 info, ok := cache.status[nodeID] 296 if !ok { 297 info = newStatusInfo(request.Node) 298 cache.status[nodeID] = info 299 } 300 301 // update last watch request time 302 info.mu.Lock() 303 info.lastWatchRequestTime = time.Now() 304 info.mu.Unlock() 305 306 snapshot, exists := cache.snapshots[nodeID] 307 version := snapshot.GetVersion(request.TypeUrl) 308 309 // if the requested version is up-to-date or missing a response, leave an open watch 310 if !exists || request.VersionInfo == version { 311 watchID := cache.nextWatchID() 312 if cache.log != nil { 313 cache.log.Debugf("open watch %d for %s%v from nodeID %q, version %q", watchID, 314 request.TypeUrl, request.ResourceNames, nodeID, request.VersionInfo) 315 } 316 info.mu.Lock() 317 info.watches[watchID] = ResponseWatch{Request: request, Response: value} 318 info.mu.Unlock() 319 return cache.cancelWatch(nodeID, watchID) 320 } 321 322 // otherwise, the watch may be responded immediately 323 resources := snapshot.GetResourcesAndTTL(request.TypeUrl) 324 _ = cache.respond(context.Background(), request, value, resources, version, false) 325 326 return nil 327 } 328 329 func (cache *snapshotCache) nextWatchID() int64 { 330 return atomic.AddInt64(&cache.watchCount, 1) 331 } 332 333 // cancellation function for cleaning stale watches 334 func (cache *snapshotCache) cancelWatch(nodeID string, watchID int64) func() { 335 return func() { 336 // uses the cache mutex 337 cache.mu.Lock() 338 defer cache.mu.Unlock() 339 if info, ok := cache.status[nodeID]; ok { 340 info.mu.Lock() 341 delete(info.watches, watchID) 342 info.mu.Unlock() 343 } 344 } 345 } 346 347 // Respond to a watch with the snapshot value. The value channel should have capacity not to block. 348 // TODO(kuat) do not respond always, see issue https://github.com/envoyproxy/go-control-plane/issues/46 349 func (cache *snapshotCache) respond(ctx context.Context, request *Request, value chan Response, resources map[string]types.ResourceWithTTL, version string, heartbeat bool) error { 350 // for ADS, the request names must match the snapshot names 351 // if they do not, then the watch is never responded, and it is expected that envoy makes another request 352 if len(request.ResourceNames) != 0 && cache.ads { 353 if err := superset(nameSet(request.ResourceNames), resources); err != nil { 354 if cache.log != nil { 355 cache.log.Warnf("ADS mode: not responding to request: %v", err) 356 } 357 return nil 358 } 359 } 360 if cache.log != nil { 361 cache.log.Debugf("respond %s%v version %q with version %q", 362 request.TypeUrl, request.ResourceNames, request.VersionInfo, version) 363 } 364 365 select { 366 case value <- createResponse(ctx, request, resources, version, heartbeat): 367 return nil 368 case <-ctx.Done(): 369 return context.Canceled 370 } 371 } 372 373 func createResponse(ctx context.Context, request *Request, resources map[string]types.ResourceWithTTL, version string, heartbeat bool) Response { 374 filtered := make([]types.ResourceWithTTL, 0, len(resources)) 375 376 // Reply only with the requested resources. Envoy may ask each resource 377 // individually in a separate stream. It is ok to reply with the same version 378 // on separate streams since requests do not share their response versions. 379 if len(request.ResourceNames) != 0 { 380 set := nameSet(request.ResourceNames) 381 for name, resource := range resources { 382 if set[name] { 383 filtered = append(filtered, resource) 384 } 385 } 386 } else { 387 for _, resource := range resources { 388 filtered = append(filtered, resource) 389 } 390 } 391 392 return &RawResponse{ 393 Request: request, 394 Version: version, 395 Resources: filtered, 396 Heartbeat: heartbeat, 397 Ctx: ctx, 398 } 399 } 400 401 // CreateDeltaWatch returns a watch for a delta xDS request which implements the Simple SnapshotCache. 402 func (cache *snapshotCache) CreateDeltaWatch(request *DeltaRequest, state stream.StreamState, value chan DeltaResponse) func() { 403 nodeID := cache.hash.ID(request.Node) 404 t := request.GetTypeUrl() 405 406 cache.mu.Lock() 407 defer cache.mu.Unlock() 408 409 info, ok := cache.status[nodeID] 410 if !ok { 411 info = newStatusInfo(request.Node) 412 cache.status[nodeID] = info 413 } 414 415 // update last watch request time 416 info.SetLastDeltaWatchRequestTime(time.Now()) 417 418 // find the current cache snapshot for the provided node 419 snapshot, exists := cache.snapshots[nodeID] 420 421 // There are three different cases that leads to a delayed watch trigger: 422 // - no snapshot exists for the requested nodeID 423 // - a snapshot exists, but we failed to initialize its version map 424 // - we attempted to issue a response, but the caller is already up to date 425 delayedResponse := !exists 426 if exists { 427 err := snapshot.ConstructVersionMap() 428 if err != nil { 429 if cache.log != nil { 430 cache.log.Errorf("failed to compute version for snapshot resources inline, waiting for next snapshot update") 431 } 432 } 433 response, err := cache.respondDelta(context.Background(), &snapshot, request, value, state) 434 if err != nil { 435 if cache.log != nil { 436 cache.log.Errorf("failed to respond with delta response, waiting for next snapshot update: %s", err) 437 } 438 } 439 440 delayedResponse = response == nil 441 } 442 443 if delayedResponse { 444 watchID := cache.nextDeltaWatchID() 445 if cache.log != nil { 446 cache.log.Infof("open delta watch ID:%d for %s Resources:%v from nodeID: %q, system version %q", watchID, 447 t, state.GetResourceVersions(), nodeID, snapshot.GetVersion(t)) 448 } 449 450 info.SetDeltaResponseWatch(watchID, DeltaResponseWatch{Request: request, Response: value, StreamState: state}) 451 452 return cache.cancelDeltaWatch(nodeID, watchID) 453 } 454 455 return nil 456 } 457 458 // Respond to a delta watch with the provided snapshot value. If the response is nil, there has been no state change. 459 func (cache *snapshotCache) respondDelta(ctx context.Context, snapshot *Snapshot, request *DeltaRequest, value chan DeltaResponse, state stream.StreamState) (*RawDeltaResponse, error) { 460 resp := createDeltaResponse(ctx, request, state, resourceContainer{ 461 resourceMap: snapshot.GetResources(request.TypeUrl), 462 versionMap: snapshot.GetVersionMap(request.TypeUrl), 463 systemVersion: snapshot.GetVersion(request.TypeUrl), 464 }) 465 466 // Only send a response if there were changes 467 // We want to respond immediately for the first wildcard request in a stream, even if the response is empty 468 // otherwise, envoy won't complete initialization 469 if len(resp.Resources) > 0 || len(resp.RemovedResources) > 0 || (state.IsWildcard() && state.IsFirst()) { 470 if cache.log != nil { 471 cache.log.Debugf("node: %s, sending delta response with resources: %v removed resources %v wildcard: %t", 472 request.GetNode().GetId(), resp.Resources, resp.RemovedResources, state.IsWildcard()) 473 } 474 select { 475 case value <- resp: 476 return resp, nil 477 case <-ctx.Done(): 478 return resp, context.Canceled 479 } 480 } 481 return nil, nil 482 } 483 484 func (cache *snapshotCache) nextDeltaWatchID() int64 { 485 return atomic.AddInt64(&cache.deltaWatchCount, 1) 486 } 487 488 // cancellation function for cleaning stale delta watches 489 func (cache *snapshotCache) cancelDeltaWatch(nodeID string, watchID int64) func() { 490 return func() { 491 cache.mu.Lock() 492 defer cache.mu.Unlock() 493 if info, ok := cache.status[nodeID]; ok { 494 info.mu.Lock() 495 delete(info.deltaWatches, watchID) 496 info.mu.Unlock() 497 } 498 } 499 } 500 501 // Fetch implements the cache fetch function. 502 // Fetch is called on multiple streams, so responding to individual names with the same version works. 503 func (cache *snapshotCache) Fetch(ctx context.Context, request *Request) (Response, error) { 504 nodeID := cache.hash.ID(request.Node) 505 506 cache.mu.RLock() 507 defer cache.mu.RUnlock() 508 509 if snapshot, exists := cache.snapshots[nodeID]; exists { 510 // Respond only if the request version is distinct from the current snapshot state. 511 // It might be beneficial to hold the request since Envoy will re-attempt the refresh. 512 version := snapshot.GetVersion(request.TypeUrl) 513 if request.VersionInfo == version { 514 if cache.log != nil { 515 cache.log.Warnf("skip fetch: version up to date") 516 } 517 return nil, &types.SkipFetchError{} 518 } 519 520 resources := snapshot.GetResourcesAndTTL(request.TypeUrl) 521 out := createResponse(ctx, request, resources, version, false) 522 return out, nil 523 } 524 525 return nil, fmt.Errorf("missing snapshot for %q", nodeID) 526 } 527 528 // GetStatusInfo retrieves the status info for the node. 529 func (cache *snapshotCache) GetStatusInfo(node string) StatusInfo { 530 cache.mu.RLock() 531 defer cache.mu.RUnlock() 532 533 info, exists := cache.status[node] 534 if !exists { 535 if cache.log != nil { 536 cache.log.Warnf("node does not exist") 537 } 538 return nil 539 } 540 541 return info 542 } 543 544 // GetStatusKeys retrieves all node IDs in the status map. 545 func (cache *snapshotCache) GetStatusKeys() []string { 546 cache.mu.RLock() 547 defer cache.mu.RUnlock() 548 549 out := make([]string, 0, len(cache.status)) 550 for id := range cache.status { 551 out = append(out, id) 552 } 553 554 return out 555 }