gitee.com/ks-custle/core-gm@v0.0.0-20230922171213-b83bdd97b62c/go-control-plane/pkg/cache/v3/simple.go (about)

     1  // Copyright 2018 Envoyproxy Authors
     2  //
     3  //   Licensed under the Apache License, Version 2.0 (the "License");
     4  //   you may not use this file except in compliance with the License.
     5  //   You may obtain a copy of the License at
     6  //
     7  //       http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  //   Unless required by applicable law or agreed to in writing, software
    10  //   distributed under the License is distributed on an "AS IS" BASIS,
    11  //   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  //   See the License for the specific language governing permissions and
    13  //   limitations under the License.
    14  
    15  package cache
    16  
    17  import (
    18  	"context"
    19  	"fmt"
    20  	"gitee.com/ks-custle/core-gm/go-control-plane/pkg/server/stream/v3"
    21  	"sync"
    22  	"sync/atomic"
    23  	"time"
    24  
    25  	"gitee.com/ks-custle/core-gm/go-control-plane/pkg/cache/types"
    26  	"gitee.com/ks-custle/core-gm/go-control-plane/pkg/log"
    27  )
    28  
    29  // SnapshotCache is a snapshot-based cache that maintains a single versioned
    30  // snapshot of responses per node. SnapshotCache consistently replies with the
    31  // latest snapshot. For the protocol to work correctly in ADS mode, EDS/RDS
    32  // requests are responded only when all resources in the snapshot xDS response
    33  // are named as part of the request. It is expected that the CDS response names
    34  // all EDS clusters, and the LDS response names all RDS routes in a snapshot,
    35  // to ensure that Envoy makes the request for all EDS clusters or RDS routes
    36  // eventually.
    37  //
    38  // SnapshotCache can operate as a REST or regular xDS backend. The snapshot
    39  // can be partial, e.g. only include RDS or EDS resources.
    40  type SnapshotCache interface {
    41  	Cache
    42  
    43  	// SetSnapshot sets a response snapshot for a node. For ADS, the snapshots
    44  	// should have distinct versions and be internally consistent (e.g. all
    45  	// referenced resources must be included in the snapshot).
    46  	//
    47  	// This method will cause the server to respond to all open watches, for which
    48  	// the version differs from the snapshot version.
    49  	SetSnapshot(ctx context.Context, node string, snapshot Snapshot) error
    50  
    51  	// GetSnapshot gets the snapshot for a node.
    52  	GetSnapshot(node string) (Snapshot, error)
    53  
    54  	// ClearSnapshot removes all status and snapshot information associated with a node.
    55  	ClearSnapshot(node string)
    56  
    57  	// GetStatusInfo retrieves status information for a node ID.
    58  	GetStatusInfo(string) StatusInfo
    59  
    60  	// GetStatusKeys retrieves node IDs for all statuses.
    61  	GetStatusKeys() []string
    62  }
    63  
    64  type snapshotCache struct {
    65  	// watchCount and deltaWatchCount are atomic counters incremented for each watch respectively. They need to
    66  	// be the first fields in the struct to guarantee 64-bit alignment,
    67  	// which is a requirement for atomic operations on 64-bit operands to work on
    68  	// 32-bit machines.
    69  	watchCount      int64
    70  	deltaWatchCount int64
    71  
    72  	log log.Logger
    73  
    74  	// ads flag to hold responses until all resources are named
    75  	ads bool
    76  
    77  	// snapshots are cached resources indexed by node IDs
    78  	snapshots map[string]Snapshot
    79  
    80  	// status information for all nodes indexed by node IDs
    81  	status map[string]*statusInfo
    82  
    83  	// hash is the hashing function for Envoy nodes
    84  	hash NodeHash
    85  
    86  	mu sync.RWMutex
    87  }
    88  
    89  // NewSnapshotCache initializes a simple cache.
    90  //
    91  // ADS flag forces a delay in responding to streaming requests until all
    92  // resources are explicitly named in the request. This avoids the problem of a
    93  // partial request over a single stream for a subset of resources which would
    94  // require generating a fresh version for acknowledgement. ADS flag requires
    95  // snapshot consistency. For non-ADS case (and fetch), multiple partial
    96  // requests are sent across multiple streams and re-using the snapshot version
    97  // is OK.
    98  //
    99  // Logger is optional.
   100  func NewSnapshotCache(ads bool, hash NodeHash, logger log.Logger) SnapshotCache {
   101  	return newSnapshotCache(ads, hash, logger)
   102  }
   103  
   104  func newSnapshotCache(ads bool, hash NodeHash, logger log.Logger) *snapshotCache {
   105  	cache := &snapshotCache{
   106  		log:       logger,
   107  		ads:       ads,
   108  		snapshots: make(map[string]Snapshot),
   109  		status:    make(map[string]*statusInfo),
   110  		hash:      hash,
   111  	}
   112  
   113  	return cache
   114  }
   115  
   116  // NewSnapshotCacheWithHeartbeating initializes a simple cache that sends periodic heartbeat
   117  // responses for resources with a TTL.
   118  //
   119  // ADS flag forces a delay in responding to streaming requests until all
   120  // resources are explicitly named in the request. This avoids the problem of a
   121  // partial request over a single stream for a subset of resources which would
   122  // require generating a fresh version for acknowledgement. ADS flag requires
   123  // snapshot consistency. For non-ADS case (and fetch), multiple partial
   124  // requests are sent across multiple streams and re-using the snapshot version
   125  // is OK.
   126  //
   127  // Logger is optional.
   128  //
   129  // The context provides a way to cancel the heartbeating routine, while the heartbeatInterval
   130  // parameter controls how often heartbeating occurs.
   131  func NewSnapshotCacheWithHeartbeating(ctx context.Context, ads bool, hash NodeHash, logger log.Logger, heartbeatInterval time.Duration) SnapshotCache {
   132  	cache := newSnapshotCache(ads, hash, logger)
   133  	go func() {
   134  		t := time.NewTicker(heartbeatInterval)
   135  
   136  		for {
   137  			select {
   138  			case <-t.C:
   139  				cache.mu.Lock()
   140  				for node := range cache.status {
   141  					// TODO(snowp): Omit heartbeats if a real response has been sent recently.
   142  					cache.sendHeartbeats(ctx, node)
   143  				}
   144  				cache.mu.Unlock()
   145  			case <-ctx.Done():
   146  				return
   147  			}
   148  		}
   149  	}()
   150  	return cache
   151  }
   152  
   153  func (cache *snapshotCache) sendHeartbeats(ctx context.Context, node string) {
   154  	snapshot := cache.snapshots[node]
   155  	if info, ok := cache.status[node]; ok {
   156  		info.mu.Lock()
   157  		for id, watch := range info.watches {
   158  			// Respond with the current version regardless of whether the version has changed.
   159  			version := snapshot.GetVersion(watch.Request.TypeUrl)
   160  			resources := snapshot.GetResourcesAndTTL(watch.Request.TypeUrl)
   161  
   162  			// TODO(snowp): Construct this once per type instead of once per watch.
   163  			resourcesWithTTL := map[string]types.ResourceWithTTL{}
   164  			for k, v := range resources {
   165  				if v.TTL != nil {
   166  					resourcesWithTTL[k] = v
   167  				}
   168  			}
   169  
   170  			if len(resourcesWithTTL) == 0 {
   171  				continue
   172  			}
   173  			if cache.log != nil {
   174  				cache.log.Debugf("respond open watch %d%v with heartbeat for version %q", id, watch.Request.ResourceNames, version)
   175  			}
   176  
   177  			_ = cache.respond(ctx, watch.Request, watch.Response, resourcesWithTTL, version, true)
   178  
   179  			// The watch must be deleted and we must rely on the client to ack this response to create a new watch.
   180  			delete(info.watches, id)
   181  		}
   182  		info.mu.Unlock()
   183  	}
   184  }
   185  
   186  // SetSnapshot updates a snapshot for a node.
   187  func (cache *snapshotCache) SetSnapshot(ctx context.Context, node string, snapshot Snapshot) error {
   188  	cache.mu.Lock()
   189  	defer cache.mu.Unlock()
   190  
   191  	// update the existing entry
   192  	cache.snapshots[node] = snapshot
   193  
   194  	// trigger existing watches for which version changed
   195  	if info, ok := cache.status[node]; ok {
   196  		info.mu.Lock()
   197  		defer info.mu.Unlock()
   198  		for id, watch := range info.watches {
   199  			version := snapshot.GetVersion(watch.Request.TypeUrl)
   200  			if version != watch.Request.VersionInfo {
   201  				if cache.log != nil {
   202  					cache.log.Debugf("respond open watch %d%v with new version %q", id, watch.Request.ResourceNames, version)
   203  				}
   204  				resources := snapshot.GetResourcesAndTTL(watch.Request.TypeUrl)
   205  				err := cache.respond(ctx, watch.Request, watch.Response, resources, version, false)
   206  				if err != nil {
   207  					return err
   208  				}
   209  
   210  				// discard the watch
   211  				delete(info.watches, id)
   212  			}
   213  		}
   214  
   215  		// We only calculate version hashes when using delta. We don't
   216  		// want to do this when using SOTW so we can avoid unnecessary
   217  		// computational cost if not using delta.
   218  		if len(info.deltaWatches) > 0 {
   219  			err := snapshot.ConstructVersionMap()
   220  			if err != nil {
   221  				return err
   222  			}
   223  		}
   224  
   225  		// process our delta watches
   226  		for id, watch := range info.deltaWatches {
   227  			res, err := cache.respondDelta(
   228  				ctx,
   229  				&snapshot,
   230  				watch.Request,
   231  				watch.Response,
   232  				watch.StreamState,
   233  			)
   234  			if err != nil {
   235  				return err
   236  			}
   237  			// If we detect a nil response here, that means there has been no state change
   238  			// so we don't want to respond or remove any existing resource watches
   239  			if res != nil {
   240  				delete(info.deltaWatches, id)
   241  			}
   242  		}
   243  	}
   244  
   245  	return nil
   246  }
   247  
   248  // GetSnapshot gets the snapshot for a node, and returns an error if not found.
   249  func (cache *snapshotCache) GetSnapshot(node string) (Snapshot, error) {
   250  	cache.mu.RLock()
   251  	defer cache.mu.RUnlock()
   252  
   253  	snap, ok := cache.snapshots[node]
   254  	if !ok {
   255  		return Snapshot{}, fmt.Errorf("no snapshot found for node %s", node)
   256  	}
   257  	return snap, nil
   258  }
   259  
   260  // ClearSnapshot clears snapshot and info for a node.
   261  func (cache *snapshotCache) ClearSnapshot(node string) {
   262  	cache.mu.Lock()
   263  	defer cache.mu.Unlock()
   264  
   265  	delete(cache.snapshots, node)
   266  	delete(cache.status, node)
   267  }
   268  
   269  // nameSet creates a map from a string slice to value true.
   270  func nameSet(names []string) map[string]bool {
   271  	set := make(map[string]bool)
   272  	for _, name := range names {
   273  		set[name] = true
   274  	}
   275  	return set
   276  }
   277  
   278  // superset checks that all resources are listed in the names set.
   279  func superset(names map[string]bool, resources map[string]types.ResourceWithTTL) error {
   280  	for resourceName := range resources {
   281  		if _, exists := names[resourceName]; !exists {
   282  			return fmt.Errorf("%q not listed", resourceName)
   283  		}
   284  	}
   285  	return nil
   286  }
   287  
   288  // CreateWatch returns a watch for an xDS request.
   289  func (cache *snapshotCache) CreateWatch(request *Request, value chan Response) func() {
   290  	nodeID := cache.hash.ID(request.Node)
   291  
   292  	cache.mu.Lock()
   293  	defer cache.mu.Unlock()
   294  
   295  	info, ok := cache.status[nodeID]
   296  	if !ok {
   297  		info = newStatusInfo(request.Node)
   298  		cache.status[nodeID] = info
   299  	}
   300  
   301  	// update last watch request time
   302  	info.mu.Lock()
   303  	info.lastWatchRequestTime = time.Now()
   304  	info.mu.Unlock()
   305  
   306  	snapshot, exists := cache.snapshots[nodeID]
   307  	version := snapshot.GetVersion(request.TypeUrl)
   308  
   309  	// if the requested version is up-to-date or missing a response, leave an open watch
   310  	if !exists || request.VersionInfo == version {
   311  		watchID := cache.nextWatchID()
   312  		if cache.log != nil {
   313  			cache.log.Debugf("open watch %d for %s%v from nodeID %q, version %q", watchID,
   314  				request.TypeUrl, request.ResourceNames, nodeID, request.VersionInfo)
   315  		}
   316  		info.mu.Lock()
   317  		info.watches[watchID] = ResponseWatch{Request: request, Response: value}
   318  		info.mu.Unlock()
   319  		return cache.cancelWatch(nodeID, watchID)
   320  	}
   321  
   322  	// otherwise, the watch may be responded immediately
   323  	resources := snapshot.GetResourcesAndTTL(request.TypeUrl)
   324  	_ = cache.respond(context.Background(), request, value, resources, version, false)
   325  
   326  	return nil
   327  }
   328  
   329  func (cache *snapshotCache) nextWatchID() int64 {
   330  	return atomic.AddInt64(&cache.watchCount, 1)
   331  }
   332  
   333  // cancellation function for cleaning stale watches
   334  func (cache *snapshotCache) cancelWatch(nodeID string, watchID int64) func() {
   335  	return func() {
   336  		// uses the cache mutex
   337  		cache.mu.Lock()
   338  		defer cache.mu.Unlock()
   339  		if info, ok := cache.status[nodeID]; ok {
   340  			info.mu.Lock()
   341  			delete(info.watches, watchID)
   342  			info.mu.Unlock()
   343  		}
   344  	}
   345  }
   346  
   347  // Respond to a watch with the snapshot value. The value channel should have capacity not to block.
   348  // TODO(kuat) do not respond always, see issue https://github.com/envoyproxy/go-control-plane/issues/46
   349  func (cache *snapshotCache) respond(ctx context.Context, request *Request, value chan Response, resources map[string]types.ResourceWithTTL, version string, heartbeat bool) error {
   350  	// for ADS, the request names must match the snapshot names
   351  	// if they do not, then the watch is never responded, and it is expected that envoy makes another request
   352  	if len(request.ResourceNames) != 0 && cache.ads {
   353  		if err := superset(nameSet(request.ResourceNames), resources); err != nil {
   354  			if cache.log != nil {
   355  				cache.log.Warnf("ADS mode: not responding to request: %v", err)
   356  			}
   357  			return nil
   358  		}
   359  	}
   360  	if cache.log != nil {
   361  		cache.log.Debugf("respond %s%v version %q with version %q",
   362  			request.TypeUrl, request.ResourceNames, request.VersionInfo, version)
   363  	}
   364  
   365  	select {
   366  	case value <- createResponse(ctx, request, resources, version, heartbeat):
   367  		return nil
   368  	case <-ctx.Done():
   369  		return context.Canceled
   370  	}
   371  }
   372  
   373  func createResponse(ctx context.Context, request *Request, resources map[string]types.ResourceWithTTL, version string, heartbeat bool) Response {
   374  	filtered := make([]types.ResourceWithTTL, 0, len(resources))
   375  
   376  	// Reply only with the requested resources. Envoy may ask each resource
   377  	// individually in a separate stream. It is ok to reply with the same version
   378  	// on separate streams since requests do not share their response versions.
   379  	if len(request.ResourceNames) != 0 {
   380  		set := nameSet(request.ResourceNames)
   381  		for name, resource := range resources {
   382  			if set[name] {
   383  				filtered = append(filtered, resource)
   384  			}
   385  		}
   386  	} else {
   387  		for _, resource := range resources {
   388  			filtered = append(filtered, resource)
   389  		}
   390  	}
   391  
   392  	return &RawResponse{
   393  		Request:   request,
   394  		Version:   version,
   395  		Resources: filtered,
   396  		Heartbeat: heartbeat,
   397  		Ctx:       ctx,
   398  	}
   399  }
   400  
   401  // CreateDeltaWatch returns a watch for a delta xDS request which implements the Simple SnapshotCache.
   402  func (cache *snapshotCache) CreateDeltaWatch(request *DeltaRequest, state stream.StreamState, value chan DeltaResponse) func() {
   403  	nodeID := cache.hash.ID(request.Node)
   404  	t := request.GetTypeUrl()
   405  
   406  	cache.mu.Lock()
   407  	defer cache.mu.Unlock()
   408  
   409  	info, ok := cache.status[nodeID]
   410  	if !ok {
   411  		info = newStatusInfo(request.Node)
   412  		cache.status[nodeID] = info
   413  	}
   414  
   415  	// update last watch request time
   416  	info.SetLastDeltaWatchRequestTime(time.Now())
   417  
   418  	// find the current cache snapshot for the provided node
   419  	snapshot, exists := cache.snapshots[nodeID]
   420  
   421  	// There are three different cases that leads to a delayed watch trigger:
   422  	// - no snapshot exists for the requested nodeID
   423  	// - a snapshot exists, but we failed to initialize its version map
   424  	// - we attempted to issue a response, but the caller is already up to date
   425  	delayedResponse := !exists
   426  	if exists {
   427  		err := snapshot.ConstructVersionMap()
   428  		if err != nil {
   429  			if cache.log != nil {
   430  				cache.log.Errorf("failed to compute version for snapshot resources inline, waiting for next snapshot update")
   431  			}
   432  		}
   433  		response, err := cache.respondDelta(context.Background(), &snapshot, request, value, state)
   434  		if err != nil {
   435  			if cache.log != nil {
   436  				cache.log.Errorf("failed to respond with delta response, waiting for next snapshot update: %s", err)
   437  			}
   438  		}
   439  
   440  		delayedResponse = response == nil
   441  	}
   442  
   443  	if delayedResponse {
   444  		watchID := cache.nextDeltaWatchID()
   445  		if cache.log != nil {
   446  			cache.log.Infof("open delta watch ID:%d for %s Resources:%v from nodeID: %q, system version %q", watchID,
   447  				t, state.GetResourceVersions(), nodeID, snapshot.GetVersion(t))
   448  		}
   449  
   450  		info.SetDeltaResponseWatch(watchID, DeltaResponseWatch{Request: request, Response: value, StreamState: state})
   451  
   452  		return cache.cancelDeltaWatch(nodeID, watchID)
   453  	}
   454  
   455  	return nil
   456  }
   457  
   458  // Respond to a delta watch with the provided snapshot value. If the response is nil, there has been no state change.
   459  func (cache *snapshotCache) respondDelta(ctx context.Context, snapshot *Snapshot, request *DeltaRequest, value chan DeltaResponse, state stream.StreamState) (*RawDeltaResponse, error) {
   460  	resp := createDeltaResponse(ctx, request, state, resourceContainer{
   461  		resourceMap:   snapshot.GetResources(request.TypeUrl),
   462  		versionMap:    snapshot.GetVersionMap(request.TypeUrl),
   463  		systemVersion: snapshot.GetVersion(request.TypeUrl),
   464  	})
   465  
   466  	// Only send a response if there were changes
   467  	// We want to respond immediately for the first wildcard request in a stream, even if the response is empty
   468  	// otherwise, envoy won't complete initialization
   469  	if len(resp.Resources) > 0 || len(resp.RemovedResources) > 0 || (state.IsWildcard() && state.IsFirst()) {
   470  		if cache.log != nil {
   471  			cache.log.Debugf("node: %s, sending delta response with resources: %v removed resources %v wildcard: %t",
   472  				request.GetNode().GetId(), resp.Resources, resp.RemovedResources, state.IsWildcard())
   473  		}
   474  		select {
   475  		case value <- resp:
   476  			return resp, nil
   477  		case <-ctx.Done():
   478  			return resp, context.Canceled
   479  		}
   480  	}
   481  	return nil, nil
   482  }
   483  
   484  func (cache *snapshotCache) nextDeltaWatchID() int64 {
   485  	return atomic.AddInt64(&cache.deltaWatchCount, 1)
   486  }
   487  
   488  // cancellation function for cleaning stale delta watches
   489  func (cache *snapshotCache) cancelDeltaWatch(nodeID string, watchID int64) func() {
   490  	return func() {
   491  		cache.mu.Lock()
   492  		defer cache.mu.Unlock()
   493  		if info, ok := cache.status[nodeID]; ok {
   494  			info.mu.Lock()
   495  			delete(info.deltaWatches, watchID)
   496  			info.mu.Unlock()
   497  		}
   498  	}
   499  }
   500  
   501  // Fetch implements the cache fetch function.
   502  // Fetch is called on multiple streams, so responding to individual names with the same version works.
   503  func (cache *snapshotCache) Fetch(ctx context.Context, request *Request) (Response, error) {
   504  	nodeID := cache.hash.ID(request.Node)
   505  
   506  	cache.mu.RLock()
   507  	defer cache.mu.RUnlock()
   508  
   509  	if snapshot, exists := cache.snapshots[nodeID]; exists {
   510  		// Respond only if the request version is distinct from the current snapshot state.
   511  		// It might be beneficial to hold the request since Envoy will re-attempt the refresh.
   512  		version := snapshot.GetVersion(request.TypeUrl)
   513  		if request.VersionInfo == version {
   514  			if cache.log != nil {
   515  				cache.log.Warnf("skip fetch: version up to date")
   516  			}
   517  			return nil, &types.SkipFetchError{}
   518  		}
   519  
   520  		resources := snapshot.GetResourcesAndTTL(request.TypeUrl)
   521  		out := createResponse(ctx, request, resources, version, false)
   522  		return out, nil
   523  	}
   524  
   525  	return nil, fmt.Errorf("missing snapshot for %q", nodeID)
   526  }
   527  
   528  // GetStatusInfo retrieves the status info for the node.
   529  func (cache *snapshotCache) GetStatusInfo(node string) StatusInfo {
   530  	cache.mu.RLock()
   531  	defer cache.mu.RUnlock()
   532  
   533  	info, exists := cache.status[node]
   534  	if !exists {
   535  		if cache.log != nil {
   536  			cache.log.Warnf("node does not exist")
   537  		}
   538  		return nil
   539  	}
   540  
   541  	return info
   542  }
   543  
   544  // GetStatusKeys retrieves all node IDs in the status map.
   545  func (cache *snapshotCache) GetStatusKeys() []string {
   546  	cache.mu.RLock()
   547  	defer cache.mu.RUnlock()
   548  
   549  	out := make([]string, 0, len(cache.status))
   550  	for id := range cache.status {
   551  		out = append(out, id)
   552  	}
   553  
   554  	return out
   555  }