github.com/cilium/cilium@v1.16.2/pkg/kvstore/store/watchstore.go (about)

     1  // SPDX-License-Identifier: Apache-2.0
     2  // Copyright Authors of Cilium
     3  
     4  package store
     5  
     6  import (
     7  	"context"
     8  	"strings"
     9  	"sync/atomic"
    10  
    11  	"github.com/prometheus/client_golang/prometheus"
    12  	"github.com/sirupsen/logrus"
    13  
    14  	"github.com/cilium/cilium/pkg/kvstore"
    15  	"github.com/cilium/cilium/pkg/logging/logfields"
    16  	"github.com/cilium/cilium/pkg/metrics"
    17  	"github.com/cilium/cilium/pkg/metrics/metric"
    18  )
    19  
    20  // WatchStore abstracts the operations allowing to synchronize key/value pairs
    21  // from a kvstore, emitting the corresponding events.
    22  type WatchStore interface {
    23  	// Watch starts watching the specified kvstore prefix, blocking until the context is closed.
    24  	// Depending on the implementation, it might be executed multiple times.
    25  	Watch(ctx context.Context, backend WatchStoreBackend, prefix string)
    26  
    27  	// NumEntries returns the number of entries synchronized from the store.
    28  	NumEntries() uint64
    29  
    30  	// Synced returns whether the initial list of entries has been retrieved from
    31  	// the kvstore, and new events are currently being watched.
    32  	Synced() bool
    33  
    34  	// Drain emits a deletion event for each known key. It shall be called only
    35  	// when no watch operation is in progress.
    36  	Drain()
    37  }
    38  
    39  // WatchStoreBackend represents the subset of kvstore.BackendOperations leveraged
    40  // by WatchStore implementations.
    41  type WatchStoreBackend interface {
    42  	// ListAndWatch creates a new watcher for the given prefix after listing the existing keys.
    43  	ListAndWatch(ctx context.Context, prefix string, chanSize int) *kvstore.Watcher
    44  }
    45  
    46  type RWSOpt func(*restartableWatchStore)
    47  
    48  // WSWithOnSyncCallback registers a function to be executed after
    49  // listing all keys from the kvstore for the first time. Multiple
    50  // callback functions can be registered.
    51  func RWSWithOnSyncCallback(callback func(ctx context.Context)) RWSOpt {
    52  	return func(rws *restartableWatchStore) {
    53  		rws.onSyncCallbacks = append(rws.onSyncCallbacks, callback)
    54  	}
    55  }
    56  
    57  // WSWithEntriesGauge registers a Prometheus gauge metric that is kept
    58  // in sync with the number of entries synchronized from the kvstore.
    59  func RWSWithEntriesMetric(gauge prometheus.Gauge) RWSOpt {
    60  	return func(rws *restartableWatchStore) {
    61  		rws.entriesMetric = gauge
    62  	}
    63  }
    64  
    65  type rwsEntry struct {
    66  	key   Key
    67  	stale bool
    68  }
    69  
    70  // restartableWatchStore implements the WatchStore interface, supporting
    71  // multiple executions of the Watch() operation (granted that the previous one
    72  // already terminated). This allows to transparently handle the case in which
    73  // we had to create a new etcd connection (for instance following a failure)
    74  // which refers to the same remote cluster.
    75  type restartableWatchStore struct {
    76  	source     string
    77  	keyCreator KeyCreator
    78  	observer   Observer
    79  
    80  	watching        atomic.Bool
    81  	synced          atomic.Bool
    82  	onSyncCallbacks []func(ctx context.Context)
    83  
    84  	// Using a separate entries counter avoids the need for synchronizing the
    85  	// access to the state map, since the only concurrent reader is represented
    86  	// by the NumEntries() function.
    87  	state      map[string]*rwsEntry
    88  	numEntries atomic.Uint64
    89  
    90  	log           *logrus.Entry
    91  	entriesMetric prometheus.Gauge
    92  	syncMetric    metric.Vec[metric.Gauge]
    93  }
    94  
    95  // NewRestartableWatchStore returns a WatchStore instance which supports
    96  // restarting the watch operation multiple times, automatically handling
    97  // the emission of deletion events for all stale entries (if enabled). It
    98  // shall be restarted only once the previous Watch execution terminated.
    99  func newRestartableWatchStore(clusterName string, keyCreator KeyCreator, observer Observer, m *Metrics, opts ...RWSOpt) WatchStore {
   100  	rws := &restartableWatchStore{
   101  		source:     clusterName,
   102  		keyCreator: keyCreator,
   103  		observer:   observer,
   104  
   105  		state: make(map[string]*rwsEntry),
   106  
   107  		log:           log,
   108  		entriesMetric: metrics.NoOpGauge,
   109  		syncMetric:    m.KVStoreInitialSyncCompleted,
   110  	}
   111  
   112  	for _, opt := range opts {
   113  		opt(rws)
   114  	}
   115  
   116  	rws.log = rws.log.WithField(logfields.ClusterName, rws.source)
   117  	return rws
   118  }
   119  
   120  // Watch starts watching the specified kvstore prefix, blocking until the context is closed.
   121  // It might be executed multiple times, granted that the previous execution already terminated.
   122  func (rws *restartableWatchStore) Watch(ctx context.Context, backend WatchStoreBackend, prefix string) {
   123  	// Append a trailing "/" to the prefix, to make sure that we watch only
   124  	// sub-elements belonging to that prefix, and not to sibling prefixes
   125  	// (for instance in case the last part of the prefix is the cluster name,
   126  	// and one is the substring of another).
   127  	if !strings.HasSuffix(prefix, "/") {
   128  		prefix = prefix + "/"
   129  	}
   130  
   131  	rws.log = rws.log.WithField(logfields.Prefix, prefix)
   132  	syncedMetric := rws.syncMetric.WithLabelValues(
   133  		kvstore.GetScopeFromKey(prefix), rws.source, "read")
   134  
   135  	rws.log.Info("Starting restartable watch store")
   136  	syncedMetric.Set(metrics.BoolToFloat64(false))
   137  
   138  	if rws.watching.Swap(true) {
   139  		rws.log.Panic("Cannot start the watch store while still running")
   140  	}
   141  
   142  	defer func() {
   143  		rws.log.Info("Stopped restartable watch store")
   144  		syncedMetric.Set(metrics.BoolToFloat64(false))
   145  		rws.watching.Store(false)
   146  		rws.synced.Store(false)
   147  	}()
   148  
   149  	// Mark all known keys as stale.
   150  	for _, entry := range rws.state {
   151  		entry.stale = true
   152  	}
   153  
   154  	// The events channel is closed when the context is closed.
   155  	watcher := backend.ListAndWatch(ctx, prefix, 0)
   156  	for event := range watcher.Events {
   157  		if event.Typ == kvstore.EventTypeListDone {
   158  			rws.log.Debug("Initial synchronization completed")
   159  			rws.drainKeys(true)
   160  			syncedMetric.Set(metrics.BoolToFloat64(true))
   161  			rws.synced.Store(true)
   162  
   163  			for _, callback := range rws.onSyncCallbacks {
   164  				callback(ctx)
   165  			}
   166  
   167  			// Clear the list of callbacks so that they don't get executed
   168  			// a second time in case of reconnections.
   169  			rws.onSyncCallbacks = nil
   170  
   171  			continue
   172  		}
   173  
   174  		key := strings.TrimPrefix(event.Key, prefix)
   175  		rws.log.WithFields(logrus.Fields{
   176  			logfields.Key:   key,
   177  			logfields.Event: event.Typ,
   178  		}).Debug("Received event from kvstore")
   179  
   180  		switch event.Typ {
   181  		case kvstore.EventTypeCreate, kvstore.EventTypeModify:
   182  			rws.handleUpsert(key, event.Value)
   183  		case kvstore.EventTypeDelete:
   184  			rws.handleDelete(key)
   185  		}
   186  	}
   187  }
   188  
   189  // NumEntries returns the number of entries synchronized from the store.
   190  func (rws *restartableWatchStore) NumEntries() uint64 {
   191  	return rws.numEntries.Load()
   192  }
   193  
   194  // Synced returns whether the initial list of entries has been retrieved from
   195  // the kvstore, and new events are currently being watched.
   196  func (rws *restartableWatchStore) Synced() bool {
   197  	return rws.synced.Load()
   198  }
   199  
   200  // Drain emits a deletion event for each known key. It shall be called only
   201  // when no watch operation is in progress.
   202  func (rws *restartableWatchStore) Drain() {
   203  	if rws.watching.Swap(true) {
   204  		rws.log.Panic("Cannot drain the watch store while still running")
   205  	}
   206  	defer rws.watching.Store(false)
   207  
   208  	rws.log.Info("Draining restartable watch store")
   209  	rws.drainKeys(false)
   210  	rws.log.Info("Drained restartable watch store")
   211  }
   212  
   213  // drainKeys emits synthetic deletion events:
   214  // * staleOnly == true: for all keys marked as stale;
   215  // * staleOnly == false: for all known keys;
   216  func (rws *restartableWatchStore) drainKeys(staleOnly bool) {
   217  	for key, entry := range rws.state {
   218  		if !staleOnly || entry.stale {
   219  			rws.log.WithField(logfields.Key, key).Debug("Emitting deletion event for stale key")
   220  			rws.handleDelete(key)
   221  		}
   222  	}
   223  }
   224  
   225  func (rws *restartableWatchStore) handleUpsert(key string, value []byte) {
   226  	entry := &rwsEntry{key: rws.keyCreator()}
   227  	if err := entry.key.Unmarshal(key, value); err != nil {
   228  		rws.log.WithFields(logrus.Fields{
   229  			logfields.Key:   key,
   230  			logfields.Value: string(value),
   231  		}).WithError(err).Warning("Unable to unmarshal value")
   232  		return
   233  	}
   234  
   235  	rws.state[key] = entry
   236  	rws.numEntries.Store(uint64(len(rws.state)))
   237  	rws.entriesMetric.Set(float64(len(rws.state)))
   238  	rws.observer.OnUpdate(entry.key)
   239  }
   240  
   241  func (rws *restartableWatchStore) handleDelete(key string) {
   242  	entry, ok := rws.state[key]
   243  	if !ok {
   244  		rws.log.WithField(logfields.Key, key).Warning("Received deletion event for unknown key")
   245  		return
   246  	}
   247  
   248  	delete(rws.state, key)
   249  	rws.numEntries.Store(uint64(len(rws.state)))
   250  	rws.entriesMetric.Set(float64(len(rws.state)))
   251  	rws.observer.OnDelete(entry.key)
   252  }