github.com/cilium/cilium@v1.16.2/pkg/kvstore/store/syncstore.go (about)

     1  // SPDX-License-Identifier: Apache-2.0
     2  // Copyright Authors of Cilium
     3  
     4  package store
     5  
     6  import (
     7  	"bytes"
     8  	"context"
     9  	"fmt"
    10  	"path"
    11  	"strings"
    12  	"sync"
    13  	"sync/atomic"
    14  
    15  	"github.com/prometheus/client_golang/prometheus"
    16  	"github.com/sirupsen/logrus"
    17  	"k8s.io/client-go/util/workqueue"
    18  
    19  	"github.com/cilium/cilium/pkg/kvstore"
    20  	"github.com/cilium/cilium/pkg/lock"
    21  	"github.com/cilium/cilium/pkg/logging/logfields"
    22  	"github.com/cilium/cilium/pkg/metrics"
    23  	"github.com/cilium/cilium/pkg/time"
    24  )
    25  
    26  // SyncStore abstracts the operations allowing to synchronize key/value pairs
    27  // into a kvstore.
    28  type SyncStore interface {
    29  	// Run starts the SyncStore logic, blocking until the context is closed.
    30  	Run(ctx context.Context)
    31  
    32  	// UpsertKey upserts a key/value pair into the kvstore.
    33  	UpsertKey(ctx context.Context, key Key) error
    34  
    35  	// DeleteKey removes a key from the kvstore.
    36  	DeleteKey(ctx context.Context, key NamedKey) error
    37  
    38  	// Synced triggers the insertion of the "synced" key associated with this
    39  	// store into the kvstore once all upsertions already issued have completed
    40  	// successfully, eventually executing all specified callbacks (if any).
    41  	// Only the first invocation takes effect.
    42  	Synced(ctx context.Context, callbacks ...func(ctx context.Context)) error
    43  }
    44  
    45  // SyncStoreBackend represents the subset kvstore.BackendOperations leveraged
    46  // by SyncStore implementations.
    47  type SyncStoreBackend interface {
    48  	// Update creates or updates a key.
    49  	Update(ctx context.Context, key string, value []byte, lease bool) error
    50  	// Delete deletes a key.
    51  	Delete(ctx context.Context, key string) error
    52  
    53  	// RegisterLeaseExpiredObserver registers a function which is executed when
    54  	// the lease associated with a key having the given prefix is detected as expired.
    55  	RegisterLeaseExpiredObserver(prefix string, fn func(key string))
    56  }
    57  
    58  // wqSyncStore implements the SyncStore interface leveraging a workqueue to
    59  // coalescence update/delete requests and handle retries in case of errors.
    60  type wqSyncStore struct {
    61  	backend SyncStoreBackend
    62  	prefix  string
    63  	source  string
    64  
    65  	workers   uint
    66  	withLease bool
    67  
    68  	limiter   workqueue.RateLimiter
    69  	workqueue workqueue.RateLimitingInterface
    70  	state     lock.Map[string, []byte] // map[NamedKey.GetKeyName()]Key.Marshal()
    71  
    72  	synced          atomic.Bool                // Synced() has been triggered
    73  	pendingSync     lock.Map[string, struct{}] // the set of keys still to sync
    74  	syncedKey       string
    75  	syncedCallbacks []func(context.Context)
    76  
    77  	log          *logrus.Entry
    78  	queuedMetric prometheus.Gauge
    79  	errorsMetric prometheus.Counter
    80  	syncedMetric prometheus.Gauge
    81  }
    82  
    83  type syncCanary struct{ skipCallbacks bool }
    84  
    85  type WSSOpt func(*wqSyncStore)
    86  
    87  // WSSWithRateLimiter sets the rate limiting algorithm to be used when requeueing failed events.
    88  func WSSWithRateLimiter(limiter workqueue.RateLimiter) WSSOpt {
    89  	return func(wss *wqSyncStore) {
    90  		wss.limiter = limiter
    91  	}
    92  }
    93  
    94  // WSSWithWorkers configures the number of workers spawned by Run() to handle update/delete operations.
    95  func WSSWithWorkers(workers uint) WSSOpt {
    96  	return func(wss *wqSyncStore) {
    97  		wss.workers = workers
    98  	}
    99  }
   100  
   101  // WSSWithoutLease disables attaching the lease to upserted keys.
   102  func WSSWithoutLease() WSSOpt {
   103  	return func(wss *wqSyncStore) {
   104  		wss.withLease = false
   105  	}
   106  }
   107  
   108  // WSSWithSyncedKeyOverride overrides the "synced" key inserted into the kvstore
   109  // when initial synchronization completed (by default it corresponds to the prefix).
   110  func WSSWithSyncedKeyOverride(key string) WSSOpt {
   111  	return func(wss *wqSyncStore) {
   112  		wss.syncedKey = key
   113  	}
   114  }
   115  
   116  // NewWorkqueueSyncStore returns a SyncStore instance which leverages a workqueue
   117  // to coalescence update/delete requests and handle retries in case of errors.
   118  func newWorkqueueSyncStore(clusterName string, backend SyncStoreBackend, prefix string, m *Metrics, opts ...WSSOpt) SyncStore {
   119  	wss := &wqSyncStore{
   120  		backend: backend,
   121  		prefix:  prefix,
   122  		source:  clusterName,
   123  
   124  		workers:   1,
   125  		withLease: true,
   126  		limiter:   workqueue.DefaultControllerRateLimiter(),
   127  		syncedKey: prefix,
   128  
   129  		log: log.WithField(logfields.Prefix, prefix),
   130  	}
   131  
   132  	for _, opt := range opts {
   133  		opt(wss)
   134  	}
   135  
   136  	wss.log = wss.log.WithField(logfields.ClusterName, wss.source)
   137  	wss.workqueue = workqueue.NewRateLimitingQueue(wss.limiter)
   138  	wss.queuedMetric = m.KVStoreSyncQueueSize.WithLabelValues(kvstore.GetScopeFromKey(prefix), wss.source)
   139  	wss.errorsMetric = m.KVStoreSyncErrors.WithLabelValues(kvstore.GetScopeFromKey(prefix), wss.source)
   140  	wss.syncedMetric = m.KVStoreInitialSyncCompleted.WithLabelValues(kvstore.GetScopeFromKey(prefix), wss.source, "write")
   141  	return wss
   142  }
   143  
   144  // Run starts the SyncStore logic, blocking until the context is closed.
   145  func (wss *wqSyncStore) Run(ctx context.Context) {
   146  	var wg sync.WaitGroup
   147  
   148  	wss.syncedMetric.Set(metrics.BoolToFloat64(false))
   149  	defer wss.syncedMetric.Set(metrics.BoolToFloat64(false))
   150  
   151  	wss.backend.RegisterLeaseExpiredObserver(wss.prefix, wss.handleExpiredLease)
   152  	wss.backend.RegisterLeaseExpiredObserver(wss.getSyncedKey(), wss.handleExpiredLease)
   153  
   154  	wss.log.WithField(logfields.Workers, wss.workers).Info("Starting workqueue-based sync store")
   155  	wg.Add(int(wss.workers))
   156  	for i := uint(0); i < wss.workers; i++ {
   157  		go func() {
   158  			defer wg.Done()
   159  			for wss.processNextItem(ctx) {
   160  			}
   161  		}()
   162  	}
   163  
   164  	<-ctx.Done()
   165  
   166  	wss.backend.RegisterLeaseExpiredObserver(wss.prefix, nil)
   167  	wss.backend.RegisterLeaseExpiredObserver(wss.getSyncedKey(), nil)
   168  
   169  	wss.log.Info("Shutting down workqueue-based sync store")
   170  	wss.workqueue.ShutDown()
   171  	wg.Wait()
   172  }
   173  
   174  // UpsertKey registers the key for asynchronous upsertion in the kvstore, if the
   175  // corresponding value has changed. It returns an error in case it is impossible
   176  // to marshal the value, while kvstore failures are automatically handled through
   177  // a retry mechanism.
   178  func (wss *wqSyncStore) UpsertKey(_ context.Context, k Key) error {
   179  	key := k.GetKeyName()
   180  	value, err := k.Marshal()
   181  	if err != nil {
   182  		return fmt.Errorf("failed marshaling key %q: %w", k, err)
   183  	}
   184  
   185  	prevValue, loaded := wss.state.Swap(key, value)
   186  	if loaded && bytes.Equal(prevValue, value) {
   187  		wss.log.WithField(logfields.Key, k).Debug("ignoring upsert request for already up-to-date key")
   188  	} else {
   189  		if !wss.synced.Load() {
   190  			wss.pendingSync.Store(key, struct{}{})
   191  		}
   192  
   193  		wss.workqueue.Add(key)
   194  		wss.queuedMetric.Set(float64(wss.workqueue.Len()))
   195  	}
   196  
   197  	return nil
   198  }
   199  
   200  // DeleteKey registers the key for asynchronous deletion from the kvstore, if it
   201  // was known to be present. It never returns an error, because kvstore failures
   202  // are automatically handled through a retry mechanism.
   203  func (wss *wqSyncStore) DeleteKey(_ context.Context, k NamedKey) error {
   204  	key := k.GetKeyName()
   205  	if _, loaded := wss.state.LoadAndDelete(key); loaded {
   206  		wss.workqueue.Add(key)
   207  		wss.queuedMetric.Set(float64(wss.workqueue.Len()))
   208  	} else {
   209  		wss.log.WithField(logfields.Key, key).Debug("ignoring delete request for non-existing key")
   210  	}
   211  
   212  	return nil
   213  }
   214  
   215  func (wss *wqSyncStore) Synced(_ context.Context, callbacks ...func(ctx context.Context)) error {
   216  	if synced := wss.synced.Swap(true); !synced {
   217  		wss.syncedCallbacks = callbacks
   218  		wss.workqueue.Add(syncCanary{})
   219  	}
   220  	return nil
   221  }
   222  
   223  func (wss *wqSyncStore) processNextItem(ctx context.Context) bool {
   224  	// Retrieve the next key to process from the workqueue.
   225  	key, shutdown := wss.workqueue.Get()
   226  	wss.queuedMetric.Set(float64(wss.workqueue.Len()))
   227  	if shutdown {
   228  		return false
   229  	}
   230  
   231  	// We call Done here so the workqueue knows we have finished
   232  	// processing this item.
   233  	defer func() {
   234  		wss.workqueue.Done(key)
   235  		// This ensures that the metric is correctly updated in case of requeues.
   236  		wss.queuedMetric.Set(float64(wss.workqueue.Len()))
   237  	}()
   238  
   239  	// Run the handler, passing it the key to be processed as parameter.
   240  	if err := wss.handle(ctx, key); err != nil {
   241  		// Put the item back on the workqueue to handle any transient errors.
   242  		wss.errorsMetric.Inc()
   243  		wss.workqueue.AddRateLimited(key)
   244  		return true
   245  	}
   246  
   247  	// Since no error occurred, forget this item so it does not get queued again
   248  	// until another change happens.
   249  	wss.workqueue.Forget(key)
   250  	if skey, ok := key.(string); ok {
   251  		wss.pendingSync.Delete(skey)
   252  	}
   253  	return true
   254  }
   255  
   256  func (wss *wqSyncStore) handle(ctx context.Context, key interface{}) error {
   257  	if value, ok := key.(syncCanary); ok {
   258  		return wss.handleSync(ctx, value.skipCallbacks)
   259  	}
   260  
   261  	if value, ok := wss.state.Load(key.(string)); ok {
   262  		return wss.handleUpsert(ctx, key.(string), value)
   263  	}
   264  
   265  	return wss.handleDelete(ctx, key.(string))
   266  }
   267  
   268  func (wss *wqSyncStore) handleUpsert(ctx context.Context, key string, value []byte) error {
   269  	scopedLog := wss.log.WithField(logfields.Key, key)
   270  
   271  	err := wss.backend.Update(ctx, wss.keyPath(key), value, wss.withLease)
   272  	if err != nil {
   273  		scopedLog.WithError(err).Warning("Failed upserting key in kvstore. Retrying...")
   274  		return err
   275  	}
   276  
   277  	scopedLog.Debug("Upserted key in kvstore")
   278  	return nil
   279  }
   280  
   281  func (wss *wqSyncStore) handleDelete(ctx context.Context, key string) error {
   282  	scopedLog := wss.log.WithField(logfields.Key, key)
   283  
   284  	if err := wss.backend.Delete(ctx, wss.keyPath(key)); err != nil {
   285  		scopedLog.WithError(err).Warning("Failed deleting key from kvstore. Retrying...")
   286  		return err
   287  	}
   288  
   289  	scopedLog.Debug("Deleted key from kvstore")
   290  	return nil
   291  }
   292  
   293  func (wss *wqSyncStore) handleSync(ctx context.Context, skipCallbacks bool) error {
   294  	// This could be replaced by wss.toSync.Len() == 0 if it only existed...
   295  	syncCompleted := true
   296  	wss.pendingSync.Range(func(string, struct{}) bool {
   297  		syncCompleted = false
   298  		return false
   299  	})
   300  
   301  	if !syncCompleted {
   302  		return fmt.Errorf("there are still keys to be synchronized")
   303  	}
   304  
   305  	key := wss.getSyncedKey()
   306  	scopedLog := wss.log.WithField(logfields.Key, key)
   307  
   308  	err := wss.backend.Update(ctx, key, []byte(time.Now().Format(time.RFC3339)), wss.withLease)
   309  	if err != nil {
   310  		scopedLog.WithError(err).Warning("Failed upserting synced key in kvstore. Retrying...")
   311  		return err
   312  	}
   313  
   314  	wss.log.Info("Initial synchronization from the external source completed")
   315  	wss.syncedMetric.Set(metrics.BoolToFloat64(true))
   316  
   317  	// Execute any callback that might have been registered.
   318  	if !skipCallbacks {
   319  		for _, callback := range wss.syncedCallbacks {
   320  			callback(ctx)
   321  		}
   322  	}
   323  
   324  	return nil
   325  }
   326  
   327  // handleExpiredLease gets executed when the lease attached to a given key expired,
   328  // and is responsible for enqueuing the given key to recreate it.
   329  func (wss *wqSyncStore) handleExpiredLease(key string) {
   330  	defer wss.queuedMetric.Set(float64(wss.workqueue.Len()))
   331  
   332  	if key == wss.getSyncedKey() {
   333  		// Re-enqueue the creation of the sync canary, but make sure that
   334  		// the registered callbacks are not executed a second time.
   335  		wss.workqueue.Add(syncCanary{skipCallbacks: true})
   336  		return
   337  	}
   338  
   339  	key = strings.TrimPrefix(strings.TrimPrefix(key, wss.prefix), "/")
   340  	_, ok := wss.state.Load(key)
   341  	if ok {
   342  		wss.log.WithField(logfields.Key, key).Debug("enqueuing upsert request for key as the attached lease expired")
   343  		if !wss.synced.Load() {
   344  			wss.pendingSync.Store(key, struct{}{})
   345  		}
   346  
   347  		wss.workqueue.Add(key)
   348  	}
   349  }
   350  
   351  // keyPath returns the absolute kvstore path of a key
   352  func (wss *wqSyncStore) keyPath(key string) string {
   353  	// WARNING - STABLE API: The composition of the absolute key path
   354  	// cannot be changed without breaking up and downgrades.
   355  	return path.Join(wss.prefix, key)
   356  }
   357  
   358  func (wss *wqSyncStore) getSyncedKey() string {
   359  	return path.Join(kvstore.SyncedPrefix, wss.source, wss.syncedKey)
   360  }