github.com/cilium/cilium@v1.16.2/pkg/kvstore/store/watchstore.go (about) 1 // SPDX-License-Identifier: Apache-2.0 2 // Copyright Authors of Cilium 3 4 package store 5 6 import ( 7 "context" 8 "strings" 9 "sync/atomic" 10 11 "github.com/prometheus/client_golang/prometheus" 12 "github.com/sirupsen/logrus" 13 14 "github.com/cilium/cilium/pkg/kvstore" 15 "github.com/cilium/cilium/pkg/logging/logfields" 16 "github.com/cilium/cilium/pkg/metrics" 17 "github.com/cilium/cilium/pkg/metrics/metric" 18 ) 19 20 // WatchStore abstracts the operations allowing to synchronize key/value pairs 21 // from a kvstore, emitting the corresponding events. 22 type WatchStore interface { 23 // Watch starts watching the specified kvstore prefix, blocking until the context is closed. 24 // Depending on the implementation, it might be executed multiple times. 25 Watch(ctx context.Context, backend WatchStoreBackend, prefix string) 26 27 // NumEntries returns the number of entries synchronized from the store. 28 NumEntries() uint64 29 30 // Synced returns whether the initial list of entries has been retrieved from 31 // the kvstore, and new events are currently being watched. 32 Synced() bool 33 34 // Drain emits a deletion event for each known key. It shall be called only 35 // when no watch operation is in progress. 36 Drain() 37 } 38 39 // WatchStoreBackend represents the subset of kvstore.BackendOperations leveraged 40 // by WatchStore implementations. 41 type WatchStoreBackend interface { 42 // ListAndWatch creates a new watcher for the given prefix after listing the existing keys. 43 ListAndWatch(ctx context.Context, prefix string, chanSize int) *kvstore.Watcher 44 } 45 46 type RWSOpt func(*restartableWatchStore) 47 48 // WSWithOnSyncCallback registers a function to be executed after 49 // listing all keys from the kvstore for the first time. Multiple 50 // callback functions can be registered. 51 func RWSWithOnSyncCallback(callback func(ctx context.Context)) RWSOpt { 52 return func(rws *restartableWatchStore) { 53 rws.onSyncCallbacks = append(rws.onSyncCallbacks, callback) 54 } 55 } 56 57 // WSWithEntriesGauge registers a Prometheus gauge metric that is kept 58 // in sync with the number of entries synchronized from the kvstore. 59 func RWSWithEntriesMetric(gauge prometheus.Gauge) RWSOpt { 60 return func(rws *restartableWatchStore) { 61 rws.entriesMetric = gauge 62 } 63 } 64 65 type rwsEntry struct { 66 key Key 67 stale bool 68 } 69 70 // restartableWatchStore implements the WatchStore interface, supporting 71 // multiple executions of the Watch() operation (granted that the previous one 72 // already terminated). This allows to transparently handle the case in which 73 // we had to create a new etcd connection (for instance following a failure) 74 // which refers to the same remote cluster. 75 type restartableWatchStore struct { 76 source string 77 keyCreator KeyCreator 78 observer Observer 79 80 watching atomic.Bool 81 synced atomic.Bool 82 onSyncCallbacks []func(ctx context.Context) 83 84 // Using a separate entries counter avoids the need for synchronizing the 85 // access to the state map, since the only concurrent reader is represented 86 // by the NumEntries() function. 87 state map[string]*rwsEntry 88 numEntries atomic.Uint64 89 90 log *logrus.Entry 91 entriesMetric prometheus.Gauge 92 syncMetric metric.Vec[metric.Gauge] 93 } 94 95 // NewRestartableWatchStore returns a WatchStore instance which supports 96 // restarting the watch operation multiple times, automatically handling 97 // the emission of deletion events for all stale entries (if enabled). It 98 // shall be restarted only once the previous Watch execution terminated. 99 func newRestartableWatchStore(clusterName string, keyCreator KeyCreator, observer Observer, m *Metrics, opts ...RWSOpt) WatchStore { 100 rws := &restartableWatchStore{ 101 source: clusterName, 102 keyCreator: keyCreator, 103 observer: observer, 104 105 state: make(map[string]*rwsEntry), 106 107 log: log, 108 entriesMetric: metrics.NoOpGauge, 109 syncMetric: m.KVStoreInitialSyncCompleted, 110 } 111 112 for _, opt := range opts { 113 opt(rws) 114 } 115 116 rws.log = rws.log.WithField(logfields.ClusterName, rws.source) 117 return rws 118 } 119 120 // Watch starts watching the specified kvstore prefix, blocking until the context is closed. 121 // It might be executed multiple times, granted that the previous execution already terminated. 122 func (rws *restartableWatchStore) Watch(ctx context.Context, backend WatchStoreBackend, prefix string) { 123 // Append a trailing "/" to the prefix, to make sure that we watch only 124 // sub-elements belonging to that prefix, and not to sibling prefixes 125 // (for instance in case the last part of the prefix is the cluster name, 126 // and one is the substring of another). 127 if !strings.HasSuffix(prefix, "/") { 128 prefix = prefix + "/" 129 } 130 131 rws.log = rws.log.WithField(logfields.Prefix, prefix) 132 syncedMetric := rws.syncMetric.WithLabelValues( 133 kvstore.GetScopeFromKey(prefix), rws.source, "read") 134 135 rws.log.Info("Starting restartable watch store") 136 syncedMetric.Set(metrics.BoolToFloat64(false)) 137 138 if rws.watching.Swap(true) { 139 rws.log.Panic("Cannot start the watch store while still running") 140 } 141 142 defer func() { 143 rws.log.Info("Stopped restartable watch store") 144 syncedMetric.Set(metrics.BoolToFloat64(false)) 145 rws.watching.Store(false) 146 rws.synced.Store(false) 147 }() 148 149 // Mark all known keys as stale. 150 for _, entry := range rws.state { 151 entry.stale = true 152 } 153 154 // The events channel is closed when the context is closed. 155 watcher := backend.ListAndWatch(ctx, prefix, 0) 156 for event := range watcher.Events { 157 if event.Typ == kvstore.EventTypeListDone { 158 rws.log.Debug("Initial synchronization completed") 159 rws.drainKeys(true) 160 syncedMetric.Set(metrics.BoolToFloat64(true)) 161 rws.synced.Store(true) 162 163 for _, callback := range rws.onSyncCallbacks { 164 callback(ctx) 165 } 166 167 // Clear the list of callbacks so that they don't get executed 168 // a second time in case of reconnections. 169 rws.onSyncCallbacks = nil 170 171 continue 172 } 173 174 key := strings.TrimPrefix(event.Key, prefix) 175 rws.log.WithFields(logrus.Fields{ 176 logfields.Key: key, 177 logfields.Event: event.Typ, 178 }).Debug("Received event from kvstore") 179 180 switch event.Typ { 181 case kvstore.EventTypeCreate, kvstore.EventTypeModify: 182 rws.handleUpsert(key, event.Value) 183 case kvstore.EventTypeDelete: 184 rws.handleDelete(key) 185 } 186 } 187 } 188 189 // NumEntries returns the number of entries synchronized from the store. 190 func (rws *restartableWatchStore) NumEntries() uint64 { 191 return rws.numEntries.Load() 192 } 193 194 // Synced returns whether the initial list of entries has been retrieved from 195 // the kvstore, and new events are currently being watched. 196 func (rws *restartableWatchStore) Synced() bool { 197 return rws.synced.Load() 198 } 199 200 // Drain emits a deletion event for each known key. It shall be called only 201 // when no watch operation is in progress. 202 func (rws *restartableWatchStore) Drain() { 203 if rws.watching.Swap(true) { 204 rws.log.Panic("Cannot drain the watch store while still running") 205 } 206 defer rws.watching.Store(false) 207 208 rws.log.Info("Draining restartable watch store") 209 rws.drainKeys(false) 210 rws.log.Info("Drained restartable watch store") 211 } 212 213 // drainKeys emits synthetic deletion events: 214 // * staleOnly == true: for all keys marked as stale; 215 // * staleOnly == false: for all known keys; 216 func (rws *restartableWatchStore) drainKeys(staleOnly bool) { 217 for key, entry := range rws.state { 218 if !staleOnly || entry.stale { 219 rws.log.WithField(logfields.Key, key).Debug("Emitting deletion event for stale key") 220 rws.handleDelete(key) 221 } 222 } 223 } 224 225 func (rws *restartableWatchStore) handleUpsert(key string, value []byte) { 226 entry := &rwsEntry{key: rws.keyCreator()} 227 if err := entry.key.Unmarshal(key, value); err != nil { 228 rws.log.WithFields(logrus.Fields{ 229 logfields.Key: key, 230 logfields.Value: string(value), 231 }).WithError(err).Warning("Unable to unmarshal value") 232 return 233 } 234 235 rws.state[key] = entry 236 rws.numEntries.Store(uint64(len(rws.state))) 237 rws.entriesMetric.Set(float64(len(rws.state))) 238 rws.observer.OnUpdate(entry.key) 239 } 240 241 func (rws *restartableWatchStore) handleDelete(key string) { 242 entry, ok := rws.state[key] 243 if !ok { 244 rws.log.WithField(logfields.Key, key).Warning("Received deletion event for unknown key") 245 return 246 } 247 248 delete(rws.state, key) 249 rws.numEntries.Store(uint64(len(rws.state))) 250 rws.entriesMetric.Set(float64(len(rws.state))) 251 rws.observer.OnDelete(entry.key) 252 }