github.com/cilium/statedb@v0.3.2/reconciler/incremental.go (about) 1 // SPDX-License-Identifier: Apache-2.0 2 // Copyright Authors of Cilium 3 4 package reconciler 5 6 import ( 7 "context" 8 "errors" 9 "iter" 10 "time" 11 12 "github.com/cilium/hive/cell" 13 "github.com/cilium/statedb" 14 ) 15 16 // incrementalRound is the shared context for incremental reconciliation and retries. 17 type incrementalRound[Obj comparable] struct { 18 metrics Metrics 19 moduleID cell.FullModuleID 20 config *config[Obj] 21 retries *retries 22 primaryIndexer statedb.Indexer[Obj] 23 db *statedb.DB 24 ctx context.Context 25 txn statedb.ReadTxn 26 table statedb.RWTable[Obj] 27 28 // numReconciled counts the number of objects that have been reconciled in this 29 // round, both for new & changed objects and for retried objects. If 30 // Config.IncrementalBatchSize is reached the round is stopped. 31 // This allows for timely reporting of status when lot of objects have changed and 32 // reconciliation per object is slow. 33 numReconciled int 34 35 // results collects the results of update operations. 36 // The results are committed in a separate write transaction in order to 37 // not lock the table while reconciling. If an object has changed in the meanwhile 38 // the stale reconciliation result for that object is dropped. 39 results map[Obj]opResult 40 } 41 42 // opResult is the outcome from reconciling a single object 43 type opResult struct { 44 original any // the original object 45 rev statedb.Revision // revision of the object 46 err error 47 id uint64 // the "pending" identifier 48 } 49 50 func (r *reconciler[Obj]) incremental(ctx context.Context, txn statedb.ReadTxn, changes iter.Seq2[statedb.Change[Obj], statedb.Revision]) []error { 51 round := incrementalRound[Obj]{ 52 moduleID: r.ModuleID, 53 metrics: r.config.Metrics, 54 config: &r.config, 55 retries: r.retries, 56 primaryIndexer: r.primaryIndexer, 57 db: r.DB, 58 ctx: ctx, 59 txn: txn, 60 table: r.config.Table, 61 results: make(map[Obj]opResult), 62 } 63 64 // Reconcile new and changed objects using either Operations 65 // or BatchOperations. 66 if r.config.BatchOperations != nil { 67 round.batch(changes) 68 } else { 69 round.single(changes) 70 } 71 72 // Process objects that need to be retried that were not cleared. 73 round.processRetries() 74 75 // Finally commit the status updates. 76 newErrors := round.commitStatus() 77 78 // Since all failures are retried, we can return the errors from the retry 79 // queue which includes both errors occurred in this round and the old 80 // errors. 81 errs := round.retries.errors() 82 round.metrics.ReconciliationErrors(r.ModuleID, newErrors, len(errs)) 83 return errs 84 } 85 86 func (round *incrementalRound[Obj]) single(changes iter.Seq2[statedb.Change[Obj], statedb.Revision]) { 87 // Iterate in revision order through new and changed objects. 88 for change, rev := range changes { 89 obj := change.Object 90 91 status := round.config.GetObjectStatus(obj) 92 if !change.Deleted && !status.IsPendingOrRefreshing() { 93 // Only process objects that are pending reconciliation, e.g. 94 // changed from outside. 95 // Failures (e.g. StatusKindError) are processed via the retry queue. 96 continue 97 } 98 99 // Clear retries as the object has changed. 100 round.retries.Clear(obj) 101 102 round.processSingle(obj, rev, change.Deleted) 103 round.numReconciled++ 104 if round.numReconciled >= round.config.IncrementalRoundSize { 105 break 106 } 107 } 108 } 109 110 func (round *incrementalRound[Obj]) batch(changes iter.Seq2[statedb.Change[Obj], statedb.Revision]) { 111 ops := round.config.BatchOperations 112 updateBatch := []BatchEntry[Obj]{} 113 deleteBatch := []BatchEntry[Obj]{} 114 115 for change, rev := range changes { 116 obj := change.Object 117 118 status := round.config.GetObjectStatus(obj) 119 if !change.Deleted && !status.IsPendingOrRefreshing() { 120 // Only process objects that are pending reconciliation, e.g. 121 // changed from outside. 122 // Failures (e.g. StatusKindError) are processed via the retry queue. 123 continue 124 } 125 126 // Clear an existing retry as the object has changed. 127 round.retries.Clear(obj) 128 129 // Clone the object so we or the operations can mutate it. 130 orig := obj 131 obj = round.config.CloneObject(obj) 132 133 if change.Deleted { 134 deleteBatch = append(deleteBatch, BatchEntry[Obj]{Object: obj, Revision: rev, original: orig}) 135 } else { 136 updateBatch = append(updateBatch, BatchEntry[Obj]{Object: obj, Revision: rev, original: orig}) 137 } 138 139 round.numReconciled++ 140 if round.numReconciled >= round.config.IncrementalRoundSize { 141 break 142 } 143 } 144 145 // Process the delete batch first to make room. 146 if len(deleteBatch) > 0 { 147 start := time.Now() 148 ops.DeleteBatch(round.ctx, round.txn, deleteBatch) 149 round.metrics.ReconciliationDuration( 150 round.moduleID, 151 OpDelete, 152 time.Since(start), 153 ) 154 for _, entry := range deleteBatch { 155 if entry.Result != nil { 156 // Delete failed, queue a retry for it. 157 round.retries.Add(entry.original, entry.Revision, true, entry.Result) 158 } 159 } 160 } 161 162 // And then the update batch. 163 if len(updateBatch) > 0 { 164 start := time.Now() 165 ops.UpdateBatch(round.ctx, round.txn, updateBatch) 166 round.metrics.ReconciliationDuration( 167 round.moduleID, 168 OpUpdate, 169 time.Since(start), 170 ) 171 172 for _, entry := range updateBatch { 173 status := round.config.GetObjectStatus(entry.Object) 174 if entry.Result == nil { 175 round.retries.Clear(entry.Object) 176 } 177 round.results[entry.Object] = opResult{rev: entry.Revision, id: status.id, err: entry.Result, original: entry.original} 178 } 179 } 180 } 181 182 func (round *incrementalRound[Obj]) processRetries() { 183 now := time.Now() 184 for round.numReconciled < round.config.IncrementalRoundSize { 185 item, ok := round.retries.Top() 186 if !ok || item.retryAt.After(now) { 187 break 188 } 189 round.retries.Pop() 190 round.processSingle(item.object.(Obj), item.rev, item.delete) 191 round.numReconciled++ 192 } 193 } 194 195 func (round *incrementalRound[Obj]) processSingle(obj Obj, rev statedb.Revision, delete bool) { 196 start := time.Now() 197 198 var ( 199 err error 200 op string 201 ) 202 if delete { 203 op = OpDelete 204 err = round.config.Operations.Delete(round.ctx, round.txn, obj) 205 if err != nil { 206 // Deletion failed. Retry again later. 207 round.retries.Add(obj, rev, true, err) 208 } 209 } else { 210 // Clone the object so it can be mutated by Update() 211 orig := obj 212 obj = round.config.CloneObject(obj) 213 op = OpUpdate 214 err = round.config.Operations.Update(round.ctx, round.txn, obj) 215 status := round.config.GetObjectStatus(obj) 216 round.results[obj] = opResult{original: orig, id: status.id, rev: rev, err: err} 217 } 218 round.metrics.ReconciliationDuration(round.moduleID, op, time.Since(start)) 219 220 if err == nil { 221 round.retries.Clear(obj) 222 } 223 } 224 225 func (round *incrementalRound[Obj]) commitStatus() (numErrors int) { 226 if len(round.results) == 0 { 227 // Nothing to commit. 228 return 229 } 230 231 wtxn := round.db.WriteTxn(round.table) 232 defer wtxn.Commit() 233 234 // Commit status for updated objects. 235 for obj, result := range round.results { 236 // Update the object if it is unchanged. It may happen that the object has 237 // been updated in the meanwhile, in which case we skip updating the status 238 // and reprocess the object on the next round. 239 240 var status Status 241 if result.err == nil { 242 status = StatusDone() 243 } else { 244 status = StatusError(result.err) 245 numErrors++ 246 } 247 248 current, exists, err := round.table.CompareAndSwap(wtxn, result.rev, round.config.SetObjectStatus(obj, status)) 249 if errors.Is(err, statedb.ErrRevisionNotEqual) && exists { 250 // The object had changed. Check if the pending status still carries the same 251 // identifier and if so update the object. This is an optimization for supporting 252 // multiple reconcilers per object to avoid repeating work when only the 253 // reconciliation status had changed. 254 // 255 // The limitation of this approach is that we cannot support the reconciler 256 // modifying the object during reconciliation as the following will forget 257 // the changes. 258 currentStatus := round.config.GetObjectStatus(current) 259 if currentStatus.Kind == StatusKindPending && currentStatus.id == result.id { 260 current = round.config.CloneObject(current) 261 current = round.config.SetObjectStatus(current, status) 262 round.table.Insert(wtxn, current) 263 } 264 } 265 266 if result.err != nil && err == nil { 267 // Reconciliation of the object had failed and the status was updated 268 // successfully (object had not changed). Queue the retry for the object. 269 newRevision := round.table.Revision(wtxn) 270 round.retries.Add(result.original.(Obj), newRevision, false, result.err) 271 } 272 } 273 return 274 }