github.com/cilium/statedb@v0.3.2/reconciler/reconciler.go (about) 1 // SPDX-License-Identifier: Apache-2.0 2 // Copyright Authors of Cilium 3 4 package reconciler 5 6 import ( 7 "context" 8 "fmt" 9 "iter" 10 "time" 11 12 "github.com/cilium/hive/cell" 13 "github.com/cilium/statedb" 14 ) 15 16 type reconciler[Obj comparable] struct { 17 Params 18 config config[Obj] 19 retries *retries 20 externalPruneTrigger chan struct{} 21 primaryIndexer statedb.Indexer[Obj] 22 } 23 24 func (r *reconciler[Obj]) Prune() { 25 select { 26 case r.externalPruneTrigger <- struct{}{}: 27 default: 28 } 29 } 30 31 func (r *reconciler[Obj]) reconcileLoop(ctx context.Context, health cell.Health) error { 32 var pruneTickerChan <-chan time.Time 33 if r.config.PruneInterval > 0 { 34 pruneTicker := time.NewTicker(r.config.PruneInterval) 35 defer pruneTicker.Stop() 36 pruneTickerChan = pruneTicker.C 37 } 38 39 // Create the change iterator to watch for inserts and deletes to the table. 40 wtxn := r.DB.WriteTxn(r.config.Table) 41 changeIterator, err := r.config.Table.Changes(wtxn) 42 txn := wtxn.Commit() 43 if err != nil { 44 return fmt.Errorf("watching for changes failed: %w", err) 45 } 46 47 tableWatchChan := closedWatchChannel 48 49 externalPrune := false 50 51 tableInitialized := false 52 _, tableInitWatch := r.config.Table.Initialized(txn) 53 54 for { 55 // Throttle a bit before reconciliation to allow for a bigger batch to arrive and 56 // for objects to settle. 57 if err := r.config.RateLimiter.Wait(ctx); err != nil { 58 return err 59 } 60 61 prune := false 62 63 // Wait for trigger 64 select { 65 case <-ctx.Done(): 66 return ctx.Err() 67 case <-r.retries.Wait(): 68 // Object(s) are ready to be retried 69 case <-tableWatchChan: 70 // Table has changed 71 case <-tableInitWatch: 72 tableInitialized = true 73 tableInitWatch = nil 74 75 // Do an immediate pruning now as the table has finished 76 // initializing and pruning is enabled. 77 prune = r.config.PruneInterval != 0 78 case <-pruneTickerChan: 79 prune = true 80 case <-r.externalPruneTrigger: 81 externalPrune = true 82 } 83 84 // Grab a new snapshot and refresh the changes iterator to read 85 // in the new changes. 86 txn = r.DB.ReadTxn() 87 var changes iter.Seq2[statedb.Change[Obj], statedb.Revision] 88 changes, tableWatchChan = changeIterator.Next(txn) 89 90 // Perform incremental reconciliation and retries of previously failed 91 // objects. 92 errs := r.incremental(ctx, txn, changes) 93 94 if tableInitialized && (prune || externalPrune) { 95 if err := r.prune(ctx, txn); err != nil { 96 errs = append(errs, err) 97 } 98 externalPrune = false 99 } 100 101 if len(errs) == 0 { 102 health.OK( 103 fmt.Sprintf("OK, %d object(s)", r.config.Table.NumObjects(txn))) 104 } else { 105 health.Degraded( 106 fmt.Sprintf("%d error(s)", len(errs)), 107 joinErrors(errs)) 108 } 109 } 110 } 111 112 // prune performs the Prune operation to delete unexpected objects in the target system. 113 func (r *reconciler[Obj]) prune(ctx context.Context, txn statedb.ReadTxn) error { 114 iter := r.config.Table.All(txn) 115 start := time.Now() 116 err := r.config.Operations.Prune(ctx, txn, iter) 117 if err != nil { 118 r.Log.Warn("Reconciler: failed to prune objects", "error", err, "pruneInterval", r.config.PruneInterval) 119 err = fmt.Errorf("prune: %w", err) 120 } 121 r.config.Metrics.PruneDuration(r.ModuleID, time.Since(start)) 122 r.config.Metrics.PruneError(r.ModuleID, err) 123 return err 124 } 125 126 func (r *reconciler[Obj]) refreshLoop(ctx context.Context, health cell.Health) error { 127 lastRevision := statedb.Revision(0) 128 129 refreshTimer := time.NewTimer(0) 130 defer refreshTimer.Stop() 131 132 for { 133 // Wait until it's time to refresh. 134 select { 135 case <-ctx.Done(): 136 return nil 137 138 case <-refreshTimer.C: 139 } 140 141 durationUntilRefresh := r.config.RefreshInterval 142 143 // Iterate over the objects in revision order, e.g. oldest modification first. 144 // We look for objects that are older than [RefreshInterval] and mark them for 145 // refresh in order for them to be reconciled again. 146 seq := r.config.Table.LowerBound(r.DB.ReadTxn(), statedb.ByRevision[Obj](lastRevision+1)) 147 indexer := r.config.Table.PrimaryIndexer() 148 149 for obj, rev := range seq { 150 status := r.config.GetObjectStatus(obj) 151 152 // The duration elapsed since this object was last updated. 153 updatedSince := time.Since(status.UpdatedAt) 154 155 // Have we reached an object that is newer than RefreshInterval? 156 // If so, wait until this now oldest object's UpdatedAt exceeds RefreshInterval. 157 if updatedSince < r.config.RefreshInterval { 158 durationUntilRefresh = r.config.RefreshInterval - updatedSince 159 break 160 } 161 162 lastRevision = rev 163 164 if status.Kind == StatusKindDone { 165 if r.config.RefreshRateLimiter != nil { 166 // Limit the rate at which objects are marked for refresh to avoid disrupting 167 // normal work. 168 if err := r.config.RefreshRateLimiter.Wait(ctx); err != nil { 169 break 170 } 171 } 172 173 // Mark the object for refreshing. We make the assumption that refreshing is spread over 174 // time enough that batching of the writes is not useful here. 175 wtxn := r.DB.WriteTxn(r.config.Table) 176 obj, newRev, ok := r.config.Table.Get(wtxn, indexer.QueryFromObject(obj)) 177 if ok && rev == newRev { 178 obj = r.config.SetObjectStatus(r.config.CloneObject(obj), StatusRefreshing()) 179 r.config.Table.Insert(wtxn, obj) 180 } 181 wtxn.Commit() 182 } 183 } 184 185 refreshTimer.Reset(durationUntilRefresh) 186 health.OK(fmt.Sprintf("Next refresh in %s", durationUntilRefresh)) 187 } 188 }