github.com/cilium/statedb@v0.3.2/reconciler/reconciler.go (about)

     1  // SPDX-License-Identifier: Apache-2.0
     2  // Copyright Authors of Cilium
     3  
     4  package reconciler
     5  
     6  import (
     7  	"context"
     8  	"fmt"
     9  	"iter"
    10  	"time"
    11  
    12  	"github.com/cilium/hive/cell"
    13  	"github.com/cilium/statedb"
    14  )
    15  
    16  type reconciler[Obj comparable] struct {
    17  	Params
    18  	config               config[Obj]
    19  	retries              *retries
    20  	externalPruneTrigger chan struct{}
    21  	primaryIndexer       statedb.Indexer[Obj]
    22  }
    23  
    24  func (r *reconciler[Obj]) Prune() {
    25  	select {
    26  	case r.externalPruneTrigger <- struct{}{}:
    27  	default:
    28  	}
    29  }
    30  
    31  func (r *reconciler[Obj]) reconcileLoop(ctx context.Context, health cell.Health) error {
    32  	var pruneTickerChan <-chan time.Time
    33  	if r.config.PruneInterval > 0 {
    34  		pruneTicker := time.NewTicker(r.config.PruneInterval)
    35  		defer pruneTicker.Stop()
    36  		pruneTickerChan = pruneTicker.C
    37  	}
    38  
    39  	// Create the change iterator to watch for inserts and deletes to the table.
    40  	wtxn := r.DB.WriteTxn(r.config.Table)
    41  	changeIterator, err := r.config.Table.Changes(wtxn)
    42  	txn := wtxn.Commit()
    43  	if err != nil {
    44  		return fmt.Errorf("watching for changes failed: %w", err)
    45  	}
    46  
    47  	tableWatchChan := closedWatchChannel
    48  
    49  	externalPrune := false
    50  
    51  	tableInitialized := false
    52  	_, tableInitWatch := r.config.Table.Initialized(txn)
    53  
    54  	for {
    55  		// Throttle a bit before reconciliation to allow for a bigger batch to arrive and
    56  		// for objects to settle.
    57  		if err := r.config.RateLimiter.Wait(ctx); err != nil {
    58  			return err
    59  		}
    60  
    61  		prune := false
    62  
    63  		// Wait for trigger
    64  		select {
    65  		case <-ctx.Done():
    66  			return ctx.Err()
    67  		case <-r.retries.Wait():
    68  			// Object(s) are ready to be retried
    69  		case <-tableWatchChan:
    70  			// Table has changed
    71  		case <-tableInitWatch:
    72  			tableInitialized = true
    73  			tableInitWatch = nil
    74  
    75  			// Do an immediate pruning now as the table has finished
    76  			// initializing and pruning is enabled.
    77  			prune = r.config.PruneInterval != 0
    78  		case <-pruneTickerChan:
    79  			prune = true
    80  		case <-r.externalPruneTrigger:
    81  			externalPrune = true
    82  		}
    83  
    84  		// Grab a new snapshot and refresh the changes iterator to read
    85  		// in the new changes.
    86  		txn = r.DB.ReadTxn()
    87  		var changes iter.Seq2[statedb.Change[Obj], statedb.Revision]
    88  		changes, tableWatchChan = changeIterator.Next(txn)
    89  
    90  		// Perform incremental reconciliation and retries of previously failed
    91  		// objects.
    92  		errs := r.incremental(ctx, txn, changes)
    93  
    94  		if tableInitialized && (prune || externalPrune) {
    95  			if err := r.prune(ctx, txn); err != nil {
    96  				errs = append(errs, err)
    97  			}
    98  			externalPrune = false
    99  		}
   100  
   101  		if len(errs) == 0 {
   102  			health.OK(
   103  				fmt.Sprintf("OK, %d object(s)", r.config.Table.NumObjects(txn)))
   104  		} else {
   105  			health.Degraded(
   106  				fmt.Sprintf("%d error(s)", len(errs)),
   107  				joinErrors(errs))
   108  		}
   109  	}
   110  }
   111  
   112  // prune performs the Prune operation to delete unexpected objects in the target system.
   113  func (r *reconciler[Obj]) prune(ctx context.Context, txn statedb.ReadTxn) error {
   114  	iter := r.config.Table.All(txn)
   115  	start := time.Now()
   116  	err := r.config.Operations.Prune(ctx, txn, iter)
   117  	if err != nil {
   118  		r.Log.Warn("Reconciler: failed to prune objects", "error", err, "pruneInterval", r.config.PruneInterval)
   119  		err = fmt.Errorf("prune: %w", err)
   120  	}
   121  	r.config.Metrics.PruneDuration(r.ModuleID, time.Since(start))
   122  	r.config.Metrics.PruneError(r.ModuleID, err)
   123  	return err
   124  }
   125  
   126  func (r *reconciler[Obj]) refreshLoop(ctx context.Context, health cell.Health) error {
   127  	lastRevision := statedb.Revision(0)
   128  
   129  	refreshTimer := time.NewTimer(0)
   130  	defer refreshTimer.Stop()
   131  
   132  	for {
   133  		// Wait until it's time to refresh.
   134  		select {
   135  		case <-ctx.Done():
   136  			return nil
   137  
   138  		case <-refreshTimer.C:
   139  		}
   140  
   141  		durationUntilRefresh := r.config.RefreshInterval
   142  
   143  		// Iterate over the objects in revision order, e.g. oldest modification first.
   144  		// We look for objects that are older than [RefreshInterval] and mark them for
   145  		// refresh in order for them to be reconciled again.
   146  		seq := r.config.Table.LowerBound(r.DB.ReadTxn(), statedb.ByRevision[Obj](lastRevision+1))
   147  		indexer := r.config.Table.PrimaryIndexer()
   148  
   149  		for obj, rev := range seq {
   150  			status := r.config.GetObjectStatus(obj)
   151  
   152  			// The duration elapsed since this object was last updated.
   153  			updatedSince := time.Since(status.UpdatedAt)
   154  
   155  			// Have we reached an object that is newer than RefreshInterval?
   156  			// If so, wait until this now oldest object's UpdatedAt exceeds RefreshInterval.
   157  			if updatedSince < r.config.RefreshInterval {
   158  				durationUntilRefresh = r.config.RefreshInterval - updatedSince
   159  				break
   160  			}
   161  
   162  			lastRevision = rev
   163  
   164  			if status.Kind == StatusKindDone {
   165  				if r.config.RefreshRateLimiter != nil {
   166  					// Limit the rate at which objects are marked for refresh to avoid disrupting
   167  					// normal work.
   168  					if err := r.config.RefreshRateLimiter.Wait(ctx); err != nil {
   169  						break
   170  					}
   171  				}
   172  
   173  				// Mark the object for refreshing. We make the assumption that refreshing is spread over
   174  				// time enough that batching of the writes is not useful here.
   175  				wtxn := r.DB.WriteTxn(r.config.Table)
   176  				obj, newRev, ok := r.config.Table.Get(wtxn, indexer.QueryFromObject(obj))
   177  				if ok && rev == newRev {
   178  					obj = r.config.SetObjectStatus(r.config.CloneObject(obj), StatusRefreshing())
   179  					r.config.Table.Insert(wtxn, obj)
   180  				}
   181  				wtxn.Commit()
   182  			}
   183  		}
   184  
   185  		refreshTimer.Reset(durationUntilRefresh)
   186  		health.OK(fmt.Sprintf("Next refresh in %s", durationUntilRefresh))
   187  	}
   188  }