github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/kv/kvserver/protectedts/ptreconcile/reconciler.go (about)

     1  // Copyright 2020 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  // Package ptreconcile provides logic to reconcile protected timestamp records
    12  // with state associated with their metadata.
    13  package ptreconcile
    14  
    15  import (
    16  	"context"
    17  	"math/rand"
    18  	"time"
    19  
    20  	"github.com/cockroachdb/cockroach/pkg/keys"
    21  	"github.com/cockroachdb/cockroach/pkg/kv"
    22  	"github.com/cockroachdb/cockroach/pkg/kv/kvserver"
    23  	"github.com/cockroachdb/cockroach/pkg/kv/kvserver/protectedts"
    24  	"github.com/cockroachdb/cockroach/pkg/kv/kvserver/protectedts/ptpb"
    25  	"github.com/cockroachdb/cockroach/pkg/settings"
    26  	"github.com/cockroachdb/cockroach/pkg/settings/cluster"
    27  	"github.com/cockroachdb/cockroach/pkg/util/hlc"
    28  	"github.com/cockroachdb/cockroach/pkg/util/log"
    29  	"github.com/cockroachdb/cockroach/pkg/util/stop"
    30  	"github.com/cockroachdb/cockroach/pkg/util/timeutil"
    31  	"github.com/cockroachdb/errors"
    32  )
    33  
    34  // ReconcileInterval is the interval between two generations of the reports.
    35  // When set to zero - disables the report generation.
    36  var ReconcileInterval = settings.RegisterPublicNonNegativeDurationSetting(
    37  	"kv.protectedts.reconciliation.interval",
    38  	"the frequency for reconciling jobs with protected timestamp records",
    39  	5*time.Minute,
    40  )
    41  
    42  // StatusFunc is used to check on the status of a Record based on its Meta
    43  // field.
    44  type StatusFunc func(
    45  	ctx context.Context, txn *kv.Txn, meta []byte,
    46  ) (shouldRemove bool, _ error)
    47  
    48  // StatusFuncs maps from MetaType to a StatusFunc.
    49  type StatusFuncs map[string]StatusFunc
    50  
    51  // Config configures a Reconciler.
    52  type Config struct {
    53  	Settings *cluster.Settings
    54  	// Stores is used to ensure that we only run the reconciliation loop on
    55  	Stores  *kvserver.Stores
    56  	DB      *kv.DB
    57  	Storage protectedts.Storage
    58  	Cache   protectedts.Cache
    59  
    60  	// We want a map from metaType to a function which determines whether we
    61  	// should clean it up.
    62  	StatusFuncs StatusFuncs
    63  }
    64  
    65  // Reconciler runs an a loop to reconcile the protected timestamps with external
    66  // state. Each record's status is determined using the record's meta type and
    67  // meta in conjunction with the configured StatusFunc.
    68  type Reconciler struct {
    69  	settings    *cluster.Settings
    70  	localStores *kvserver.Stores
    71  	db          *kv.DB
    72  	cache       protectedts.Cache
    73  	pts         protectedts.Storage
    74  	metrics     Metrics
    75  	statusFuncs StatusFuncs
    76  }
    77  
    78  // NewReconciler constructs a Reconciler.
    79  func NewReconciler(cfg Config) *Reconciler {
    80  	return &Reconciler{
    81  		settings:    cfg.Settings,
    82  		localStores: cfg.Stores,
    83  		db:          cfg.DB,
    84  		cache:       cfg.Cache,
    85  		pts:         cfg.Storage,
    86  		metrics:     makeMetrics(),
    87  		statusFuncs: cfg.StatusFuncs,
    88  	}
    89  }
    90  
    91  // Metrics returns the Reconciler's metrics.
    92  func (r *Reconciler) Metrics() *Metrics {
    93  	return &r.metrics
    94  }
    95  
    96  // Start will start the Reconciler.
    97  func (r *Reconciler) Start(ctx context.Context, stopper *stop.Stopper) error {
    98  	return stopper.RunAsyncTask(ctx, "protectedts-reconciliation", func(ctx context.Context) {
    99  		r.run(ctx, stopper)
   100  	})
   101  }
   102  
   103  func (r *Reconciler) run(ctx context.Context, stopper *stop.Stopper) {
   104  	reconcileIntervalChanged := make(chan struct{}, 1)
   105  	ReconcileInterval.SetOnChange(&r.settings.SV, func() {
   106  		select {
   107  		case reconcileIntervalChanged <- struct{}{}:
   108  		default:
   109  		}
   110  	})
   111  	lastReconciled := time.Time{}
   112  	getInterval := func() time.Duration {
   113  		interval := ReconcileInterval.Get(&r.settings.SV)
   114  		const jitterFrac = .1
   115  		return time.Duration(float64(interval) * (1 + (rand.Float64()-.5)*jitterFrac))
   116  	}
   117  	timer := timeutil.NewTimer()
   118  	for {
   119  		timer.Reset(timeutil.Until(lastReconciled.Add(getInterval())))
   120  		select {
   121  		case <-timer.C:
   122  			timer.Read = true
   123  			r.reconcile(ctx)
   124  			lastReconciled = timeutil.Now()
   125  		case <-reconcileIntervalChanged:
   126  			// Go back around again.
   127  		case <-stopper.ShouldQuiesce():
   128  			return
   129  		case <-ctx.Done():
   130  			return
   131  		}
   132  	}
   133  }
   134  
   135  func (r *Reconciler) isMeta1Leaseholder(ctx context.Context, now hlc.Timestamp) (bool, error) {
   136  	return r.localStores.IsMeta1Leaseholder(now)
   137  }
   138  
   139  func (r *Reconciler) reconcile(ctx context.Context) {
   140  	now := r.db.Clock().Now()
   141  	isLeaseholder, err := r.isMeta1Leaseholder(ctx, now)
   142  	if err != nil {
   143  		log.Errorf(ctx, "failed to determine whether the local store contains the meta1 lease: %v", err)
   144  		return
   145  	}
   146  	if !isLeaseholder {
   147  		return
   148  	}
   149  	if err := r.cache.Refresh(ctx, now); err != nil {
   150  		log.Errorf(ctx, "failed to refresh the protected timestamp cache to %v: %v", now, err)
   151  		return
   152  	}
   153  	r.cache.Iterate(ctx, keys.MinKey, keys.MaxKey, func(rec *ptpb.Record) (wantMore bool) {
   154  		task, ok := r.statusFuncs[rec.MetaType]
   155  		if !ok {
   156  			// NB: We don't expect to ever hit this case outside of testing.
   157  			log.Infof(ctx, "found protected timestamp record with unknown meta type %q, skipping", rec.MetaType)
   158  			return true
   159  		}
   160  		var didRemove bool
   161  		if err := r.db.Txn(ctx, func(ctx context.Context, txn *kv.Txn) (err error) {
   162  			didRemove = false // reset for retries
   163  			shouldRemove, err := task(ctx, txn, rec.Meta)
   164  			if err != nil {
   165  				return err
   166  			}
   167  			if !shouldRemove {
   168  				return nil
   169  			}
   170  			err = r.pts.Release(ctx, txn, rec.ID)
   171  			if err != nil && !errors.Is(err, protectedts.ErrNotExists) {
   172  				return err
   173  			}
   174  			didRemove = true
   175  			return nil
   176  		}); err != nil {
   177  			r.metrics.ReconciliationErrors.Inc(1)
   178  			log.Errorf(ctx, "failed to reconcile protected timestamp with id %s: %v",
   179  				rec.ID.String(), err)
   180  		} else {
   181  			r.metrics.RecordsProcessed.Inc(1)
   182  			if didRemove {
   183  				r.metrics.RecordsRemoved.Inc(1)
   184  			}
   185  		}
   186  		return true
   187  	})
   188  	r.metrics.ReconcilationRuns.Inc(1)
   189  }