github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/kv/kvserver/protectedts/ptreconcile/reconciler.go (about) 1 // Copyright 2020 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 // Package ptreconcile provides logic to reconcile protected timestamp records 12 // with state associated with their metadata. 13 package ptreconcile 14 15 import ( 16 "context" 17 "math/rand" 18 "time" 19 20 "github.com/cockroachdb/cockroach/pkg/keys" 21 "github.com/cockroachdb/cockroach/pkg/kv" 22 "github.com/cockroachdb/cockroach/pkg/kv/kvserver" 23 "github.com/cockroachdb/cockroach/pkg/kv/kvserver/protectedts" 24 "github.com/cockroachdb/cockroach/pkg/kv/kvserver/protectedts/ptpb" 25 "github.com/cockroachdb/cockroach/pkg/settings" 26 "github.com/cockroachdb/cockroach/pkg/settings/cluster" 27 "github.com/cockroachdb/cockroach/pkg/util/hlc" 28 "github.com/cockroachdb/cockroach/pkg/util/log" 29 "github.com/cockroachdb/cockroach/pkg/util/stop" 30 "github.com/cockroachdb/cockroach/pkg/util/timeutil" 31 "github.com/cockroachdb/errors" 32 ) 33 34 // ReconcileInterval is the interval between two generations of the reports. 35 // When set to zero - disables the report generation. 36 var ReconcileInterval = settings.RegisterPublicNonNegativeDurationSetting( 37 "kv.protectedts.reconciliation.interval", 38 "the frequency for reconciling jobs with protected timestamp records", 39 5*time.Minute, 40 ) 41 42 // StatusFunc is used to check on the status of a Record based on its Meta 43 // field. 44 type StatusFunc func( 45 ctx context.Context, txn *kv.Txn, meta []byte, 46 ) (shouldRemove bool, _ error) 47 48 // StatusFuncs maps from MetaType to a StatusFunc. 49 type StatusFuncs map[string]StatusFunc 50 51 // Config configures a Reconciler. 52 type Config struct { 53 Settings *cluster.Settings 54 // Stores is used to ensure that we only run the reconciliation loop on 55 Stores *kvserver.Stores 56 DB *kv.DB 57 Storage protectedts.Storage 58 Cache protectedts.Cache 59 60 // We want a map from metaType to a function which determines whether we 61 // should clean it up. 62 StatusFuncs StatusFuncs 63 } 64 65 // Reconciler runs an a loop to reconcile the protected timestamps with external 66 // state. Each record's status is determined using the record's meta type and 67 // meta in conjunction with the configured StatusFunc. 68 type Reconciler struct { 69 settings *cluster.Settings 70 localStores *kvserver.Stores 71 db *kv.DB 72 cache protectedts.Cache 73 pts protectedts.Storage 74 metrics Metrics 75 statusFuncs StatusFuncs 76 } 77 78 // NewReconciler constructs a Reconciler. 79 func NewReconciler(cfg Config) *Reconciler { 80 return &Reconciler{ 81 settings: cfg.Settings, 82 localStores: cfg.Stores, 83 db: cfg.DB, 84 cache: cfg.Cache, 85 pts: cfg.Storage, 86 metrics: makeMetrics(), 87 statusFuncs: cfg.StatusFuncs, 88 } 89 } 90 91 // Metrics returns the Reconciler's metrics. 92 func (r *Reconciler) Metrics() *Metrics { 93 return &r.metrics 94 } 95 96 // Start will start the Reconciler. 97 func (r *Reconciler) Start(ctx context.Context, stopper *stop.Stopper) error { 98 return stopper.RunAsyncTask(ctx, "protectedts-reconciliation", func(ctx context.Context) { 99 r.run(ctx, stopper) 100 }) 101 } 102 103 func (r *Reconciler) run(ctx context.Context, stopper *stop.Stopper) { 104 reconcileIntervalChanged := make(chan struct{}, 1) 105 ReconcileInterval.SetOnChange(&r.settings.SV, func() { 106 select { 107 case reconcileIntervalChanged <- struct{}{}: 108 default: 109 } 110 }) 111 lastReconciled := time.Time{} 112 getInterval := func() time.Duration { 113 interval := ReconcileInterval.Get(&r.settings.SV) 114 const jitterFrac = .1 115 return time.Duration(float64(interval) * (1 + (rand.Float64()-.5)*jitterFrac)) 116 } 117 timer := timeutil.NewTimer() 118 for { 119 timer.Reset(timeutil.Until(lastReconciled.Add(getInterval()))) 120 select { 121 case <-timer.C: 122 timer.Read = true 123 r.reconcile(ctx) 124 lastReconciled = timeutil.Now() 125 case <-reconcileIntervalChanged: 126 // Go back around again. 127 case <-stopper.ShouldQuiesce(): 128 return 129 case <-ctx.Done(): 130 return 131 } 132 } 133 } 134 135 func (r *Reconciler) isMeta1Leaseholder(ctx context.Context, now hlc.Timestamp) (bool, error) { 136 return r.localStores.IsMeta1Leaseholder(now) 137 } 138 139 func (r *Reconciler) reconcile(ctx context.Context) { 140 now := r.db.Clock().Now() 141 isLeaseholder, err := r.isMeta1Leaseholder(ctx, now) 142 if err != nil { 143 log.Errorf(ctx, "failed to determine whether the local store contains the meta1 lease: %v", err) 144 return 145 } 146 if !isLeaseholder { 147 return 148 } 149 if err := r.cache.Refresh(ctx, now); err != nil { 150 log.Errorf(ctx, "failed to refresh the protected timestamp cache to %v: %v", now, err) 151 return 152 } 153 r.cache.Iterate(ctx, keys.MinKey, keys.MaxKey, func(rec *ptpb.Record) (wantMore bool) { 154 task, ok := r.statusFuncs[rec.MetaType] 155 if !ok { 156 // NB: We don't expect to ever hit this case outside of testing. 157 log.Infof(ctx, "found protected timestamp record with unknown meta type %q, skipping", rec.MetaType) 158 return true 159 } 160 var didRemove bool 161 if err := r.db.Txn(ctx, func(ctx context.Context, txn *kv.Txn) (err error) { 162 didRemove = false // reset for retries 163 shouldRemove, err := task(ctx, txn, rec.Meta) 164 if err != nil { 165 return err 166 } 167 if !shouldRemove { 168 return nil 169 } 170 err = r.pts.Release(ctx, txn, rec.ID) 171 if err != nil && !errors.Is(err, protectedts.ErrNotExists) { 172 return err 173 } 174 didRemove = true 175 return nil 176 }); err != nil { 177 r.metrics.ReconciliationErrors.Inc(1) 178 log.Errorf(ctx, "failed to reconcile protected timestamp with id %s: %v", 179 rec.ID.String(), err) 180 } else { 181 r.metrics.RecordsProcessed.Inc(1) 182 if didRemove { 183 r.metrics.RecordsRemoved.Inc(1) 184 } 185 } 186 return true 187 }) 188 r.metrics.ReconcilationRuns.Inc(1) 189 }