github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/kv/kvserver/ts_maintenance_queue.go (about) 1 // Copyright 2016 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package kvserver 12 13 import ( 14 "context" 15 "time" 16 17 "github.com/cockroachdb/cockroach/pkg/config" 18 "github.com/cockroachdb/cockroach/pkg/gossip" 19 "github.com/cockroachdb/cockroach/pkg/kv" 20 "github.com/cockroachdb/cockroach/pkg/roachpb" 21 "github.com/cockroachdb/cockroach/pkg/storage" 22 "github.com/cockroachdb/cockroach/pkg/util/hlc" 23 "github.com/cockroachdb/cockroach/pkg/util/log" 24 "github.com/cockroachdb/cockroach/pkg/util/mon" 25 ) 26 27 const ( 28 // TimeSeriesMaintenanceInterval is the minimum interval between two 29 // time series maintenance runs on a replica. 30 TimeSeriesMaintenanceInterval = 24 * time.Hour // daily 31 32 // TimeSeriesMaintenanceMemoryBudget is the maximum amount of memory that 33 // should be consumed by time series maintenance operations at any one time. 34 TimeSeriesMaintenanceMemoryBudget = int64(8 * 1024 * 1024) // 8MB 35 ) 36 37 // TimeSeriesDataStore is an interface defined in the storage package that can 38 // be implemented by the higher-level time series system. This allows the 39 // storage queues to run periodic time series maintenance; importantly, this 40 // maintenance can then be informed by data from the local store. 41 type TimeSeriesDataStore interface { 42 ContainsTimeSeries(roachpb.RKey, roachpb.RKey) bool 43 MaintainTimeSeries( 44 context.Context, 45 storage.Reader, 46 roachpb.RKey, 47 roachpb.RKey, 48 *kv.DB, 49 *mon.BytesMonitor, 50 int64, 51 hlc.Timestamp, 52 ) error 53 } 54 55 // timeSeriesMaintenanceQueue identifies replicas that contain time series 56 // data and performs necessary data maintenance on the time series located in 57 // the replica. Currently, maintenance involves pruning time series data older 58 // than a certain threshold. 59 // 60 // Logic for time series maintenance is implemented in a higher level time 61 // series package; this queue uses the TimeSeriesDataStore interface to call 62 // into that logic. 63 // 64 // Once a replica is selected for processing, data changes are executed against 65 // the cluster using a KV client; changes are not restricted to the data in the 66 // replica being processed. These tasks could therefore be performed without a 67 // replica queue; however, a replica queue has been chosen to initiate this task 68 // for a few reasons: 69 // * The queue naturally distributes the workload across the cluster in 70 // proportion to the number of ranges containing time series data. 71 // * Access to the local replica is a convenient way to discover the names of 72 // time series stored on the cluster; the names are required in order to 73 // effectively prune time series without expensive distributed scans. 74 // 75 // Data changes executed by this queue are idempotent; it is explicitly safe 76 // for multiple nodes to attempt to prune the same time series concurrently. 77 // In this situation, each node would compute the same delete range based on 78 // the current timestamp; the first will succeed, all others will become 79 // a no-op. 80 type timeSeriesMaintenanceQueue struct { 81 *baseQueue 82 tsData TimeSeriesDataStore 83 replicaCountFn func() int 84 db *kv.DB 85 mem mon.BytesMonitor 86 } 87 88 // newTimeSeriesMaintenanceQueue returns a new instance of 89 // timeSeriesMaintenanceQueue. 90 func newTimeSeriesMaintenanceQueue( 91 store *Store, db *kv.DB, g *gossip.Gossip, tsData TimeSeriesDataStore, 92 ) *timeSeriesMaintenanceQueue { 93 q := &timeSeriesMaintenanceQueue{ 94 tsData: tsData, 95 replicaCountFn: store.ReplicaCount, 96 db: db, 97 mem: mon.MakeUnlimitedMonitor( 98 context.Background(), 99 "timeseries-maintenance-queue", 100 mon.MemoryResource, 101 nil, 102 nil, 103 // Begin logging messages if we exceed our planned memory usage by 104 // more than triple. 105 TimeSeriesMaintenanceMemoryBudget*3, 106 store.cfg.Settings, 107 ), 108 } 109 q.baseQueue = newBaseQueue( 110 "timeSeriesMaintenance", q, store, g, 111 queueConfig{ 112 maxSize: defaultQueueMaxSize, 113 needsLease: true, 114 needsSystemConfig: false, 115 acceptsUnsplitRanges: true, 116 successes: store.metrics.TimeSeriesMaintenanceQueueSuccesses, 117 failures: store.metrics.TimeSeriesMaintenanceQueueFailures, 118 pending: store.metrics.TimeSeriesMaintenanceQueuePending, 119 processingNanos: store.metrics.TimeSeriesMaintenanceQueueProcessingNanos, 120 }, 121 ) 122 123 return q 124 } 125 126 func (q *timeSeriesMaintenanceQueue) shouldQueue( 127 ctx context.Context, now hlc.Timestamp, repl *Replica, _ *config.SystemConfig, 128 ) (shouldQ bool, priority float64) { 129 if !repl.store.cfg.TestingKnobs.DisableLastProcessedCheck { 130 lpTS, err := repl.getQueueLastProcessed(ctx, q.name) 131 if err != nil { 132 return false, 0 133 } 134 shouldQ, priority = shouldQueueAgain(now, lpTS, TimeSeriesMaintenanceInterval) 135 if !shouldQ { 136 return 137 } 138 } 139 desc := repl.Desc() 140 if q.tsData.ContainsTimeSeries(desc.StartKey, desc.EndKey) { 141 return 142 } 143 return false, 0 144 } 145 146 func (q *timeSeriesMaintenanceQueue) process( 147 ctx context.Context, repl *Replica, _ *config.SystemConfig, 148 ) error { 149 desc := repl.Desc() 150 snap := repl.store.Engine().NewSnapshot() 151 now := repl.store.Clock().Now() 152 defer snap.Close() 153 if err := q.tsData.MaintainTimeSeries( 154 ctx, snap, desc.StartKey, desc.EndKey, q.db, &q.mem, TimeSeriesMaintenanceMemoryBudget, now, 155 ); err != nil { 156 return err 157 } 158 // Update the last processed time for this queue. 159 if err := repl.setQueueLastProcessed(ctx, q.name, now); err != nil { 160 log.VErrEventf(ctx, 2, "failed to update last processed time: %v", err) 161 } 162 return nil 163 } 164 165 func (q *timeSeriesMaintenanceQueue) timer(duration time.Duration) time.Duration { 166 // An interval between replicas to space consistency checks out over 167 // the check interval. 168 replicaCount := q.replicaCountFn() 169 if replicaCount == 0 { 170 return 0 171 } 172 replInterval := TimeSeriesMaintenanceInterval / time.Duration(replicaCount) 173 if replInterval < duration { 174 return 0 175 } 176 return replInterval - duration 177 } 178 179 func (*timeSeriesMaintenanceQueue) purgatoryChan() <-chan time.Time { 180 return nil 181 }