github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/ts/pruning.go (about) 1 // Copyright 2016 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package ts 12 13 import ( 14 "context" 15 16 "github.com/cockroachdb/cockroach/pkg/keys" 17 "github.com/cockroachdb/cockroach/pkg/kv" 18 "github.com/cockroachdb/cockroach/pkg/roachpb" 19 "github.com/cockroachdb/cockroach/pkg/storage" 20 "github.com/cockroachdb/cockroach/pkg/util/hlc" 21 ) 22 23 var ( 24 firstTSRKey = roachpb.RKey(keys.TimeseriesPrefix) 25 lastTSRKey = firstTSRKey.PrefixEnd() 26 ) 27 28 type timeSeriesResolutionInfo struct { 29 Name string 30 Resolution Resolution 31 } 32 33 // findTimeSeries searches the supplied engine over the supplied key range, 34 // identifying time series which have stored data in the range, along with the 35 // resolutions at which time series data is stored. A unique name/resolution 36 // pair will only be identified once, even if the range contains keys for that 37 // name/resolution pair at multiple timestamps or from multiple sources. 38 // 39 // An engine snapshot is used, rather than a client, because this function is 40 // intended to be called by a storage queue which can inspect the local data for 41 // a single range without the need for expensive network calls. 42 func (tsdb *DB) findTimeSeries( 43 snapshot storage.Reader, startKey, endKey roachpb.RKey, now hlc.Timestamp, 44 ) ([]timeSeriesResolutionInfo, error) { 45 var results []timeSeriesResolutionInfo 46 47 // Set start boundary for the search, which is the lesser of the range start 48 // key and the beginning of time series data. 49 start := storage.MakeMVCCMetadataKey(startKey.AsRawKey()) 50 next := storage.MakeMVCCMetadataKey(keys.TimeseriesPrefix) 51 if next.Less(start) { 52 next = start 53 } 54 55 // Set end boundary for the search, which is the lesser of the range end key 56 // and the end of time series data. 57 end := storage.MakeMVCCMetadataKey(endKey.AsRawKey()) 58 lastTS := storage.MakeMVCCMetadataKey(keys.TimeseriesPrefix.PrefixEnd()) 59 if lastTS.Less(end) { 60 end = lastTS 61 } 62 63 thresholds := tsdb.computeThresholds(now.WallTime) 64 65 iter := snapshot.NewIterator(storage.IterOptions{UpperBound: endKey.AsRawKey()}) 66 defer iter.Close() 67 68 for iter.SeekGE(next); ; iter.SeekGE(next) { 69 if ok, err := iter.Valid(); err != nil { 70 return nil, err 71 } else if !ok || !iter.UnsafeKey().Less(end) { 72 break 73 } 74 foundKey := iter.Key().Key 75 76 // Extract the name and resolution from the discovered key. 77 name, _, res, tsNanos, err := DecodeDataKey(foundKey) 78 if err != nil { 79 return nil, err 80 } 81 // Skip this time series if there's nothing to prune. We check the 82 // oldest (first) time series record's timestamp against the 83 // pruning threshold. 84 if threshold, ok := thresholds[res]; !ok || threshold > tsNanos { 85 results = append(results, timeSeriesResolutionInfo{ 86 Name: name, 87 Resolution: res, 88 }) 89 } 90 91 // Set 'next' is initialized to the next possible time series key 92 // which could belong to a previously undiscovered time series. 93 next = storage.MakeMVCCMetadataKey(makeDataKeySeriesPrefix(name, res).PrefixEnd()) 94 } 95 96 return results, nil 97 } 98 99 // pruneTimeSeries will prune data for the supplied set of time series. Time 100 // series series are identified by name and resolution. 101 // 102 // For each time series supplied, the pruning operation will delete all data 103 // older than a constant threshold. The threshold is different depending on the 104 // resolution; typically, lower-resolution time series data will be retained for 105 // a longer period. 106 // 107 // If data is stored at a resolution which is not known to the system, it is 108 // assumed that the resolution has been deprecated and all data for that time 109 // series at that resolution will be deleted. 110 // 111 // As range deletion of inline data is an idempotent operation, it is safe to 112 // run this operation concurrently on multiple nodes at the same time. 113 func (tsdb *DB) pruneTimeSeries( 114 ctx context.Context, db *kv.DB, timeSeriesList []timeSeriesResolutionInfo, now hlc.Timestamp, 115 ) error { 116 thresholds := tsdb.computeThresholds(now.WallTime) 117 118 b := &kv.Batch{} 119 for _, timeSeries := range timeSeriesList { 120 // Time series data for a specific resolution falls in a contiguous key 121 // range, and can be deleted with a DelRange command. 122 // The start key is the prefix unique to this name/resolution pair. 123 start := makeDataKeySeriesPrefix(timeSeries.Name, timeSeries.Resolution) 124 125 // The end key can be created by generating a time series key with the 126 // threshold timestamp for the resolution. If the resolution is not 127 // supported, the start key's PrefixEnd is used instead (which will clear 128 // the time series entirely). 129 var end roachpb.Key 130 threshold, ok := thresholds[timeSeries.Resolution] 131 if ok { 132 end = MakeDataKey(timeSeries.Name, "", timeSeries.Resolution, threshold) 133 } else { 134 end = start.PrefixEnd() 135 } 136 137 b.AddRawRequest(&roachpb.DeleteRangeRequest{ 138 RequestHeader: roachpb.RequestHeader{ 139 Key: start, 140 EndKey: end, 141 }, 142 Inline: true, 143 }) 144 } 145 146 return db.Run(ctx, b) 147 }