github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/ts/pruning.go (about)

     1  // Copyright 2016 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package ts
    12  
    13  import (
    14  	"context"
    15  
    16  	"github.com/cockroachdb/cockroach/pkg/keys"
    17  	"github.com/cockroachdb/cockroach/pkg/kv"
    18  	"github.com/cockroachdb/cockroach/pkg/roachpb"
    19  	"github.com/cockroachdb/cockroach/pkg/storage"
    20  	"github.com/cockroachdb/cockroach/pkg/util/hlc"
    21  )
    22  
    23  var (
    24  	firstTSRKey = roachpb.RKey(keys.TimeseriesPrefix)
    25  	lastTSRKey  = firstTSRKey.PrefixEnd()
    26  )
    27  
    28  type timeSeriesResolutionInfo struct {
    29  	Name       string
    30  	Resolution Resolution
    31  }
    32  
    33  // findTimeSeries searches the supplied engine over the supplied key range,
    34  // identifying time series which have stored data in the range, along with the
    35  // resolutions at which time series data is stored. A unique name/resolution
    36  // pair will only be identified once, even if the range contains keys for that
    37  // name/resolution pair at multiple timestamps or from multiple sources.
    38  //
    39  // An engine snapshot is used, rather than a client, because this function is
    40  // intended to be called by a storage queue which can inspect the local data for
    41  // a single range without the need for expensive network calls.
    42  func (tsdb *DB) findTimeSeries(
    43  	snapshot storage.Reader, startKey, endKey roachpb.RKey, now hlc.Timestamp,
    44  ) ([]timeSeriesResolutionInfo, error) {
    45  	var results []timeSeriesResolutionInfo
    46  
    47  	// Set start boundary for the search, which is the lesser of the range start
    48  	// key and the beginning of time series data.
    49  	start := storage.MakeMVCCMetadataKey(startKey.AsRawKey())
    50  	next := storage.MakeMVCCMetadataKey(keys.TimeseriesPrefix)
    51  	if next.Less(start) {
    52  		next = start
    53  	}
    54  
    55  	// Set end boundary for the search, which is the lesser of the range end key
    56  	// and the end of time series data.
    57  	end := storage.MakeMVCCMetadataKey(endKey.AsRawKey())
    58  	lastTS := storage.MakeMVCCMetadataKey(keys.TimeseriesPrefix.PrefixEnd())
    59  	if lastTS.Less(end) {
    60  		end = lastTS
    61  	}
    62  
    63  	thresholds := tsdb.computeThresholds(now.WallTime)
    64  
    65  	iter := snapshot.NewIterator(storage.IterOptions{UpperBound: endKey.AsRawKey()})
    66  	defer iter.Close()
    67  
    68  	for iter.SeekGE(next); ; iter.SeekGE(next) {
    69  		if ok, err := iter.Valid(); err != nil {
    70  			return nil, err
    71  		} else if !ok || !iter.UnsafeKey().Less(end) {
    72  			break
    73  		}
    74  		foundKey := iter.Key().Key
    75  
    76  		// Extract the name and resolution from the discovered key.
    77  		name, _, res, tsNanos, err := DecodeDataKey(foundKey)
    78  		if err != nil {
    79  			return nil, err
    80  		}
    81  		// Skip this time series if there's nothing to prune. We check the
    82  		// oldest (first) time series record's timestamp against the
    83  		// pruning threshold.
    84  		if threshold, ok := thresholds[res]; !ok || threshold > tsNanos {
    85  			results = append(results, timeSeriesResolutionInfo{
    86  				Name:       name,
    87  				Resolution: res,
    88  			})
    89  		}
    90  
    91  		// Set 'next' is initialized to the next possible time series key
    92  		// which could belong to a previously undiscovered time series.
    93  		next = storage.MakeMVCCMetadataKey(makeDataKeySeriesPrefix(name, res).PrefixEnd())
    94  	}
    95  
    96  	return results, nil
    97  }
    98  
    99  // pruneTimeSeries will prune data for the supplied set of time series. Time
   100  // series series are identified by name and resolution.
   101  //
   102  // For each time series supplied, the pruning operation will delete all data
   103  // older than a constant threshold. The threshold is different depending on the
   104  // resolution; typically, lower-resolution time series data will be retained for
   105  // a longer period.
   106  //
   107  // If data is stored at a resolution which is not known to the system, it is
   108  // assumed that the resolution has been deprecated and all data for that time
   109  // series at that resolution will be deleted.
   110  //
   111  // As range deletion of inline data is an idempotent operation, it is safe to
   112  // run this operation concurrently on multiple nodes at the same time.
   113  func (tsdb *DB) pruneTimeSeries(
   114  	ctx context.Context, db *kv.DB, timeSeriesList []timeSeriesResolutionInfo, now hlc.Timestamp,
   115  ) error {
   116  	thresholds := tsdb.computeThresholds(now.WallTime)
   117  
   118  	b := &kv.Batch{}
   119  	for _, timeSeries := range timeSeriesList {
   120  		// Time series data for a specific resolution falls in a contiguous key
   121  		// range, and can be deleted with a DelRange command.
   122  		// The start key is the prefix unique to this name/resolution pair.
   123  		start := makeDataKeySeriesPrefix(timeSeries.Name, timeSeries.Resolution)
   124  
   125  		// The end key can be created by generating a time series key with the
   126  		// threshold timestamp for the resolution. If the resolution is not
   127  		// supported, the start key's PrefixEnd is used instead (which will clear
   128  		// the time series entirely).
   129  		var end roachpb.Key
   130  		threshold, ok := thresholds[timeSeries.Resolution]
   131  		if ok {
   132  			end = MakeDataKey(timeSeries.Name, "", timeSeries.Resolution, threshold)
   133  		} else {
   134  			end = start.PrefixEnd()
   135  		}
   136  
   137  		b.AddRawRequest(&roachpb.DeleteRangeRequest{
   138  			RequestHeader: roachpb.RequestHeader{
   139  				Key:    start,
   140  				EndKey: end,
   141  			},
   142  			Inline: true,
   143  		})
   144  	}
   145  
   146  	return db.Run(ctx, b)
   147  }