github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/kv/kvserver/ts_maintenance_queue.go (about)

     1  // Copyright 2016 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package kvserver
    12  
    13  import (
    14  	"context"
    15  	"time"
    16  
    17  	"github.com/cockroachdb/cockroach/pkg/config"
    18  	"github.com/cockroachdb/cockroach/pkg/gossip"
    19  	"github.com/cockroachdb/cockroach/pkg/kv"
    20  	"github.com/cockroachdb/cockroach/pkg/roachpb"
    21  	"github.com/cockroachdb/cockroach/pkg/storage"
    22  	"github.com/cockroachdb/cockroach/pkg/util/hlc"
    23  	"github.com/cockroachdb/cockroach/pkg/util/log"
    24  	"github.com/cockroachdb/cockroach/pkg/util/mon"
    25  )
    26  
    27  const (
    28  	// TimeSeriesMaintenanceInterval is the minimum interval between two
    29  	// time series maintenance runs on a replica.
    30  	TimeSeriesMaintenanceInterval = 24 * time.Hour // daily
    31  
    32  	// TimeSeriesMaintenanceMemoryBudget is the maximum amount of memory that
    33  	// should be consumed by time series maintenance operations at any one time.
    34  	TimeSeriesMaintenanceMemoryBudget = int64(8 * 1024 * 1024) // 8MB
    35  )
    36  
    37  // TimeSeriesDataStore is an interface defined in the storage package that can
    38  // be implemented by the higher-level time series system. This allows the
    39  // storage queues to run periodic time series maintenance; importantly, this
    40  // maintenance can then be informed by data from the local store.
    41  type TimeSeriesDataStore interface {
    42  	ContainsTimeSeries(roachpb.RKey, roachpb.RKey) bool
    43  	MaintainTimeSeries(
    44  		context.Context,
    45  		storage.Reader,
    46  		roachpb.RKey,
    47  		roachpb.RKey,
    48  		*kv.DB,
    49  		*mon.BytesMonitor,
    50  		int64,
    51  		hlc.Timestamp,
    52  	) error
    53  }
    54  
    55  // timeSeriesMaintenanceQueue identifies replicas that contain time series
    56  // data and performs necessary data maintenance on the time series located in
    57  // the replica. Currently, maintenance involves pruning time series data older
    58  // than a certain threshold.
    59  //
    60  // Logic for time series maintenance is implemented in a higher level time
    61  // series package; this queue uses the TimeSeriesDataStore interface to call
    62  // into that logic.
    63  //
    64  // Once a replica is selected for processing, data changes are executed against
    65  // the cluster using a KV client; changes are not restricted to the data in the
    66  // replica being processed. These tasks could therefore be performed without a
    67  // replica queue; however, a replica queue has been chosen to initiate this task
    68  // for a few reasons:
    69  // * The queue naturally distributes the workload across the cluster in
    70  // proportion to the number of ranges containing time series data.
    71  // * Access to the local replica is a convenient way to discover the names of
    72  // time series stored on the cluster; the names are required in order to
    73  // effectively prune time series without expensive distributed scans.
    74  //
    75  // Data changes executed by this queue are idempotent; it is explicitly safe
    76  // for multiple nodes to attempt to prune the same time series concurrently.
    77  // In this situation, each node would compute the same delete range based on
    78  // the current timestamp; the first will succeed, all others will become
    79  // a no-op.
    80  type timeSeriesMaintenanceQueue struct {
    81  	*baseQueue
    82  	tsData         TimeSeriesDataStore
    83  	replicaCountFn func() int
    84  	db             *kv.DB
    85  	mem            mon.BytesMonitor
    86  }
    87  
    88  // newTimeSeriesMaintenanceQueue returns a new instance of
    89  // timeSeriesMaintenanceQueue.
    90  func newTimeSeriesMaintenanceQueue(
    91  	store *Store, db *kv.DB, g *gossip.Gossip, tsData TimeSeriesDataStore,
    92  ) *timeSeriesMaintenanceQueue {
    93  	q := &timeSeriesMaintenanceQueue{
    94  		tsData:         tsData,
    95  		replicaCountFn: store.ReplicaCount,
    96  		db:             db,
    97  		mem: mon.MakeUnlimitedMonitor(
    98  			context.Background(),
    99  			"timeseries-maintenance-queue",
   100  			mon.MemoryResource,
   101  			nil,
   102  			nil,
   103  			// Begin logging messages if we exceed our planned memory usage by
   104  			// more than triple.
   105  			TimeSeriesMaintenanceMemoryBudget*3,
   106  			store.cfg.Settings,
   107  		),
   108  	}
   109  	q.baseQueue = newBaseQueue(
   110  		"timeSeriesMaintenance", q, store, g,
   111  		queueConfig{
   112  			maxSize:              defaultQueueMaxSize,
   113  			needsLease:           true,
   114  			needsSystemConfig:    false,
   115  			acceptsUnsplitRanges: true,
   116  			successes:            store.metrics.TimeSeriesMaintenanceQueueSuccesses,
   117  			failures:             store.metrics.TimeSeriesMaintenanceQueueFailures,
   118  			pending:              store.metrics.TimeSeriesMaintenanceQueuePending,
   119  			processingNanos:      store.metrics.TimeSeriesMaintenanceQueueProcessingNanos,
   120  		},
   121  	)
   122  
   123  	return q
   124  }
   125  
   126  func (q *timeSeriesMaintenanceQueue) shouldQueue(
   127  	ctx context.Context, now hlc.Timestamp, repl *Replica, _ *config.SystemConfig,
   128  ) (shouldQ bool, priority float64) {
   129  	if !repl.store.cfg.TestingKnobs.DisableLastProcessedCheck {
   130  		lpTS, err := repl.getQueueLastProcessed(ctx, q.name)
   131  		if err != nil {
   132  			return false, 0
   133  		}
   134  		shouldQ, priority = shouldQueueAgain(now, lpTS, TimeSeriesMaintenanceInterval)
   135  		if !shouldQ {
   136  			return
   137  		}
   138  	}
   139  	desc := repl.Desc()
   140  	if q.tsData.ContainsTimeSeries(desc.StartKey, desc.EndKey) {
   141  		return
   142  	}
   143  	return false, 0
   144  }
   145  
   146  func (q *timeSeriesMaintenanceQueue) process(
   147  	ctx context.Context, repl *Replica, _ *config.SystemConfig,
   148  ) error {
   149  	desc := repl.Desc()
   150  	snap := repl.store.Engine().NewSnapshot()
   151  	now := repl.store.Clock().Now()
   152  	defer snap.Close()
   153  	if err := q.tsData.MaintainTimeSeries(
   154  		ctx, snap, desc.StartKey, desc.EndKey, q.db, &q.mem, TimeSeriesMaintenanceMemoryBudget, now,
   155  	); err != nil {
   156  		return err
   157  	}
   158  	// Update the last processed time for this queue.
   159  	if err := repl.setQueueLastProcessed(ctx, q.name, now); err != nil {
   160  		log.VErrEventf(ctx, 2, "failed to update last processed time: %v", err)
   161  	}
   162  	return nil
   163  }
   164  
   165  func (q *timeSeriesMaintenanceQueue) timer(duration time.Duration) time.Duration {
   166  	// An interval between replicas to space consistency checks out over
   167  	// the check interval.
   168  	replicaCount := q.replicaCountFn()
   169  	if replicaCount == 0 {
   170  		return 0
   171  	}
   172  	replInterval := TimeSeriesMaintenanceInterval / time.Duration(replicaCount)
   173  	if replInterval < duration {
   174  		return 0
   175  	}
   176  	return replInterval - duration
   177  }
   178  
   179  func (*timeSeriesMaintenanceQueue) purgatoryChan() <-chan time.Time {
   180  	return nil
   181  }