github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/ts/db.go (about)

     1  // Copyright 2015 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package ts
    12  
    13  import (
    14  	"context"
    15  	"fmt"
    16  	"time"
    17  
    18  	"github.com/cockroachdb/cockroach/pkg/kv"
    19  	"github.com/cockroachdb/cockroach/pkg/roachpb"
    20  	"github.com/cockroachdb/cockroach/pkg/settings"
    21  	"github.com/cockroachdb/cockroach/pkg/settings/cluster"
    22  	"github.com/cockroachdb/cockroach/pkg/ts/tspb"
    23  	"github.com/cockroachdb/cockroach/pkg/util/log"
    24  	"github.com/cockroachdb/cockroach/pkg/util/stop"
    25  )
    26  
    27  var (
    28  	resolution1nsDefaultRollupThreshold = time.Second
    29  	// The deprecated prune threshold for the 10s resolution was created before
    30  	// time series rollups were enabled. It is still used in the transition period
    31  	// during an upgrade before the cluster version is finalized. After the
    32  	// version upgrade, the rollup threshold is used instead.
    33  	deprecatedResolution10sDefaultPruneThreshold = 30 * 24 * time.Hour
    34  	resolution10sDefaultRollupThreshold          = 10 * 24 * time.Hour
    35  	resolution30mDefaultPruneThreshold           = 90 * 24 * time.Hour
    36  	resolution50nsDefaultPruneThreshold          = 1 * time.Millisecond
    37  )
    38  
    39  // TimeseriesStorageEnabled controls whether to store timeseries data to disk.
    40  var TimeseriesStorageEnabled = settings.RegisterPublicBoolSetting(
    41  	"timeseries.storage.enabled",
    42  	"if set, periodic timeseries data is stored within the cluster; disabling is not recommended "+
    43  		"unless you are storing the data elsewhere",
    44  	true,
    45  )
    46  
    47  // Resolution10sStorageTTL defines the maximum age of data that will be retained
    48  // at he 10 second resolution. Data older than this is subject to being "rolled
    49  // up" into the 30 minute resolution and then deleted.
    50  var Resolution10sStorageTTL = settings.RegisterPublicDurationSetting(
    51  	"timeseries.storage.resolution_10s.ttl",
    52  	"the maximum age of time series data stored at the 10 second resolution. Data older than this "+
    53  		"is subject to rollup and deletion.",
    54  	resolution10sDefaultRollupThreshold,
    55  )
    56  
    57  // deprecatedResolution30StoreDuration is retained for backward compatibility during a version upgrade.
    58  var deprecatedResolution30StoreDuration = func() *settings.DurationSetting {
    59  	s := settings.RegisterDurationSetting(
    60  		"timeseries.storage.30m_resolution_ttl", "replaced by timeseries.storage.resolution_30m.ttl",
    61  		resolution30mDefaultPruneThreshold,
    62  	)
    63  	s.SetRetired()
    64  	return s
    65  }()
    66  
    67  func init() {
    68  	// The setting is not used any more, but we need to keep its
    69  	// definition for backward compatibility until the next release
    70  	// cycle.
    71  	_ = deprecatedResolution30StoreDuration
    72  }
    73  
    74  // Resolution30mStorageTTL defines the maximum age of data that will be
    75  // retained at he 30 minute resolution. Data older than this is subject to
    76  // deletion.
    77  var Resolution30mStorageTTL = settings.RegisterPublicDurationSetting(
    78  	"timeseries.storage.resolution_30m.ttl",
    79  	"the maximum age of time series data stored at the 30 minute resolution. Data older than this "+
    80  		"is subject to deletion.",
    81  	resolution30mDefaultPruneThreshold,
    82  )
    83  
    84  // DB provides Cockroach's Time Series API.
    85  type DB struct {
    86  	db      *kv.DB
    87  	st      *cluster.Settings
    88  	metrics *TimeSeriesMetrics
    89  
    90  	// pruneAgeByResolution maintains a suggested maximum age per resolution; data
    91  	// which is older than the given threshold for a resolution is considered
    92  	// eligible for deletion. Thresholds are specified in nanoseconds.
    93  	pruneThresholdByResolution map[Resolution]func() int64
    94  
    95  	// forceRowFormat is set to true if the database should write in the old row
    96  	// format, regardless of the current cluster setting. Currently only set to
    97  	// true in tests to verify backwards compatibility.
    98  	forceRowFormat bool
    99  }
   100  
   101  // NewDB creates a new DB instance.
   102  func NewDB(db *kv.DB, settings *cluster.Settings) *DB {
   103  	pruneThresholdByResolution := map[Resolution]func() int64{
   104  		Resolution10s: func() int64 {
   105  			return Resolution10sStorageTTL.Get(&settings.SV).Nanoseconds()
   106  		},
   107  		Resolution30m:  func() int64 { return Resolution30mStorageTTL.Get(&settings.SV).Nanoseconds() },
   108  		resolution1ns:  func() int64 { return resolution1nsDefaultRollupThreshold.Nanoseconds() },
   109  		resolution50ns: func() int64 { return resolution50nsDefaultPruneThreshold.Nanoseconds() },
   110  	}
   111  	return &DB{
   112  		db:                         db,
   113  		st:                         settings,
   114  		metrics:                    NewTimeSeriesMetrics(),
   115  		pruneThresholdByResolution: pruneThresholdByResolution,
   116  	}
   117  }
   118  
   119  // A DataSource can be queryied for a slice of time series data.
   120  type DataSource interface {
   121  	GetTimeSeriesData() []tspb.TimeSeriesData
   122  }
   123  
   124  // poller maintains information for a polling process started by PollSource().
   125  type poller struct {
   126  	log.AmbientContext
   127  	db        *DB
   128  	source    DataSource
   129  	frequency time.Duration
   130  	r         Resolution
   131  	stopper   *stop.Stopper
   132  }
   133  
   134  // PollSource begins a Goroutine which periodically queries the supplied
   135  // DataSource for time series data, storing the returned data in the server.
   136  // Stored data will be sampled using the provided Resolution. The polling
   137  // process will continue until the provided stop.Stopper is stopped.
   138  func (db *DB) PollSource(
   139  	ambient log.AmbientContext,
   140  	source DataSource,
   141  	frequency time.Duration,
   142  	r Resolution,
   143  	stopper *stop.Stopper,
   144  ) {
   145  	ambient.AddLogTag("ts-poll", nil)
   146  	p := &poller{
   147  		AmbientContext: ambient,
   148  		db:             db,
   149  		source:         source,
   150  		frequency:      frequency,
   151  		r:              r,
   152  		stopper:        stopper,
   153  	}
   154  	p.start()
   155  }
   156  
   157  // start begins the goroutine for this poller, which will periodically request
   158  // time series data from the DataSource and store it.
   159  func (p *poller) start() {
   160  	p.stopper.RunWorker(context.TODO(), func(context.Context) {
   161  		// Poll once immediately.
   162  		p.poll()
   163  		ticker := time.NewTicker(p.frequency)
   164  		defer ticker.Stop()
   165  		for {
   166  			select {
   167  			case <-ticker.C:
   168  				p.poll()
   169  			case <-p.stopper.ShouldStop():
   170  				return
   171  			}
   172  		}
   173  	})
   174  }
   175  
   176  // poll retrieves data from the underlying DataSource a single time, storing any
   177  // returned time series data on the server.
   178  func (p *poller) poll() {
   179  	if !TimeseriesStorageEnabled.Get(&p.db.st.SV) {
   180  		return
   181  	}
   182  
   183  	bgCtx := p.AnnotateCtx(context.Background())
   184  	if err := p.stopper.RunTask(bgCtx, "ts.poller: poll", func(bgCtx context.Context) {
   185  		data := p.source.GetTimeSeriesData()
   186  		if len(data) == 0 {
   187  			return
   188  		}
   189  
   190  		ctx, span := p.AnnotateCtxWithSpan(bgCtx, "ts-poll")
   191  		defer span.Finish()
   192  
   193  		if err := p.db.StoreData(ctx, p.r, data); err != nil {
   194  			log.Warningf(ctx, "error writing time series data: %s", err)
   195  		}
   196  	}); err != nil {
   197  		log.Warningf(bgCtx, "%v", err)
   198  	}
   199  }
   200  
   201  // StoreData writes the supplied time series data to the cockroach server.
   202  // Stored data will be sampled at the supplied resolution.
   203  func (db *DB) StoreData(ctx context.Context, r Resolution, data []tspb.TimeSeriesData) error {
   204  	if r.IsRollup() {
   205  		return fmt.Errorf(
   206  			"invalid attempt to store time series data in rollup resolution %s", r.String(),
   207  		)
   208  	}
   209  	if TimeseriesStorageEnabled.Get(&db.st.SV) {
   210  		if err := db.tryStoreData(ctx, r, data); err != nil {
   211  			db.metrics.WriteErrors.Inc(1)
   212  			return err
   213  		}
   214  	}
   215  	return nil
   216  }
   217  
   218  func (db *DB) tryStoreData(ctx context.Context, r Resolution, data []tspb.TimeSeriesData) error {
   219  	var kvs []roachpb.KeyValue
   220  	var totalSizeOfKvs int64
   221  	var totalSamples int64
   222  
   223  	// Process data collection: data is converted to internal format, and a key
   224  	// is generated for each internal message.
   225  	for _, d := range data {
   226  		idatas, err := d.ToInternal(r.SlabDuration(), r.SampleDuration(), db.WriteColumnar())
   227  		if err != nil {
   228  			return err
   229  		}
   230  		for _, idata := range idatas {
   231  			var value roachpb.Value
   232  			if err := value.SetProto(&idata); err != nil {
   233  				return err
   234  			}
   235  			key := MakeDataKey(d.Name, d.Source, r, idata.StartTimestampNanos)
   236  			kvs = append(kvs, roachpb.KeyValue{
   237  				Key:   key,
   238  				Value: value,
   239  			})
   240  			totalSamples += int64(idata.SampleCount())
   241  			totalSizeOfKvs += int64(len(value.RawBytes)+len(key)) + sizeOfTimestamp
   242  		}
   243  	}
   244  
   245  	if err := db.storeKvs(ctx, kvs); err != nil {
   246  		return err
   247  	}
   248  
   249  	db.metrics.WriteSamples.Inc(totalSamples)
   250  	db.metrics.WriteBytes.Inc(totalSizeOfKvs)
   251  	return nil
   252  }
   253  
   254  // storeRollup writes the supplied time series rollup data to the cockroach
   255  // server.
   256  func (db *DB) storeRollup(ctx context.Context, r Resolution, data []rollupData) error {
   257  	if !r.IsRollup() {
   258  		return fmt.Errorf(
   259  			"invalid attempt to store rollup data in non-rollup resolution %s", r.String(),
   260  		)
   261  	}
   262  	if TimeseriesStorageEnabled.Get(&db.st.SV) {
   263  		if err := db.tryStoreRollup(ctx, r, data); err != nil {
   264  			db.metrics.WriteErrors.Inc(1)
   265  			return err
   266  		}
   267  	}
   268  	return nil
   269  }
   270  
   271  func (db *DB) tryStoreRollup(ctx context.Context, r Resolution, data []rollupData) error {
   272  	var kvs []roachpb.KeyValue
   273  
   274  	for _, d := range data {
   275  		idatas, err := d.toInternal(r.SlabDuration(), r.SampleDuration())
   276  		if err != nil {
   277  			return err
   278  		}
   279  		for _, idata := range idatas {
   280  			var value roachpb.Value
   281  			if err := value.SetProto(&idata); err != nil {
   282  				return err
   283  			}
   284  			key := MakeDataKey(d.name, d.source, r, idata.StartTimestampNanos)
   285  			kvs = append(kvs, roachpb.KeyValue{
   286  				Key:   key,
   287  				Value: value,
   288  			})
   289  		}
   290  	}
   291  
   292  	return db.storeKvs(ctx, kvs)
   293  	// TODO(mrtracy): metrics for rollups stored
   294  }
   295  
   296  func (db *DB) storeKvs(ctx context.Context, kvs []roachpb.KeyValue) error {
   297  	b := &kv.Batch{}
   298  	for _, kv := range kvs {
   299  		b.AddRawRequest(&roachpb.MergeRequest{
   300  			RequestHeader: roachpb.RequestHeader{
   301  				Key: kv.Key,
   302  			},
   303  			Value: kv.Value,
   304  		})
   305  	}
   306  
   307  	return db.db.Run(ctx, b)
   308  }
   309  
   310  // computeThresholds returns a map of timestamps for each resolution supported
   311  // by the system. Data at a resolution which is older than the threshold
   312  // timestamp for that resolution is considered eligible for deletion.
   313  func (db *DB) computeThresholds(timestamp int64) map[Resolution]int64 {
   314  	result := make(map[Resolution]int64, len(db.pruneThresholdByResolution))
   315  	for k, v := range db.pruneThresholdByResolution {
   316  		result[k] = timestamp - v()
   317  	}
   318  	return result
   319  }
   320  
   321  // PruneThreshold returns the pruning threshold duration for this resolution,
   322  // expressed in nanoseconds. This duration determines how old time series data
   323  // must be before it is eligible for pruning.
   324  func (db *DB) PruneThreshold(r Resolution) int64 {
   325  	threshold, ok := db.pruneThresholdByResolution[r]
   326  	if !ok {
   327  		panic(fmt.Sprintf("no prune threshold found for resolution value %v", r))
   328  	}
   329  	return threshold()
   330  }
   331  
   332  // Metrics gets the TimeSeriesMetrics structure used by this DB instance.
   333  func (db *DB) Metrics() *TimeSeriesMetrics {
   334  	return db.metrics
   335  }
   336  
   337  // WriteColumnar returns true if this DB should write data in the newer columnar
   338  // format.
   339  func (db *DB) WriteColumnar() bool {
   340  	return !db.forceRowFormat
   341  }
   342  
   343  // WriteRollups returns true if this DB should write rollups for resolutions
   344  // targeted for a rollup resolution.
   345  func (db *DB) WriteRollups() bool {
   346  	return !db.forceRowFormat
   347  }