github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/ts/testmodel/db.go (about)

     1  // Copyright 2018 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package testmodel
    12  
    13  import (
    14  	"fmt"
    15  	"sort"
    16  	"strings"
    17  	"time"
    18  
    19  	"github.com/cockroachdb/cockroach/pkg/ts/tspb"
    20  )
    21  
    22  // ModelDB is a purely in-memory model of CockroachDB's time series database,
    23  // where time series can be stored and queried.
    24  type ModelDB struct {
    25  	data                    map[string]DataSeries
    26  	metricNameToDataSources map[string]map[string]struct{}
    27  	seenDataSources         map[string]struct{}
    28  }
    29  
    30  // NewModelDB instantiates a new ModelDB instance.
    31  func NewModelDB() *ModelDB {
    32  	return &ModelDB{
    33  		data:                    make(map[string]DataSeries),
    34  		metricNameToDataSources: make(map[string]map[string]struct{}),
    35  		seenDataSources:         make(map[string]struct{}),
    36  	}
    37  }
    38  
    39  type seriesVisitor func(string, string, DataSeries) (DataSeries, bool)
    40  
    41  // UniqueSourceCount returns the total number of unique data sources that have been
    42  // encountered by queries.
    43  func (mdb *ModelDB) UniqueSourceCount() int64 {
    44  	return int64(len(mdb.seenDataSources))
    45  }
    46  
    47  // VisitAllSeries calls the provided visitor function on every recorded
    48  // series. The visitor function can optionally return a DataSeries which will
    49  // completely replace the data series present in the model for that series.
    50  func (mdb *ModelDB) VisitAllSeries(visitor seriesVisitor) {
    51  	for k := range mdb.data {
    52  		metricName, dataSource := splitSeriesName(k)
    53  		replacement, replace := visitor(metricName, dataSource, mdb.data[k])
    54  		if replace {
    55  			mdb.data[k] = replacement
    56  		}
    57  	}
    58  }
    59  
    60  // VisitSeries calls the provided visitor function for all series with the given
    61  // metric name. The visitor function can optionally return a DataSeries which
    62  // will completely replace the data series present in the model for that series.
    63  func (mdb *ModelDB) VisitSeries(name string, visitor seriesVisitor) {
    64  	sourceMap, ok := mdb.metricNameToDataSources[name]
    65  	if !ok {
    66  		return
    67  	}
    68  
    69  	for source := range sourceMap {
    70  		replacement, replace := visitor(name, source, mdb.getSeriesData(name, source))
    71  		if replace {
    72  			mdb.data[seriesName(name, source)] = replacement
    73  		}
    74  	}
    75  }
    76  
    77  // Record stores the given data series for the supplied metric and data source,
    78  // merging it with any previously recorded data for the same series.
    79  func (mdb *ModelDB) Record(metricName, dataSource string, data DataSeries) {
    80  	dataSources, ok := mdb.metricNameToDataSources[metricName]
    81  	if !ok {
    82  		dataSources = make(map[string]struct{})
    83  		mdb.metricNameToDataSources[metricName] = dataSources
    84  	}
    85  	dataSources[dataSource] = struct{}{}
    86  	mdb.seenDataSources[dataSource] = struct{}{}
    87  
    88  	seriesName := seriesName(metricName, dataSource)
    89  	mdb.data[seriesName] = append(mdb.data[seriesName], data...)
    90  	sort.Stable(mdb.data[seriesName])
    91  	mdb.data[seriesName] = mdb.data[seriesName].removeDuplicates()
    92  }
    93  
    94  // Query retrieves aggregated data from the model database in the same way that
    95  // data is currently queried from CockroachDB's time series database. Each query
    96  // has a named metric, an optional set of sources, and a number of specified
    97  // aggregation options:
    98  //
    99  // + A downsampler function, which is used to group series by resolution
   100  // + An aggregation function, which is used to group multiples series by
   101  // timestamp
   102  // + A derivative option, which transforms the returned series into a rate of
   103  // change.
   104  //
   105  // Each query has a sample duration (determines the length of the group-by-time
   106  // interval), a slab duration (used to emulate certain effects of CockroachDB's
   107  // current time series tests), a start and end time, and an 'interpolation
   108  // limit' which is a maximum gap size above which missing data is not filled.
   109  // When fills are performed, linear interpolation is always used.
   110  func (mdb *ModelDB) Query(
   111  	name string,
   112  	sources []string,
   113  	downsample, agg tspb.TimeSeriesQueryAggregator,
   114  	derivative tspb.TimeSeriesQueryDerivative,
   115  	slabDuration, sampleDuration, start, end, interpolationLimit, now int64,
   116  ) DataSeries {
   117  	start = normalizeTime(start, sampleDuration)
   118  	// Check query bounds against the provided current time. Queries in the future
   119  	// are disallowed; "future" is considered to be at or later than the sample
   120  	// period containing the current system time (represented by the "now"
   121  	// timestamp)
   122  	cutoff := now - sampleDuration
   123  	if start > cutoff {
   124  		return nil
   125  	}
   126  	if end > cutoff {
   127  		end = cutoff
   128  	}
   129  
   130  	// Add one nanosecond to end because the real CockroachDB system is currently
   131  	// inclusive of end boundary.
   132  	end++
   133  
   134  	// If explicit sources were not specified, use every source currently
   135  	// available for this particular metric.
   136  	if len(sources) == 0 {
   137  		sourceMap, ok := mdb.metricNameToDataSources[name]
   138  		if !ok {
   139  			return nil
   140  		}
   141  		sources = make([]string, 0, len(sourceMap))
   142  		for k := range sourceMap {
   143  			sources = append(sources, k)
   144  		}
   145  	}
   146  
   147  	queryData := make([]DataSeries, 0, len(sources))
   148  	for _, source := range sources {
   149  		queryData = append(queryData, mdb.getSeriesData(name, source))
   150  	}
   151  
   152  	// BeforeFill keeps a list of data series before interpolation. This is used
   153  	// to emulate an odd property of the current CockroachDB time series model:
   154  	// see intersectTimestamps method for details.
   155  	beforeFill := make([]DataSeries, len(queryData))
   156  
   157  	// Process data according to query parameters.
   158  	// The adjusted start and end are needed in order to simulate the slabbing
   159  	// behavior of the real CockroachDB system; it affects how interpolation will
   160  	// behave in existing tests with an interpolation limit of 0. TODO(mrtracy):
   161  	// Remove tests with interpolation limit 0.
   162  	adjustedStart := normalizeTime(start-interpolationLimit, slabDuration)
   163  	adjustedEnd := normalizeTime(end+interpolationLimit-1, slabDuration) + slabDuration
   164  	for i := range queryData {
   165  		data := queryData[i]
   166  
   167  		// Slice to relevant period.
   168  		data = data.TimeSlice(adjustedStart, adjustedEnd)
   169  
   170  		// Group by resolution according to the provided sampleDuration.
   171  		data = data.GroupByResolution(sampleDuration, getAggFunction(downsample))
   172  
   173  		// Save snapshot of data before filling.
   174  		beforeFill[i] = data
   175  
   176  		// Fill in missing data points using linear interpolation. To match existing
   177  		// behavior, do not interpolate if there is only a single series.
   178  		if len(queryData) > 1 {
   179  			data = data.fillForResolution(
   180  				sampleDuration,
   181  				func(before DataSeries, after DataSeries, res int64) DataSeries {
   182  					// Do not fill if this gap exceeds the interpolation limit.
   183  					start := before[len(before)-1]
   184  					end := after[0]
   185  					if interpolationLimit > 0 && end.TimestampNanos-start.TimestampNanos > interpolationLimit {
   186  						return nil
   187  					}
   188  
   189  					return fillFuncLinearInterpolate(before, after, res)
   190  				},
   191  			)
   192  		}
   193  
   194  		// Convert series to its rate-of-change if specified.
   195  		if derivative != tspb.TimeSeriesQueryDerivative_NONE {
   196  			data = data.rateOfChange(time.Second.Nanoseconds())
   197  			if derivative == tspb.TimeSeriesQueryDerivative_NON_NEGATIVE_DERIVATIVE {
   198  				data = data.nonNegative()
   199  			}
   200  		}
   201  
   202  		queryData[i] = data
   203  	}
   204  
   205  	result := groupSeriesByTimestamp(queryData, getAggFunction(agg))
   206  	result = result.TimeSlice(start, end)
   207  	result = result.intersectTimestamps(beforeFill...)
   208  	return result
   209  }
   210  
   211  func (mdb *ModelDB) getSeriesData(metricName, dataSource string) DataSeries {
   212  	seriesName := seriesName(metricName, dataSource)
   213  	data, ok := mdb.data[seriesName]
   214  	if !ok {
   215  		return nil
   216  	}
   217  	return data
   218  }
   219  
   220  func seriesName(metricName, dataSource string) string {
   221  	return fmt.Sprintf("%s$$%s", metricName, dataSource)
   222  }
   223  
   224  func splitSeriesName(seriesName string) (string, string) {
   225  	split := strings.Split(seriesName, "$$")
   226  	if len(split) != 2 {
   227  		panic(fmt.Sprintf("attempt to split invalid series name %s", seriesName))
   228  	}
   229  	return split[0], split[1]
   230  }