github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/ts/testmodel/db.go (about) 1 // Copyright 2018 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package testmodel 12 13 import ( 14 "fmt" 15 "sort" 16 "strings" 17 "time" 18 19 "github.com/cockroachdb/cockroach/pkg/ts/tspb" 20 ) 21 22 // ModelDB is a purely in-memory model of CockroachDB's time series database, 23 // where time series can be stored and queried. 24 type ModelDB struct { 25 data map[string]DataSeries 26 metricNameToDataSources map[string]map[string]struct{} 27 seenDataSources map[string]struct{} 28 } 29 30 // NewModelDB instantiates a new ModelDB instance. 31 func NewModelDB() *ModelDB { 32 return &ModelDB{ 33 data: make(map[string]DataSeries), 34 metricNameToDataSources: make(map[string]map[string]struct{}), 35 seenDataSources: make(map[string]struct{}), 36 } 37 } 38 39 type seriesVisitor func(string, string, DataSeries) (DataSeries, bool) 40 41 // UniqueSourceCount returns the total number of unique data sources that have been 42 // encountered by queries. 43 func (mdb *ModelDB) UniqueSourceCount() int64 { 44 return int64(len(mdb.seenDataSources)) 45 } 46 47 // VisitAllSeries calls the provided visitor function on every recorded 48 // series. The visitor function can optionally return a DataSeries which will 49 // completely replace the data series present in the model for that series. 50 func (mdb *ModelDB) VisitAllSeries(visitor seriesVisitor) { 51 for k := range mdb.data { 52 metricName, dataSource := splitSeriesName(k) 53 replacement, replace := visitor(metricName, dataSource, mdb.data[k]) 54 if replace { 55 mdb.data[k] = replacement 56 } 57 } 58 } 59 60 // VisitSeries calls the provided visitor function for all series with the given 61 // metric name. The visitor function can optionally return a DataSeries which 62 // will completely replace the data series present in the model for that series. 63 func (mdb *ModelDB) VisitSeries(name string, visitor seriesVisitor) { 64 sourceMap, ok := mdb.metricNameToDataSources[name] 65 if !ok { 66 return 67 } 68 69 for source := range sourceMap { 70 replacement, replace := visitor(name, source, mdb.getSeriesData(name, source)) 71 if replace { 72 mdb.data[seriesName(name, source)] = replacement 73 } 74 } 75 } 76 77 // Record stores the given data series for the supplied metric and data source, 78 // merging it with any previously recorded data for the same series. 79 func (mdb *ModelDB) Record(metricName, dataSource string, data DataSeries) { 80 dataSources, ok := mdb.metricNameToDataSources[metricName] 81 if !ok { 82 dataSources = make(map[string]struct{}) 83 mdb.metricNameToDataSources[metricName] = dataSources 84 } 85 dataSources[dataSource] = struct{}{} 86 mdb.seenDataSources[dataSource] = struct{}{} 87 88 seriesName := seriesName(metricName, dataSource) 89 mdb.data[seriesName] = append(mdb.data[seriesName], data...) 90 sort.Stable(mdb.data[seriesName]) 91 mdb.data[seriesName] = mdb.data[seriesName].removeDuplicates() 92 } 93 94 // Query retrieves aggregated data from the model database in the same way that 95 // data is currently queried from CockroachDB's time series database. Each query 96 // has a named metric, an optional set of sources, and a number of specified 97 // aggregation options: 98 // 99 // + A downsampler function, which is used to group series by resolution 100 // + An aggregation function, which is used to group multiples series by 101 // timestamp 102 // + A derivative option, which transforms the returned series into a rate of 103 // change. 104 // 105 // Each query has a sample duration (determines the length of the group-by-time 106 // interval), a slab duration (used to emulate certain effects of CockroachDB's 107 // current time series tests), a start and end time, and an 'interpolation 108 // limit' which is a maximum gap size above which missing data is not filled. 109 // When fills are performed, linear interpolation is always used. 110 func (mdb *ModelDB) Query( 111 name string, 112 sources []string, 113 downsample, agg tspb.TimeSeriesQueryAggregator, 114 derivative tspb.TimeSeriesQueryDerivative, 115 slabDuration, sampleDuration, start, end, interpolationLimit, now int64, 116 ) DataSeries { 117 start = normalizeTime(start, sampleDuration) 118 // Check query bounds against the provided current time. Queries in the future 119 // are disallowed; "future" is considered to be at or later than the sample 120 // period containing the current system time (represented by the "now" 121 // timestamp) 122 cutoff := now - sampleDuration 123 if start > cutoff { 124 return nil 125 } 126 if end > cutoff { 127 end = cutoff 128 } 129 130 // Add one nanosecond to end because the real CockroachDB system is currently 131 // inclusive of end boundary. 132 end++ 133 134 // If explicit sources were not specified, use every source currently 135 // available for this particular metric. 136 if len(sources) == 0 { 137 sourceMap, ok := mdb.metricNameToDataSources[name] 138 if !ok { 139 return nil 140 } 141 sources = make([]string, 0, len(sourceMap)) 142 for k := range sourceMap { 143 sources = append(sources, k) 144 } 145 } 146 147 queryData := make([]DataSeries, 0, len(sources)) 148 for _, source := range sources { 149 queryData = append(queryData, mdb.getSeriesData(name, source)) 150 } 151 152 // BeforeFill keeps a list of data series before interpolation. This is used 153 // to emulate an odd property of the current CockroachDB time series model: 154 // see intersectTimestamps method for details. 155 beforeFill := make([]DataSeries, len(queryData)) 156 157 // Process data according to query parameters. 158 // The adjusted start and end are needed in order to simulate the slabbing 159 // behavior of the real CockroachDB system; it affects how interpolation will 160 // behave in existing tests with an interpolation limit of 0. TODO(mrtracy): 161 // Remove tests with interpolation limit 0. 162 adjustedStart := normalizeTime(start-interpolationLimit, slabDuration) 163 adjustedEnd := normalizeTime(end+interpolationLimit-1, slabDuration) + slabDuration 164 for i := range queryData { 165 data := queryData[i] 166 167 // Slice to relevant period. 168 data = data.TimeSlice(adjustedStart, adjustedEnd) 169 170 // Group by resolution according to the provided sampleDuration. 171 data = data.GroupByResolution(sampleDuration, getAggFunction(downsample)) 172 173 // Save snapshot of data before filling. 174 beforeFill[i] = data 175 176 // Fill in missing data points using linear interpolation. To match existing 177 // behavior, do not interpolate if there is only a single series. 178 if len(queryData) > 1 { 179 data = data.fillForResolution( 180 sampleDuration, 181 func(before DataSeries, after DataSeries, res int64) DataSeries { 182 // Do not fill if this gap exceeds the interpolation limit. 183 start := before[len(before)-1] 184 end := after[0] 185 if interpolationLimit > 0 && end.TimestampNanos-start.TimestampNanos > interpolationLimit { 186 return nil 187 } 188 189 return fillFuncLinearInterpolate(before, after, res) 190 }, 191 ) 192 } 193 194 // Convert series to its rate-of-change if specified. 195 if derivative != tspb.TimeSeriesQueryDerivative_NONE { 196 data = data.rateOfChange(time.Second.Nanoseconds()) 197 if derivative == tspb.TimeSeriesQueryDerivative_NON_NEGATIVE_DERIVATIVE { 198 data = data.nonNegative() 199 } 200 } 201 202 queryData[i] = data 203 } 204 205 result := groupSeriesByTimestamp(queryData, getAggFunction(agg)) 206 result = result.TimeSlice(start, end) 207 result = result.intersectTimestamps(beforeFill...) 208 return result 209 } 210 211 func (mdb *ModelDB) getSeriesData(metricName, dataSource string) DataSeries { 212 seriesName := seriesName(metricName, dataSource) 213 data, ok := mdb.data[seriesName] 214 if !ok { 215 return nil 216 } 217 return data 218 } 219 220 func seriesName(metricName, dataSource string) string { 221 return fmt.Sprintf("%s$$%s", metricName, dataSource) 222 } 223 224 func splitSeriesName(seriesName string) (string, string) { 225 split := strings.Split(seriesName, "$$") 226 if len(split) != 2 { 227 panic(fmt.Sprintf("attempt to split invalid series name %s", seriesName)) 228 } 229 return split[0], split[1] 230 }