github.com/muhammadn/cortex@v1.9.1-0.20220510110439-46bb7000d03d/pkg/testexporter/correctness/simple.go (about) 1 package correctness 2 3 import ( 4 "context" 5 "flag" 6 "fmt" 7 "math" 8 "sort" 9 "time" 10 11 "github.com/go-kit/log/level" 12 v1 "github.com/prometheus/client_golang/api/prometheus/v1" 13 "github.com/prometheus/client_golang/prometheus" 14 "github.com/prometheus/client_golang/prometheus/promauto" 15 "github.com/prometheus/common/model" 16 17 "github.com/cortexproject/cortex/pkg/util/spanlogger" 18 ) 19 20 const ( 21 namespace = "prometheus" 22 subsystem = "test_exporter" 23 ) 24 25 var sampleResult = promauto.NewCounterVec( 26 prometheus.CounterOpts{ 27 Subsystem: subsystem, 28 Name: "sample_result_total", 29 Help: "Number of samples that succeed / fail.", 30 }, 31 []string{"test_name", "result"}, 32 ) 33 34 type simpleTestCase struct { 35 prometheus.GaugeFunc 36 name string 37 expectedValueAt func(time.Time) float64 38 cfg CommonTestConfig 39 } 40 41 type CommonTestConfig struct { 42 testTimeEpsilon time.Duration 43 testEpsilon float64 44 ScrapeInterval time.Duration 45 samplesEpsilon float64 46 timeQueryStart TimeValue 47 durationQuerySince time.Duration 48 } 49 50 func (cfg *CommonTestConfig) RegisterFlags(f *flag.FlagSet) { 51 f.DurationVar(&cfg.testTimeEpsilon, "test-time-epsilion", 1*time.Second, "Amount samples are allowed to be off by") 52 f.Float64Var(&cfg.testEpsilon, "test-epsilion", 0.01, "Amount samples are allowed to be off by this %%") 53 f.DurationVar(&cfg.ScrapeInterval, "scrape-interval", 15*time.Second, "Expected scrape interval.") 54 f.Float64Var(&cfg.samplesEpsilon, "test-samples-epsilon", 0.1, "Amount that the number of samples are allowed to be off by") 55 56 // By default, we only query for values from when this process started 57 cfg.timeQueryStart = NewTimeValue(time.Now()) 58 f.Var(&cfg.timeQueryStart, "test-query-start", "Minimum start date for queries") 59 f.DurationVar(&cfg.durationQuerySince, "test-query-since", 0, "Duration in the past to test. Overrides -test-query-start") 60 } 61 62 // NewSimpleTestCase makes a new simpleTestCase 63 func NewSimpleTestCase(name string, f func(time.Time) float64, cfg CommonTestConfig) Case { 64 return &simpleTestCase{ 65 GaugeFunc: prometheus.NewGaugeFunc( 66 prometheus.GaugeOpts{ 67 Namespace: namespace, 68 Subsystem: subsystem, 69 Name: name, 70 Help: name, 71 }, 72 func() float64 { 73 return f(time.Now()) 74 }, 75 ), 76 name: name, 77 expectedValueAt: f, 78 cfg: cfg, 79 } 80 } 81 82 func (tc *simpleTestCase) Stop() { 83 } 84 85 func (tc *simpleTestCase) Name() string { 86 return tc.name 87 } 88 89 func (tc *simpleTestCase) ExpectedValueAt(t time.Time) float64 { 90 return tc.expectedValueAt(t) 91 } 92 93 func (tc *simpleTestCase) Query(ctx context.Context, client v1.API, selectors string, start time.Time, duration time.Duration) ([]model.SamplePair, error) { 94 log, ctx := spanlogger.New(ctx, "simpleTestCase.Query") 95 defer log.Finish() 96 97 metricName := prometheus.BuildFQName(namespace, subsystem, tc.name) 98 query := fmt.Sprintf("%s{%s}[%dm]", metricName, selectors, duration/time.Minute) 99 level.Info(log).Log("query", query) 100 101 value, wrngs, err := client.Query(ctx, query, start) 102 if err != nil { 103 return nil, err 104 } 105 if wrngs != nil { 106 level.Warn(log).Log( 107 "query", query, 108 "start", start, 109 "warnings", wrngs, 110 ) 111 } 112 if value.Type() != model.ValMatrix { 113 return nil, fmt.Errorf("didn't get matrix from Prom") 114 } 115 116 ms, ok := value.(model.Matrix) 117 if !ok { 118 return nil, fmt.Errorf("didn't get matrix from Prom") 119 } 120 121 // sort samples belonging to different series by first timestamp of the batch 122 sort.Slice(ms, func(i, j int) bool { 123 if len(ms[i].Values) == 0 { 124 return true 125 } 126 if len(ms[j].Values) == 0 { 127 return true 128 } 129 130 return ms[i].Values[0].Timestamp.Before(ms[j].Values[0].Timestamp) 131 }) 132 133 var result []model.SamplePair 134 for _, stream := range ms { 135 result = append(result, stream.Values...) 136 } 137 return result, nil 138 } 139 140 func (tc *simpleTestCase) Test(ctx context.Context, client v1.API, selectors string, start time.Time, duration time.Duration) (bool, error) { 141 log := spanlogger.FromContext(ctx) 142 pairs, err := tc.Query(ctx, client, selectors, start, duration) 143 if err != nil { 144 level.Info(log).Log("err", err) 145 return false, err 146 } 147 148 return verifySamples(spanlogger.FromContext(ctx), tc, pairs, duration, tc.cfg), nil 149 } 150 151 func (tc *simpleTestCase) MinQueryTime() time.Time { 152 return calculateMinQueryTime(tc.cfg.durationQuerySince, tc.cfg.timeQueryStart) 153 } 154 155 func verifySamples(log *spanlogger.SpanLogger, tc Case, pairs []model.SamplePair, duration time.Duration, cfg CommonTestConfig) bool { 156 for _, pair := range pairs { 157 correct := timeEpsilonCorrect(tc.ExpectedValueAt, pair, cfg.testTimeEpsilon) || valueEpsilonCorrect(tc.ExpectedValueAt, pair, cfg.testEpsilon) 158 if correct { 159 sampleResult.WithLabelValues(tc.Name(), success).Inc() 160 } else { 161 sampleResult.WithLabelValues(tc.Name(), fail).Inc() 162 level.Error(log).Log("msg", "wrong value", "at", pair.Timestamp, "expected", tc.ExpectedValueAt(pair.Timestamp.Time()), "actual", pair.Value) 163 log.Error(fmt.Errorf("wrong value")) 164 return false 165 } 166 } 167 168 // when verifying a deleted series we get samples for very short interval. As small as 1 or 2 missing/extra samples can cause test to fail. 169 if duration > 5*time.Minute { 170 expectedNumSamples := int(duration / cfg.ScrapeInterval) 171 if !epsilonCorrect(float64(len(pairs)), float64(expectedNumSamples), cfg.samplesEpsilon) { 172 level.Error(log).Log("msg", "wrong number of samples", "expected", expectedNumSamples, "actual", len(pairs)) 173 log.Error(fmt.Errorf("wrong number of samples")) 174 return false 175 } 176 } else { 177 expectedNumSamples := int(duration / cfg.ScrapeInterval) 178 if math.Abs(float64(expectedNumSamples-len(pairs))) > 2 { 179 level.Error(log).Log("msg", "wrong number of samples", "expected", expectedNumSamples, "actual", len(pairs)) 180 log.Error(fmt.Errorf("wrong number of samples")) 181 return false 182 } 183 } 184 185 return true 186 } 187 188 func timeEpsilonCorrect(f func(time.Time) float64, pair model.SamplePair, testTimeEpsilon time.Duration) bool { 189 minExpected := f(pair.Timestamp.Time().Add(-testTimeEpsilon)) 190 maxExpected := f(pair.Timestamp.Time().Add(testTimeEpsilon)) 191 if minExpected > maxExpected { 192 minExpected, maxExpected = maxExpected, minExpected 193 } 194 return minExpected < float64(pair.Value) && float64(pair.Value) < maxExpected 195 } 196 197 func valueEpsilonCorrect(f func(time.Time) float64, pair model.SamplePair, testEpsilon float64) bool { 198 return epsilonCorrect(float64(pair.Value), f(pair.Timestamp.Time()), testEpsilon) 199 } 200 201 func epsilonCorrect(actual, expected, epsilon float64) bool { 202 delta := math.Abs((actual - expected) / expected) 203 return delta < epsilon 204 } 205 206 func calculateMinQueryTime(durationQuerySince time.Duration, timeQueryStart TimeValue) time.Time { 207 minQueryTime := startTime 208 if durationQuerySince != 0 { 209 minQueryTime = time.Now().Add(-durationQuerySince) 210 } else if timeQueryStart.set { 211 minQueryTime = timeQueryStart.Time 212 } 213 return minQueryTime 214 }