github.com/muhammadn/cortex@v1.9.1-0.20220510110439-46bb7000d03d/pkg/testexporter/correctness/runner.go (about) 1 package correctness 2 3 import ( 4 "context" 5 "flag" 6 "fmt" 7 "math/rand" 8 "net/http" 9 "sync" 10 "time" 11 12 "github.com/go-kit/log/level" 13 "github.com/opentracing-contrib/go-stdlib/nethttp" 14 opentracing "github.com/opentracing/opentracing-go" 15 "github.com/prometheus/client_golang/api" 16 v1 "github.com/prometheus/client_golang/api/prometheus/v1" 17 "github.com/prometheus/client_golang/prometheus" 18 "github.com/prometheus/client_golang/prometheus/promauto" 19 "github.com/prometheus/client_golang/prometheus/promhttp" 20 "github.com/weaveworks/common/user" 21 22 "github.com/cortexproject/cortex/pkg/util/spanlogger" 23 ) 24 25 const ( 26 success = "success" 27 fail = "fail" 28 ) 29 30 var ( 31 testcaseResult = promauto.NewCounterVec( 32 prometheus.CounterOpts{ 33 Subsystem: subsystem, 34 Name: "test_case_result_total", 35 Help: "Number of test cases by test name, that succeed / fail.", 36 }, 37 []string{"name", "result"}, 38 ) 39 startTime = time.Now() 40 ) 41 42 // RunnerConfig is config, for the runner. 43 type RunnerConfig struct { 44 testRate float64 45 testQueryMinSize time.Duration 46 testQueryMaxSize time.Duration 47 PrometheusAddr string 48 UserID string 49 ExtraSelectors string 50 EnableDeleteSeriesTest bool 51 CommonTestConfig CommonTestConfig 52 DeleteSeriesTestConfig DeleteSeriesTestConfig 53 } 54 55 // RegisterFlags does what it says. 56 func (cfg *RunnerConfig) RegisterFlags(f *flag.FlagSet) { 57 f.Float64Var(&cfg.testRate, "test-rate", 1, "Query QPS") 58 f.DurationVar(&cfg.testQueryMinSize, "test-query-min-size", 5*time.Minute, "The min query size to Prometheus.") 59 f.DurationVar(&cfg.testQueryMaxSize, "test-query-max-size", 60*time.Minute, "The max query size to Prometheus.") 60 61 f.StringVar(&cfg.PrometheusAddr, "prometheus-address", "", "Address of Prometheus instance to query.") 62 f.StringVar(&cfg.UserID, "user-id", "", "UserID to send to Cortex.") 63 64 f.StringVar(&cfg.ExtraSelectors, "extra-selectors", "", "Extra selectors to be included in queries, eg to identify different instances of this job.") 65 f.BoolVar(&cfg.EnableDeleteSeriesTest, "enable-delete-series-test", false, "Enable tests for checking deletion of series.") 66 67 cfg.CommonTestConfig.RegisterFlags(f) 68 cfg.DeleteSeriesTestConfig.RegisterFlags(f) 69 } 70 71 // Runner runs a bunch of test cases, periodically checking their value. 72 type Runner struct { 73 cfg RunnerConfig 74 mtx sync.RWMutex 75 cases []Case 76 quit chan struct{} 77 wg sync.WaitGroup 78 client v1.API 79 } 80 81 // NewRunner makes a new Runner. 82 func NewRunner(cfg RunnerConfig) (*Runner, error) { 83 apiCfg := api.Config{ 84 Address: cfg.PrometheusAddr, 85 } 86 if cfg.UserID != "" { 87 apiCfg.RoundTripper = &nethttp.Transport{ 88 RoundTripper: promhttp.RoundTripperFunc(func(req *http.Request) (*http.Response, error) { 89 _ = user.InjectOrgIDIntoHTTPRequest(user.InjectOrgID(context.Background(), cfg.UserID), req) 90 return api.DefaultRoundTripper.RoundTrip(req) 91 }), 92 } 93 } else { 94 apiCfg.RoundTripper = &nethttp.Transport{} 95 } 96 97 client, err := api.NewClient(apiCfg) 98 if err != nil { 99 return nil, err 100 } 101 102 tc := &Runner{ 103 cfg: cfg, 104 quit: make(chan struct{}), 105 client: v1.NewAPI(tracingClient{client}), 106 } 107 108 tc.wg.Add(1) 109 go tc.verifyLoop() 110 return tc, nil 111 } 112 113 type tracingClient struct { 114 api.Client 115 } 116 117 func (t tracingClient) Do(ctx context.Context, req *http.Request) (*http.Response, []byte, error) { 118 req = req.WithContext(ctx) 119 req, tr := nethttp.TraceRequest(opentracing.GlobalTracer(), req) 120 ctx = req.Context() 121 defer tr.Finish() 122 return t.Client.Do(ctx, req) 123 } 124 125 // Stop the checking goroutine. 126 func (r *Runner) Stop() { 127 close(r.quit) 128 r.wg.Wait() 129 130 for _, tc := range r.cases { 131 tc.Stop() 132 } 133 } 134 135 // Add a new TestCase. 136 func (r *Runner) Add(tc Case) { 137 r.mtx.Lock() 138 defer r.mtx.Unlock() 139 r.cases = append(r.cases, tc) 140 } 141 142 // Describe implements prometheus.Collector. 143 func (r *Runner) Describe(c chan<- *prometheus.Desc) { 144 r.mtx.RLock() 145 defer r.mtx.RUnlock() 146 for _, t := range r.cases { 147 t.Describe(c) 148 } 149 } 150 151 // Collect implements prometheus.Collector. 152 func (r *Runner) Collect(c chan<- prometheus.Metric) { 153 r.mtx.RLock() 154 defer r.mtx.RUnlock() 155 for _, t := range r.cases { 156 t.Collect(c) 157 } 158 } 159 160 func (r *Runner) verifyLoop() { 161 defer r.wg.Done() 162 163 ticker := time.NewTicker(time.Second / time.Duration(r.cfg.testRate)) 164 defer ticker.Stop() 165 166 for { 167 select { 168 case <-r.quit: 169 return 170 case <-ticker.C: 171 r.runRandomTest() 172 } 173 } 174 } 175 176 func (r *Runner) runRandomTest() { 177 r.mtx.Lock() 178 tc := r.cases[rand.Intn(len(r.cases))] 179 r.mtx.Unlock() 180 181 ctx := context.Background() 182 log, ctx := spanlogger.New(ctx, "runRandomTest") 183 span, trace := opentracing.SpanFromContext(ctx), "<none>" 184 if span != nil { 185 trace = fmt.Sprintf("%s", span.Context()) 186 } 187 188 minQueryTime := tc.MinQueryTime() 189 level.Info(log).Log("name", tc.Name(), "trace", trace, "minTime", minQueryTime) 190 defer log.Finish() 191 192 // pick a random time to start testStart and now 193 // pick a random length between minDuration and maxDuration 194 now := time.Now() 195 start := minQueryTime.Add(time.Duration(rand.Int63n(int64(now.Sub(minQueryTime))))) 196 duration := r.cfg.testQueryMinSize + 197 time.Duration(rand.Int63n(int64(r.cfg.testQueryMaxSize)-int64(r.cfg.testQueryMinSize))) 198 if start.Add(-duration).Before(minQueryTime) { 199 duration = start.Sub(minQueryTime) 200 } 201 if duration < r.cfg.testQueryMinSize { 202 return 203 } 204 205 // round off duration to minutes because we anyways have a window in minutes while doing queries. 206 duration = (duration / time.Minute) * time.Minute 207 level.Info(log).Log("start", start, "duration", duration) 208 209 passed, err := tc.Test(ctx, r.client, r.cfg.ExtraSelectors, start, duration) 210 if err != nil { 211 level.Error(log).Log("err", err) 212 } 213 214 if passed { 215 testcaseResult.WithLabelValues(tc.Name(), success).Inc() 216 } else { 217 testcaseResult.WithLabelValues(tc.Name(), fail).Inc() 218 } 219 }