github.com/muhammadn/cortex@v1.9.1-0.20220510110439-46bb7000d03d/pkg/testexporter/correctness/runner.go (about)

     1  package correctness
     2  
     3  import (
     4  	"context"
     5  	"flag"
     6  	"fmt"
     7  	"math/rand"
     8  	"net/http"
     9  	"sync"
    10  	"time"
    11  
    12  	"github.com/go-kit/log/level"
    13  	"github.com/opentracing-contrib/go-stdlib/nethttp"
    14  	opentracing "github.com/opentracing/opentracing-go"
    15  	"github.com/prometheus/client_golang/api"
    16  	v1 "github.com/prometheus/client_golang/api/prometheus/v1"
    17  	"github.com/prometheus/client_golang/prometheus"
    18  	"github.com/prometheus/client_golang/prometheus/promauto"
    19  	"github.com/prometheus/client_golang/prometheus/promhttp"
    20  	"github.com/weaveworks/common/user"
    21  
    22  	"github.com/cortexproject/cortex/pkg/util/spanlogger"
    23  )
    24  
    25  const (
    26  	success = "success"
    27  	fail    = "fail"
    28  )
    29  
    30  var (
    31  	testcaseResult = promauto.NewCounterVec(
    32  		prometheus.CounterOpts{
    33  			Subsystem: subsystem,
    34  			Name:      "test_case_result_total",
    35  			Help:      "Number of test cases by test name, that succeed / fail.",
    36  		},
    37  		[]string{"name", "result"},
    38  	)
    39  	startTime = time.Now()
    40  )
    41  
    42  // RunnerConfig is config, for the runner.
    43  type RunnerConfig struct {
    44  	testRate               float64
    45  	testQueryMinSize       time.Duration
    46  	testQueryMaxSize       time.Duration
    47  	PrometheusAddr         string
    48  	UserID                 string
    49  	ExtraSelectors         string
    50  	EnableDeleteSeriesTest bool
    51  	CommonTestConfig       CommonTestConfig
    52  	DeleteSeriesTestConfig DeleteSeriesTestConfig
    53  }
    54  
    55  // RegisterFlags does what it says.
    56  func (cfg *RunnerConfig) RegisterFlags(f *flag.FlagSet) {
    57  	f.Float64Var(&cfg.testRate, "test-rate", 1, "Query QPS")
    58  	f.DurationVar(&cfg.testQueryMinSize, "test-query-min-size", 5*time.Minute, "The min query size to Prometheus.")
    59  	f.DurationVar(&cfg.testQueryMaxSize, "test-query-max-size", 60*time.Minute, "The max query size to Prometheus.")
    60  
    61  	f.StringVar(&cfg.PrometheusAddr, "prometheus-address", "", "Address of Prometheus instance to query.")
    62  	f.StringVar(&cfg.UserID, "user-id", "", "UserID to send to Cortex.")
    63  
    64  	f.StringVar(&cfg.ExtraSelectors, "extra-selectors", "", "Extra selectors to be included in queries, eg to identify different instances of this job.")
    65  	f.BoolVar(&cfg.EnableDeleteSeriesTest, "enable-delete-series-test", false, "Enable tests for checking deletion of series.")
    66  
    67  	cfg.CommonTestConfig.RegisterFlags(f)
    68  	cfg.DeleteSeriesTestConfig.RegisterFlags(f)
    69  }
    70  
    71  // Runner runs a bunch of test cases, periodically checking their value.
    72  type Runner struct {
    73  	cfg    RunnerConfig
    74  	mtx    sync.RWMutex
    75  	cases  []Case
    76  	quit   chan struct{}
    77  	wg     sync.WaitGroup
    78  	client v1.API
    79  }
    80  
    81  // NewRunner makes a new Runner.
    82  func NewRunner(cfg RunnerConfig) (*Runner, error) {
    83  	apiCfg := api.Config{
    84  		Address: cfg.PrometheusAddr,
    85  	}
    86  	if cfg.UserID != "" {
    87  		apiCfg.RoundTripper = &nethttp.Transport{
    88  			RoundTripper: promhttp.RoundTripperFunc(func(req *http.Request) (*http.Response, error) {
    89  				_ = user.InjectOrgIDIntoHTTPRequest(user.InjectOrgID(context.Background(), cfg.UserID), req)
    90  				return api.DefaultRoundTripper.RoundTrip(req)
    91  			}),
    92  		}
    93  	} else {
    94  		apiCfg.RoundTripper = &nethttp.Transport{}
    95  	}
    96  
    97  	client, err := api.NewClient(apiCfg)
    98  	if err != nil {
    99  		return nil, err
   100  	}
   101  
   102  	tc := &Runner{
   103  		cfg:    cfg,
   104  		quit:   make(chan struct{}),
   105  		client: v1.NewAPI(tracingClient{client}),
   106  	}
   107  
   108  	tc.wg.Add(1)
   109  	go tc.verifyLoop()
   110  	return tc, nil
   111  }
   112  
   113  type tracingClient struct {
   114  	api.Client
   115  }
   116  
   117  func (t tracingClient) Do(ctx context.Context, req *http.Request) (*http.Response, []byte, error) {
   118  	req = req.WithContext(ctx)
   119  	req, tr := nethttp.TraceRequest(opentracing.GlobalTracer(), req)
   120  	ctx = req.Context()
   121  	defer tr.Finish()
   122  	return t.Client.Do(ctx, req)
   123  }
   124  
   125  // Stop the checking goroutine.
   126  func (r *Runner) Stop() {
   127  	close(r.quit)
   128  	r.wg.Wait()
   129  
   130  	for _, tc := range r.cases {
   131  		tc.Stop()
   132  	}
   133  }
   134  
   135  // Add a new TestCase.
   136  func (r *Runner) Add(tc Case) {
   137  	r.mtx.Lock()
   138  	defer r.mtx.Unlock()
   139  	r.cases = append(r.cases, tc)
   140  }
   141  
   142  // Describe implements prometheus.Collector.
   143  func (r *Runner) Describe(c chan<- *prometheus.Desc) {
   144  	r.mtx.RLock()
   145  	defer r.mtx.RUnlock()
   146  	for _, t := range r.cases {
   147  		t.Describe(c)
   148  	}
   149  }
   150  
   151  // Collect implements prometheus.Collector.
   152  func (r *Runner) Collect(c chan<- prometheus.Metric) {
   153  	r.mtx.RLock()
   154  	defer r.mtx.RUnlock()
   155  	for _, t := range r.cases {
   156  		t.Collect(c)
   157  	}
   158  }
   159  
   160  func (r *Runner) verifyLoop() {
   161  	defer r.wg.Done()
   162  
   163  	ticker := time.NewTicker(time.Second / time.Duration(r.cfg.testRate))
   164  	defer ticker.Stop()
   165  
   166  	for {
   167  		select {
   168  		case <-r.quit:
   169  			return
   170  		case <-ticker.C:
   171  			r.runRandomTest()
   172  		}
   173  	}
   174  }
   175  
   176  func (r *Runner) runRandomTest() {
   177  	r.mtx.Lock()
   178  	tc := r.cases[rand.Intn(len(r.cases))]
   179  	r.mtx.Unlock()
   180  
   181  	ctx := context.Background()
   182  	log, ctx := spanlogger.New(ctx, "runRandomTest")
   183  	span, trace := opentracing.SpanFromContext(ctx), "<none>"
   184  	if span != nil {
   185  		trace = fmt.Sprintf("%s", span.Context())
   186  	}
   187  
   188  	minQueryTime := tc.MinQueryTime()
   189  	level.Info(log).Log("name", tc.Name(), "trace", trace, "minTime", minQueryTime)
   190  	defer log.Finish()
   191  
   192  	// pick a random time to start testStart and now
   193  	// pick a random length between minDuration and maxDuration
   194  	now := time.Now()
   195  	start := minQueryTime.Add(time.Duration(rand.Int63n(int64(now.Sub(minQueryTime)))))
   196  	duration := r.cfg.testQueryMinSize +
   197  		time.Duration(rand.Int63n(int64(r.cfg.testQueryMaxSize)-int64(r.cfg.testQueryMinSize)))
   198  	if start.Add(-duration).Before(minQueryTime) {
   199  		duration = start.Sub(minQueryTime)
   200  	}
   201  	if duration < r.cfg.testQueryMinSize {
   202  		return
   203  	}
   204  
   205  	// round off duration to minutes because we anyways have a window in minutes while doing queries.
   206  	duration = (duration / time.Minute) * time.Minute
   207  	level.Info(log).Log("start", start, "duration", duration)
   208  
   209  	passed, err := tc.Test(ctx, r.client, r.cfg.ExtraSelectors, start, duration)
   210  	if err != nil {
   211  		level.Error(log).Log("err", err)
   212  	}
   213  
   214  	if passed {
   215  		testcaseResult.WithLabelValues(tc.Name(), success).Inc()
   216  	} else {
   217  		testcaseResult.WithLabelValues(tc.Name(), fail).Inc()
   218  	}
   219  }