github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/cmd/roachtest/ts_util.go (about)

     1  // Copyright 2018 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package main
    12  
    13  import (
    14  	"context"
    15  	"net/http"
    16  	"time"
    17  
    18  	"github.com/cockroachdb/cockroach/pkg/ts/tspb"
    19  	"github.com/cockroachdb/cockroach/pkg/util/httputil"
    20  )
    21  
    22  // tsQueryType represents the type of the time series query to retrieve. In
    23  // most cases, tests are verifying either the "total" or "rate" metrics, so
    24  // this enum type simplifies the API of tspb.Query.
    25  type tsQueryType int
    26  
    27  const (
    28  	// total indicates to query the total of the metric. Specifically,
    29  	// downsampler will be average, aggregator will be sum, and derivative will
    30  	// be none.
    31  	total tsQueryType = iota
    32  	// rate indicates to query the rate of change of the metric. Specifically,
    33  	// downsampler will be average, aggregator will be sum, and derivative will
    34  	// be non-negative derivative.
    35  	rate
    36  )
    37  
    38  type tsQuery struct {
    39  	name      string
    40  	queryType tsQueryType
    41  }
    42  
    43  func mustGetMetrics(
    44  	t *test, adminURL string, start, end time.Time, tsQueries []tsQuery,
    45  ) tspb.TimeSeriesQueryResponse {
    46  	response, err := getMetrics(adminURL, start, end, tsQueries)
    47  	if err != nil {
    48  		t.Fatal(err)
    49  	}
    50  	return response
    51  }
    52  
    53  func getMetrics(
    54  	adminURL string, start, end time.Time, tsQueries []tsQuery,
    55  ) (tspb.TimeSeriesQueryResponse, error) {
    56  	url := "http://" + adminURL + "/ts/query"
    57  	queries := make([]tspb.Query, len(tsQueries))
    58  	for i := 0; i < len(tsQueries); i++ {
    59  		switch tsQueries[i].queryType {
    60  		case total:
    61  			queries[i] = tspb.Query{
    62  				Name:             tsQueries[i].name,
    63  				Downsampler:      tspb.TimeSeriesQueryAggregator_AVG.Enum(),
    64  				SourceAggregator: tspb.TimeSeriesQueryAggregator_SUM.Enum(),
    65  			}
    66  		case rate:
    67  			queries[i] = tspb.Query{
    68  				Name:             tsQueries[i].name,
    69  				Downsampler:      tspb.TimeSeriesQueryAggregator_AVG.Enum(),
    70  				SourceAggregator: tspb.TimeSeriesQueryAggregator_SUM.Enum(),
    71  				Derivative:       tspb.TimeSeriesQueryDerivative_NON_NEGATIVE_DERIVATIVE.Enum(),
    72  			}
    73  		default:
    74  			panic("unexpected")
    75  		}
    76  	}
    77  	request := tspb.TimeSeriesQueryRequest{
    78  		StartNanos: start.UnixNano(),
    79  		EndNanos:   end.UnixNano(),
    80  		// Ask for one minute intervals. We can't just ask for the whole hour
    81  		// because the time series query system does not support downsampling
    82  		// offsets.
    83  		SampleNanos: (1 * time.Minute).Nanoseconds(),
    84  		Queries:     queries,
    85  	}
    86  	var response tspb.TimeSeriesQueryResponse
    87  	err := httputil.PostJSON(http.Client{Timeout: 500 * time.Millisecond}, url, &request, &response)
    88  	return response, err
    89  
    90  }
    91  
    92  func verifyTxnPerSecond(
    93  	ctx context.Context,
    94  	c *cluster,
    95  	t *test,
    96  	adminNode nodeListOption,
    97  	start, end time.Time,
    98  	txnTarget, maxPercentTimeUnderTarget float64,
    99  ) {
   100  	// Query needed information over the timespan of the query.
   101  	adminURL := c.ExternalAdminUIAddr(ctx, adminNode)[0]
   102  	response := mustGetMetrics(t, adminURL, start, end, []tsQuery{
   103  		{name: "cr.node.txn.commits", queryType: rate},
   104  		{name: "cr.node.txn.commits", queryType: total},
   105  	})
   106  
   107  	// Drop the first two minutes of datapoints as a "ramp-up" period.
   108  	perMinute := response.Results[0].Datapoints[2:]
   109  	cumulative := response.Results[1].Datapoints[2:]
   110  
   111  	// Check average txns per second over the entire test was above the target.
   112  	totalTxns := cumulative[len(cumulative)-1].Value - cumulative[0].Value
   113  	avgTxnPerSec := totalTxns / float64(end.Sub(start)/time.Second)
   114  
   115  	if avgTxnPerSec < txnTarget {
   116  		t.Fatalf("average txns per second %f was under target %f", avgTxnPerSec, txnTarget)
   117  	} else {
   118  		t.l.Printf("average txns per second: %f", avgTxnPerSec)
   119  	}
   120  
   121  	// Verify that less than the specified limit of each individual one minute
   122  	// period was underneath the target.
   123  	minutesBelowTarget := 0.0
   124  	for _, dp := range perMinute {
   125  		if dp.Value < txnTarget {
   126  			minutesBelowTarget++
   127  		}
   128  	}
   129  	if perc := minutesBelowTarget / float64(len(perMinute)); perc > maxPercentTimeUnderTarget {
   130  		t.Fatalf(
   131  			"spent %f%% of time below target of %f txn/s, wanted no more than %f%%",
   132  			perc*100, txnTarget, maxPercentTimeUnderTarget*100,
   133  		)
   134  	} else {
   135  		t.l.Printf("spent %f%% of time below target of %f txn/s", perc*100, txnTarget)
   136  	}
   137  }
   138  
   139  func verifyLookupsPerSec(
   140  	ctx context.Context,
   141  	c *cluster,
   142  	t *test,
   143  	adminNode nodeListOption,
   144  	start, end time.Time,
   145  	rangeLookupsTarget float64,
   146  ) {
   147  	// Query needed information over the timespan of the query.
   148  	adminURL := c.ExternalAdminUIAddr(ctx, adminNode)[0]
   149  	response := mustGetMetrics(t, adminURL, start, end, []tsQuery{
   150  		{name: "cr.node.distsender.rangelookups", queryType: rate},
   151  	})
   152  
   153  	// Drop the first two minutes of datapoints as a "ramp-up" period.
   154  	perMinute := response.Results[0].Datapoints[2:]
   155  
   156  	// Verify that each individual one minute periods were below the target.
   157  	for _, dp := range perMinute {
   158  		if dp.Value > rangeLookupsTarget {
   159  			t.Fatalf("Found minute interval with %f lookup/sec above target of %f lookup/sec\n", dp.Value, rangeLookupsTarget)
   160  		} else {
   161  			t.l.Printf("Found minute interval with %f lookup/sec\n", dp.Value)
   162  		}
   163  	}
   164  }