github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/cmd/roachtest/follower_reads.go (about)

     1  // Copyright 2018 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package main
    12  
    13  import (
    14  	"bufio"
    15  	"context"
    16  	gosql "database/sql"
    17  	"math/rand"
    18  	"net/http"
    19  	"regexp"
    20  	"strconv"
    21  	"time"
    22  
    23  	"github.com/cockroachdb/cockroach/pkg/ts/tspb"
    24  	"github.com/cockroachdb/cockroach/pkg/util/httputil"
    25  	"github.com/cockroachdb/cockroach/pkg/util/timeutil"
    26  	"github.com/cockroachdb/errors"
    27  	"golang.org/x/sync/errgroup"
    28  )
    29  
    30  func registerFollowerReads(r *testRegistry) {
    31  	r.Add(testSpec{
    32  		Name:       "follower-reads/nodes=3",
    33  		Owner:      OwnerKV,
    34  		Cluster:    makeClusterSpec(3 /* nodeCount */, cpu(2), geo()),
    35  		MinVersion: "v19.1.0",
    36  		Run:        runFollowerReadsTest,
    37  	})
    38  }
    39  
    40  // runFollowerReadsTest is a basic litmus test that follower reads work.
    41  // The test does the following:
    42  //
    43  //  * Creates a database and table.
    44  //  * Installs a number of rows into that table.
    45  //  * Queries the data initially with a recent timestamp and expecting an
    46  //    error because the table does not exist in the past immediately following
    47  //    creation.
    48  //  * Waits until the required duration has elapsed such that the installed
    49  //    data can be read with a follower read issued using `follower_timestamp()`.
    50  //  * Performs a single `follower_timestamp()` select query against a single
    51  //    row on all of the nodes and then observes the counter metric for
    52  //    store-level follower reads ensuring that they occurred on at least
    53  //    two of the nodes.
    54  //  * Performs reads against the written data on all of the nodes at a steady
    55  //    rate for 20 seconds, ensure that the 90-%ile SQL latencies during that
    56  //    time are under 10ms which implies that no WAN RPCs occurred.
    57  //
    58  func runFollowerReadsTest(ctx context.Context, t *test, c *cluster) {
    59  	crdbNodes := c.Range(1, c.spec.NodeCount)
    60  	c.Put(ctx, cockroach, "./cockroach", crdbNodes)
    61  	c.Wipe(ctx, crdbNodes)
    62  	c.Start(ctx, t, crdbNodes)
    63  
    64  	var conns []*gosql.DB
    65  	for i := 0; i < c.spec.NodeCount; i++ {
    66  		conns = append(conns, c.Conn(ctx, i+1))
    67  		defer conns[i].Close()
    68  	}
    69  	db := conns[0]
    70  
    71  	if _, err := db.ExecContext(ctx, "SET CLUSTER SETTING kv.closed_timestamp.follower_reads_enabled = 'true'"); err != nil {
    72  		t.Fatalf("failed to enable follower reads: %v", err)
    73  	}
    74  	// Disable load based splitting and range merging because splits and merges
    75  	// interfere with follower reads. Rhis test's workload regularly triggers load
    76  	// based splitting in the first phase creating small ranges which later
    77  	// in the test are merged. The merging tends to coincide with the final phase
    78  	// of the test which attempts to observe low latency reads leading to
    79  	// flakiness.
    80  	if _, err := db.ExecContext(ctx, "SET CLUSTER SETTING kv.range_split.by_load_enabled = 'false'"); err != nil {
    81  		t.Fatalf("failed to disable load based splitting: %v", err)
    82  	}
    83  	if _, err := db.ExecContext(ctx, "SET CLUSTER SETTING kv.range_merge.queue_enabled = 'false'"); err != nil {
    84  		t.Fatalf("failed to disable range merging: %v", err)
    85  	}
    86  	if r, err := db.ExecContext(ctx, "CREATE DATABASE test;"); err != nil {
    87  		t.Fatalf("failed to create database: %v %v", err, r)
    88  	}
    89  	if r, err := db.ExecContext(ctx, "CREATE TABLE test.test ( k INT8, v INT8, PRIMARY KEY (k) )"); err != nil {
    90  		t.Fatalf("failed to create table: %v %v", err, r)
    91  	}
    92  	waitForFullReplication(t, db)
    93  	const rows = 100
    94  	const concurrency = 32
    95  	sem := make(chan struct{}, concurrency)
    96  	data := make(map[int]int64)
    97  	insert := func(ctx context.Context, k int) func() error {
    98  		v := rand.Int63()
    99  		data[k] = v
   100  		return func() error {
   101  			sem <- struct{}{}
   102  			defer func() { <-sem }()
   103  			_, err := db.ExecContext(ctx, "INSERT INTO test.test VALUES ( $1, $2 )", k, v)
   104  			return err
   105  		}
   106  	}
   107  	// chooseKV picks a random key-value pair by exploiting the pseudo-random
   108  	// ordering of keys when traversing a map with in a range statement.
   109  	chooseKV := func() (k int, v int64) {
   110  		for k, v = range data {
   111  			return k, v
   112  		}
   113  		panic("data is empty")
   114  	}
   115  	verifySelect := func(ctx context.Context, node, k int, expectError bool, expectedVal int64) func() error {
   116  		return func() error {
   117  			nodeDB := conns[node-1]
   118  			r := nodeDB.QueryRowContext(ctx, "SELECT v FROM test.test AS OF SYSTEM "+
   119  				"TIME experimental_follower_read_timestamp() WHERE k = $1", k)
   120  			var got int64
   121  			if err := r.Scan(&got); err != nil {
   122  				// Ignore errors due to cancellation.
   123  				if ctx.Err() != nil {
   124  					return nil
   125  				}
   126  				if expectError {
   127  					return nil
   128  				}
   129  				return err
   130  			}
   131  
   132  			if expectError {
   133  				return errors.Errorf("failed to get expected error on node %d", node)
   134  			}
   135  			if got != expectedVal {
   136  				return errors.Errorf("Didn't get expected val on node %d: %v != %v",
   137  					node, got, expectedVal)
   138  			}
   139  			return nil
   140  		}
   141  	}
   142  	doSelects := func(ctx context.Context, node int) func() error {
   143  		return func() error {
   144  			for ctx.Err() == nil {
   145  				k, v := chooseKV()
   146  				err := verifySelect(ctx, node, k, false, v)()
   147  				if err != nil && ctx.Err() == nil {
   148  					return err
   149  				}
   150  			}
   151  			return nil
   152  		}
   153  	}
   154  	// Insert the data.
   155  	g, gCtx := errgroup.WithContext(ctx)
   156  	for i := 0; i < rows; i++ {
   157  		g.Go(insert(gCtx, i))
   158  	}
   159  	if err := g.Wait(); err != nil {
   160  		t.Fatalf("failed to insert data: %v", err)
   161  	}
   162  	// Verify error on immediate read.
   163  	g, gCtx = errgroup.WithContext(ctx)
   164  	for i := 1; i <= c.spec.NodeCount; i++ {
   165  		// Expect an error performing a historical read at first because the table
   166  		// won't have been created yet.
   167  		g.Go(verifySelect(gCtx, i, 0, true, 0))
   168  	}
   169  	if err := g.Wait(); err != nil {
   170  		t.Fatalf("unexpected error performing historical reads: %v", err)
   171  	}
   172  	// Wait for follower_timestamp() historical reads to have data.
   173  	followerReadDuration, err := computeFollowerReadDuration(ctx, db)
   174  	if err != nil {
   175  		t.Fatalf("failed to compute follower read duration: %v", err)
   176  	}
   177  	select {
   178  	case <-time.After(followerReadDuration):
   179  	case <-ctx.Done():
   180  		t.Fatalf("context canceled: %v", ctx.Err())
   181  	}
   182  	// Read the follower read counts before issuing the follower reads to observe
   183  	// the delta and protect from follower reads which might have happened due to
   184  	// system queries.
   185  	followerReadsBefore, err := getFollowerReadCounts(ctx, c)
   186  	if err != nil {
   187  		t.Fatalf("failed to get follower read counts: %v", err)
   188  	}
   189  	// Perform reads at follower_timestamp() and ensure we get the expected value.
   190  	g, gCtx = errgroup.WithContext(ctx)
   191  	k, v := chooseKV()
   192  	for i := 1; i <= c.spec.NodeCount; i++ {
   193  		g.Go(verifySelect(gCtx, i, k, false, v))
   194  	}
   195  	if err := g.Wait(); err != nil {
   196  		t.Fatalf("error verifying node values: %v", err)
   197  	}
   198  	// Verify that the follower read count increments on at least two nodes.
   199  	followerReadsAfter, err := getFollowerReadCounts(ctx, c)
   200  	if err != nil {
   201  		t.Fatalf("failed to get follower read counts: %v", err)
   202  	}
   203  	nodesWhichSawFollowerReads := 0
   204  	for i := 0; i < len(followerReadsAfter); i++ {
   205  		if followerReadsAfter[i] > followerReadsBefore[i] {
   206  			nodesWhichSawFollowerReads++
   207  		}
   208  	}
   209  	if nodesWhichSawFollowerReads < 2 {
   210  		t.Fatalf("fewer than 2 follower reads occurred: saw %v before and %v after",
   211  			followerReadsBefore, followerReadsAfter)
   212  	}
   213  	// Run reads for 3m which given the metrics window of 10s should guarantee
   214  	// that the most recent SQL latency time series data should relate to at least
   215  	// some of these reads.
   216  	timeoutCtx, cancel := context.WithTimeout(ctx, 3*time.Minute)
   217  	defer cancel()
   218  	g, gCtx = errgroup.WithContext(timeoutCtx)
   219  	for i := 0; i < concurrency; i++ {
   220  		g.Go(doSelects(gCtx, rand.Intn(c.spec.NodeCount)+1))
   221  	}
   222  	start := timeutil.Now()
   223  	if err := g.Wait(); err != nil && timeoutCtx.Err() == nil {
   224  		t.Fatalf("error reading data: %v", err)
   225  	}
   226  	// Perform a ts query to verify that the SQL latencies were well below the
   227  	// WAN latencies which should be at least 50ms.
   228  	verifySQLLatency(ctx, c, t, c.Node(1), start, timeutil.Now(), 20*time.Millisecond)
   229  }
   230  
   231  func computeFollowerReadDuration(ctx context.Context, db *gosql.DB) (time.Duration, error) {
   232  	var targetDurationStr string
   233  	err := db.QueryRowContext(ctx, "SELECT value FROM crdb_internal.cluster_settings WHERE variable = 'kv.closed_timestamp.target_duration'").Scan(&targetDurationStr)
   234  	if err != nil {
   235  		return 0, err
   236  	}
   237  	targetDuration, err := time.ParseDuration(targetDurationStr)
   238  	if err != nil {
   239  		return 0, err
   240  	}
   241  	var closeFraction float64
   242  	err = db.QueryRowContext(ctx, "SELECT value FROM crdb_internal.cluster_settings WHERE variable = 'kv.closed_timestamp.close_fraction'").Scan(&closeFraction)
   243  	if err != nil {
   244  		return 0, err
   245  	}
   246  	// target_multiple is a hidden setting which cannot be read from crdb_internal
   247  	// so for now hard code to the default value.
   248  	const targetMultiple = 3
   249  	return time.Duration(float64(targetDuration) * (1 + targetMultiple*closeFraction)), nil
   250  }
   251  
   252  // verifySQLLatency verifies that the client-facing SQL latencies in the 90th
   253  // percentile remain below target latency 80% of the time between start and end
   254  // ignoring the first 20s.
   255  func verifySQLLatency(
   256  	ctx context.Context,
   257  	c *cluster,
   258  	t *test,
   259  	adminNode nodeListOption,
   260  	start, end time.Time,
   261  	targetLatency time.Duration,
   262  ) {
   263  	// Query needed information over the timespan of the query.
   264  	adminURLs := c.ExternalAdminUIAddr(ctx, adminNode)
   265  	url := "http://" + adminURLs[0] + "/ts/query"
   266  	request := tspb.TimeSeriesQueryRequest{
   267  		StartNanos: start.UnixNano(),
   268  		EndNanos:   end.UnixNano(),
   269  		// Ask for 10s intervals.
   270  		SampleNanos: (10 * time.Second).Nanoseconds(),
   271  		Queries: []tspb.Query{
   272  			{
   273  				Name: "cr.node.sql.service.latency-p90",
   274  			},
   275  		},
   276  	}
   277  	var response tspb.TimeSeriesQueryResponse
   278  	if err := httputil.PostJSON(http.Client{}, url, &request, &response); err != nil {
   279  		t.Fatal(err)
   280  	}
   281  	perTenSeconds := response.Results[0].Datapoints
   282  	// Drop the first 20 seconds of datapoints as a "ramp-up" period.
   283  	if len(perTenSeconds) < 3 {
   284  		t.Fatalf("not enough ts data to verify latency")
   285  	}
   286  	perTenSeconds = perTenSeconds[2:]
   287  	var above []time.Duration
   288  	for _, dp := range perTenSeconds {
   289  		if val := time.Duration(dp.Value); val > targetLatency {
   290  			above = append(above, val)
   291  		}
   292  	}
   293  	if permitted := int(.2 * float64(len(perTenSeconds))); len(above) > permitted {
   294  		t.Fatalf("%d latency values (%v) are above target latency %v, %d permitted",
   295  			len(above), above, targetLatency, permitted)
   296  	}
   297  }
   298  
   299  const followerReadsMetric = "follower_reads_success_count"
   300  
   301  // getFollowerReadCounts returns a slice from node to follower read count
   302  // according to the metric.
   303  func getFollowerReadCounts(ctx context.Context, c *cluster) ([]int, error) {
   304  	followerReadCounts := make([]int, c.spec.NodeCount)
   305  	getFollowerReadCount := func(ctx context.Context, node int) func() error {
   306  		return func() error {
   307  			url := "http://" + c.ExternalAdminUIAddr(ctx, c.Node(node))[0] + "/_status/vars"
   308  			resp, err := httputil.Get(ctx, url)
   309  			if err != nil {
   310  				return err
   311  			}
   312  			defer resp.Body.Close()
   313  			if resp.StatusCode != 200 {
   314  				return errors.Errorf("invalid non-200 status code %v", resp.StatusCode)
   315  			}
   316  			scanner := bufio.NewScanner(resp.Body)
   317  			for scanner.Scan() {
   318  				m, ok := parsePrometheusMetric(scanner.Text())
   319  				if ok {
   320  					if m.metric == followerReadsMetric {
   321  						v, err := strconv.ParseFloat(m.value, 64)
   322  						if err != nil {
   323  							return err
   324  						}
   325  						followerReadCounts[node-1] = int(v)
   326  					}
   327  				}
   328  			}
   329  			return nil
   330  		}
   331  	}
   332  	g, gCtx := errgroup.WithContext(ctx)
   333  	for i := 1; i <= c.spec.NodeCount; i++ {
   334  		g.Go(getFollowerReadCount(gCtx, i))
   335  	}
   336  	if err := g.Wait(); err != nil {
   337  		return nil, err
   338  	}
   339  	return followerReadCounts, nil
   340  }
   341  
   342  var prometheusMetricStringPattern = "(?P<metric>\\w+)\\{" +
   343  	"(?P<labelvalues>(\\w+=\".*\",)*(\\w+=\".*\")?)\\}\\s+(?P<value>.*)"
   344  var promethusMetricStringRE = regexp.MustCompile(prometheusMetricStringPattern)
   345  
   346  type prometheusMetric struct {
   347  	metric      string
   348  	labelValues string
   349  	value       string
   350  }
   351  
   352  func parsePrometheusMetric(s string) (*prometheusMetric, bool) {
   353  	matches := promethusMetricStringRE.FindStringSubmatch(s)
   354  	if matches == nil {
   355  		return nil, false
   356  	}
   357  	return &prometheusMetric{
   358  		metric:      matches[1],
   359  		labelValues: matches[2],
   360  		value:       matches[5],
   361  	}, true
   362  }