github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/cmd/roachtest/follower_reads.go (about) 1 // Copyright 2018 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package main 12 13 import ( 14 "bufio" 15 "context" 16 gosql "database/sql" 17 "math/rand" 18 "net/http" 19 "regexp" 20 "strconv" 21 "time" 22 23 "github.com/cockroachdb/cockroach/pkg/ts/tspb" 24 "github.com/cockroachdb/cockroach/pkg/util/httputil" 25 "github.com/cockroachdb/cockroach/pkg/util/timeutil" 26 "github.com/cockroachdb/errors" 27 "golang.org/x/sync/errgroup" 28 ) 29 30 func registerFollowerReads(r *testRegistry) { 31 r.Add(testSpec{ 32 Name: "follower-reads/nodes=3", 33 Owner: OwnerKV, 34 Cluster: makeClusterSpec(3 /* nodeCount */, cpu(2), geo()), 35 MinVersion: "v19.1.0", 36 Run: runFollowerReadsTest, 37 }) 38 } 39 40 // runFollowerReadsTest is a basic litmus test that follower reads work. 41 // The test does the following: 42 // 43 // * Creates a database and table. 44 // * Installs a number of rows into that table. 45 // * Queries the data initially with a recent timestamp and expecting an 46 // error because the table does not exist in the past immediately following 47 // creation. 48 // * Waits until the required duration has elapsed such that the installed 49 // data can be read with a follower read issued using `follower_timestamp()`. 50 // * Performs a single `follower_timestamp()` select query against a single 51 // row on all of the nodes and then observes the counter metric for 52 // store-level follower reads ensuring that they occurred on at least 53 // two of the nodes. 54 // * Performs reads against the written data on all of the nodes at a steady 55 // rate for 20 seconds, ensure that the 90-%ile SQL latencies during that 56 // time are under 10ms which implies that no WAN RPCs occurred. 57 // 58 func runFollowerReadsTest(ctx context.Context, t *test, c *cluster) { 59 crdbNodes := c.Range(1, c.spec.NodeCount) 60 c.Put(ctx, cockroach, "./cockroach", crdbNodes) 61 c.Wipe(ctx, crdbNodes) 62 c.Start(ctx, t, crdbNodes) 63 64 var conns []*gosql.DB 65 for i := 0; i < c.spec.NodeCount; i++ { 66 conns = append(conns, c.Conn(ctx, i+1)) 67 defer conns[i].Close() 68 } 69 db := conns[0] 70 71 if _, err := db.ExecContext(ctx, "SET CLUSTER SETTING kv.closed_timestamp.follower_reads_enabled = 'true'"); err != nil { 72 t.Fatalf("failed to enable follower reads: %v", err) 73 } 74 // Disable load based splitting and range merging because splits and merges 75 // interfere with follower reads. Rhis test's workload regularly triggers load 76 // based splitting in the first phase creating small ranges which later 77 // in the test are merged. The merging tends to coincide with the final phase 78 // of the test which attempts to observe low latency reads leading to 79 // flakiness. 80 if _, err := db.ExecContext(ctx, "SET CLUSTER SETTING kv.range_split.by_load_enabled = 'false'"); err != nil { 81 t.Fatalf("failed to disable load based splitting: %v", err) 82 } 83 if _, err := db.ExecContext(ctx, "SET CLUSTER SETTING kv.range_merge.queue_enabled = 'false'"); err != nil { 84 t.Fatalf("failed to disable range merging: %v", err) 85 } 86 if r, err := db.ExecContext(ctx, "CREATE DATABASE test;"); err != nil { 87 t.Fatalf("failed to create database: %v %v", err, r) 88 } 89 if r, err := db.ExecContext(ctx, "CREATE TABLE test.test ( k INT8, v INT8, PRIMARY KEY (k) )"); err != nil { 90 t.Fatalf("failed to create table: %v %v", err, r) 91 } 92 waitForFullReplication(t, db) 93 const rows = 100 94 const concurrency = 32 95 sem := make(chan struct{}, concurrency) 96 data := make(map[int]int64) 97 insert := func(ctx context.Context, k int) func() error { 98 v := rand.Int63() 99 data[k] = v 100 return func() error { 101 sem <- struct{}{} 102 defer func() { <-sem }() 103 _, err := db.ExecContext(ctx, "INSERT INTO test.test VALUES ( $1, $2 )", k, v) 104 return err 105 } 106 } 107 // chooseKV picks a random key-value pair by exploiting the pseudo-random 108 // ordering of keys when traversing a map with in a range statement. 109 chooseKV := func() (k int, v int64) { 110 for k, v = range data { 111 return k, v 112 } 113 panic("data is empty") 114 } 115 verifySelect := func(ctx context.Context, node, k int, expectError bool, expectedVal int64) func() error { 116 return func() error { 117 nodeDB := conns[node-1] 118 r := nodeDB.QueryRowContext(ctx, "SELECT v FROM test.test AS OF SYSTEM "+ 119 "TIME experimental_follower_read_timestamp() WHERE k = $1", k) 120 var got int64 121 if err := r.Scan(&got); err != nil { 122 // Ignore errors due to cancellation. 123 if ctx.Err() != nil { 124 return nil 125 } 126 if expectError { 127 return nil 128 } 129 return err 130 } 131 132 if expectError { 133 return errors.Errorf("failed to get expected error on node %d", node) 134 } 135 if got != expectedVal { 136 return errors.Errorf("Didn't get expected val on node %d: %v != %v", 137 node, got, expectedVal) 138 } 139 return nil 140 } 141 } 142 doSelects := func(ctx context.Context, node int) func() error { 143 return func() error { 144 for ctx.Err() == nil { 145 k, v := chooseKV() 146 err := verifySelect(ctx, node, k, false, v)() 147 if err != nil && ctx.Err() == nil { 148 return err 149 } 150 } 151 return nil 152 } 153 } 154 // Insert the data. 155 g, gCtx := errgroup.WithContext(ctx) 156 for i := 0; i < rows; i++ { 157 g.Go(insert(gCtx, i)) 158 } 159 if err := g.Wait(); err != nil { 160 t.Fatalf("failed to insert data: %v", err) 161 } 162 // Verify error on immediate read. 163 g, gCtx = errgroup.WithContext(ctx) 164 for i := 1; i <= c.spec.NodeCount; i++ { 165 // Expect an error performing a historical read at first because the table 166 // won't have been created yet. 167 g.Go(verifySelect(gCtx, i, 0, true, 0)) 168 } 169 if err := g.Wait(); err != nil { 170 t.Fatalf("unexpected error performing historical reads: %v", err) 171 } 172 // Wait for follower_timestamp() historical reads to have data. 173 followerReadDuration, err := computeFollowerReadDuration(ctx, db) 174 if err != nil { 175 t.Fatalf("failed to compute follower read duration: %v", err) 176 } 177 select { 178 case <-time.After(followerReadDuration): 179 case <-ctx.Done(): 180 t.Fatalf("context canceled: %v", ctx.Err()) 181 } 182 // Read the follower read counts before issuing the follower reads to observe 183 // the delta and protect from follower reads which might have happened due to 184 // system queries. 185 followerReadsBefore, err := getFollowerReadCounts(ctx, c) 186 if err != nil { 187 t.Fatalf("failed to get follower read counts: %v", err) 188 } 189 // Perform reads at follower_timestamp() and ensure we get the expected value. 190 g, gCtx = errgroup.WithContext(ctx) 191 k, v := chooseKV() 192 for i := 1; i <= c.spec.NodeCount; i++ { 193 g.Go(verifySelect(gCtx, i, k, false, v)) 194 } 195 if err := g.Wait(); err != nil { 196 t.Fatalf("error verifying node values: %v", err) 197 } 198 // Verify that the follower read count increments on at least two nodes. 199 followerReadsAfter, err := getFollowerReadCounts(ctx, c) 200 if err != nil { 201 t.Fatalf("failed to get follower read counts: %v", err) 202 } 203 nodesWhichSawFollowerReads := 0 204 for i := 0; i < len(followerReadsAfter); i++ { 205 if followerReadsAfter[i] > followerReadsBefore[i] { 206 nodesWhichSawFollowerReads++ 207 } 208 } 209 if nodesWhichSawFollowerReads < 2 { 210 t.Fatalf("fewer than 2 follower reads occurred: saw %v before and %v after", 211 followerReadsBefore, followerReadsAfter) 212 } 213 // Run reads for 3m which given the metrics window of 10s should guarantee 214 // that the most recent SQL latency time series data should relate to at least 215 // some of these reads. 216 timeoutCtx, cancel := context.WithTimeout(ctx, 3*time.Minute) 217 defer cancel() 218 g, gCtx = errgroup.WithContext(timeoutCtx) 219 for i := 0; i < concurrency; i++ { 220 g.Go(doSelects(gCtx, rand.Intn(c.spec.NodeCount)+1)) 221 } 222 start := timeutil.Now() 223 if err := g.Wait(); err != nil && timeoutCtx.Err() == nil { 224 t.Fatalf("error reading data: %v", err) 225 } 226 // Perform a ts query to verify that the SQL latencies were well below the 227 // WAN latencies which should be at least 50ms. 228 verifySQLLatency(ctx, c, t, c.Node(1), start, timeutil.Now(), 20*time.Millisecond) 229 } 230 231 func computeFollowerReadDuration(ctx context.Context, db *gosql.DB) (time.Duration, error) { 232 var targetDurationStr string 233 err := db.QueryRowContext(ctx, "SELECT value FROM crdb_internal.cluster_settings WHERE variable = 'kv.closed_timestamp.target_duration'").Scan(&targetDurationStr) 234 if err != nil { 235 return 0, err 236 } 237 targetDuration, err := time.ParseDuration(targetDurationStr) 238 if err != nil { 239 return 0, err 240 } 241 var closeFraction float64 242 err = db.QueryRowContext(ctx, "SELECT value FROM crdb_internal.cluster_settings WHERE variable = 'kv.closed_timestamp.close_fraction'").Scan(&closeFraction) 243 if err != nil { 244 return 0, err 245 } 246 // target_multiple is a hidden setting which cannot be read from crdb_internal 247 // so for now hard code to the default value. 248 const targetMultiple = 3 249 return time.Duration(float64(targetDuration) * (1 + targetMultiple*closeFraction)), nil 250 } 251 252 // verifySQLLatency verifies that the client-facing SQL latencies in the 90th 253 // percentile remain below target latency 80% of the time between start and end 254 // ignoring the first 20s. 255 func verifySQLLatency( 256 ctx context.Context, 257 c *cluster, 258 t *test, 259 adminNode nodeListOption, 260 start, end time.Time, 261 targetLatency time.Duration, 262 ) { 263 // Query needed information over the timespan of the query. 264 adminURLs := c.ExternalAdminUIAddr(ctx, adminNode) 265 url := "http://" + adminURLs[0] + "/ts/query" 266 request := tspb.TimeSeriesQueryRequest{ 267 StartNanos: start.UnixNano(), 268 EndNanos: end.UnixNano(), 269 // Ask for 10s intervals. 270 SampleNanos: (10 * time.Second).Nanoseconds(), 271 Queries: []tspb.Query{ 272 { 273 Name: "cr.node.sql.service.latency-p90", 274 }, 275 }, 276 } 277 var response tspb.TimeSeriesQueryResponse 278 if err := httputil.PostJSON(http.Client{}, url, &request, &response); err != nil { 279 t.Fatal(err) 280 } 281 perTenSeconds := response.Results[0].Datapoints 282 // Drop the first 20 seconds of datapoints as a "ramp-up" period. 283 if len(perTenSeconds) < 3 { 284 t.Fatalf("not enough ts data to verify latency") 285 } 286 perTenSeconds = perTenSeconds[2:] 287 var above []time.Duration 288 for _, dp := range perTenSeconds { 289 if val := time.Duration(dp.Value); val > targetLatency { 290 above = append(above, val) 291 } 292 } 293 if permitted := int(.2 * float64(len(perTenSeconds))); len(above) > permitted { 294 t.Fatalf("%d latency values (%v) are above target latency %v, %d permitted", 295 len(above), above, targetLatency, permitted) 296 } 297 } 298 299 const followerReadsMetric = "follower_reads_success_count" 300 301 // getFollowerReadCounts returns a slice from node to follower read count 302 // according to the metric. 303 func getFollowerReadCounts(ctx context.Context, c *cluster) ([]int, error) { 304 followerReadCounts := make([]int, c.spec.NodeCount) 305 getFollowerReadCount := func(ctx context.Context, node int) func() error { 306 return func() error { 307 url := "http://" + c.ExternalAdminUIAddr(ctx, c.Node(node))[0] + "/_status/vars" 308 resp, err := httputil.Get(ctx, url) 309 if err != nil { 310 return err 311 } 312 defer resp.Body.Close() 313 if resp.StatusCode != 200 { 314 return errors.Errorf("invalid non-200 status code %v", resp.StatusCode) 315 } 316 scanner := bufio.NewScanner(resp.Body) 317 for scanner.Scan() { 318 m, ok := parsePrometheusMetric(scanner.Text()) 319 if ok { 320 if m.metric == followerReadsMetric { 321 v, err := strconv.ParseFloat(m.value, 64) 322 if err != nil { 323 return err 324 } 325 followerReadCounts[node-1] = int(v) 326 } 327 } 328 } 329 return nil 330 } 331 } 332 g, gCtx := errgroup.WithContext(ctx) 333 for i := 1; i <= c.spec.NodeCount; i++ { 334 g.Go(getFollowerReadCount(gCtx, i)) 335 } 336 if err := g.Wait(); err != nil { 337 return nil, err 338 } 339 return followerReadCounts, nil 340 } 341 342 var prometheusMetricStringPattern = "(?P<metric>\\w+)\\{" + 343 "(?P<labelvalues>(\\w+=\".*\",)*(\\w+=\".*\")?)\\}\\s+(?P<value>.*)" 344 var promethusMetricStringRE = regexp.MustCompile(prometheusMetricStringPattern) 345 346 type prometheusMetric struct { 347 metric string 348 labelValues string 349 value string 350 } 351 352 func parsePrometheusMetric(s string) (*prometheusMetric, bool) { 353 matches := promethusMetricStringRE.FindStringSubmatch(s) 354 if matches == nil { 355 return nil, false 356 } 357 return &prometheusMetric{ 358 metric: matches[1], 359 labelValues: matches[2], 360 value: matches[5], 361 }, true 362 }