github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/cmd/roachtest/election.go (about) 1 // Copyright 2018 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package main 12 13 import ( 14 "context" 15 "time" 16 17 "github.com/cockroachdb/cockroach/pkg/util/timeutil" 18 ) 19 20 func registerElectionAfterRestart(r *testRegistry) { 21 r.Add(testSpec{ 22 Name: "election-after-restart", 23 Owner: OwnerKV, 24 Skip: "https://github.com/cockroachdb/cockroach/issues/35047", 25 Cluster: makeClusterSpec(3), 26 Run: func(ctx context.Context, t *test, c *cluster) { 27 t.Status("starting up") 28 c.Put(ctx, cockroach, "./cockroach") 29 c.Start(ctx, t) 30 31 // If the initial ranges aren't fully replicated by the time we 32 // run our splits, replicating them after the splits will take 33 // longer, so wait for the initial replication before 34 // proceeding. 35 time.Sleep(3 * time.Second) 36 37 t.Status("creating table and splits") 38 c.Run(ctx, c.Node(1), `./cockroach sql --insecure -e " 39 CREATE DATABASE IF NOT EXISTS test; 40 CREATE TABLE test.kv (k INT PRIMARY KEY, v INT); 41 -- Prevent the merge queue from immediately discarding our splits. 42 SET CLUSTER SETTING kv.range_merge.queue_enabled = false; 43 ALTER TABLE test.kv SPLIT AT SELECT generate_series(0, 10000, 100)"`) 44 45 start := timeutil.Now() 46 c.Run(ctx, c.Node(1), `./cockroach sql --insecure -e " 47 SELECT * FROM test.kv"`) 48 duration := timeutil.Since(start) 49 t.l.Printf("pre-restart, query took %s\n", duration) 50 51 // If we restart before all the nodes have applied the splits, 52 // there will be a lot of snapshot attempts (which may fail) 53 // after the restart. This appears to slow down startup enough 54 // to fail the condition below, so wait a bit for the dust to 55 // settle before restarting. 56 time.Sleep(3 * time.Second) 57 58 t.Status("restarting") 59 c.Stop(ctx) 60 c.Start(ctx, t) 61 62 // Each of the 100 ranges in this table must elect a leader for 63 // this query to complete. In naive raft, each of these 64 // elections would require waiting for a 3-second timeout, one 65 // at a time. This test verifies that our mechanisms to speed 66 // this up are working (we trigger elections eagerly, but not so 67 // eagerly that multiple elections conflict with each other). 68 start = timeutil.Now() 69 // Use a large CONNECT_TIMEOUT so that if the initial connection 70 // takes ages (perhaps due to some cli-internal query taking a 71 // very long time), we fail with the duration check below and 72 // not an opaque error from the cli. 73 buf, err := c.RunWithBuffer(ctx, t.l, c.Node(1), `COCKROACH_CONNECT_TIMEOUT=240 ./cockroach sql --insecure -e " 74 SET TRACING = on; 75 SELECT * FROM test.kv; 76 SET TRACING = off; 77 SHOW TRACE FOR SESSION; 78 "`) 79 if err != nil { 80 t.Fatalf("%s\n\n%s", buf, err) 81 } 82 duration = timeutil.Since(start) 83 t.l.Printf("post-restart, query took %s\n", duration) 84 if expected := 15 * time.Second; duration > expected { 85 // In the happy case, this query runs in around 250ms. Prior 86 // to the introduction of this test, a bug caused most 87 // elections to fail and the query would take over 100 88 // seconds. There are still issues that can cause a few 89 // elections to fail (the biggest one as I write this is 90 // #26448), so we must use a generous timeout here. We may be 91 // able to tighten the bounds as we make more improvements. 92 t.l.Printf("%s\n", buf) 93 t.Fatalf("expected query to succeed in less than %s, took %s", expected, duration) 94 } 95 }, 96 }) 97 }