github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/cmd/roachtest/engine_switch.go (about) 1 // Copyright 2020 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package main 12 13 import ( 14 "context" 15 "fmt" 16 "time" 17 18 "github.com/cockroachdb/cockroach/pkg/util/timeutil" 19 "github.com/cockroachdb/errors" 20 _ "github.com/lib/pq" 21 "golang.org/x/exp/rand" 22 ) 23 24 func registerEngineSwitch(r *testRegistry) { 25 runEngineSwitch := func(ctx context.Context, t *test, c *cluster, additionalArgs ...string) { 26 roachNodes := c.Range(1, c.spec.NodeCount-1) 27 loadNode := c.Node(c.spec.NodeCount) 28 c.Put(ctx, workload, "./workload", loadNode) 29 c.Put(ctx, cockroach, "./cockroach", roachNodes) 30 pebbleArgs := startArgs(append(additionalArgs, "--args=--storage-engine=pebble")...) 31 rocksdbArgs := startArgs(append(additionalArgs, "--args=--storage-engine=rocksdb")...) 32 c.Start(ctx, t, roachNodes, rocksdbArgs) 33 stageDuration := 1 * time.Minute 34 if local { 35 t.l.Printf("local mode: speeding up test\n") 36 stageDuration = 10 * time.Second 37 } 38 numIters := 5 * len(roachNodes) 39 40 loadDuration := " --duration=" + (time.Duration(numIters) * stageDuration).String() 41 42 workloads := []string{ 43 // Currently tpcc is the only one with CheckConsistency. We can add more later. 44 "./workload run tpcc --tolerate-errors --wait=false --drop --init --warehouses=1 " + loadDuration + " {pgurl:1-%d}", 45 } 46 checkWorkloads := []string{ 47 "./workload check tpcc --warehouses=1 --expensive-checks=true {pgurl:1}", 48 } 49 m := newMonitor(ctx, c, roachNodes) 50 for _, cmd := range workloads { 51 cmd := cmd // loop-local copy 52 m.Go(func(ctx context.Context) error { 53 cmd = fmt.Sprintf(cmd, len(roachNodes)) 54 return c.RunE(ctx, loadNode, cmd) 55 }) 56 } 57 58 usingPebble := make([]bool, len(roachNodes)) 59 rng := rand.New(rand.NewSource(uint64(timeutil.Now().UnixNano()))) 60 m.Go(func(ctx context.Context) error { 61 l, err := t.l.ChildLogger("engine-switcher") 62 if err != nil { 63 return err 64 } 65 // NB: the number of calls to `sleep` needs to be reflected in `loadDuration`. 66 sleepAndCheck := func() error { 67 t.WorkerStatus("sleeping") 68 select { 69 case <-ctx.Done(): 70 return ctx.Err() 71 case <-time.After(stageDuration): 72 } 73 // Make sure everyone is still running. 74 for i := 1; i <= len(roachNodes); i++ { 75 t.WorkerStatus("checking ", i) 76 db := c.Conn(ctx, i) 77 defer db.Close() 78 rows, err := db.Query(`SHOW DATABASES`) 79 if err != nil { 80 return err 81 } 82 if err := rows.Close(); err != nil { 83 return err 84 } 85 if err := c.CheckReplicaDivergenceOnDB(ctx, db); err != nil { 86 return errors.Wrapf(err, "node %d", i) 87 } 88 } 89 return nil 90 } 91 92 for i := 0; i < numIters; i++ { 93 // First let the load generators run in the cluster. 94 if err := sleepAndCheck(); err != nil { 95 return err 96 } 97 98 stop := func(node int) error { 99 m.ExpectDeath() 100 if rng.Intn(2) == 0 { 101 l.Printf("stopping node gracefully %d\n", node) 102 return c.StopCockroachGracefullyOnNode(ctx, node) 103 } 104 l.Printf("stopping node %d\n", node) 105 c.Stop(ctx, c.Node(node)) 106 return nil 107 } 108 109 i := rng.Intn(len(roachNodes)) 110 var args option 111 usingPebble[i] = !usingPebble[i] 112 if usingPebble[i] { 113 args = pebbleArgs 114 } else { 115 args = rocksdbArgs 116 } 117 t.WorkerStatus("switching ", i+1) 118 l.Printf("switching %d\n", i+1) 119 if err := stop(i + 1); err != nil { 120 return err 121 } 122 c.Start(ctx, t, c.Node(i+1), args) 123 } 124 return sleepAndCheck() 125 }) 126 m.Wait() 127 128 for _, cmd := range checkWorkloads { 129 c.Run(ctx, loadNode, cmd) 130 } 131 } 132 133 n := 3 134 r.Add(testSpec{ 135 Name: fmt.Sprintf("engine/switch/nodes=%d", n), 136 Owner: OwnerStorage, 137 MinVersion: "v20.1.0", 138 Cluster: makeClusterSpec(n + 1), 139 Run: func(ctx context.Context, t *test, c *cluster) { 140 runEngineSwitch(ctx, t, c) 141 }, 142 }) 143 r.Add(testSpec{ 144 Name: fmt.Sprintf("engine/switch/encrypted/nodes=%d", n), 145 Owner: OwnerStorage, 146 MinVersion: "v20.1.0", 147 Cluster: makeClusterSpec(n + 1), 148 Run: func(ctx context.Context, t *test, c *cluster) { 149 runEngineSwitch(ctx, t, c, "--encrypt=true") 150 }, 151 }) 152 }