github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/cmd/roachtest/engine_switch.go (about)

     1  // Copyright 2020 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package main
    12  
    13  import (
    14  	"context"
    15  	"fmt"
    16  	"time"
    17  
    18  	"github.com/cockroachdb/cockroach/pkg/util/timeutil"
    19  	"github.com/cockroachdb/errors"
    20  	_ "github.com/lib/pq"
    21  	"golang.org/x/exp/rand"
    22  )
    23  
    24  func registerEngineSwitch(r *testRegistry) {
    25  	runEngineSwitch := func(ctx context.Context, t *test, c *cluster, additionalArgs ...string) {
    26  		roachNodes := c.Range(1, c.spec.NodeCount-1)
    27  		loadNode := c.Node(c.spec.NodeCount)
    28  		c.Put(ctx, workload, "./workload", loadNode)
    29  		c.Put(ctx, cockroach, "./cockroach", roachNodes)
    30  		pebbleArgs := startArgs(append(additionalArgs, "--args=--storage-engine=pebble")...)
    31  		rocksdbArgs := startArgs(append(additionalArgs, "--args=--storage-engine=rocksdb")...)
    32  		c.Start(ctx, t, roachNodes, rocksdbArgs)
    33  		stageDuration := 1 * time.Minute
    34  		if local {
    35  			t.l.Printf("local mode: speeding up test\n")
    36  			stageDuration = 10 * time.Second
    37  		}
    38  		numIters := 5 * len(roachNodes)
    39  
    40  		loadDuration := " --duration=" + (time.Duration(numIters) * stageDuration).String()
    41  
    42  		workloads := []string{
    43  			// Currently tpcc is the only one with CheckConsistency. We can add more later.
    44  			"./workload run tpcc --tolerate-errors --wait=false --drop --init --warehouses=1 " + loadDuration + " {pgurl:1-%d}",
    45  		}
    46  		checkWorkloads := []string{
    47  			"./workload check tpcc --warehouses=1 --expensive-checks=true {pgurl:1}",
    48  		}
    49  		m := newMonitor(ctx, c, roachNodes)
    50  		for _, cmd := range workloads {
    51  			cmd := cmd // loop-local copy
    52  			m.Go(func(ctx context.Context) error {
    53  				cmd = fmt.Sprintf(cmd, len(roachNodes))
    54  				return c.RunE(ctx, loadNode, cmd)
    55  			})
    56  		}
    57  
    58  		usingPebble := make([]bool, len(roachNodes))
    59  		rng := rand.New(rand.NewSource(uint64(timeutil.Now().UnixNano())))
    60  		m.Go(func(ctx context.Context) error {
    61  			l, err := t.l.ChildLogger("engine-switcher")
    62  			if err != nil {
    63  				return err
    64  			}
    65  			// NB: the number of calls to `sleep` needs to be reflected in `loadDuration`.
    66  			sleepAndCheck := func() error {
    67  				t.WorkerStatus("sleeping")
    68  				select {
    69  				case <-ctx.Done():
    70  					return ctx.Err()
    71  				case <-time.After(stageDuration):
    72  				}
    73  				// Make sure everyone is still running.
    74  				for i := 1; i <= len(roachNodes); i++ {
    75  					t.WorkerStatus("checking ", i)
    76  					db := c.Conn(ctx, i)
    77  					defer db.Close()
    78  					rows, err := db.Query(`SHOW DATABASES`)
    79  					if err != nil {
    80  						return err
    81  					}
    82  					if err := rows.Close(); err != nil {
    83  						return err
    84  					}
    85  					if err := c.CheckReplicaDivergenceOnDB(ctx, db); err != nil {
    86  						return errors.Wrapf(err, "node %d", i)
    87  					}
    88  				}
    89  				return nil
    90  			}
    91  
    92  			for i := 0; i < numIters; i++ {
    93  				// First let the load generators run in the cluster.
    94  				if err := sleepAndCheck(); err != nil {
    95  					return err
    96  				}
    97  
    98  				stop := func(node int) error {
    99  					m.ExpectDeath()
   100  					if rng.Intn(2) == 0 {
   101  						l.Printf("stopping node gracefully %d\n", node)
   102  						return c.StopCockroachGracefullyOnNode(ctx, node)
   103  					}
   104  					l.Printf("stopping node %d\n", node)
   105  					c.Stop(ctx, c.Node(node))
   106  					return nil
   107  				}
   108  
   109  				i := rng.Intn(len(roachNodes))
   110  				var args option
   111  				usingPebble[i] = !usingPebble[i]
   112  				if usingPebble[i] {
   113  					args = pebbleArgs
   114  				} else {
   115  					args = rocksdbArgs
   116  				}
   117  				t.WorkerStatus("switching ", i+1)
   118  				l.Printf("switching %d\n", i+1)
   119  				if err := stop(i + 1); err != nil {
   120  					return err
   121  				}
   122  				c.Start(ctx, t, c.Node(i+1), args)
   123  			}
   124  			return sleepAndCheck()
   125  		})
   126  		m.Wait()
   127  
   128  		for _, cmd := range checkWorkloads {
   129  			c.Run(ctx, loadNode, cmd)
   130  		}
   131  	}
   132  
   133  	n := 3
   134  	r.Add(testSpec{
   135  		Name:       fmt.Sprintf("engine/switch/nodes=%d", n),
   136  		Owner:      OwnerStorage,
   137  		MinVersion: "v20.1.0",
   138  		Cluster:    makeClusterSpec(n + 1),
   139  		Run: func(ctx context.Context, t *test, c *cluster) {
   140  			runEngineSwitch(ctx, t, c)
   141  		},
   142  	})
   143  	r.Add(testSpec{
   144  		Name:       fmt.Sprintf("engine/switch/encrypted/nodes=%d", n),
   145  		Owner:      OwnerStorage,
   146  		MinVersion: "v20.1.0",
   147  		Cluster:    makeClusterSpec(n + 1),
   148  		Run: func(ctx context.Context, t *test, c *cluster) {
   149  			runEngineSwitch(ctx, t, c, "--encrypt=true")
   150  		},
   151  	})
   152  }