github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/cmd/roachtest/version.go (about)

     1  // Copyright 2018 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package main
    12  
    13  import (
    14  	"context"
    15  	"fmt"
    16  	"runtime"
    17  	"strings"
    18  	"time"
    19  
    20  	"github.com/cockroachdb/cockroach/pkg/util/binfetcher"
    21  	"github.com/cockroachdb/errors"
    22  	_ "github.com/lib/pq"
    23  )
    24  
    25  // TODO(tbg): remove this test. Use the harness in versionupgrade.go
    26  // to make a much better one, much more easily.
    27  func registerVersion(r *testRegistry) {
    28  	runVersion := func(ctx context.Context, t *test, c *cluster, version string) {
    29  		nodes := c.spec.NodeCount - 1
    30  		goos := ifLocal(runtime.GOOS, "linux")
    31  
    32  		b, err := binfetcher.Download(ctx, binfetcher.Options{
    33  			Binary:  "cockroach",
    34  			Version: "v" + version,
    35  			GOOS:    goos,
    36  			GOARCH:  "amd64",
    37  		})
    38  		if err != nil {
    39  			t.Fatal(err)
    40  		}
    41  
    42  		c.Put(ctx, workload, "./workload", c.Node(nodes+1))
    43  
    44  		c.Put(ctx, b, "./cockroach", c.Range(1, nodes))
    45  		// Force disable encryption.
    46  		// TODO(mberhault): allow it once version >= 2.1.
    47  		c.Start(ctx, t, c.Range(1, nodes), startArgsDontEncrypt)
    48  
    49  		stageDuration := 10 * time.Minute
    50  		buffer := 10 * time.Minute
    51  		if local {
    52  			t.l.Printf("local mode: speeding up test\n")
    53  			stageDuration = 10 * time.Second
    54  			buffer = time.Minute
    55  		}
    56  
    57  		loadDuration := " --duration=" + (time.Duration(3*nodes+2)*stageDuration + buffer).String()
    58  
    59  		workloads := []string{
    60  			"./workload run tpcc --tolerate-errors --wait=false --drop --init --warehouses=1 " + loadDuration + " {pgurl:1-%d}",
    61  			"./workload run kv --tolerate-errors --init" + loadDuration + " {pgurl:1-%d}",
    62  		}
    63  
    64  		m := newMonitor(ctx, c, c.Range(1, nodes))
    65  		for _, cmd := range workloads {
    66  			cmd := cmd // loop-local copy
    67  			m.Go(func(ctx context.Context) error {
    68  				cmd = fmt.Sprintf(cmd, nodes)
    69  				return c.RunE(ctx, c.Node(nodes+1), cmd)
    70  			})
    71  		}
    72  
    73  		m.Go(func(ctx context.Context) error {
    74  			l, err := t.l.ChildLogger("upgrader")
    75  			if err != nil {
    76  				return err
    77  			}
    78  			// NB: the number of calls to `sleep` needs to be reflected in `loadDuration`.
    79  			sleepAndCheck := func() error {
    80  				t.WorkerStatus("sleeping")
    81  				select {
    82  				case <-ctx.Done():
    83  					return ctx.Err()
    84  				case <-time.After(stageDuration):
    85  				}
    86  				// Make sure everyone is still running.
    87  				for i := 1; i <= nodes; i++ {
    88  					t.WorkerStatus("checking ", i)
    89  					db := c.Conn(ctx, i)
    90  					defer db.Close()
    91  					rows, err := db.Query(`SHOW DATABASES`)
    92  					if err != nil {
    93  						return err
    94  					}
    95  					if err := rows.Close(); err != nil {
    96  						return err
    97  					}
    98  					// Regression test for #37425. We can't run this in 2.1 because
    99  					// 19.1 changed downstream-of-raft semantics for consistency
   100  					// checks but unfortunately our versioning story for these
   101  					// checks had been broken for a long time. See:
   102  					//
   103  					// https://github.com/cockroachdb/cockroach/issues/37737#issuecomment-496026918
   104  					if !strings.HasPrefix(version, "2.") {
   105  						if err := c.CheckReplicaDivergenceOnDB(ctx, db); err != nil {
   106  							return errors.Wrapf(err, "node %d", i)
   107  						}
   108  					}
   109  				}
   110  				return nil
   111  			}
   112  
   113  			db := c.Conn(ctx, 1)
   114  			defer db.Close()
   115  			// See analogous comment in the upgrade/mixedWith roachtest.
   116  			db.SetMaxIdleConns(0)
   117  
   118  			// First let the load generators run in the cluster at `version`.
   119  			if err := sleepAndCheck(); err != nil {
   120  				return err
   121  			}
   122  
   123  			stop := func(node int) error {
   124  				m.ExpectDeath()
   125  				l.Printf("stopping node %d\n", node)
   126  				return c.StopCockroachGracefullyOnNode(ctx, node)
   127  			}
   128  
   129  			var oldVersion string
   130  			if err := db.QueryRowContext(ctx, `SHOW CLUSTER SETTING version`).Scan(&oldVersion); err != nil {
   131  				return err
   132  			}
   133  			l.Printf("cluster version is %s\n", oldVersion)
   134  
   135  			// Now perform a rolling restart into the new binary.
   136  			for i := 1; i < nodes; i++ {
   137  				t.WorkerStatus("upgrading ", i)
   138  				l.Printf("upgrading %d\n", i)
   139  				if err := stop(i); err != nil {
   140  					return err
   141  				}
   142  				c.Put(ctx, cockroach, "./cockroach", c.Node(i))
   143  				c.Start(ctx, t, c.Node(i), startArgsDontEncrypt)
   144  				if err := sleepAndCheck(); err != nil {
   145  					return err
   146  				}
   147  			}
   148  
   149  			l.Printf("stopping last node\n")
   150  			// Stop the last node.
   151  			if err := stop(nodes); err != nil {
   152  				return err
   153  			}
   154  
   155  			// Set cluster.preserve_downgrade_option to be the old cluster version to
   156  			// prevent upgrade.
   157  			l.Printf("preventing automatic upgrade\n")
   158  			if _, err := db.ExecContext(ctx,
   159  				fmt.Sprintf("SET CLUSTER SETTING cluster.preserve_downgrade_option = '%s';", oldVersion),
   160  			); err != nil {
   161  				return err
   162  			}
   163  
   164  			// Do upgrade for the last node.
   165  			l.Printf("upgrading last node\n")
   166  			c.Put(ctx, cockroach, "./cockroach", c.Node(nodes))
   167  			c.Start(ctx, t, c.Node(nodes), startArgsDontEncrypt)
   168  			if err := sleepAndCheck(); err != nil {
   169  				return err
   170  			}
   171  
   172  			// Changed our mind, let's roll that back.
   173  			for i := 1; i <= nodes; i++ {
   174  				l.Printf("downgrading node %d\n", i)
   175  				t.WorkerStatus("downgrading", i)
   176  				if err := stop(i); err != nil {
   177  					return err
   178  				}
   179  				c.Put(ctx, b, "./cockroach", c.Node(i))
   180  				c.Start(ctx, t, c.Node(i), startArgsDontEncrypt)
   181  				if err := sleepAndCheck(); err != nil {
   182  					return err
   183  				}
   184  			}
   185  
   186  			// OK, let's go forward again.
   187  			for i := 1; i <= nodes; i++ {
   188  				l.Printf("upgrading node %d (again)\n", i)
   189  				t.WorkerStatus("upgrading", i, "(again)")
   190  				if err := stop(i); err != nil {
   191  					return err
   192  				}
   193  				c.Put(ctx, cockroach, "./cockroach", c.Node(i))
   194  				c.Start(ctx, t, c.Node(i), startArgsDontEncrypt)
   195  				if err := sleepAndCheck(); err != nil {
   196  					return err
   197  				}
   198  			}
   199  
   200  			// Reset cluster.preserve_downgrade_option to allow auto upgrade.
   201  			l.Printf("reenabling auto-upgrade\n")
   202  			if _, err := db.ExecContext(ctx,
   203  				"RESET CLUSTER SETTING cluster.preserve_downgrade_option;",
   204  			); err != nil {
   205  				return err
   206  			}
   207  
   208  			return sleepAndCheck()
   209  		})
   210  		m.Wait()
   211  	}
   212  
   213  	for _, n := range []int{3, 5} {
   214  		r.Add(testSpec{
   215  			Name:       fmt.Sprintf("version/mixed/nodes=%d", n),
   216  			Owner:      OwnerKV,
   217  			MinVersion: "v2.1.0",
   218  			Cluster:    makeClusterSpec(n + 1),
   219  			Run: func(ctx context.Context, t *test, c *cluster) {
   220  				pred, err := PredecessorVersion(r.buildVersion)
   221  				if err != nil {
   222  					t.Fatal(err)
   223  				}
   224  				runVersion(ctx, t, c, pred)
   225  			},
   226  		})
   227  	}
   228  }