github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/cmd/roachtest/cdc.go (about)

     1  // Copyright 2018 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package main
    12  
    13  import (
    14  	"context"
    15  	gosql "database/sql"
    16  	"fmt"
    17  	"regexp"
    18  	"sort"
    19  	"strconv"
    20  	"strings"
    21  	"sync/atomic"
    22  	"time"
    23  
    24  	"github.com/Shopify/sarama"
    25  	"github.com/cockroachdb/cockroach/pkg/ccl/changefeedccl/cdctest"
    26  	"github.com/cockroachdb/cockroach/pkg/jobs/jobspb"
    27  	"github.com/cockroachdb/cockroach/pkg/util/protoutil"
    28  	"github.com/cockroachdb/cockroach/pkg/util/timeutil"
    29  	"github.com/cockroachdb/cockroach/pkg/util/version"
    30  	"github.com/cockroachdb/errors"
    31  	"github.com/codahale/hdrhistogram"
    32  )
    33  
    34  type workloadType string
    35  
    36  const (
    37  	tpccWorkloadType   workloadType = "tpcc"
    38  	ledgerWorkloadType workloadType = "ledger"
    39  )
    40  
    41  type cdcTestArgs struct {
    42  	workloadType       workloadType
    43  	tpccWarehouseCount int
    44  	workloadDuration   string
    45  	initialScan        bool
    46  	rangefeed          bool
    47  	kafkaChaos         bool
    48  	crdbChaos          bool
    49  	cloudStorageSink   bool
    50  	fixturesImport     bool
    51  
    52  	targetInitialScanLatency time.Duration
    53  	targetSteadyLatency      time.Duration
    54  	targetTxnPerSecond       float64
    55  }
    56  
    57  func cdcBasicTest(ctx context.Context, t *test, c *cluster, args cdcTestArgs) {
    58  	// Skip the poller test on v19.2. After 19.2 is out, we should likely delete
    59  	// the test entirely.
    60  	if !args.rangefeed && t.buildVersion.Compare(version.MustParse(`v19.1.0-0`)) > 0 {
    61  		t.Skip("no poller in >= v19.2.0", "")
    62  	}
    63  
    64  	crdbNodes := c.Range(1, c.spec.NodeCount-1)
    65  	workloadNode := c.Node(c.spec.NodeCount)
    66  	kafkaNode := c.Node(c.spec.NodeCount)
    67  	c.Put(ctx, cockroach, "./cockroach")
    68  	c.Put(ctx, workload, "./workload", workloadNode)
    69  	c.Start(ctx, t, crdbNodes)
    70  
    71  	db := c.Conn(ctx, 1)
    72  	defer stopFeeds(db)
    73  	if _, err := db.Exec(
    74  		`SET CLUSTER SETTING kv.rangefeed.enabled = $1`, args.rangefeed,
    75  	); err != nil {
    76  		t.Fatal(err)
    77  	}
    78  	// The 2.1 branch doesn't have this cluster setting, so ignore the error if
    79  	// it's about an unknown cluster setting
    80  	if _, err := db.Exec(
    81  		`SET CLUSTER SETTING changefeed.push.enabled = $1`, args.rangefeed,
    82  	); err != nil && !strings.Contains(err.Error(), "unknown cluster setting") {
    83  		t.Fatal(err)
    84  	}
    85  	kafka := kafkaManager{
    86  		c:     c,
    87  		nodes: kafkaNode,
    88  	}
    89  
    90  	var sinkURI string
    91  	if args.cloudStorageSink {
    92  		ts := timeutil.Now().Format(`20060102150405`)
    93  		// cockroach-tmp is a multi-region bucket with a TTL to clean up old
    94  		// data.
    95  		sinkURI = `experimental-gs://cockroach-tmp/roachtest/` + ts
    96  	} else {
    97  		t.Status("installing kafka")
    98  		kafka.install(ctx)
    99  		kafka.start(ctx)
   100  		sinkURI = kafka.sinkURL(ctx)
   101  	}
   102  
   103  	m := newMonitor(ctx, c, crdbNodes)
   104  	workloadCompleteCh := make(chan struct{}, 1)
   105  
   106  	workloadStart := timeutil.Now()
   107  	if args.workloadType == tpccWorkloadType {
   108  		t.Status("installing TPCC")
   109  		tpcc := tpccWorkload{
   110  			sqlNodes:           crdbNodes,
   111  			workloadNodes:      workloadNode,
   112  			tpccWarehouseCount: args.tpccWarehouseCount,
   113  			// TolerateErrors if crdbChaos is true; otherwise, the workload will fail
   114  			// if it attempts to use the node which was brought down by chaos.
   115  			tolerateErrors: args.crdbChaos,
   116  		}
   117  		// TODO(dan): Remove this when we fix whatever is causing the "duplicate key
   118  		// value" errors #34025.
   119  		tpcc.tolerateErrors = true
   120  
   121  		tpcc.install(ctx, c, args.fixturesImport)
   122  		// TODO(dan,ajwerner): sleeping momentarily before running the workload
   123  		// mitigates errors like "error in newOrder: missing stock row" from tpcc.
   124  		time.Sleep(2 * time.Second)
   125  		t.Status("initiating workload")
   126  		m.Go(func(ctx context.Context) error {
   127  			defer func() { close(workloadCompleteCh) }()
   128  			tpcc.run(ctx, c, args.workloadDuration)
   129  			return nil
   130  		})
   131  	} else {
   132  		t.Status("installing Ledger Workload")
   133  		lw := ledgerWorkload{
   134  			sqlNodes:      crdbNodes,
   135  			workloadNodes: workloadNode,
   136  		}
   137  		lw.install(ctx, c)
   138  
   139  		t.Status("initiating workload")
   140  		m.Go(func(ctx context.Context) error {
   141  			defer func() { close(workloadCompleteCh) }()
   142  			lw.run(ctx, c, args.workloadDuration)
   143  			return nil
   144  		})
   145  	}
   146  
   147  	changefeedLogger, err := t.l.ChildLogger("changefeed")
   148  	if err != nil {
   149  		t.Fatal(err)
   150  	}
   151  	defer changefeedLogger.close()
   152  	verifier := makeLatencyVerifier(
   153  		args.targetInitialScanLatency,
   154  		args.targetSteadyLatency,
   155  		changefeedLogger,
   156  		args.crdbChaos,
   157  	)
   158  	defer verifier.maybeLogLatencyHist()
   159  
   160  	m.Go(func(ctx context.Context) error {
   161  		// Some of the tests have a tight enough bound on targetSteadyLatency
   162  		// that the default for kv.closed_timestamp.target_duration means the
   163  		// changefeed is never considered sufficiently caught up. We could
   164  		// instead make targetSteadyLatency less aggressive, but it'd be nice to
   165  		// keep it where it is.
   166  		if _, err := db.Exec(
   167  			`SET CLUSTER SETTING kv.closed_timestamp.target_duration='10s'`,
   168  		); err != nil {
   169  			t.Fatal(err)
   170  		}
   171  
   172  		var targets string
   173  		if args.workloadType == tpccWorkloadType {
   174  			targets = `tpcc.warehouse, tpcc.district, tpcc.customer, tpcc.history,
   175  			tpcc.order, tpcc.new_order, tpcc.item, tpcc.stock,
   176  			tpcc.order_line`
   177  		} else {
   178  			targets = `ledger.customer, ledger.transaction, ledger.entry, ledger.session`
   179  		}
   180  
   181  		jobID, err := createChangefeed(db, targets, sinkURI, args)
   182  		if err != nil {
   183  			return err
   184  		}
   185  
   186  		info, err := getChangefeedInfo(db, jobID)
   187  		if err != nil {
   188  			return err
   189  		}
   190  		verifier.statementTime = info.statementTime
   191  		changefeedLogger.Printf("started changefeed at (%d) %s\n",
   192  			verifier.statementTime.UnixNano(), verifier.statementTime)
   193  		t.Status("watching changefeed")
   194  		return verifier.pollLatency(ctx, db, jobID, time.Second, workloadCompleteCh)
   195  	})
   196  
   197  	if args.kafkaChaos {
   198  		m.Go(func(ctx context.Context) error {
   199  			period, downTime := 2*time.Minute, 20*time.Second
   200  			return kafka.chaosLoop(ctx, period, downTime, workloadCompleteCh)
   201  		})
   202  	}
   203  
   204  	if args.crdbChaos {
   205  		chaosDuration, err := time.ParseDuration(args.workloadDuration)
   206  		if err != nil {
   207  			t.Fatal(err)
   208  		}
   209  		ch := Chaos{
   210  			Timer:   Periodic{Period: 2 * time.Minute, DownTime: 20 * time.Second},
   211  			Target:  crdbNodes.randNode,
   212  			Stopper: time.After(chaosDuration),
   213  		}
   214  		m.Go(ch.Runner(c, m))
   215  	}
   216  	m.Wait()
   217  
   218  	verifier.assertValid(t)
   219  	workloadEnd := timeutil.Now()
   220  	if args.targetTxnPerSecond > 0.0 {
   221  		verifyTxnPerSecond(
   222  			ctx, c, t, crdbNodes.randNode(), workloadStart, workloadEnd, args.targetTxnPerSecond, 0.05,
   223  		)
   224  	}
   225  }
   226  
   227  func runCDCBank(ctx context.Context, t *test, c *cluster) {
   228  	// Make the logs dir on every node to work around the `roachprod get logs`
   229  	// spam.
   230  	c.Run(ctx, c.All(), `mkdir -p logs`)
   231  
   232  	crdbNodes, workloadNode, kafkaNode := c.Range(1, c.spec.NodeCount-1), c.Node(c.spec.NodeCount), c.Node(c.spec.NodeCount)
   233  	c.Put(ctx, cockroach, "./cockroach", crdbNodes)
   234  	c.Put(ctx, workload, "./workload", workloadNode)
   235  	c.Start(ctx, t, crdbNodes)
   236  	kafka := kafkaManager{
   237  		c:     c,
   238  		nodes: kafkaNode,
   239  	}
   240  	kafka.install(ctx)
   241  	if !c.isLocal() {
   242  		// TODO(dan): This test currently connects to kafka from the test
   243  		// runner, so kafka needs to advertise the external address. Better
   244  		// would be a binary we could run on one of the roachprod machines.
   245  		c.Run(ctx, kafka.nodes, `echo "advertised.listeners=PLAINTEXT://`+kafka.consumerURL(ctx)+`" >> `+
   246  			kafka.basePath()+`/confluent-4.0.0/etc/kafka/server.properties`)
   247  	}
   248  	kafka.start(ctx)
   249  	defer kafka.stop(ctx)
   250  
   251  	c.Run(ctx, workloadNode, `./workload init bank {pgurl:1}`)
   252  	db := c.Conn(ctx, 1)
   253  	defer stopFeeds(db)
   254  
   255  	if _, err := db.Exec(
   256  		`SET CLUSTER SETTING kv.rangefeed.enabled = true`,
   257  	); err != nil {
   258  		t.Fatal(err)
   259  	}
   260  	if _, err := db.Exec(
   261  		`SET CLUSTER SETTING changefeed.experimental_poll_interval = '10ms'`,
   262  	); err != nil {
   263  		t.Fatal(err)
   264  	}
   265  	if _, err := db.Exec(
   266  		`SET CLUSTER SETTING kv.closed_timestamp.target_duration = '1s'`,
   267  	); err != nil {
   268  		t.Fatal(err)
   269  	}
   270  
   271  	// NB: the WITH diff option was not supported until v20.1.
   272  	withDiff := t.IsBuildVersion("v20.1.0")
   273  	var opts = []string{`updated`, `resolved`}
   274  	if withDiff {
   275  		opts = append(opts, `diff`)
   276  	}
   277  	var jobID string
   278  	if err := db.QueryRow(
   279  		`CREATE CHANGEFEED FOR bank.bank INTO $1 WITH `+strings.Join(opts, `, `), kafka.sinkURL(ctx),
   280  	).Scan(&jobID); err != nil {
   281  		t.Fatal(err)
   282  	}
   283  
   284  	t.Status("running workload")
   285  	workloadCtx, workloadCancel := context.WithCancel(ctx)
   286  	m := newMonitor(workloadCtx, c, crdbNodes)
   287  	var doneAtomic int64
   288  	m.Go(func(ctx context.Context) error {
   289  		err := c.RunE(ctx, workloadNode, `./workload run bank {pgurl:1} --max-rate=10`)
   290  		if atomic.LoadInt64(&doneAtomic) > 0 {
   291  			return nil
   292  		}
   293  		return errors.Wrap(err, "workload failed")
   294  	})
   295  	m.Go(func(ctx context.Context) (_err error) {
   296  		defer workloadCancel()
   297  
   298  		defer func() {
   299  			_err = errors.Wrap(_err, "CDC failed")
   300  		}()
   301  
   302  		l, err := t.l.ChildLogger(`changefeed`)
   303  		if err != nil {
   304  			return err
   305  		}
   306  		defer l.close()
   307  
   308  		tc, err := kafka.consumer(ctx, `bank`)
   309  		if err != nil {
   310  			return err
   311  		}
   312  		defer tc.Close()
   313  
   314  		if _, err := db.Exec(
   315  			`CREATE TABLE fprint (id INT PRIMARY KEY, balance INT, payload STRING)`,
   316  		); err != nil {
   317  			return errors.Wrap(err, "CREATE TABLE failed")
   318  		}
   319  
   320  		const requestedResolved = 100
   321  		fprintV, err := cdctest.NewFingerprintValidator(db, `bank.bank`, `fprint`, tc.partitions, 0)
   322  		if err != nil {
   323  			return errors.Wrap(err, "error creating validator")
   324  		}
   325  		validators := cdctest.Validators{
   326  			cdctest.NewOrderValidator(`bank`),
   327  			fprintV,
   328  		}
   329  		if withDiff {
   330  			baV, err := cdctest.NewBeforeAfterValidator(db, `bank.bank`)
   331  			if err != nil {
   332  				return err
   333  			}
   334  			validators = append(validators, baV)
   335  		}
   336  		v := cdctest.MakeCountValidator(validators)
   337  
   338  		for {
   339  			m := tc.Next(ctx)
   340  			if m == nil {
   341  				return fmt.Errorf("unexpected end of changefeed")
   342  			}
   343  			updated, resolved, err := cdctest.ParseJSONValueTimestamps(m.Value)
   344  			if err != nil {
   345  				return err
   346  			}
   347  
   348  			partitionStr := strconv.Itoa(int(m.Partition))
   349  			if len(m.Key) > 0 {
   350  				if err := v.NoteRow(partitionStr, string(m.Key), string(m.Value), updated); err != nil {
   351  					return err
   352  				}
   353  			} else {
   354  				if err := v.NoteResolved(partitionStr, resolved); err != nil {
   355  					return err
   356  				}
   357  				l.Printf("%d of %d resolved timestamps, latest is %s behind realtime",
   358  					v.NumResolvedWithRows, requestedResolved, timeutil.Since(resolved.GoTime()))
   359  				if v.NumResolvedWithRows >= requestedResolved {
   360  					atomic.StoreInt64(&doneAtomic, 1)
   361  					break
   362  				}
   363  			}
   364  		}
   365  		if failures := v.Failures(); len(failures) > 0 {
   366  			return errors.Newf("validator failures:\n%s", strings.Join(failures, "\n"))
   367  		}
   368  		return nil
   369  	})
   370  	m.Wait()
   371  }
   372  
   373  // This test verifies that the changefeed avro + confluent schema registry works
   374  // end-to-end (including the schema registry default of requiring backward
   375  // compatibility within a topic).
   376  func runCDCSchemaRegistry(ctx context.Context, t *test, c *cluster) {
   377  	crdbNodes, kafkaNode := c.Node(1), c.Node(1)
   378  	c.Put(ctx, cockroach, "./cockroach", crdbNodes)
   379  	c.Start(ctx, t, crdbNodes)
   380  	kafka := kafkaManager{
   381  		c:     c,
   382  		nodes: kafkaNode,
   383  	}
   384  	kafka.install(ctx)
   385  	kafka.start(ctx)
   386  	defer kafka.stop(ctx)
   387  
   388  	db := c.Conn(ctx, 1)
   389  	defer stopFeeds(db)
   390  
   391  	if _, err := db.Exec(`SET CLUSTER SETTING kv.rangefeed.enabled = $1`, true); err != nil {
   392  		t.Fatal(err)
   393  	}
   394  	if _, err := db.Exec(`CREATE TABLE foo (a INT PRIMARY KEY)`); err != nil {
   395  		t.Fatal(err)
   396  	}
   397  
   398  	// NB: the WITH diff option was not supported until v20.1.
   399  	withDiff := t.IsBuildVersion("v20.1.0")
   400  	var opts = []string{`updated`, `resolved`, `format=experimental_avro`, `confluent_schema_registry=$2`}
   401  	if withDiff {
   402  		opts = append(opts, `diff`)
   403  	}
   404  	var jobID string
   405  	if err := db.QueryRow(
   406  		`CREATE CHANGEFEED FOR foo INTO $1 WITH `+strings.Join(opts, `, `),
   407  		kafka.sinkURL(ctx), kafka.schemaRegistryURL(ctx),
   408  	).Scan(&jobID); err != nil {
   409  		t.Fatal(err)
   410  	}
   411  
   412  	if _, err := db.Exec(`INSERT INTO foo VALUES (1)`); err != nil {
   413  		t.Fatal(err)
   414  	}
   415  	if _, err := db.Exec(`ALTER TABLE foo ADD COLUMN b STRING`); err != nil {
   416  		t.Fatal(err)
   417  	}
   418  	if _, err := db.Exec(`INSERT INTO foo VALUES (2, '2')`); err != nil {
   419  		t.Fatal(err)
   420  	}
   421  	if _, err := db.Exec(`ALTER TABLE foo ADD COLUMN c INT`); err != nil {
   422  		t.Fatal(err)
   423  	}
   424  	if _, err := db.Exec(`INSERT INTO foo VALUES (3, '3', 3)`); err != nil {
   425  		t.Fatal(err)
   426  	}
   427  	if _, err := db.Exec(`ALTER TABLE foo DROP COLUMN b`); err != nil {
   428  		t.Fatal(err)
   429  	}
   430  	if _, err := db.Exec(`INSERT INTO foo VALUES (4, 4)`); err != nil {
   431  		t.Fatal(err)
   432  	}
   433  
   434  	folder := kafka.basePath()
   435  	output, err := c.RunWithBuffer(ctx, t.l, kafkaNode,
   436  		`CONFLUENT_CURRENT=`+folder+` `+folder+`/confluent-4.0.0/bin/kafka-avro-console-consumer `+
   437  			`--from-beginning --topic=foo --max-messages=14 --bootstrap-server=localhost:9092`)
   438  	if err != nil {
   439  		t.Fatal(err)
   440  	}
   441  	t.l.Printf("\n%s\n", output)
   442  
   443  	updatedRE := regexp.MustCompile(`"updated":\{"string":"[^"]+"\}`)
   444  	updatedMap := make(map[string]struct{})
   445  	var resolved []string
   446  	for _, line := range strings.Split(string(output), "\n") {
   447  		if strings.Contains(line, `"updated"`) {
   448  			line = updatedRE.ReplaceAllString(line, `"updated":{"string":""}`)
   449  			updatedMap[line] = struct{}{}
   450  		} else if strings.Contains(line, `"resolved"`) {
   451  			resolved = append(resolved, line)
   452  		}
   453  	}
   454  	// There are various internal races and retries in changefeeds that can
   455  	// produce duplicates. This test is really only to verify that the confluent
   456  	// schema registry works end-to-end, so do the simplest thing and sort +
   457  	// unique the output.
   458  	updated := make([]string, 0, len(updatedMap))
   459  	for u := range updatedMap {
   460  		updated = append(updated, u)
   461  	}
   462  	sort.Strings(updated)
   463  
   464  	var expected []string
   465  	if withDiff {
   466  		expected = []string{
   467  			`{"before":null,"after":{"foo":{"a":{"long":1}}},"updated":{"string":""}}`,
   468  			`{"before":null,"after":{"foo":{"a":{"long":2},"b":{"string":"2"}}},"updated":{"string":""}}`,
   469  			`{"before":null,"after":{"foo":{"a":{"long":3},"b":{"string":"3"},"c":{"long":3}}},"updated":{"string":""}}`,
   470  			`{"before":null,"after":{"foo":{"a":{"long":4},"c":{"long":4}}},"updated":{"string":""}}`,
   471  			`{"before":{"foo_before":{"a":{"long":1},"b":null,"c":null}},"after":{"foo":{"a":{"long":1},"c":null}},"updated":{"string":""}}`,
   472  			`{"before":{"foo_before":{"a":{"long":1},"c":null}},"after":{"foo":{"a":{"long":1},"c":null}},"updated":{"string":""}}`,
   473  			`{"before":{"foo_before":{"a":{"long":2},"b":{"string":"2"},"c":null}},"after":{"foo":{"a":{"long":2},"c":null}},"updated":{"string":""}}`,
   474  			`{"before":{"foo_before":{"a":{"long":2},"c":null}},"after":{"foo":{"a":{"long":2},"c":null}},"updated":{"string":""}}`,
   475  			`{"before":{"foo_before":{"a":{"long":3},"b":{"string":"3"},"c":{"long":3}}},"after":{"foo":{"a":{"long":3},"c":{"long":3}}},"updated":{"string":""}}`,
   476  			`{"before":{"foo_before":{"a":{"long":3},"c":{"long":3}}},"after":{"foo":{"a":{"long":3},"c":{"long":3}}},"updated":{"string":""}}`,
   477  		}
   478  	} else {
   479  		expected = []string{
   480  			`{"updated":{"string":""},"after":{"foo":{"a":{"long":1},"c":null}}}`,
   481  			`{"updated":{"string":""},"after":{"foo":{"a":{"long":1}}}}`,
   482  			`{"updated":{"string":""},"after":{"foo":{"a":{"long":2},"b":{"string":"2"}}}}`,
   483  			`{"updated":{"string":""},"after":{"foo":{"a":{"long":2},"c":null}}}`,
   484  			`{"updated":{"string":""},"after":{"foo":{"a":{"long":3},"b":{"string":"3"},"c":{"long":3}}}}`,
   485  			`{"updated":{"string":""},"after":{"foo":{"a":{"long":3},"c":{"long":3}}}}`,
   486  			`{"updated":{"string":""},"after":{"foo":{"a":{"long":4},"c":{"long":4}}}}`,
   487  		}
   488  	}
   489  	if strings.Join(expected, "\n") != strings.Join(updated, "\n") {
   490  		t.Fatalf("expected\n%s\n\ngot\n%s\n\n",
   491  			strings.Join(expected, "\n"), strings.Join(updated, "\n"))
   492  	}
   493  
   494  	if len(resolved) == 0 {
   495  		t.Fatal(`expected at least 1 resolved timestamp`)
   496  	}
   497  }
   498  
   499  func registerCDC(r *testRegistry) {
   500  	useRangeFeed := true
   501  	if r.buildVersion.Compare(version.MustParse(`v19.1.0-0`)) < 0 {
   502  		// RangeFeed is not production ready in 2.1, so run the tests with the
   503  		// poller.
   504  		useRangeFeed = false
   505  	}
   506  
   507  	r.Add(testSpec{
   508  		Name:       fmt.Sprintf("cdc/tpcc-1000/rangefeed=%t", useRangeFeed),
   509  		Owner:      `cdc`,
   510  		MinVersion: "v2.1.0",
   511  		Cluster:    makeClusterSpec(4, cpu(16)),
   512  		Run: func(ctx context.Context, t *test, c *cluster) {
   513  			cdcBasicTest(ctx, t, c, cdcTestArgs{
   514  				workloadType:             tpccWorkloadType,
   515  				tpccWarehouseCount:       1000,
   516  				workloadDuration:         "120m",
   517  				rangefeed:                useRangeFeed,
   518  				targetInitialScanLatency: 3 * time.Minute,
   519  				targetSteadyLatency:      10 * time.Minute,
   520  			})
   521  		},
   522  	})
   523  	r.Add(testSpec{
   524  		Name:       fmt.Sprintf("cdc/initial-scan/rangefeed=%t", useRangeFeed),
   525  		Owner:      `cdc`,
   526  		MinVersion: "v2.1.0",
   527  		Cluster:    makeClusterSpec(4, cpu(16)),
   528  		Run: func(ctx context.Context, t *test, c *cluster) {
   529  			cdcBasicTest(ctx, t, c, cdcTestArgs{
   530  				workloadType:             tpccWorkloadType,
   531  				tpccWarehouseCount:       100,
   532  				workloadDuration:         "30m",
   533  				initialScan:              true,
   534  				rangefeed:                useRangeFeed,
   535  				targetInitialScanLatency: 30 * time.Minute,
   536  				targetSteadyLatency:      time.Minute,
   537  			})
   538  		},
   539  	})
   540  	r.Add(testSpec{
   541  		Name:  "cdc/poller/rangefeed=false",
   542  		Owner: `cdc`,
   543  		// When testing a 2.1 binary, we use the poller for all the other tests
   544  		// and this is close enough to cdc/tpcc-1000 test to be redundant, so
   545  		// skip it.
   546  		MinVersion: "v19.1.0",
   547  		Cluster:    makeClusterSpec(4, cpu(16)),
   548  		Run: func(ctx context.Context, t *test, c *cluster) {
   549  			cdcBasicTest(ctx, t, c, cdcTestArgs{
   550  				workloadType:             tpccWorkloadType,
   551  				tpccWarehouseCount:       1000,
   552  				workloadDuration:         "30m",
   553  				rangefeed:                false,
   554  				targetInitialScanLatency: 30 * time.Minute,
   555  				targetSteadyLatency:      2 * time.Minute,
   556  			})
   557  		},
   558  	})
   559  	r.Add(testSpec{
   560  		Name:  fmt.Sprintf("cdc/sink-chaos/rangefeed=%t", useRangeFeed),
   561  		Owner: `cdc`,
   562  		// TODO(dan): Re-enable this test on 2.1 if we decide to backport #36852.
   563  		MinVersion: "v19.1.0",
   564  		Cluster:    makeClusterSpec(4, cpu(16)),
   565  		Run: func(ctx context.Context, t *test, c *cluster) {
   566  			cdcBasicTest(ctx, t, c, cdcTestArgs{
   567  				workloadType:             tpccWorkloadType,
   568  				tpccWarehouseCount:       100,
   569  				workloadDuration:         "30m",
   570  				rangefeed:                useRangeFeed,
   571  				kafkaChaos:               true,
   572  				targetInitialScanLatency: 3 * time.Minute,
   573  				targetSteadyLatency:      5 * time.Minute,
   574  			})
   575  		},
   576  	})
   577  	r.Add(testSpec{
   578  		Name:  fmt.Sprintf("cdc/crdb-chaos/rangefeed=%t", useRangeFeed),
   579  		Owner: `cdc`,
   580  		Skip:  "#37716",
   581  		// TODO(dan): Re-enable this test on 2.1 if we decide to backport #36852.
   582  		MinVersion: "v19.1.0",
   583  		Cluster:    makeClusterSpec(4, cpu(16)),
   584  		Run: func(ctx context.Context, t *test, c *cluster) {
   585  			cdcBasicTest(ctx, t, c, cdcTestArgs{
   586  				workloadType:             tpccWorkloadType,
   587  				tpccWarehouseCount:       100,
   588  				workloadDuration:         "30m",
   589  				rangefeed:                useRangeFeed,
   590  				crdbChaos:                true,
   591  				targetInitialScanLatency: 3 * time.Minute,
   592  				// TODO(dan): It should be okay to drop this as low as 2 to 3 minutes,
   593  				// but we're occasionally seeing it take between 11 and 12 minutes to
   594  				// get everything running again after a chaos event. There's definitely
   595  				// a thread worth pulling on here. See #36879.
   596  				targetSteadyLatency: 15 * time.Minute,
   597  			})
   598  		},
   599  	})
   600  	r.Add(testSpec{
   601  		Name:       fmt.Sprintf("cdc/ledger/rangefeed=%t", useRangeFeed),
   602  		Owner:      `cdc`,
   603  		MinVersion: "v2.1.0",
   604  		// TODO(mrtracy): This workload is designed to be running on a 20CPU nodes,
   605  		// but this cannot be allocated without some sort of configuration outside
   606  		// of this test. Look into it.
   607  		Cluster: makeClusterSpec(4, cpu(16)),
   608  		Run: func(ctx context.Context, t *test, c *cluster) {
   609  			cdcBasicTest(ctx, t, c, cdcTestArgs{
   610  				workloadType:             ledgerWorkloadType,
   611  				workloadDuration:         "30m",
   612  				initialScan:              true,
   613  				rangefeed:                useRangeFeed,
   614  				targetInitialScanLatency: 10 * time.Minute,
   615  				targetSteadyLatency:      time.Minute,
   616  				targetTxnPerSecond:       575,
   617  			})
   618  		},
   619  	})
   620  	r.Add(testSpec{
   621  		Name:       "cdc/cloud-sink-gcs/rangefeed=true",
   622  		Owner:      `cdc`,
   623  		MinVersion: "v19.1.0",
   624  		Cluster:    makeClusterSpec(4, cpu(16)),
   625  		Run: func(ctx context.Context, t *test, c *cluster) {
   626  			cdcBasicTest(ctx, t, c, cdcTestArgs{
   627  				workloadType: tpccWorkloadType,
   628  				// Sending data to Google Cloud Storage is a bit slower than sending to
   629  				// Kafka on an adjacent machine, so use half the data of the
   630  				// initial-scan test. Consider adding a test that writes to nodelocal,
   631  				// which should be much faster, with a larger warehouse count.
   632  				tpccWarehouseCount:       50,
   633  				workloadDuration:         "30m",
   634  				initialScan:              true,
   635  				rangefeed:                true,
   636  				cloudStorageSink:         true,
   637  				fixturesImport:           true,
   638  				targetInitialScanLatency: 30 * time.Minute,
   639  				targetSteadyLatency:      time.Minute,
   640  			})
   641  		},
   642  	})
   643  	r.Add(testSpec{
   644  		Name:       "cdc/bank",
   645  		Owner:      `cdc`,
   646  		MinVersion: "v2.1.0",
   647  		Cluster:    makeClusterSpec(4),
   648  		Run: func(ctx context.Context, t *test, c *cluster) {
   649  			runCDCBank(ctx, t, c)
   650  		},
   651  	})
   652  	r.Add(testSpec{
   653  		Name:       "cdc/schemareg",
   654  		Owner:      `cdc`,
   655  		MinVersion: "v19.1.0",
   656  		Cluster:    makeClusterSpec(1),
   657  		Run: func(ctx context.Context, t *test, c *cluster) {
   658  			runCDCSchemaRegistry(ctx, t, c)
   659  		},
   660  	})
   661  }
   662  
   663  type kafkaManager struct {
   664  	c     *cluster
   665  	nodes nodeListOption
   666  }
   667  
   668  func (k kafkaManager) basePath() string {
   669  	if k.c.isLocal() {
   670  		return `/tmp/confluent`
   671  	}
   672  	return `/mnt/data1/confluent`
   673  }
   674  
   675  func (k kafkaManager) install(ctx context.Context) {
   676  	k.c.status("installing kafka")
   677  	folder := k.basePath()
   678  	k.c.Run(ctx, k.nodes, `mkdir -p `+folder)
   679  	k.c.Run(ctx, k.nodes, `curl -s https://packages.confluent.io/archive/4.0/confluent-oss-4.0.0-2.11.tar.gz | tar -xz -C `+folder)
   680  	if !k.c.isLocal() {
   681  		k.c.Run(ctx, k.nodes, `mkdir -p logs`)
   682  		k.c.Run(ctx, k.nodes, `sudo apt-get -q update 2>&1 > logs/apt-get-update.log`)
   683  		k.c.Run(ctx, k.nodes, `yes | sudo apt-get -q install default-jre 2>&1 > logs/apt-get-install.log`)
   684  	}
   685  }
   686  
   687  func (k kafkaManager) start(ctx context.Context) {
   688  	folder := k.basePath()
   689  	// This isn't necessary for the nightly tests, but it's nice for iteration.
   690  	k.c.Run(ctx, k.nodes, `CONFLUENT_CURRENT=`+folder+` `+folder+`/confluent-4.0.0/bin/confluent destroy || true`)
   691  	k.restart(ctx)
   692  }
   693  
   694  func (k kafkaManager) restart(ctx context.Context) {
   695  	folder := k.basePath()
   696  	k.c.Run(ctx, k.nodes, `CONFLUENT_CURRENT=`+folder+` `+folder+`/confluent-4.0.0/bin/confluent start schema-registry`)
   697  }
   698  
   699  func (k kafkaManager) stop(ctx context.Context) {
   700  	folder := k.basePath()
   701  	k.c.Run(ctx, k.nodes, `CONFLUENT_CURRENT=`+folder+` `+folder+`/confluent-4.0.0/bin/confluent stop`)
   702  }
   703  
   704  func (k kafkaManager) chaosLoop(
   705  	ctx context.Context, period, downTime time.Duration, stopper chan struct{},
   706  ) error {
   707  	t := time.NewTicker(period)
   708  	for {
   709  		select {
   710  		case <-stopper:
   711  			return nil
   712  		case <-ctx.Done():
   713  			return ctx.Err()
   714  		case <-t.C:
   715  		}
   716  
   717  		k.stop(ctx)
   718  
   719  		select {
   720  		case <-stopper:
   721  			return nil
   722  		case <-ctx.Done():
   723  			return ctx.Err()
   724  		case <-time.After(downTime):
   725  		}
   726  
   727  		k.restart(ctx)
   728  	}
   729  }
   730  
   731  func (k kafkaManager) sinkURL(ctx context.Context) string {
   732  	return `kafka://` + k.c.InternalIP(ctx, k.nodes)[0] + `:9092`
   733  }
   734  
   735  func (k kafkaManager) consumerURL(ctx context.Context) string {
   736  	return k.c.ExternalIP(ctx, k.nodes)[0] + `:9092`
   737  }
   738  
   739  func (k kafkaManager) schemaRegistryURL(ctx context.Context) string {
   740  	return `http://` + k.c.InternalIP(ctx, k.nodes)[0] + `:8081`
   741  }
   742  
   743  func (k kafkaManager) consumer(ctx context.Context, topic string) (*topicConsumer, error) {
   744  	kafkaAddrs := []string{k.consumerURL(ctx)}
   745  	config := sarama.NewConfig()
   746  	// I was seeing "error processing FetchRequest: kafka: error decoding
   747  	// packet: unknown magic byte (2)" errors which
   748  	// https://github.com/Shopify/sarama/issues/962 identifies as the
   749  	// consumer's fetch size being less than the "max.message.bytes" that
   750  	// kafka is configured with. Kafka notes that this is required in
   751  	// https://kafka.apache.org/documentation.html#upgrade_11_message_format
   752  	config.Consumer.Fetch.Default = 1000012
   753  	consumer, err := sarama.NewConsumer(kafkaAddrs, config)
   754  	if err != nil {
   755  		return nil, err
   756  	}
   757  	tc, err := makeTopicConsumer(consumer, `bank`)
   758  	if err != nil {
   759  		_ = consumer.Close()
   760  		return nil, err
   761  	}
   762  	return tc, nil
   763  }
   764  
   765  type tpccWorkload struct {
   766  	workloadNodes      nodeListOption
   767  	sqlNodes           nodeListOption
   768  	tpccWarehouseCount int
   769  	tolerateErrors     bool
   770  }
   771  
   772  func (tw *tpccWorkload) install(ctx context.Context, c *cluster, fixturesImport bool) {
   773  	command := `./workload fixtures load`
   774  	if fixturesImport {
   775  		// For fixtures import, use the version built into the cockroach binary so
   776  		// the tpcc workload-versions match on release branches.
   777  		command = `./cockroach workload fixtures import`
   778  	}
   779  	c.Run(ctx, tw.workloadNodes, fmt.Sprintf(
   780  		`%s tpcc --warehouses=%d --checks=false {pgurl%s}`,
   781  		command,
   782  		tw.tpccWarehouseCount,
   783  		tw.sqlNodes.randNode(),
   784  	))
   785  }
   786  
   787  func (tw *tpccWorkload) run(ctx context.Context, c *cluster, workloadDuration string) {
   788  	tolerateErrors := ""
   789  	if tw.tolerateErrors {
   790  		tolerateErrors = "--tolerate-errors"
   791  	}
   792  	c.Run(ctx, tw.workloadNodes, fmt.Sprintf(
   793  		`./workload run tpcc --warehouses=%d --duration=%s %s {pgurl%s} `,
   794  		tw.tpccWarehouseCount, workloadDuration, tolerateErrors, tw.sqlNodes,
   795  	))
   796  }
   797  
   798  type ledgerWorkload struct {
   799  	workloadNodes nodeListOption
   800  	sqlNodes      nodeListOption
   801  }
   802  
   803  func (lw *ledgerWorkload) install(ctx context.Context, c *cluster) {
   804  	c.Run(ctx, lw.workloadNodes.randNode(), fmt.Sprintf(
   805  		`./workload init ledger {pgurl%s}`,
   806  		lw.sqlNodes.randNode(),
   807  	))
   808  }
   809  
   810  func (lw *ledgerWorkload) run(ctx context.Context, c *cluster, workloadDuration string) {
   811  	c.Run(ctx, lw.workloadNodes, fmt.Sprintf(
   812  		`./workload run ledger --mix=balance=0,withdrawal=50,deposit=50,reversal=0 {pgurl%s} --duration=%s`,
   813  		lw.sqlNodes,
   814  		workloadDuration,
   815  	))
   816  }
   817  
   818  type latencyVerifier struct {
   819  	statementTime            time.Time
   820  	targetSteadyLatency      time.Duration
   821  	targetInitialScanLatency time.Duration
   822  	tolerateErrors           bool
   823  	logger                   *logger
   824  
   825  	initialScanLatency   time.Duration
   826  	maxSeenSteadyLatency time.Duration
   827  	latencyBecameSteady  bool
   828  
   829  	latencyHist *hdrhistogram.Histogram
   830  }
   831  
   832  func makeLatencyVerifier(
   833  	targetInitialScanLatency time.Duration,
   834  	targetSteadyLatency time.Duration,
   835  	l *logger,
   836  	tolerateErrors bool,
   837  ) *latencyVerifier {
   838  	const sigFigs, minLatency, maxLatency = 1, 100 * time.Microsecond, 100 * time.Second
   839  	hist := hdrhistogram.New(minLatency.Nanoseconds(), maxLatency.Nanoseconds(), sigFigs)
   840  	return &latencyVerifier{
   841  		targetInitialScanLatency: targetInitialScanLatency,
   842  		targetSteadyLatency:      targetSteadyLatency,
   843  		logger:                   l,
   844  		latencyHist:              hist,
   845  		tolerateErrors:           tolerateErrors,
   846  	}
   847  }
   848  
   849  func (lv *latencyVerifier) noteHighwater(highwaterTime time.Time) {
   850  	if highwaterTime.Before(lv.statementTime) {
   851  		return
   852  	}
   853  	if lv.initialScanLatency == 0 {
   854  		lv.initialScanLatency = timeutil.Since(lv.statementTime)
   855  		lv.logger.Printf("initial scan completed: latency %s\n", lv.initialScanLatency)
   856  		return
   857  	}
   858  
   859  	latency := timeutil.Since(highwaterTime)
   860  	if latency < lv.targetSteadyLatency/2 {
   861  		lv.latencyBecameSteady = true
   862  	}
   863  	if !lv.latencyBecameSteady {
   864  		// Before we have RangeFeed, the polls just get
   865  		// progressively smaller after the initial one. Start
   866  		// tracking the max latency once we seen a latency
   867  		// that's less than the max allowed. Verify at the end
   868  		// of the test that this happens at some point.
   869  		lv.logger.Printf("end-to-end latency %s not yet below target steady latency %s\n",
   870  			latency, lv.targetSteadyLatency)
   871  		return
   872  	}
   873  	if err := lv.latencyHist.RecordValue(latency.Nanoseconds()); err != nil {
   874  		lv.logger.Printf("could not record value %s: %s\n", latency, err)
   875  	}
   876  	if latency > lv.maxSeenSteadyLatency {
   877  		lv.maxSeenSteadyLatency = latency
   878  	}
   879  	lv.logger.Printf("end-to-end steady latency %s; max steady latency so far %s\n",
   880  		latency, lv.maxSeenSteadyLatency)
   881  }
   882  
   883  func (lv *latencyVerifier) pollLatency(
   884  	ctx context.Context, db *gosql.DB, jobID int, interval time.Duration, stopper chan struct{},
   885  ) error {
   886  	for {
   887  		select {
   888  		case <-ctx.Done():
   889  			return ctx.Err()
   890  		case <-stopper:
   891  			return nil
   892  		case <-time.After(time.Second):
   893  		}
   894  
   895  		info, err := getChangefeedInfo(db, jobID)
   896  		if err != nil {
   897  			if lv.tolerateErrors {
   898  				lv.logger.Printf("error getting changefeed info: %s", err)
   899  				continue
   900  			}
   901  			return err
   902  		}
   903  		if info.status != `running` {
   904  			lv.logger.Printf("unexpected status: %s, error: %s", info.status, info.errMsg)
   905  			return errors.Errorf(`unexpected status: %s`, info.status)
   906  		}
   907  		lv.noteHighwater(info.highwaterTime)
   908  	}
   909  }
   910  
   911  func (lv *latencyVerifier) assertValid(t *test) {
   912  	if lv.initialScanLatency == 0 {
   913  		t.Fatalf("initial scan did not complete")
   914  	}
   915  	if lv.initialScanLatency > lv.targetInitialScanLatency {
   916  		t.Fatalf("initial scan latency was more than target: %s vs %s",
   917  			lv.initialScanLatency, lv.targetInitialScanLatency)
   918  	}
   919  	if !lv.latencyBecameSteady {
   920  		t.Fatalf("latency never dropped to acceptable steady level: %s", lv.targetSteadyLatency)
   921  	}
   922  	if lv.maxSeenSteadyLatency > lv.targetSteadyLatency {
   923  		t.Fatalf("max latency was more than allowed: %s vs %s",
   924  			lv.maxSeenSteadyLatency, lv.targetSteadyLatency)
   925  	}
   926  }
   927  
   928  func (lv *latencyVerifier) maybeLogLatencyHist() {
   929  	if lv.latencyHist == nil {
   930  		return
   931  	}
   932  	lv.logger.Printf(
   933  		"changefeed end-to-end __avg(ms)__p50(ms)__p75(ms)__p90(ms)__p95(ms)__p99(ms)_pMax(ms)\n")
   934  	lv.logger.Printf("changefeed end-to-end  %8.1f %8.1f %8.1f %8.1f %8.1f %8.1f %8.1f\n",
   935  		time.Duration(lv.latencyHist.Mean()).Seconds()*1000,
   936  		time.Duration(lv.latencyHist.ValueAtQuantile(50)).Seconds()*1000,
   937  		time.Duration(lv.latencyHist.ValueAtQuantile(75)).Seconds()*1000,
   938  		time.Duration(lv.latencyHist.ValueAtQuantile(90)).Seconds()*1000,
   939  		time.Duration(lv.latencyHist.ValueAtQuantile(95)).Seconds()*1000,
   940  		time.Duration(lv.latencyHist.ValueAtQuantile(99)).Seconds()*1000,
   941  		time.Duration(lv.latencyHist.ValueAtQuantile(100)).Seconds()*1000,
   942  	)
   943  }
   944  
   945  func createChangefeed(db *gosql.DB, targets, sinkURL string, args cdcTestArgs) (int, error) {
   946  	var jobID int
   947  	createStmt := fmt.Sprintf(`CREATE CHANGEFEED FOR %s INTO $1`, targets)
   948  	extraArgs := []interface{}{sinkURL}
   949  	if args.cloudStorageSink {
   950  		createStmt += ` WITH resolved='10s', envelope=wrapped`
   951  	} else {
   952  		createStmt += ` WITH resolved`
   953  	}
   954  	if !args.initialScan {
   955  		createStmt += `, cursor='-1s'`
   956  	}
   957  	if err := db.QueryRow(createStmt, extraArgs...).Scan(&jobID); err != nil {
   958  		return 0, err
   959  	}
   960  	return jobID, nil
   961  }
   962  
   963  type changefeedInfo struct {
   964  	status        string
   965  	errMsg        string
   966  	statementTime time.Time
   967  	highwaterTime time.Time
   968  }
   969  
   970  func getChangefeedInfo(db *gosql.DB, jobID int) (changefeedInfo, error) {
   971  	var status string
   972  	var payloadBytes []byte
   973  	var progressBytes []byte
   974  	if err := db.QueryRow(
   975  		`SELECT status, payload, progress FROM system.jobs WHERE id = $1`, jobID,
   976  	).Scan(&status, &payloadBytes, &progressBytes); err != nil {
   977  		return changefeedInfo{}, err
   978  	}
   979  	var payload jobspb.Payload
   980  	if err := protoutil.Unmarshal(payloadBytes, &payload); err != nil {
   981  		return changefeedInfo{}, err
   982  	}
   983  	var progress jobspb.Progress
   984  	if err := protoutil.Unmarshal(progressBytes, &progress); err != nil {
   985  		return changefeedInfo{}, err
   986  	}
   987  	var highwaterTime time.Time
   988  	highwater := progress.GetHighWater()
   989  	if highwater != nil {
   990  		highwaterTime = highwater.GoTime()
   991  	}
   992  	return changefeedInfo{
   993  		status:        status,
   994  		errMsg:        payload.Error,
   995  		statementTime: payload.GetChangefeed().StatementTime.GoTime(),
   996  		highwaterTime: highwaterTime,
   997  	}, nil
   998  }
   999  
  1000  // stopFeeds cancels any running feeds on the cluster. Not necessary for the
  1001  // nightly, but nice for development.
  1002  func stopFeeds(db *gosql.DB) {
  1003  	_, _ = db.Exec(`CANCEL JOBS (
  1004  			SELECT job_id FROM [SHOW JOBS] WHERE status = 'running'
  1005  		)`)
  1006  }
  1007  
  1008  type topicConsumer struct {
  1009  	sarama.Consumer
  1010  
  1011  	topic              string
  1012  	partitions         []string
  1013  	partitionConsumers []sarama.PartitionConsumer
  1014  }
  1015  
  1016  func makeTopicConsumer(c sarama.Consumer, topic string) (*topicConsumer, error) {
  1017  	t := &topicConsumer{Consumer: c, topic: topic}
  1018  	partitions, err := t.Partitions(t.topic)
  1019  	if err != nil {
  1020  		return nil, err
  1021  	}
  1022  	for _, partition := range partitions {
  1023  		t.partitions = append(t.partitions, strconv.Itoa(int(partition)))
  1024  		pc, err := t.ConsumePartition(topic, partition, sarama.OffsetOldest)
  1025  		if err != nil {
  1026  			return nil, err
  1027  		}
  1028  		t.partitionConsumers = append(t.partitionConsumers, pc)
  1029  	}
  1030  	return t, nil
  1031  }
  1032  
  1033  func (c *topicConsumer) tryNextMessage(ctx context.Context) *sarama.ConsumerMessage {
  1034  	for _, pc := range c.partitionConsumers {
  1035  		select {
  1036  		case <-ctx.Done():
  1037  			return nil
  1038  		case m := <-pc.Messages():
  1039  			return m
  1040  		default:
  1041  		}
  1042  	}
  1043  	return nil
  1044  }
  1045  
  1046  func (c *topicConsumer) Next(ctx context.Context) *sarama.ConsumerMessage {
  1047  	m := c.tryNextMessage(ctx)
  1048  	for ; m == nil; m = c.tryNextMessage(ctx) {
  1049  		if ctx.Err() != nil {
  1050  			return nil
  1051  		}
  1052  	}
  1053  	return m
  1054  }
  1055  
  1056  func (c *topicConsumer) Close() {
  1057  	for _, pc := range c.partitionConsumers {
  1058  		pc.AsyncClose()
  1059  		// Drain the messages and errors as required by AsyncClose.
  1060  		for range pc.Messages() {
  1061  		}
  1062  		for range pc.Errors() {
  1063  		}
  1064  	}
  1065  	_ = c.Consumer.Close()
  1066  }