github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/cmd/roachtest/queue.go (about)

     1  // Copyright 2018 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package main
    12  
    13  import (
    14  	"context"
    15  	"fmt"
    16  	"strings"
    17  	"time"
    18  
    19  	"github.com/cockroachdb/cockroach/pkg/util/timeutil"
    20  )
    21  
    22  func registerQueue(r *testRegistry) {
    23  	// One node runs the workload generator, all other nodes host CockroachDB.
    24  	const numNodes = 2
    25  	r.Add(testSpec{
    26  		Skip:    "https://github.com/cockroachdb/cockroach/issues/17229",
    27  		Name:    fmt.Sprintf("queue/nodes=%d", numNodes-1),
    28  		Owner:   OwnerKV,
    29  		Cluster: makeClusterSpec(numNodes),
    30  		Run: func(ctx context.Context, t *test, c *cluster) {
    31  			runQueue(ctx, t, c)
    32  		},
    33  	})
    34  }
    35  
    36  func runQueue(ctx context.Context, t *test, c *cluster) {
    37  	dbNodeCount := c.spec.NodeCount - 1
    38  	workloadNode := c.spec.NodeCount
    39  
    40  	// Distribute programs to the correct nodes and start CockroachDB.
    41  	c.Put(ctx, cockroach, "./cockroach", c.Range(1, dbNodeCount))
    42  	c.Put(ctx, workload, "./workload", c.Node(workloadNode))
    43  	c.Start(ctx, t, c.Range(1, dbNodeCount))
    44  
    45  	runQueueWorkload := func(duration time.Duration, initTables bool) {
    46  		m := newMonitor(ctx, c, c.Range(1, dbNodeCount))
    47  		m.Go(func(ctx context.Context) error {
    48  			concurrency := ifLocal("", " --concurrency="+fmt.Sprint(dbNodeCount*64))
    49  			duration := fmt.Sprintf(" --duration=%s", duration.String())
    50  			batch := " --batch 100"
    51  			init := ""
    52  			if initTables {
    53  				init = " --init"
    54  			}
    55  			cmd := fmt.Sprintf(
    56  				"./workload run queue --histograms="+perfArtifactsDir+"/stats.json"+
    57  					init+
    58  					concurrency+
    59  					duration+
    60  					batch+
    61  					" {pgurl:1-%d}",
    62  				dbNodeCount,
    63  			)
    64  			c.Run(ctx, c.Node(workloadNode), cmd)
    65  			return nil
    66  		})
    67  		m.Wait()
    68  	}
    69  
    70  	// getQueueScanTime samples the time to run a statement that scans the queue
    71  	// table.
    72  	getQueueScanTime := func() time.Duration {
    73  		db := c.Conn(ctx, 1)
    74  		sampleCount := 5
    75  		samples := make([]time.Duration, sampleCount)
    76  		for i := 0; i < sampleCount; i++ {
    77  			startTime := timeutil.Now()
    78  			var queueCount int
    79  			row := db.QueryRow("SELECT count(*) FROM queue.queue WHERE ts < 1000")
    80  			if err := row.Scan(&queueCount); err != nil {
    81  				t.Fatalf("error running delete statement on queue: %s", err)
    82  			}
    83  			endTime := timeutil.Now()
    84  			samples[i] = endTime.Sub(startTime)
    85  		}
    86  		var sum time.Duration
    87  		for _, sample := range samples {
    88  			sum += sample
    89  		}
    90  		return sum / time.Duration(sampleCount)
    91  	}
    92  
    93  	// Run an initial short workload to populate the queue table and get a baseline
    94  	// performance for the queue scan time.
    95  	t.Status("running initial workload")
    96  	runQueueWorkload(10*time.Second, true)
    97  	scanTimeBefore := getQueueScanTime()
    98  
    99  	// Set TTL on table queue.queue to 0, so that rows are deleted immediately
   100  	db := c.Conn(ctx, 1)
   101  	_, err := db.ExecContext(ctx, `ALTER TABLE queue.queue CONFIGURE ZONE USING gc.ttlseconds = 30`)
   102  	if err != nil && strings.Contains(err.Error(), "syntax error") {
   103  		// Pre-2.1 was EXPERIMENTAL.
   104  		// TODO(knz): Remove this in 2.2.
   105  		_, err = db.ExecContext(ctx, `ALTER TABLE queue.queue EXPERIMENTAL CONFIGURE ZONE 'gc: {ttlseconds: 30}'`)
   106  	}
   107  	if err != nil {
   108  		t.Fatalf("error setting zone config TTL: %s", err)
   109  	}
   110  	// Truncate table to avoid duplicate key constraints.
   111  	if _, err := db.Exec("DELETE FROM queue.queue"); err != nil {
   112  		t.Fatalf("error deleting rows after initial insertion: %s", err)
   113  	}
   114  
   115  	t.Status("running primary workload")
   116  	runQueueWorkload(10*time.Minute, false)
   117  
   118  	// Sanity Check: ensure that the queue has actually been deleting rows. There
   119  	// may be some entries left over from the end of the workflow, but the number
   120  	// should not exceed the computed maxRows.
   121  
   122  	row := db.QueryRow("SELECT count(*) FROM queue.queue")
   123  	var queueCount int
   124  	if err := row.Scan(&queueCount); err != nil {
   125  		t.Fatalf("error selecting queueCount from queue: %s", err)
   126  	}
   127  	maxRows := 100
   128  	if local {
   129  		maxRows *= dbNodeCount * 64
   130  	}
   131  	if queueCount > maxRows {
   132  		t.Fatalf("resulting table had %d entries, expected %d or fewer", queueCount, maxRows)
   133  	}
   134  
   135  	// Sample the scan time after the primary workload. We expect this to be
   136  	// similar to the baseline time; if time needed has increased by a factor
   137  	// of five or more, we consider the test to have failed.
   138  	scanTimeAfter := getQueueScanTime()
   139  	fmt.Printf("scan time before load: %s, scan time after: %s", scanTimeBefore, scanTimeAfter)
   140  	fmt.Printf("scan time increase: %f (%f/%f)", float64(scanTimeAfter)/float64(scanTimeBefore), float64(scanTimeAfter), float64(scanTimeBefore))
   141  	if scanTimeAfter > scanTimeBefore*30 {
   142  		t.Fatalf(
   143  			"scan time increased by factor of %f after queue workload",
   144  			float64(scanTimeAfter)/float64(scanTimeBefore),
   145  		)
   146  	}
   147  }