github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/cmd/roachtest/queue.go (about) 1 // Copyright 2018 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package main 12 13 import ( 14 "context" 15 "fmt" 16 "strings" 17 "time" 18 19 "github.com/cockroachdb/cockroach/pkg/util/timeutil" 20 ) 21 22 func registerQueue(r *testRegistry) { 23 // One node runs the workload generator, all other nodes host CockroachDB. 24 const numNodes = 2 25 r.Add(testSpec{ 26 Skip: "https://github.com/cockroachdb/cockroach/issues/17229", 27 Name: fmt.Sprintf("queue/nodes=%d", numNodes-1), 28 Owner: OwnerKV, 29 Cluster: makeClusterSpec(numNodes), 30 Run: func(ctx context.Context, t *test, c *cluster) { 31 runQueue(ctx, t, c) 32 }, 33 }) 34 } 35 36 func runQueue(ctx context.Context, t *test, c *cluster) { 37 dbNodeCount := c.spec.NodeCount - 1 38 workloadNode := c.spec.NodeCount 39 40 // Distribute programs to the correct nodes and start CockroachDB. 41 c.Put(ctx, cockroach, "./cockroach", c.Range(1, dbNodeCount)) 42 c.Put(ctx, workload, "./workload", c.Node(workloadNode)) 43 c.Start(ctx, t, c.Range(1, dbNodeCount)) 44 45 runQueueWorkload := func(duration time.Duration, initTables bool) { 46 m := newMonitor(ctx, c, c.Range(1, dbNodeCount)) 47 m.Go(func(ctx context.Context) error { 48 concurrency := ifLocal("", " --concurrency="+fmt.Sprint(dbNodeCount*64)) 49 duration := fmt.Sprintf(" --duration=%s", duration.String()) 50 batch := " --batch 100" 51 init := "" 52 if initTables { 53 init = " --init" 54 } 55 cmd := fmt.Sprintf( 56 "./workload run queue --histograms="+perfArtifactsDir+"/stats.json"+ 57 init+ 58 concurrency+ 59 duration+ 60 batch+ 61 " {pgurl:1-%d}", 62 dbNodeCount, 63 ) 64 c.Run(ctx, c.Node(workloadNode), cmd) 65 return nil 66 }) 67 m.Wait() 68 } 69 70 // getQueueScanTime samples the time to run a statement that scans the queue 71 // table. 72 getQueueScanTime := func() time.Duration { 73 db := c.Conn(ctx, 1) 74 sampleCount := 5 75 samples := make([]time.Duration, sampleCount) 76 for i := 0; i < sampleCount; i++ { 77 startTime := timeutil.Now() 78 var queueCount int 79 row := db.QueryRow("SELECT count(*) FROM queue.queue WHERE ts < 1000") 80 if err := row.Scan(&queueCount); err != nil { 81 t.Fatalf("error running delete statement on queue: %s", err) 82 } 83 endTime := timeutil.Now() 84 samples[i] = endTime.Sub(startTime) 85 } 86 var sum time.Duration 87 for _, sample := range samples { 88 sum += sample 89 } 90 return sum / time.Duration(sampleCount) 91 } 92 93 // Run an initial short workload to populate the queue table and get a baseline 94 // performance for the queue scan time. 95 t.Status("running initial workload") 96 runQueueWorkload(10*time.Second, true) 97 scanTimeBefore := getQueueScanTime() 98 99 // Set TTL on table queue.queue to 0, so that rows are deleted immediately 100 db := c.Conn(ctx, 1) 101 _, err := db.ExecContext(ctx, `ALTER TABLE queue.queue CONFIGURE ZONE USING gc.ttlseconds = 30`) 102 if err != nil && strings.Contains(err.Error(), "syntax error") { 103 // Pre-2.1 was EXPERIMENTAL. 104 // TODO(knz): Remove this in 2.2. 105 _, err = db.ExecContext(ctx, `ALTER TABLE queue.queue EXPERIMENTAL CONFIGURE ZONE 'gc: {ttlseconds: 30}'`) 106 } 107 if err != nil { 108 t.Fatalf("error setting zone config TTL: %s", err) 109 } 110 // Truncate table to avoid duplicate key constraints. 111 if _, err := db.Exec("DELETE FROM queue.queue"); err != nil { 112 t.Fatalf("error deleting rows after initial insertion: %s", err) 113 } 114 115 t.Status("running primary workload") 116 runQueueWorkload(10*time.Minute, false) 117 118 // Sanity Check: ensure that the queue has actually been deleting rows. There 119 // may be some entries left over from the end of the workflow, but the number 120 // should not exceed the computed maxRows. 121 122 row := db.QueryRow("SELECT count(*) FROM queue.queue") 123 var queueCount int 124 if err := row.Scan(&queueCount); err != nil { 125 t.Fatalf("error selecting queueCount from queue: %s", err) 126 } 127 maxRows := 100 128 if local { 129 maxRows *= dbNodeCount * 64 130 } 131 if queueCount > maxRows { 132 t.Fatalf("resulting table had %d entries, expected %d or fewer", queueCount, maxRows) 133 } 134 135 // Sample the scan time after the primary workload. We expect this to be 136 // similar to the baseline time; if time needed has increased by a factor 137 // of five or more, we consider the test to have failed. 138 scanTimeAfter := getQueueScanTime() 139 fmt.Printf("scan time before load: %s, scan time after: %s", scanTimeBefore, scanTimeAfter) 140 fmt.Printf("scan time increase: %f (%f/%f)", float64(scanTimeAfter)/float64(scanTimeBefore), float64(scanTimeAfter), float64(scanTimeBefore)) 141 if scanTimeAfter > scanTimeBefore*30 { 142 t.Fatalf( 143 "scan time increased by factor of %f after queue workload", 144 float64(scanTimeAfter)/float64(scanTimeBefore), 145 ) 146 } 147 }