github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/cmd/roachtest/cancel.go (about) 1 // Copyright 2018 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package main 12 13 import ( 14 "context" 15 "fmt" 16 "time" 17 18 "github.com/cockroachdb/cockroach/pkg/util/timeutil" 19 ) 20 21 // Motivation: 22 // Although there are unit tests that test query cancellation, they have been 23 // insufficient in detecting problems with canceling long-running, multi-node 24 // DistSQL queries. This is because, unlike local queries which only need to 25 // cancel the transaction's context, DistSQL queries must cancel flow contexts 26 // on each node involed in the query. Typical strategies for local execution 27 // testing involve using a builtin like generate_series to create artificially 28 // long-running queries, but these approaches don't create multi-node DistSQL 29 // queries; the only way to do so is by querying a large dataset split across 30 // multiple nodes. Due to the high cost of loading the pre-requisite data, these 31 // tests are best suited as nightlies. 32 // 33 // Once DistSQL queries provide more testing knobs, these tests can likely be 34 // replaced with unit tests. 35 func registerCancel(r *testRegistry) { 36 runCancel := func(ctx context.Context, t *test, c *cluster, 37 queries []string, warehouses int, useDistsql bool) { 38 c.Put(ctx, cockroach, "./cockroach", c.All()) 39 c.Put(ctx, workload, "./workload", c.All()) 40 c.Start(ctx, t, c.All()) 41 42 m := newMonitor(ctx, c, c.All()) 43 m.Go(func(ctx context.Context) error { 44 t.Status("importing TPCC fixture") 45 c.Run(ctx, c.Node(1), fmt.Sprintf( 46 "./workload fixtures load tpcc --warehouses=%d {pgurl:1}", warehouses)) 47 48 conn := c.Conn(ctx, 1) 49 defer conn.Close() 50 51 var queryPrefix string 52 if !useDistsql { 53 queryPrefix = "SET distsql = off;" 54 } 55 56 t.Status("running queries to cancel") 57 for _, q := range queries { 58 sem := make(chan struct{}, 1) 59 go func(q string) { 60 t.l.Printf("executing \"%s\"\n", q) 61 sem <- struct{}{} 62 _, err := conn.Exec(queryPrefix + q) 63 if err == nil { 64 close(sem) 65 t.Fatal("query completed before it could be canceled") 66 } else { 67 fmt.Printf("query failed with error: %s\n", err) 68 } 69 sem <- struct{}{} 70 }(q) 71 72 <-sem 73 74 // The cancel query races with the execution of the query it's trying to 75 // cancel, which may result in attempting to cancel the query before it 76 // has started. To be more confident that the query is executing, wait 77 // a bit before attempting to cancel it. 78 time.Sleep(100 * time.Millisecond) 79 80 const cancelQuery = `CANCEL QUERIES 81 SELECT query_id FROM [SHOW CLUSTER QUERIES] WHERE query not like '%SHOW CLUSTER QUERIES%'` 82 c.Run(ctx, c.Node(1), `./cockroach sql --insecure -e "`+cancelQuery+`"`) 83 cancelStartTime := timeutil.Now() 84 85 select { 86 case _, ok := <-sem: 87 if !ok { 88 t.Fatal("query could not be canceled") 89 } 90 timeToCancel := timeutil.Now().Sub(cancelStartTime) 91 fmt.Printf("canceling \"%s\" took %s\n", q, timeToCancel) 92 93 case <-time.After(5 * time.Second): 94 t.Fatal("query took too long to respond to cancellation") 95 } 96 } 97 98 return nil 99 }) 100 m.Wait() 101 } 102 103 const warehouses = 10 104 const numNodes = 3 105 queries := []string{ 106 `SELECT * FROM tpcc.stock`, 107 `SELECT * FROM tpcc.stock WHERE s_quantity > 100`, 108 `SELECT s_i_id, sum(s_quantity) FROM tpcc.stock GROUP BY s_i_id`, 109 `SELECT * FROM tpcc.stock ORDER BY s_quantity`, 110 `SELECT * FROM tpcc.order_line JOIN tpcc.stock ON s_i_id=ol_i_id`, 111 `SELECT ol_number, sum(s_quantity) FROM tpcc.stock JOIN tpcc.order_line ON s_i_id=ol_i_id WHERE ol_number > 10 GROUP BY ol_number ORDER BY ol_number`, 112 } 113 114 r.Add(testSpec{ 115 Name: fmt.Sprintf("cancel/tpcc/distsql/w=%d,nodes=%d", warehouses, numNodes), 116 Owner: OwnerSQLExec, 117 Cluster: makeClusterSpec(numNodes), 118 Run: func(ctx context.Context, t *test, c *cluster) { 119 runCancel(ctx, t, c, queries, warehouses, true /* useDistsql */) 120 }, 121 }) 122 123 r.Add(testSpec{ 124 Name: fmt.Sprintf("cancel/tpcc/local/w=%d,nodes=%d", warehouses, numNodes), 125 Owner: OwnerSQLExec, 126 Cluster: makeClusterSpec(numNodes), 127 Run: func(ctx context.Context, t *test, c *cluster) { 128 runCancel(ctx, t, c, queries, warehouses, false /* useDistsql */) 129 }, 130 }) 131 }