github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/cmd/roachtest/copy.go (about) 1 // Copyright 2018 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package main 12 13 import ( 14 "context" 15 gosql "database/sql" 16 "fmt" 17 "strings" 18 19 "github.com/cockroachdb/cockroach-go/crdb" 20 "github.com/cockroachdb/errors" 21 _ "github.com/lib/pq" 22 ) 23 24 func registerCopy(r *testRegistry) { 25 // This test imports a fully-populated Bank table. It then creates an empty 26 // Bank schema. Finally, it performs a series of `INSERT ... SELECT ...` 27 // statements to copy all data from the first table into the second table. 28 runCopy := func(ctx context.Context, t *test, c *cluster, rows int, inTxn bool) { 29 // payload is the size of the payload column for each row in the Bank 30 // table. If this is adjusted, a new fixture may need to be generated. 31 const payload = 100 32 // rowOverheadEstimate is an estimate of the overhead of a single 33 // row in the Bank table, not including the size of the payload 34 // itself. This overhead includes the size of the other two columns 35 // in the table along with the size of each row's associated KV key. 36 const rowOverheadEstimate = 160 37 const rowEstimate = rowOverheadEstimate + payload 38 39 c.Put(ctx, cockroach, "./cockroach", c.All()) 40 c.Put(ctx, workload, "./workload", c.All()) 41 c.Start(ctx, t, c.All()) 42 43 m := newMonitor(ctx, c, c.All()) 44 m.Go(func(ctx context.Context) error { 45 db := c.Conn(ctx, 1) 46 defer db.Close() 47 48 t.Status("importing Bank fixture") 49 c.Run(ctx, c.Node(1), fmt.Sprintf( 50 "./workload fixtures load bank --rows=%d --payload-bytes=%d {pgurl:1}", 51 rows, payload)) 52 if _, err := db.Exec("ALTER TABLE bank.bank RENAME TO bank.bank_orig"); err != nil { 53 t.Fatalf("failed to rename table: %v", err) 54 } 55 56 t.Status("create copy of Bank schema") 57 c.Run(ctx, c.Node(1), "./workload init bank --rows=0 --ranges=0 {pgurl:1}") 58 59 rangeCount := func() int { 60 var count int 61 const q = "SELECT count(*) FROM [SHOW RANGES FROM TABLE bank.bank]" 62 if err := db.QueryRow(q).Scan(&count); err != nil { 63 // TODO(rafi): Remove experimental_ranges query once we stop testing 64 // 19.1 or earlier. 65 if strings.Contains(err.Error(), "syntax error at or near \"ranges\"") { 66 err = db.QueryRow("SELECT count(*) FROM [SHOW EXPERIMENTAL_RANGES FROM TABLE bank.bank]").Scan(&count) 67 } 68 if err != nil { 69 t.Fatalf("failed to get range count: %v", err) 70 } 71 } 72 return count 73 } 74 if rc := rangeCount(); rc != 1 { 75 return errors.Errorf("empty bank table split over multiple ranges") 76 } 77 78 // Copy batches of rows from bank_orig to bank. Each batch needs to 79 // be under kv.raft.command.max_size=64MB or we'll hit a "command is 80 // too large" error. We play it safe and chose batches whose rows 81 // add up to well less than this limit. 82 rowsPerInsert := (60 << 20 /* 60MB */) / rowEstimate 83 t.Status("copying from bank_orig to bank") 84 85 // querier is a common interface shared by sql.DB and sql.Tx. It 86 // can be replaced by https://github.com/golang/go/issues/14468 if 87 // that is ever resolved. 88 type querier interface { 89 QueryRow(query string, args ...interface{}) *gosql.Row 90 } 91 runCopy := func(qu querier) error { 92 for lastID := -1; lastID+1 < rows; { 93 if lastID > 0 { 94 t.Progress(float64(lastID+1) / float64(rows)) 95 } 96 q := fmt.Sprintf(` 97 SELECT id FROM [ 98 INSERT INTO bank.bank 99 SELECT * FROM bank.bank_orig 100 WHERE id > %d 101 ORDER BY id ASC 102 LIMIT %d 103 RETURNING ID 104 ] 105 ORDER BY id DESC 106 LIMIT 1`, 107 lastID, rowsPerInsert) 108 if err := qu.QueryRow(q).Scan(&lastID); err != nil { 109 return err 110 } 111 } 112 return nil 113 } 114 115 var err error 116 if inTxn { 117 err = crdb.ExecuteTx(ctx, db, nil, func(tx *gosql.Tx) error { return runCopy(tx) }) 118 } else { 119 err = runCopy(db) 120 } 121 if err != nil { 122 t.Fatalf("failed to copy rows: %s", err) 123 } 124 rangeMinBytes, rangeMaxBytes, err := getDefaultRangeSize(ctx, db) 125 if err != nil { 126 t.Fatalf("failed to get default range size: %v", err) 127 } 128 rc := rangeCount() 129 t.l.Printf("range count after copy = %d\n", rc) 130 highExp := (rows * rowEstimate) / rangeMinBytes 131 lowExp := (rows * rowEstimate) / rangeMaxBytes 132 if rc > highExp || rc < lowExp { 133 return errors.Errorf("expected range count for table between %d and %d, found %d", 134 lowExp, highExp, rc) 135 } 136 return nil 137 }) 138 m.Wait() 139 } 140 141 const rows = int(1e7) 142 const numNodes = 9 143 144 for _, inTxn := range []bool{true, false} { 145 inTxn := inTxn 146 r.Add(testSpec{ 147 Name: fmt.Sprintf("copy/bank/rows=%d,nodes=%d,txn=%t", rows, numNodes, inTxn), 148 Owner: OwnerKV, 149 Cluster: makeClusterSpec(numNodes), 150 Run: func(ctx context.Context, t *test, c *cluster) { 151 runCopy(ctx, t, c, rows, inTxn) 152 }, 153 }) 154 } 155 } 156 157 func getDefaultRangeSize( 158 ctx context.Context, db *gosql.DB, 159 ) (rangeMinBytes, rangeMaxBytes int, err error) { 160 err = db.QueryRow(`SELECT 161 regexp_extract(regexp_extract(raw_config_sql, e'range_min_bytes = \\d+'), e'\\d+')::INT8 162 AS range_min_bytes, 163 regexp_extract(regexp_extract(raw_config_sql, e'range_max_bytes = \\d+'), e'\\d+')::INT8 164 AS range_max_bytes 165 FROM 166 [SHOW ZONE CONFIGURATION FOR RANGE default];`).Scan(&rangeMinBytes, &rangeMaxBytes) 167 // Older cluster versions do not contain this column. Use the old default. 168 if err != nil && strings.Contains(err.Error(), `column "raw_config_sql" does not exist`) { 169 rangeMinBytes, rangeMaxBytes, err = 32<<20 /* 32MB */, 64<<20 /* 64MB */, nil 170 } 171 return rangeMinBytes, rangeMaxBytes, err 172 }