github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/cmd/roachtest/copy.go (about)

     1  // Copyright 2018 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package main
    12  
    13  import (
    14  	"context"
    15  	gosql "database/sql"
    16  	"fmt"
    17  	"strings"
    18  
    19  	"github.com/cockroachdb/cockroach-go/crdb"
    20  	"github.com/cockroachdb/errors"
    21  	_ "github.com/lib/pq"
    22  )
    23  
    24  func registerCopy(r *testRegistry) {
    25  	// This test imports a fully-populated Bank table. It then creates an empty
    26  	// Bank schema. Finally, it performs a series of `INSERT ... SELECT ...`
    27  	// statements to copy all data from the first table into the second table.
    28  	runCopy := func(ctx context.Context, t *test, c *cluster, rows int, inTxn bool) {
    29  		// payload is the size of the payload column for each row in the Bank
    30  		// table. If this is adjusted, a new fixture may need to be generated.
    31  		const payload = 100
    32  		// rowOverheadEstimate is an estimate of the overhead of a single
    33  		// row in the Bank table, not including the size of the payload
    34  		// itself. This overhead includes the size of the other two columns
    35  		// in the table along with the size of each row's associated KV key.
    36  		const rowOverheadEstimate = 160
    37  		const rowEstimate = rowOverheadEstimate + payload
    38  
    39  		c.Put(ctx, cockroach, "./cockroach", c.All())
    40  		c.Put(ctx, workload, "./workload", c.All())
    41  		c.Start(ctx, t, c.All())
    42  
    43  		m := newMonitor(ctx, c, c.All())
    44  		m.Go(func(ctx context.Context) error {
    45  			db := c.Conn(ctx, 1)
    46  			defer db.Close()
    47  
    48  			t.Status("importing Bank fixture")
    49  			c.Run(ctx, c.Node(1), fmt.Sprintf(
    50  				"./workload fixtures load bank --rows=%d --payload-bytes=%d {pgurl:1}",
    51  				rows, payload))
    52  			if _, err := db.Exec("ALTER TABLE bank.bank RENAME TO bank.bank_orig"); err != nil {
    53  				t.Fatalf("failed to rename table: %v", err)
    54  			}
    55  
    56  			t.Status("create copy of Bank schema")
    57  			c.Run(ctx, c.Node(1), "./workload init bank --rows=0 --ranges=0 {pgurl:1}")
    58  
    59  			rangeCount := func() int {
    60  				var count int
    61  				const q = "SELECT count(*) FROM [SHOW RANGES FROM TABLE bank.bank]"
    62  				if err := db.QueryRow(q).Scan(&count); err != nil {
    63  					// TODO(rafi): Remove experimental_ranges query once we stop testing
    64  					// 19.1 or earlier.
    65  					if strings.Contains(err.Error(), "syntax error at or near \"ranges\"") {
    66  						err = db.QueryRow("SELECT count(*) FROM [SHOW EXPERIMENTAL_RANGES FROM TABLE bank.bank]").Scan(&count)
    67  					}
    68  					if err != nil {
    69  						t.Fatalf("failed to get range count: %v", err)
    70  					}
    71  				}
    72  				return count
    73  			}
    74  			if rc := rangeCount(); rc != 1 {
    75  				return errors.Errorf("empty bank table split over multiple ranges")
    76  			}
    77  
    78  			// Copy batches of rows from bank_orig to bank. Each batch needs to
    79  			// be under kv.raft.command.max_size=64MB or we'll hit a "command is
    80  			// too large" error. We play it safe and chose batches whose rows
    81  			// add up to well less than this limit.
    82  			rowsPerInsert := (60 << 20 /* 60MB */) / rowEstimate
    83  			t.Status("copying from bank_orig to bank")
    84  
    85  			// querier is a common interface shared by sql.DB and sql.Tx. It
    86  			// can be replaced by https://github.com/golang/go/issues/14468 if
    87  			// that is ever resolved.
    88  			type querier interface {
    89  				QueryRow(query string, args ...interface{}) *gosql.Row
    90  			}
    91  			runCopy := func(qu querier) error {
    92  				for lastID := -1; lastID+1 < rows; {
    93  					if lastID > 0 {
    94  						t.Progress(float64(lastID+1) / float64(rows))
    95  					}
    96  					q := fmt.Sprintf(`
    97  						SELECT id FROM [
    98  							INSERT INTO bank.bank
    99  							SELECT * FROM bank.bank_orig
   100  							WHERE id > %d
   101  							ORDER BY id ASC
   102  							LIMIT %d
   103  							RETURNING ID
   104  						]
   105  						ORDER BY id DESC
   106  						LIMIT 1`,
   107  						lastID, rowsPerInsert)
   108  					if err := qu.QueryRow(q).Scan(&lastID); err != nil {
   109  						return err
   110  					}
   111  				}
   112  				return nil
   113  			}
   114  
   115  			var err error
   116  			if inTxn {
   117  				err = crdb.ExecuteTx(ctx, db, nil, func(tx *gosql.Tx) error { return runCopy(tx) })
   118  			} else {
   119  				err = runCopy(db)
   120  			}
   121  			if err != nil {
   122  				t.Fatalf("failed to copy rows: %s", err)
   123  			}
   124  			rangeMinBytes, rangeMaxBytes, err := getDefaultRangeSize(ctx, db)
   125  			if err != nil {
   126  				t.Fatalf("failed to get default range size: %v", err)
   127  			}
   128  			rc := rangeCount()
   129  			t.l.Printf("range count after copy = %d\n", rc)
   130  			highExp := (rows * rowEstimate) / rangeMinBytes
   131  			lowExp := (rows * rowEstimate) / rangeMaxBytes
   132  			if rc > highExp || rc < lowExp {
   133  				return errors.Errorf("expected range count for table between %d and %d, found %d",
   134  					lowExp, highExp, rc)
   135  			}
   136  			return nil
   137  		})
   138  		m.Wait()
   139  	}
   140  
   141  	const rows = int(1e7)
   142  	const numNodes = 9
   143  
   144  	for _, inTxn := range []bool{true, false} {
   145  		inTxn := inTxn
   146  		r.Add(testSpec{
   147  			Name:    fmt.Sprintf("copy/bank/rows=%d,nodes=%d,txn=%t", rows, numNodes, inTxn),
   148  			Owner:   OwnerKV,
   149  			Cluster: makeClusterSpec(numNodes),
   150  			Run: func(ctx context.Context, t *test, c *cluster) {
   151  				runCopy(ctx, t, c, rows, inTxn)
   152  			},
   153  		})
   154  	}
   155  }
   156  
   157  func getDefaultRangeSize(
   158  	ctx context.Context, db *gosql.DB,
   159  ) (rangeMinBytes, rangeMaxBytes int, err error) {
   160  	err = db.QueryRow(`SELECT
   161      regexp_extract(regexp_extract(raw_config_sql, e'range_min_bytes = \\d+'), e'\\d+')::INT8
   162          AS range_min_bytes,
   163      regexp_extract(regexp_extract(raw_config_sql, e'range_max_bytes = \\d+'), e'\\d+')::INT8
   164          AS range_max_bytes
   165  FROM
   166      [SHOW ZONE CONFIGURATION FOR RANGE default];`).Scan(&rangeMinBytes, &rangeMaxBytes)
   167  	// Older cluster versions do not contain this column. Use the old default.
   168  	if err != nil && strings.Contains(err.Error(), `column "raw_config_sql" does not exist`) {
   169  		rangeMinBytes, rangeMaxBytes, err = 32<<20 /* 32MB */, 64<<20 /* 64MB */, nil
   170  	}
   171  	return rangeMinBytes, rangeMaxBytes, err
   172  }