github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/cmd/roachtest/split.go (about)

     1  // Copyright 2018 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package main
    12  
    13  import (
    14  	"context"
    15  	gosql "database/sql"
    16  	"fmt"
    17  	"math"
    18  	"strings"
    19  	"time"
    20  
    21  	"github.com/cockroachdb/cockroach/pkg/util/retry"
    22  	"github.com/cockroachdb/errors"
    23  	humanize "github.com/dustin/go-humanize"
    24  	_ "github.com/lib/pq"
    25  )
    26  
    27  type splitParams struct {
    28  	maxSize       int           // The maximum size a range is allowed to be.
    29  	concurrency   int           // Number of concurrent workers.
    30  	readPercent   int           // % of queries that are read queries.
    31  	spanPercent   int           // % of queries that query all the rows.
    32  	qpsThreshold  int           // QPS Threshold for load based splitting.
    33  	minimumRanges int           // Minimum number of ranges expected at the end.
    34  	maximumRanges int           // Maximum number of ranges expected at the end.
    35  	sequential    bool          // Sequential distribution.
    36  	waitDuration  time.Duration // Duration the workload should run for.
    37  }
    38  
    39  func registerLoadSplits(r *testRegistry) {
    40  	const numNodes = 3
    41  
    42  	r.Add(testSpec{
    43  		Name:       fmt.Sprintf("splits/load/uniform/nodes=%d", numNodes),
    44  		Owner:      OwnerKV,
    45  		MinVersion: "v19.1.0",
    46  		Cluster:    makeClusterSpec(numNodes),
    47  		Run: func(ctx context.Context, t *test, c *cluster) {
    48  			// This number was determined experimentally. Often, but not always,
    49  			// more splits will happen.
    50  			expSplits := 10
    51  			runLoadSplits(ctx, t, c, splitParams{
    52  				maxSize:       10 << 30,      // 10 GB
    53  				concurrency:   64,            // 64 concurrent workers
    54  				readPercent:   95,            // 95% reads
    55  				qpsThreshold:  100,           // 100 queries per second
    56  				minimumRanges: expSplits + 1, // Expected Splits + 1
    57  				maximumRanges: math.MaxInt32, // We're only checking for minimum.
    58  				// The calculation of the wait duration is as follows:
    59  				//
    60  				// Each split requires at least `split.RecordDurationThreshold` seconds to record
    61  				// keys in a range. So in the kv default distribution, if we make the assumption
    62  				// that all load will be uniform across the splits AND that the QPS threshold is
    63  				// still exceeded for all the splits as the number of splits we're targeting is
    64  				// "low" - we expect that for `expSplits` splits, it will require:
    65  				//
    66  				// Minimum Duration For a Split * log2(expSplits) seconds
    67  				//
    68  				// We also add an extra expSplits second(s) for the overhead of creating each one.
    69  				// If the number of expected splits is increased, this calculation will hold
    70  				// for uniform distribution as long as the QPS threshold is continually exceeded
    71  				// even with the expected number of splits. This puts a bound on how high the
    72  				// `expSplits` value can go.
    73  				// Add 1s for each split for the overhead of the splitting process.
    74  				// waitDuration: time.Duration(int64(math.Ceil(math.Ceil(math.Log2(float64(expSplits)))*
    75  				// 	float64((split.RecordDurationThreshold/time.Second))))+int64(expSplits)) * time.Second,
    76  				//
    77  				// NB: the above has proven flaky. Just use a fixed duration
    78  				// that we think should be good enough. For example, for five
    79  				// expected splits we get ~35s, for ten ~50s, and for 20 ~1m10s.
    80  				// These are all pretty short, so any random abnormality will mess
    81  				// things up.
    82  				waitDuration: 10 * time.Minute,
    83  			})
    84  		},
    85  	})
    86  	r.Add(testSpec{
    87  		Name:       fmt.Sprintf("splits/load/sequential/nodes=%d", numNodes),
    88  		Owner:      OwnerKV,
    89  		MinVersion: "v19.1.0",
    90  		Cluster:    makeClusterSpec(numNodes),
    91  		Run: func(ctx context.Context, t *test, c *cluster) {
    92  			runLoadSplits(ctx, t, c, splitParams{
    93  				maxSize:       10 << 30, // 10 GB
    94  				concurrency:   64,       // 64 concurrent workers
    95  				readPercent:   0,        // 0% reads
    96  				qpsThreshold:  100,      // 100 queries per second
    97  				minimumRanges: 1,        // We expect no splits so require only 1 range.
    98  				// We expect no splits so require only 1 range. However, in practice we
    99  				// sometimes see a split or two early in, presumably when the sampling
   100  				// gets lucky.
   101  				maximumRanges: 3,
   102  				sequential:    true,
   103  				waitDuration:  60 * time.Second,
   104  			})
   105  		},
   106  	})
   107  	r.Add(testSpec{
   108  		Name:       fmt.Sprintf("splits/load/spanning/nodes=%d", numNodes),
   109  		Owner:      OwnerKV,
   110  		MinVersion: "v19.1.0",
   111  		Cluster:    makeClusterSpec(numNodes),
   112  		Run: func(ctx context.Context, t *test, c *cluster) {
   113  			runLoadSplits(ctx, t, c, splitParams{
   114  				maxSize:       10 << 30, // 10 GB
   115  				concurrency:   64,       // 64 concurrent workers
   116  				readPercent:   0,        // 0% reads
   117  				spanPercent:   95,       // 95% spanning queries
   118  				qpsThreshold:  100,      // 100 queries per second
   119  				minimumRanges: 1,        // We expect no splits so require only 1 range.
   120  				maximumRanges: 1,        // We expect no splits so require only 1 range.
   121  				waitDuration:  60 * time.Second,
   122  			})
   123  		},
   124  	})
   125  }
   126  
   127  // runLoadSplits tests behavior of load based splitting under
   128  // conditions defined by the params. It checks whether certain number of
   129  // splits occur in different workload scenarios.
   130  func runLoadSplits(ctx context.Context, t *test, c *cluster, params splitParams) {
   131  	c.Put(ctx, cockroach, "./cockroach", c.All())
   132  	c.Put(ctx, workload, "./workload", c.Node(1))
   133  	c.Start(ctx, t, c.All())
   134  
   135  	m := newMonitor(ctx, c, c.All())
   136  	m.Go(func(ctx context.Context) error {
   137  		db := c.Conn(ctx, 1)
   138  		defer db.Close()
   139  
   140  		t.Status("disable load based splitting")
   141  		if err := disableLoadBasedSplitting(ctx, db); err != nil {
   142  			return err
   143  		}
   144  
   145  		t.Status("increasing range_max_bytes")
   146  		minBytes := 16 << 20 // 16 MB
   147  		setRangeMaxBytes := func(maxBytes int) {
   148  			stmtZone := fmt.Sprintf(
   149  				"ALTER RANGE default CONFIGURE ZONE USING range_max_bytes = %d, range_min_bytes = %d",
   150  				maxBytes, minBytes)
   151  			if _, err := db.Exec(stmtZone); err != nil {
   152  				t.Fatalf("failed to set range_max_bytes: %v", err)
   153  			}
   154  		}
   155  		// Set the range size to a huge size so we don't get splits that occur
   156  		// as a result of size thresholds. The kv table will thus be in a single
   157  		// range unless split by load.
   158  		setRangeMaxBytes(params.maxSize)
   159  
   160  		t.Status("running uniform kv workload")
   161  		c.Run(ctx, c.Node(1), fmt.Sprintf("./workload init kv {pgurl:1-%d}", c.spec.NodeCount))
   162  
   163  		t.Status("checking initial range count")
   164  		rangeCount := func() int {
   165  			var ranges int
   166  			const q = "SELECT count(*) FROM [SHOW RANGES FROM TABLE kv.kv]"
   167  			if err := db.QueryRow(q).Scan(&ranges); err != nil {
   168  				// TODO(rafi): Remove experimental_ranges query once we stop testing
   169  				// 19.1 or earlier.
   170  				if strings.Contains(err.Error(), "syntax error at or near \"ranges\"") {
   171  					err = db.QueryRow("SELECT count(*) FROM [SHOW EXPERIMENTAL_RANGES FROM TABLE kv.kv]").Scan(&ranges)
   172  				}
   173  				if err != nil {
   174  					t.Fatalf("failed to get range count: %v", err)
   175  				}
   176  			}
   177  			return ranges
   178  		}
   179  		if rc := rangeCount(); rc != 1 {
   180  			return errors.Errorf("kv.kv table split over multiple ranges.")
   181  		}
   182  
   183  		// Set the QPS threshold for load based splitting before turning it on.
   184  		if _, err := db.ExecContext(ctx, fmt.Sprintf("SET CLUSTER SETTING kv.range_split.load_qps_threshold = %d",
   185  			params.qpsThreshold)); err != nil {
   186  			return err
   187  		}
   188  		t.Status("enable load based splitting")
   189  		if _, err := db.ExecContext(ctx, `SET CLUSTER SETTING kv.range_split.by_load_enabled = true`); err != nil {
   190  			return err
   191  		}
   192  		var extraFlags string
   193  		if params.sequential {
   194  			extraFlags += "--sequential"
   195  		}
   196  		c.Run(ctx, c.Node(1), fmt.Sprintf("./workload run kv "+
   197  			"--init --concurrency=%d --read-percent=%d --span-percent=%d %s {pgurl:1-%d} --duration='%s'",
   198  			params.concurrency, params.readPercent, params.spanPercent, extraFlags, c.spec.NodeCount,
   199  			params.waitDuration.String()))
   200  
   201  		t.Status(fmt.Sprintf("waiting for splits"))
   202  		if rc := rangeCount(); rc < params.minimumRanges || rc > params.maximumRanges {
   203  			return errors.Errorf("kv.kv has %d ranges, expected between %d and %d splits",
   204  				rc, params.minimumRanges, params.maximumRanges)
   205  		}
   206  		return nil
   207  	})
   208  	m.Wait()
   209  }
   210  
   211  func registerLargeRange(r *testRegistry) {
   212  	const size = 10 << 30 // 10 GB
   213  	// TODO(nvanbenschoten): Snapshots currently hold the entirety of a range in
   214  	// memory on the receiving side. This is dangerous when we grow a range to
   215  	// such large sizes because it means that a snapshot could trigger an OOM.
   216  	// Because of this, we stick to 3 nodes to avoid rebalancing-related
   217  	// snapshots. Once #16954 is addressed, we can increase this count so that
   218  	// splitting the single large range also triggers rebalancing.
   219  	const numNodes = 3
   220  
   221  	r.Add(testSpec{
   222  		Name:    fmt.Sprintf("splits/largerange/size=%s,nodes=%d", bytesStr(size), numNodes),
   223  		Owner:   OwnerKV,
   224  		Cluster: makeClusterSpec(numNodes),
   225  		Timeout: 5 * time.Hour,
   226  		Run: func(ctx context.Context, t *test, c *cluster) {
   227  			runLargeRangeSplits(ctx, t, c, size)
   228  		},
   229  	})
   230  }
   231  
   232  func bytesStr(size uint64) string {
   233  	return strings.Replace(humanize.IBytes(size), " ", "", -1)
   234  }
   235  
   236  // This test generates a large Bank table all within a single range. It does
   237  // so by setting the max range size to a huge number before populating the
   238  // table. It then drops the range size back down to normal and watches as
   239  // the large range splits apart.
   240  func runLargeRangeSplits(ctx context.Context, t *test, c *cluster, size int) {
   241  	// payload is the size of the payload column for each row in the Bank
   242  	// table.
   243  	const payload = 100
   244  	// rowOverheadEstimate is an estimate of the overhead of a single
   245  	// row in the Bank table, not including the size of the payload
   246  	// itself. This overhead includes the size of the other two columns
   247  	// in the table along with the size of each row's associated KV key.
   248  	const rowOverheadEstimate = 160
   249  	const rowEstimate = rowOverheadEstimate + payload
   250  	// rows is the number of rows we'll need to insert into the bank table
   251  	// to produce a range of roughly the right size.
   252  	rows := size / rowEstimate
   253  
   254  	c.Put(ctx, cockroach, "./cockroach", c.All())
   255  	c.Put(ctx, workload, "./workload", c.All())
   256  	c.Start(ctx, t, c.All())
   257  
   258  	m := newMonitor(ctx, c, c.All())
   259  	m.Go(func(ctx context.Context) error {
   260  		db := c.Conn(ctx, 1)
   261  		defer db.Close()
   262  
   263  		// We don't want load based splitting from splitting the range before
   264  		// it's ready to be split.
   265  		t.Status("disable load based splitting")
   266  		if err := disableLoadBasedSplitting(ctx, db); err != nil {
   267  			return err
   268  		}
   269  
   270  		t.Status("increasing range_max_bytes")
   271  		minBytes := 16 << 20 // 16 MB
   272  		setRangeMaxBytes := func(maxBytes int) {
   273  			stmtZone := fmt.Sprintf(
   274  				"ALTER RANGE default CONFIGURE ZONE USING range_max_bytes = %d, range_min_bytes = %d",
   275  				maxBytes, minBytes)
   276  			_, err := db.Exec(stmtZone)
   277  			if err != nil && strings.Contains(err.Error(), "syntax error") {
   278  				// Pre-2.1 was EXPERIMENTAL.
   279  				// TODO(knz): Remove this in 2.2.
   280  				stmtZone = fmt.Sprintf("ALTER RANGE default EXPERIMENTAL CONFIGURE ZONE '\nrange_max_bytes: %d\n'", maxBytes)
   281  				_, err = db.Exec(stmtZone)
   282  			}
   283  			if err != nil {
   284  				t.Fatalf("failed to set range_max_bytes: %v", err)
   285  			}
   286  		}
   287  		// Set the range size to double what we expect the size of the
   288  		// bank table to be. This should result in the table fitting
   289  		// inside a single range.
   290  		setRangeMaxBytes(2 * size)
   291  
   292  		t.Status("populating bank table")
   293  		// NB: workload init does not wait for upreplication after creating the
   294  		// schema but before populating it. This is ok because upreplication
   295  		// occurs much faster than we can actually create a large range.
   296  		c.Run(ctx, c.Node(1), fmt.Sprintf("./workload init bank "+
   297  			"--rows=%d --payload-bytes=%d --ranges=1 {pgurl:1-%d}", rows, payload, c.spec.NodeCount))
   298  
   299  		t.Status("checking for single range")
   300  		rangeCount := func() int {
   301  			var ranges int
   302  			const q = "SELECT count(*) FROM [SHOW RANGES FROM TABLE bank.bank]"
   303  			if err := db.QueryRow(q).Scan(&ranges); err != nil {
   304  				// TODO(rafi): Remove experimental_ranges query once we stop testing
   305  				// 19.1 or earlier.
   306  				if strings.Contains(err.Error(), "syntax error at or near \"ranges\"") {
   307  					err = db.QueryRow("SELECT count(*) FROM [SHOW EXPERIMENTAL_RANGES FROM TABLE bank.bank]").Scan(&ranges)
   308  				}
   309  				if err != nil {
   310  					t.Fatalf("failed to get range count: %v", err)
   311  				}
   312  			}
   313  			return ranges
   314  		}
   315  		if rc := rangeCount(); rc != 1 {
   316  			return errors.Errorf("bank table split over multiple ranges")
   317  		}
   318  
   319  		t.Status("decreasing range_max_bytes")
   320  		rangeSize := 64 << 20 // 64MB
   321  		setRangeMaxBytes(rangeSize)
   322  
   323  		expRC := size / rangeSize
   324  		expSplits := expRC - 1
   325  		t.Status(fmt.Sprintf("waiting for %d splits", expSplits))
   326  		waitDuration := time.Duration(expSplits) * time.Second // 1 second per split
   327  		return retry.ForDuration(waitDuration, func() error {
   328  			if rc := rangeCount(); rc > expRC {
   329  				return errors.Errorf("bank table split over %d ranges, expected at least %d",
   330  					rc, expRC)
   331  			}
   332  			return nil
   333  		})
   334  	})
   335  	m.Wait()
   336  }
   337  
   338  func disableLoadBasedSplitting(ctx context.Context, db *gosql.DB) error {
   339  	_, err := db.ExecContext(ctx, `SET CLUSTER SETTING kv.range_split.by_load_enabled = false`)
   340  	if err != nil {
   341  		// If the cluster setting doesn't exist, the cluster version is < 2.2.0 and
   342  		// so Load based Splitting doesn't apply anyway and the error should be ignored.
   343  		if !strings.Contains(err.Error(), "unknown cluster setting") {
   344  			return err
   345  		}
   346  	}
   347  	return nil
   348  }