github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/cmd/roachtest/split.go (about) 1 // Copyright 2018 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package main 12 13 import ( 14 "context" 15 gosql "database/sql" 16 "fmt" 17 "math" 18 "strings" 19 "time" 20 21 "github.com/cockroachdb/cockroach/pkg/util/retry" 22 "github.com/cockroachdb/errors" 23 humanize "github.com/dustin/go-humanize" 24 _ "github.com/lib/pq" 25 ) 26 27 type splitParams struct { 28 maxSize int // The maximum size a range is allowed to be. 29 concurrency int // Number of concurrent workers. 30 readPercent int // % of queries that are read queries. 31 spanPercent int // % of queries that query all the rows. 32 qpsThreshold int // QPS Threshold for load based splitting. 33 minimumRanges int // Minimum number of ranges expected at the end. 34 maximumRanges int // Maximum number of ranges expected at the end. 35 sequential bool // Sequential distribution. 36 waitDuration time.Duration // Duration the workload should run for. 37 } 38 39 func registerLoadSplits(r *testRegistry) { 40 const numNodes = 3 41 42 r.Add(testSpec{ 43 Name: fmt.Sprintf("splits/load/uniform/nodes=%d", numNodes), 44 Owner: OwnerKV, 45 MinVersion: "v19.1.0", 46 Cluster: makeClusterSpec(numNodes), 47 Run: func(ctx context.Context, t *test, c *cluster) { 48 // This number was determined experimentally. Often, but not always, 49 // more splits will happen. 50 expSplits := 10 51 runLoadSplits(ctx, t, c, splitParams{ 52 maxSize: 10 << 30, // 10 GB 53 concurrency: 64, // 64 concurrent workers 54 readPercent: 95, // 95% reads 55 qpsThreshold: 100, // 100 queries per second 56 minimumRanges: expSplits + 1, // Expected Splits + 1 57 maximumRanges: math.MaxInt32, // We're only checking for minimum. 58 // The calculation of the wait duration is as follows: 59 // 60 // Each split requires at least `split.RecordDurationThreshold` seconds to record 61 // keys in a range. So in the kv default distribution, if we make the assumption 62 // that all load will be uniform across the splits AND that the QPS threshold is 63 // still exceeded for all the splits as the number of splits we're targeting is 64 // "low" - we expect that for `expSplits` splits, it will require: 65 // 66 // Minimum Duration For a Split * log2(expSplits) seconds 67 // 68 // We also add an extra expSplits second(s) for the overhead of creating each one. 69 // If the number of expected splits is increased, this calculation will hold 70 // for uniform distribution as long as the QPS threshold is continually exceeded 71 // even with the expected number of splits. This puts a bound on how high the 72 // `expSplits` value can go. 73 // Add 1s for each split for the overhead of the splitting process. 74 // waitDuration: time.Duration(int64(math.Ceil(math.Ceil(math.Log2(float64(expSplits)))* 75 // float64((split.RecordDurationThreshold/time.Second))))+int64(expSplits)) * time.Second, 76 // 77 // NB: the above has proven flaky. Just use a fixed duration 78 // that we think should be good enough. For example, for five 79 // expected splits we get ~35s, for ten ~50s, and for 20 ~1m10s. 80 // These are all pretty short, so any random abnormality will mess 81 // things up. 82 waitDuration: 10 * time.Minute, 83 }) 84 }, 85 }) 86 r.Add(testSpec{ 87 Name: fmt.Sprintf("splits/load/sequential/nodes=%d", numNodes), 88 Owner: OwnerKV, 89 MinVersion: "v19.1.0", 90 Cluster: makeClusterSpec(numNodes), 91 Run: func(ctx context.Context, t *test, c *cluster) { 92 runLoadSplits(ctx, t, c, splitParams{ 93 maxSize: 10 << 30, // 10 GB 94 concurrency: 64, // 64 concurrent workers 95 readPercent: 0, // 0% reads 96 qpsThreshold: 100, // 100 queries per second 97 minimumRanges: 1, // We expect no splits so require only 1 range. 98 // We expect no splits so require only 1 range. However, in practice we 99 // sometimes see a split or two early in, presumably when the sampling 100 // gets lucky. 101 maximumRanges: 3, 102 sequential: true, 103 waitDuration: 60 * time.Second, 104 }) 105 }, 106 }) 107 r.Add(testSpec{ 108 Name: fmt.Sprintf("splits/load/spanning/nodes=%d", numNodes), 109 Owner: OwnerKV, 110 MinVersion: "v19.1.0", 111 Cluster: makeClusterSpec(numNodes), 112 Run: func(ctx context.Context, t *test, c *cluster) { 113 runLoadSplits(ctx, t, c, splitParams{ 114 maxSize: 10 << 30, // 10 GB 115 concurrency: 64, // 64 concurrent workers 116 readPercent: 0, // 0% reads 117 spanPercent: 95, // 95% spanning queries 118 qpsThreshold: 100, // 100 queries per second 119 minimumRanges: 1, // We expect no splits so require only 1 range. 120 maximumRanges: 1, // We expect no splits so require only 1 range. 121 waitDuration: 60 * time.Second, 122 }) 123 }, 124 }) 125 } 126 127 // runLoadSplits tests behavior of load based splitting under 128 // conditions defined by the params. It checks whether certain number of 129 // splits occur in different workload scenarios. 130 func runLoadSplits(ctx context.Context, t *test, c *cluster, params splitParams) { 131 c.Put(ctx, cockroach, "./cockroach", c.All()) 132 c.Put(ctx, workload, "./workload", c.Node(1)) 133 c.Start(ctx, t, c.All()) 134 135 m := newMonitor(ctx, c, c.All()) 136 m.Go(func(ctx context.Context) error { 137 db := c.Conn(ctx, 1) 138 defer db.Close() 139 140 t.Status("disable load based splitting") 141 if err := disableLoadBasedSplitting(ctx, db); err != nil { 142 return err 143 } 144 145 t.Status("increasing range_max_bytes") 146 minBytes := 16 << 20 // 16 MB 147 setRangeMaxBytes := func(maxBytes int) { 148 stmtZone := fmt.Sprintf( 149 "ALTER RANGE default CONFIGURE ZONE USING range_max_bytes = %d, range_min_bytes = %d", 150 maxBytes, minBytes) 151 if _, err := db.Exec(stmtZone); err != nil { 152 t.Fatalf("failed to set range_max_bytes: %v", err) 153 } 154 } 155 // Set the range size to a huge size so we don't get splits that occur 156 // as a result of size thresholds. The kv table will thus be in a single 157 // range unless split by load. 158 setRangeMaxBytes(params.maxSize) 159 160 t.Status("running uniform kv workload") 161 c.Run(ctx, c.Node(1), fmt.Sprintf("./workload init kv {pgurl:1-%d}", c.spec.NodeCount)) 162 163 t.Status("checking initial range count") 164 rangeCount := func() int { 165 var ranges int 166 const q = "SELECT count(*) FROM [SHOW RANGES FROM TABLE kv.kv]" 167 if err := db.QueryRow(q).Scan(&ranges); err != nil { 168 // TODO(rafi): Remove experimental_ranges query once we stop testing 169 // 19.1 or earlier. 170 if strings.Contains(err.Error(), "syntax error at or near \"ranges\"") { 171 err = db.QueryRow("SELECT count(*) FROM [SHOW EXPERIMENTAL_RANGES FROM TABLE kv.kv]").Scan(&ranges) 172 } 173 if err != nil { 174 t.Fatalf("failed to get range count: %v", err) 175 } 176 } 177 return ranges 178 } 179 if rc := rangeCount(); rc != 1 { 180 return errors.Errorf("kv.kv table split over multiple ranges.") 181 } 182 183 // Set the QPS threshold for load based splitting before turning it on. 184 if _, err := db.ExecContext(ctx, fmt.Sprintf("SET CLUSTER SETTING kv.range_split.load_qps_threshold = %d", 185 params.qpsThreshold)); err != nil { 186 return err 187 } 188 t.Status("enable load based splitting") 189 if _, err := db.ExecContext(ctx, `SET CLUSTER SETTING kv.range_split.by_load_enabled = true`); err != nil { 190 return err 191 } 192 var extraFlags string 193 if params.sequential { 194 extraFlags += "--sequential" 195 } 196 c.Run(ctx, c.Node(1), fmt.Sprintf("./workload run kv "+ 197 "--init --concurrency=%d --read-percent=%d --span-percent=%d %s {pgurl:1-%d} --duration='%s'", 198 params.concurrency, params.readPercent, params.spanPercent, extraFlags, c.spec.NodeCount, 199 params.waitDuration.String())) 200 201 t.Status(fmt.Sprintf("waiting for splits")) 202 if rc := rangeCount(); rc < params.minimumRanges || rc > params.maximumRanges { 203 return errors.Errorf("kv.kv has %d ranges, expected between %d and %d splits", 204 rc, params.minimumRanges, params.maximumRanges) 205 } 206 return nil 207 }) 208 m.Wait() 209 } 210 211 func registerLargeRange(r *testRegistry) { 212 const size = 10 << 30 // 10 GB 213 // TODO(nvanbenschoten): Snapshots currently hold the entirety of a range in 214 // memory on the receiving side. This is dangerous when we grow a range to 215 // such large sizes because it means that a snapshot could trigger an OOM. 216 // Because of this, we stick to 3 nodes to avoid rebalancing-related 217 // snapshots. Once #16954 is addressed, we can increase this count so that 218 // splitting the single large range also triggers rebalancing. 219 const numNodes = 3 220 221 r.Add(testSpec{ 222 Name: fmt.Sprintf("splits/largerange/size=%s,nodes=%d", bytesStr(size), numNodes), 223 Owner: OwnerKV, 224 Cluster: makeClusterSpec(numNodes), 225 Timeout: 5 * time.Hour, 226 Run: func(ctx context.Context, t *test, c *cluster) { 227 runLargeRangeSplits(ctx, t, c, size) 228 }, 229 }) 230 } 231 232 func bytesStr(size uint64) string { 233 return strings.Replace(humanize.IBytes(size), " ", "", -1) 234 } 235 236 // This test generates a large Bank table all within a single range. It does 237 // so by setting the max range size to a huge number before populating the 238 // table. It then drops the range size back down to normal and watches as 239 // the large range splits apart. 240 func runLargeRangeSplits(ctx context.Context, t *test, c *cluster, size int) { 241 // payload is the size of the payload column for each row in the Bank 242 // table. 243 const payload = 100 244 // rowOverheadEstimate is an estimate of the overhead of a single 245 // row in the Bank table, not including the size of the payload 246 // itself. This overhead includes the size of the other two columns 247 // in the table along with the size of each row's associated KV key. 248 const rowOverheadEstimate = 160 249 const rowEstimate = rowOverheadEstimate + payload 250 // rows is the number of rows we'll need to insert into the bank table 251 // to produce a range of roughly the right size. 252 rows := size / rowEstimate 253 254 c.Put(ctx, cockroach, "./cockroach", c.All()) 255 c.Put(ctx, workload, "./workload", c.All()) 256 c.Start(ctx, t, c.All()) 257 258 m := newMonitor(ctx, c, c.All()) 259 m.Go(func(ctx context.Context) error { 260 db := c.Conn(ctx, 1) 261 defer db.Close() 262 263 // We don't want load based splitting from splitting the range before 264 // it's ready to be split. 265 t.Status("disable load based splitting") 266 if err := disableLoadBasedSplitting(ctx, db); err != nil { 267 return err 268 } 269 270 t.Status("increasing range_max_bytes") 271 minBytes := 16 << 20 // 16 MB 272 setRangeMaxBytes := func(maxBytes int) { 273 stmtZone := fmt.Sprintf( 274 "ALTER RANGE default CONFIGURE ZONE USING range_max_bytes = %d, range_min_bytes = %d", 275 maxBytes, minBytes) 276 _, err := db.Exec(stmtZone) 277 if err != nil && strings.Contains(err.Error(), "syntax error") { 278 // Pre-2.1 was EXPERIMENTAL. 279 // TODO(knz): Remove this in 2.2. 280 stmtZone = fmt.Sprintf("ALTER RANGE default EXPERIMENTAL CONFIGURE ZONE '\nrange_max_bytes: %d\n'", maxBytes) 281 _, err = db.Exec(stmtZone) 282 } 283 if err != nil { 284 t.Fatalf("failed to set range_max_bytes: %v", err) 285 } 286 } 287 // Set the range size to double what we expect the size of the 288 // bank table to be. This should result in the table fitting 289 // inside a single range. 290 setRangeMaxBytes(2 * size) 291 292 t.Status("populating bank table") 293 // NB: workload init does not wait for upreplication after creating the 294 // schema but before populating it. This is ok because upreplication 295 // occurs much faster than we can actually create a large range. 296 c.Run(ctx, c.Node(1), fmt.Sprintf("./workload init bank "+ 297 "--rows=%d --payload-bytes=%d --ranges=1 {pgurl:1-%d}", rows, payload, c.spec.NodeCount)) 298 299 t.Status("checking for single range") 300 rangeCount := func() int { 301 var ranges int 302 const q = "SELECT count(*) FROM [SHOW RANGES FROM TABLE bank.bank]" 303 if err := db.QueryRow(q).Scan(&ranges); err != nil { 304 // TODO(rafi): Remove experimental_ranges query once we stop testing 305 // 19.1 or earlier. 306 if strings.Contains(err.Error(), "syntax error at or near \"ranges\"") { 307 err = db.QueryRow("SELECT count(*) FROM [SHOW EXPERIMENTAL_RANGES FROM TABLE bank.bank]").Scan(&ranges) 308 } 309 if err != nil { 310 t.Fatalf("failed to get range count: %v", err) 311 } 312 } 313 return ranges 314 } 315 if rc := rangeCount(); rc != 1 { 316 return errors.Errorf("bank table split over multiple ranges") 317 } 318 319 t.Status("decreasing range_max_bytes") 320 rangeSize := 64 << 20 // 64MB 321 setRangeMaxBytes(rangeSize) 322 323 expRC := size / rangeSize 324 expSplits := expRC - 1 325 t.Status(fmt.Sprintf("waiting for %d splits", expSplits)) 326 waitDuration := time.Duration(expSplits) * time.Second // 1 second per split 327 return retry.ForDuration(waitDuration, func() error { 328 if rc := rangeCount(); rc > expRC { 329 return errors.Errorf("bank table split over %d ranges, expected at least %d", 330 rc, expRC) 331 } 332 return nil 333 }) 334 }) 335 m.Wait() 336 } 337 338 func disableLoadBasedSplitting(ctx context.Context, db *gosql.DB) error { 339 _, err := db.ExecContext(ctx, `SET CLUSTER SETTING kv.range_split.by_load_enabled = false`) 340 if err != nil { 341 // If the cluster setting doesn't exist, the cluster version is < 2.2.0 and 342 // so Load based Splitting doesn't apply anyway and the error should be ignored. 343 if !strings.Contains(err.Error(), "unknown cluster setting") { 344 return err 345 } 346 } 347 return nil 348 }