github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/cmd/allocsim/main.go (about) 1 // Copyright 2016 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package main 12 13 import ( 14 "bytes" 15 "context" 16 "encoding/json" 17 "flag" 18 "fmt" 19 "math" 20 "os" 21 "os/signal" 22 "runtime" 23 "sync" 24 "sync/atomic" 25 "syscall" 26 "time" 27 28 "github.com/cockroachdb/cockroach/pkg/acceptance/localcluster" 29 "github.com/cockroachdb/cockroach/pkg/acceptance/localcluster/tc" 30 "github.com/cockroachdb/cockroach/pkg/cli" 31 "github.com/cockroachdb/cockroach/pkg/kv/kvserver" 32 "github.com/cockroachdb/cockroach/pkg/server/serverpb" 33 "github.com/cockroachdb/cockroach/pkg/util/log" 34 "github.com/cockroachdb/cockroach/pkg/util/randutil" 35 "github.com/cockroachdb/cockroach/pkg/util/syncutil" 36 "github.com/cockroachdb/cockroach/pkg/util/timeutil" 37 "github.com/cockroachdb/errors" 38 ) 39 40 var workers = flag.Int("w", 1, "number of workers; the i'th worker talks to node i%numNodes") 41 var numNodes = flag.Int("n", 4, "number of nodes") 42 var duration = flag.Duration("duration", math.MaxInt64, "how long to run the simulation for") 43 var blockSize = flag.Int("b", 1000, "block size") 44 var configFile = flag.String("f", "", "config file that specifies an allocsim workload (overrides -n)") 45 46 // Configuration provides a way to configure allocsim via a JSON file. 47 // TODO(a-robinson): Consider moving all the above options into the config file. 48 type Configuration struct { 49 NumWorkers int `json:"NumWorkers"` 50 Localities []Locality `json:"Localities"` 51 } 52 53 // Locality defines the properties of a single locality as part of a Configuration. 54 type Locality struct { 55 Name string `json:"Name"` 56 LocalityStr string `json:"LocalityStr"` 57 NumNodes int `json:"NumNodes"` 58 NumWorkers int `json:"NumWorkers"` 59 OutgoingLatencies []*struct { 60 Name string `json:"Name"` 61 Latency jsonDuration `json:"Latency"` 62 } `json:"OutgoingLatencies"` 63 } 64 65 type jsonDuration time.Duration 66 67 func (j *jsonDuration) UnmarshalJSON(b []byte) error { 68 var s string 69 if err := json.Unmarshal(b, &s); err != nil { 70 return err 71 } 72 dur, err := time.ParseDuration(s) 73 if err != nil { 74 return err 75 } 76 *j = jsonDuration(dur) 77 return nil 78 } 79 80 func loadConfig(file string) (Configuration, error) { 81 fileHandle, err := os.Open(file) 82 if err != nil { 83 return Configuration{}, errors.Wrapf(err, "failed to open config file %q", file) 84 } 85 defer fileHandle.Close() 86 87 var config Configuration 88 jsonParser := json.NewDecoder(fileHandle) 89 if err := jsonParser.Decode(&config); err != nil { 90 return Configuration{}, errors.Wrapf(err, "failed to decode %q as json", file) 91 } 92 93 *numNodes = 0 94 *workers = config.NumWorkers 95 for _, locality := range config.Localities { 96 *numNodes += locality.NumNodes 97 *workers += locality.NumWorkers 98 } 99 return config, nil 100 } 101 102 // allocSim allows investigation of allocation/rebalancing heuristics. A 103 // pool of workers generates block_writer-style load where the i'th worker 104 // talks to node i%numNodes. Every second a monitor goroutine outputs status 105 // such as the per-node replica and leaseholder counts. 106 // 107 // TODO(peter/a-robinson): Allow configuration of zone-config constraints. 108 type allocSim struct { 109 *localcluster.Cluster 110 stats struct { 111 ops uint64 112 totalLatencyNanos uint64 113 errors uint64 114 } 115 ranges struct { 116 syncutil.Mutex 117 stats allocStats 118 } 119 localities []Locality 120 } 121 122 type allocStats struct { 123 count int 124 replicas []int 125 leases []int 126 replicaAdds []int 127 leaseTransfers []int 128 } 129 130 func newAllocSim(c *localcluster.Cluster) *allocSim { 131 return &allocSim{ 132 Cluster: c, 133 } 134 } 135 136 func (a *allocSim) run(workers int) { 137 a.setup() 138 for i := 0; i < workers; i++ { 139 go a.roundRobinWorker(i, workers) 140 } 141 go a.rangeStats(time.Second) 142 a.monitor(time.Second) 143 } 144 145 func (a *allocSim) runWithConfig(config Configuration) { 146 a.setup() 147 148 numWorkers := config.NumWorkers 149 for _, locality := range config.Localities { 150 numWorkers += locality.NumWorkers 151 } 152 153 firstNodeInLocality := 0 154 for _, locality := range config.Localities { 155 for i := 0; i < locality.NumWorkers; i++ { 156 node := firstNodeInLocality + (i % locality.NumNodes) 157 startNum := firstNodeInLocality + i 158 go a.worker(node, startNum, numWorkers) 159 } 160 firstNodeInLocality += locality.NumNodes 161 } 162 for i := 0; i < config.NumWorkers; i++ { 163 go a.roundRobinWorker(firstNodeInLocality+i, numWorkers) 164 } 165 166 go a.rangeStats(time.Second) 167 a.monitor(time.Second) 168 } 169 170 func (a *allocSim) setup() { 171 db := a.Nodes[0].DB() 172 if _, err := db.Exec("CREATE DATABASE IF NOT EXISTS allocsim"); err != nil { 173 log.Fatalf(context.Background(), "%v", err) 174 } 175 176 blocks := ` 177 CREATE TABLE IF NOT EXISTS blocks ( 178 id INT NOT NULL, 179 num INT NOT NULL, 180 data BYTES NOT NULL, 181 PRIMARY KEY (id, num) 182 ) 183 ` 184 if _, err := db.Exec(blocks); err != nil { 185 log.Fatalf(context.Background(), "%v", err) 186 } 187 } 188 189 func (a *allocSim) maybeLogError(err error) { 190 if localcluster.IsUnavailableError(err) { 191 return 192 } 193 log.Errorf(context.Background(), "%v", err) 194 atomic.AddUint64(&a.stats.errors, 1) 195 } 196 197 const insertStmt = `INSERT INTO allocsim.blocks (id, num, data) VALUES ($1, $2, repeat('a', $3)::bytes)` 198 199 func (a *allocSim) worker(dbIdx, startNum, workers int) { 200 r, _ := randutil.NewPseudoRand() 201 db := a.Nodes[dbIdx%len(a.Nodes)].DB() 202 for num := startNum; true; num += workers { 203 now := timeutil.Now() 204 if _, err := db.Exec(insertStmt, r.Int63(), num, *blockSize); err != nil { 205 a.maybeLogError(err) 206 } else { 207 atomic.AddUint64(&a.stats.ops, 1) 208 atomic.AddUint64(&a.stats.totalLatencyNanos, uint64(timeutil.Since(now).Nanoseconds())) 209 } 210 } 211 } 212 213 func (a *allocSim) roundRobinWorker(startNum, workers int) { 214 r, _ := randutil.NewPseudoRand() 215 for i := 0; ; i++ { 216 now := timeutil.Now() 217 db := a.Nodes[i%len(a.Nodes)].DB() 218 if db == nil { 219 continue // nodes are shutting down 220 } 221 if _, err := db.Exec(insertStmt, r.Int63(), startNum+i*workers, *blockSize); err != nil { 222 a.maybeLogError(err) 223 } else { 224 atomic.AddUint64(&a.stats.ops, 1) 225 atomic.AddUint64(&a.stats.totalLatencyNanos, uint64(timeutil.Since(now).Nanoseconds())) 226 } 227 } 228 } 229 230 func (a *allocSim) rangeInfo() allocStats { 231 stats := allocStats{ 232 replicas: make([]int, len(a.Nodes)), 233 replicaAdds: make([]int, len(a.Nodes)), 234 leases: make([]int, len(a.Nodes)), 235 leaseTransfers: make([]int, len(a.Nodes)), 236 } 237 238 // Retrieve the metrics for each node and extract the replica and leaseholder 239 // counts. 240 var wg sync.WaitGroup 241 wg.Add(len(a.Nodes)) 242 for i := 0; i < len(a.Nodes); i++ { 243 go func(i int) { 244 defer wg.Done() 245 status := a.Nodes[i].StatusClient() 246 if status == nil { 247 // Cluster is shutting down. 248 return 249 } 250 resp, err := status.Metrics(context.Background(), &serverpb.MetricsRequest{ 251 NodeId: fmt.Sprintf("local"), 252 }) 253 if err != nil { 254 log.Fatalf(context.Background(), "%v", err) 255 } 256 var metrics map[string]interface{} 257 if err := json.Unmarshal(resp.Data, &metrics); err != nil { 258 log.Fatalf(context.Background(), "%v", err) 259 } 260 stores := metrics["stores"].(map[string]interface{}) 261 for _, v := range stores { 262 storeMetrics := v.(map[string]interface{}) 263 if v, ok := storeMetrics["replicas"]; ok { 264 stats.replicas[i] += int(v.(float64)) 265 } 266 if v, ok := storeMetrics["replicas.leaseholders"]; ok { 267 stats.leases[i] += int(v.(float64)) 268 } 269 if v, ok := storeMetrics["range.adds"]; ok { 270 stats.replicaAdds[i] += int(v.(float64)) 271 } 272 if v, ok := storeMetrics["leases.transfers.success"]; ok { 273 stats.leaseTransfers[i] += int(v.(float64)) 274 } 275 } 276 }(i) 277 } 278 wg.Wait() 279 280 for _, v := range stats.replicas { 281 stats.count += v 282 } 283 return stats 284 } 285 286 func (a *allocSim) rangeStats(d time.Duration) { 287 for { 288 stats := a.rangeInfo() 289 a.ranges.Lock() 290 a.ranges.stats = stats 291 a.ranges.Unlock() 292 293 time.Sleep(d) 294 } 295 } 296 297 const padding = "__________________" 298 299 func formatHeader(header string, numberNodes int, localities []Locality) string { 300 var buf bytes.Buffer 301 _, _ = buf.WriteString(header) 302 for i := 1; i <= numberNodes; i++ { 303 node := fmt.Sprintf("%d", i) 304 if localities != nil { 305 node += fmt.Sprintf(":%s", localities[i-1].Name) 306 } 307 fmt.Fprintf(&buf, "%s%s", padding[:len(padding)-len(node)], node) 308 } 309 return buf.String() 310 } 311 312 func (a *allocSim) monitor(d time.Duration) { 313 formatNodes := func(stats allocStats) string { 314 var buf bytes.Buffer 315 for i := range stats.replicas { 316 alive := a.Nodes[i].Alive() 317 if !alive { 318 _, _ = buf.WriteString("\033[0;31;49m") 319 } 320 fmt.Fprintf(&buf, "%*s", len(padding), fmt.Sprintf("%d/%d/%d/%d", 321 stats.replicas[i], stats.leases[i], stats.replicaAdds[i], stats.leaseTransfers[i])) 322 if !alive { 323 _, _ = buf.WriteString("\033[0m") 324 } 325 } 326 return buf.String() 327 } 328 329 start := timeutil.Now() 330 lastTime := start 331 var numReplicas int 332 var lastOps uint64 333 334 for ticks := 0; true; ticks++ { 335 time.Sleep(d) 336 337 now := timeutil.Now() 338 elapsed := now.Sub(lastTime).Seconds() 339 ops := atomic.LoadUint64(&a.stats.ops) 340 totalLatencyNanos := atomic.LoadUint64(&a.stats.totalLatencyNanos) 341 342 a.ranges.Lock() 343 rangeStats := a.ranges.stats 344 a.ranges.Unlock() 345 346 if ticks%20 == 0 || numReplicas != len(rangeStats.replicas) { 347 numReplicas = len(rangeStats.replicas) 348 fmt.Println(formatHeader("_elapsed__ops/sec__average__latency___errors_replicas", numReplicas, a.localities)) 349 } 350 351 var avgLatency float64 352 if ops > 0 { 353 avgLatency = float64(totalLatencyNanos/ops) / float64(time.Millisecond) 354 } 355 fmt.Printf("%8s %8.1f %8.1f %6.1fms %8d %8d%s\n", 356 time.Duration(now.Sub(start).Seconds()+0.5)*time.Second, 357 float64(ops-lastOps)/elapsed, float64(ops)/now.Sub(start).Seconds(), avgLatency, 358 atomic.LoadUint64(&a.stats.errors), rangeStats.count, formatNodes(rangeStats)) 359 lastTime = now 360 lastOps = ops 361 } 362 } 363 364 func (a *allocSim) finalStatus() { 365 a.ranges.Lock() 366 defer a.ranges.Unlock() 367 368 // TODO(bram): With the addition of localities, these stats will have to be 369 // updated. 370 371 fmt.Println(formatHeader("___stats___________________________", len(a.ranges.stats.replicas), a.localities)) 372 373 genStats := func(name string, counts []int) { 374 var total float64 375 for _, count := range counts { 376 total += float64(count) 377 } 378 mean := total / float64(len(counts)) 379 var buf bytes.Buffer 380 fmt.Fprintf(&buf, "%8s (total%% / diff%%) ", name) 381 for _, count := range counts { 382 var percent, fromMean float64 383 if total != 0 { 384 percent = float64(count) / total * 100 385 fromMean = (float64(count) - mean) / total * 100 386 } 387 fmt.Fprintf(&buf, " %9.9s", fmt.Sprintf("%.0f/%.0f", percent, fromMean)) 388 } 389 fmt.Println(buf.String()) 390 } 391 genStats("replicas", a.ranges.stats.replicas) 392 genStats("leases", a.ranges.stats.leases) 393 } 394 395 func handleStart() bool { 396 if len(os.Args) < 2 || os.Args[1] != "start" { 397 return false 398 } 399 400 // Speed up lease transfer decisions by not requiring quite as much data 401 // before beginning to make them. Without this, the rapid splitting of ranges 402 // in the few minutes after allocsim starts up causes it to take a long time 403 // for leases to settle onto other nodes even when requests are skewed heavily 404 // onto them. 405 kvserver.MinLeaseTransferStatsDuration = 10 * time.Second 406 407 cli.Main() 408 return true 409 } 410 411 func main() { 412 if handleStart() { 413 return 414 } 415 416 flag.Parse() 417 418 var config Configuration 419 if *configFile != "" { 420 var err error 421 config, err = loadConfig(*configFile) 422 if err != nil { 423 log.Fatalf(context.Background(), "%v", err) 424 } 425 } 426 427 perNodeCfg := localcluster.MakePerNodeFixedPortsCfg(*numNodes) 428 429 // TODO(a-robinson): Automatically run github.com/tylertreat/comcast for 430 // simpler configs that just have a single latency between all nodes. 431 var separateAddrs bool 432 for _, locality := range config.Localities { 433 if len(locality.OutgoingLatencies) != 0 { 434 separateAddrs = true 435 if runtime.GOOS != "linux" { 436 log.Fatal(context.Background(), 437 "configs that set per-locality outgoing latencies are only supported on linux") 438 } 439 break 440 } 441 } 442 443 if separateAddrs { 444 for i := range perNodeCfg { 445 s := perNodeCfg[i] 446 s.Addr = fmt.Sprintf("127.0.0.%d", i) 447 perNodeCfg[i] = s 448 } 449 } 450 451 signalCh := make(chan os.Signal, 1) 452 signal.Notify(signalCh, syscall.SIGINT, syscall.SIGTERM, syscall.SIGQUIT) 453 454 localities := make([]Locality, *numNodes) 455 if len(config.Localities) != 0 { 456 nodesPerLocality := make(map[string][]int) 457 var nodeIdx int 458 for _, locality := range config.Localities { 459 for i := 0; i < locality.NumNodes; i++ { 460 s := perNodeCfg[nodeIdx] // avoid map assignment problems 461 if locality.LocalityStr != "" { 462 s.ExtraArgs = []string{fmt.Sprintf("--locality=%s", locality.LocalityStr)} 463 } else { 464 s.ExtraArgs = []string{fmt.Sprintf("--locality=l=%s", locality.Name)} 465 } 466 if separateAddrs { 467 s.ExtraEnv = []string{fmt.Sprintf("COCKROACH_SOURCE_IP_ADDRESS=%s", s.Addr)} 468 } 469 localities[nodeIdx] = locality 470 nodesPerLocality[locality.Name] = append(nodesPerLocality[locality.Name], nodeIdx) 471 472 perNodeCfg[nodeIdx] = s 473 nodeIdx++ 474 } 475 } 476 var tcController *tc.Controller 477 if separateAddrs { 478 // Since localcluster only uses loopback IPs for the nodes, we only need to 479 // set up tc rules on the loopback device. 480 tcController = tc.NewController("lo") 481 if err := tcController.Init(); err != nil { 482 log.Fatalf(context.Background(), "%v", err) 483 } 484 defer func() { 485 if err := tcController.CleanUp(); err != nil { 486 log.Errorf(context.Background(), "%v", err) 487 } 488 }() 489 } 490 for _, locality := range localities { 491 for _, outgoing := range locality.OutgoingLatencies { 492 if outgoing.Latency > 0 { 493 for _, srcNodeIdx := range nodesPerLocality[locality.Name] { 494 for _, dstNodeIdx := range nodesPerLocality[outgoing.Name] { 495 if err := tcController.AddLatency( 496 perNodeCfg[srcNodeIdx].Addr, perNodeCfg[dstNodeIdx].Addr, time.Duration(outgoing.Latency/2), 497 ); err != nil { 498 log.Fatalf(context.Background(), "%v", err) 499 } 500 } 501 } 502 } 503 } 504 } 505 } 506 507 cfg := localcluster.ClusterConfig{ 508 AllNodeArgs: append(flag.Args(), "--vmodule=allocator=3,allocator_scorer=3,replicate_queue=3"), 509 Binary: os.Args[0], 510 NumNodes: *numNodes, 511 DB: "allocsim", 512 NumWorkers: *workers, 513 PerNodeCfg: perNodeCfg, 514 DataDir: "cockroach-data-allocsim", 515 } 516 517 c := localcluster.New(cfg) 518 a := newAllocSim(c) 519 a.localities = localities 520 521 log.SetExitFunc(false /* hideStack */, func(code int) { 522 c.Close() 523 os.Exit(code) 524 }) 525 526 go func() { 527 var exitStatus int 528 select { 529 case s := <-signalCh: 530 log.Infof(context.Background(), "signal received: %v", s) 531 exitStatus = 1 532 case <-time.After(*duration): 533 log.Infof(context.Background(), "finished run of: %s", *duration) 534 } 535 c.Close() 536 a.finalStatus() 537 os.Exit(exitStatus) 538 }() 539 540 c.Start(context.Background()) 541 defer c.Close() 542 c.UpdateZoneConfig(1, 1<<20) 543 _, err := c.Nodes[0].DB().Exec("SET CLUSTER SETTING kv.raft_log.disable_synchronization_unsafe = true") 544 if err != nil { 545 log.Fatalf(context.Background(), "%v", err) 546 } 547 if len(config.Localities) != 0 { 548 a.runWithConfig(config) 549 } else { 550 a.run(*workers) 551 } 552 }