github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/cmd/allocsim/main.go (about)

     1  // Copyright 2016 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package main
    12  
    13  import (
    14  	"bytes"
    15  	"context"
    16  	"encoding/json"
    17  	"flag"
    18  	"fmt"
    19  	"math"
    20  	"os"
    21  	"os/signal"
    22  	"runtime"
    23  	"sync"
    24  	"sync/atomic"
    25  	"syscall"
    26  	"time"
    27  
    28  	"github.com/cockroachdb/cockroach/pkg/acceptance/localcluster"
    29  	"github.com/cockroachdb/cockroach/pkg/acceptance/localcluster/tc"
    30  	"github.com/cockroachdb/cockroach/pkg/cli"
    31  	"github.com/cockroachdb/cockroach/pkg/kv/kvserver"
    32  	"github.com/cockroachdb/cockroach/pkg/server/serverpb"
    33  	"github.com/cockroachdb/cockroach/pkg/util/log"
    34  	"github.com/cockroachdb/cockroach/pkg/util/randutil"
    35  	"github.com/cockroachdb/cockroach/pkg/util/syncutil"
    36  	"github.com/cockroachdb/cockroach/pkg/util/timeutil"
    37  	"github.com/cockroachdb/errors"
    38  )
    39  
    40  var workers = flag.Int("w", 1, "number of workers; the i'th worker talks to node i%numNodes")
    41  var numNodes = flag.Int("n", 4, "number of nodes")
    42  var duration = flag.Duration("duration", math.MaxInt64, "how long to run the simulation for")
    43  var blockSize = flag.Int("b", 1000, "block size")
    44  var configFile = flag.String("f", "", "config file that specifies an allocsim workload (overrides -n)")
    45  
    46  // Configuration provides a way to configure allocsim via a JSON file.
    47  // TODO(a-robinson): Consider moving all the above options into the config file.
    48  type Configuration struct {
    49  	NumWorkers int        `json:"NumWorkers"`
    50  	Localities []Locality `json:"Localities"`
    51  }
    52  
    53  // Locality defines the properties of a single locality as part of a Configuration.
    54  type Locality struct {
    55  	Name              string `json:"Name"`
    56  	LocalityStr       string `json:"LocalityStr"`
    57  	NumNodes          int    `json:"NumNodes"`
    58  	NumWorkers        int    `json:"NumWorkers"`
    59  	OutgoingLatencies []*struct {
    60  		Name    string       `json:"Name"`
    61  		Latency jsonDuration `json:"Latency"`
    62  	} `json:"OutgoingLatencies"`
    63  }
    64  
    65  type jsonDuration time.Duration
    66  
    67  func (j *jsonDuration) UnmarshalJSON(b []byte) error {
    68  	var s string
    69  	if err := json.Unmarshal(b, &s); err != nil {
    70  		return err
    71  	}
    72  	dur, err := time.ParseDuration(s)
    73  	if err != nil {
    74  		return err
    75  	}
    76  	*j = jsonDuration(dur)
    77  	return nil
    78  }
    79  
    80  func loadConfig(file string) (Configuration, error) {
    81  	fileHandle, err := os.Open(file)
    82  	if err != nil {
    83  		return Configuration{}, errors.Wrapf(err, "failed to open config file %q", file)
    84  	}
    85  	defer fileHandle.Close()
    86  
    87  	var config Configuration
    88  	jsonParser := json.NewDecoder(fileHandle)
    89  	if err := jsonParser.Decode(&config); err != nil {
    90  		return Configuration{}, errors.Wrapf(err, "failed to decode %q as json", file)
    91  	}
    92  
    93  	*numNodes = 0
    94  	*workers = config.NumWorkers
    95  	for _, locality := range config.Localities {
    96  		*numNodes += locality.NumNodes
    97  		*workers += locality.NumWorkers
    98  	}
    99  	return config, nil
   100  }
   101  
   102  // allocSim allows investigation of allocation/rebalancing heuristics. A
   103  // pool of workers generates block_writer-style load where the i'th worker
   104  // talks to node i%numNodes. Every second a monitor goroutine outputs status
   105  // such as the per-node replica and leaseholder counts.
   106  //
   107  // TODO(peter/a-robinson): Allow configuration of zone-config constraints.
   108  type allocSim struct {
   109  	*localcluster.Cluster
   110  	stats struct {
   111  		ops               uint64
   112  		totalLatencyNanos uint64
   113  		errors            uint64
   114  	}
   115  	ranges struct {
   116  		syncutil.Mutex
   117  		stats allocStats
   118  	}
   119  	localities []Locality
   120  }
   121  
   122  type allocStats struct {
   123  	count          int
   124  	replicas       []int
   125  	leases         []int
   126  	replicaAdds    []int
   127  	leaseTransfers []int
   128  }
   129  
   130  func newAllocSim(c *localcluster.Cluster) *allocSim {
   131  	return &allocSim{
   132  		Cluster: c,
   133  	}
   134  }
   135  
   136  func (a *allocSim) run(workers int) {
   137  	a.setup()
   138  	for i := 0; i < workers; i++ {
   139  		go a.roundRobinWorker(i, workers)
   140  	}
   141  	go a.rangeStats(time.Second)
   142  	a.monitor(time.Second)
   143  }
   144  
   145  func (a *allocSim) runWithConfig(config Configuration) {
   146  	a.setup()
   147  
   148  	numWorkers := config.NumWorkers
   149  	for _, locality := range config.Localities {
   150  		numWorkers += locality.NumWorkers
   151  	}
   152  
   153  	firstNodeInLocality := 0
   154  	for _, locality := range config.Localities {
   155  		for i := 0; i < locality.NumWorkers; i++ {
   156  			node := firstNodeInLocality + (i % locality.NumNodes)
   157  			startNum := firstNodeInLocality + i
   158  			go a.worker(node, startNum, numWorkers)
   159  		}
   160  		firstNodeInLocality += locality.NumNodes
   161  	}
   162  	for i := 0; i < config.NumWorkers; i++ {
   163  		go a.roundRobinWorker(firstNodeInLocality+i, numWorkers)
   164  	}
   165  
   166  	go a.rangeStats(time.Second)
   167  	a.monitor(time.Second)
   168  }
   169  
   170  func (a *allocSim) setup() {
   171  	db := a.Nodes[0].DB()
   172  	if _, err := db.Exec("CREATE DATABASE IF NOT EXISTS allocsim"); err != nil {
   173  		log.Fatalf(context.Background(), "%v", err)
   174  	}
   175  
   176  	blocks := `
   177  CREATE TABLE IF NOT EXISTS blocks (
   178    id INT NOT NULL,
   179    num INT NOT NULL,
   180    data BYTES NOT NULL,
   181    PRIMARY KEY (id, num)
   182  )
   183  `
   184  	if _, err := db.Exec(blocks); err != nil {
   185  		log.Fatalf(context.Background(), "%v", err)
   186  	}
   187  }
   188  
   189  func (a *allocSim) maybeLogError(err error) {
   190  	if localcluster.IsUnavailableError(err) {
   191  		return
   192  	}
   193  	log.Errorf(context.Background(), "%v", err)
   194  	atomic.AddUint64(&a.stats.errors, 1)
   195  }
   196  
   197  const insertStmt = `INSERT INTO allocsim.blocks (id, num, data) VALUES ($1, $2, repeat('a', $3)::bytes)`
   198  
   199  func (a *allocSim) worker(dbIdx, startNum, workers int) {
   200  	r, _ := randutil.NewPseudoRand()
   201  	db := a.Nodes[dbIdx%len(a.Nodes)].DB()
   202  	for num := startNum; true; num += workers {
   203  		now := timeutil.Now()
   204  		if _, err := db.Exec(insertStmt, r.Int63(), num, *blockSize); err != nil {
   205  			a.maybeLogError(err)
   206  		} else {
   207  			atomic.AddUint64(&a.stats.ops, 1)
   208  			atomic.AddUint64(&a.stats.totalLatencyNanos, uint64(timeutil.Since(now).Nanoseconds()))
   209  		}
   210  	}
   211  }
   212  
   213  func (a *allocSim) roundRobinWorker(startNum, workers int) {
   214  	r, _ := randutil.NewPseudoRand()
   215  	for i := 0; ; i++ {
   216  		now := timeutil.Now()
   217  		db := a.Nodes[i%len(a.Nodes)].DB()
   218  		if db == nil {
   219  			continue // nodes are shutting down
   220  		}
   221  		if _, err := db.Exec(insertStmt, r.Int63(), startNum+i*workers, *blockSize); err != nil {
   222  			a.maybeLogError(err)
   223  		} else {
   224  			atomic.AddUint64(&a.stats.ops, 1)
   225  			atomic.AddUint64(&a.stats.totalLatencyNanos, uint64(timeutil.Since(now).Nanoseconds()))
   226  		}
   227  	}
   228  }
   229  
   230  func (a *allocSim) rangeInfo() allocStats {
   231  	stats := allocStats{
   232  		replicas:       make([]int, len(a.Nodes)),
   233  		replicaAdds:    make([]int, len(a.Nodes)),
   234  		leases:         make([]int, len(a.Nodes)),
   235  		leaseTransfers: make([]int, len(a.Nodes)),
   236  	}
   237  
   238  	// Retrieve the metrics for each node and extract the replica and leaseholder
   239  	// counts.
   240  	var wg sync.WaitGroup
   241  	wg.Add(len(a.Nodes))
   242  	for i := 0; i < len(a.Nodes); i++ {
   243  		go func(i int) {
   244  			defer wg.Done()
   245  			status := a.Nodes[i].StatusClient()
   246  			if status == nil {
   247  				// Cluster is shutting down.
   248  				return
   249  			}
   250  			resp, err := status.Metrics(context.Background(), &serverpb.MetricsRequest{
   251  				NodeId: fmt.Sprintf("local"),
   252  			})
   253  			if err != nil {
   254  				log.Fatalf(context.Background(), "%v", err)
   255  			}
   256  			var metrics map[string]interface{}
   257  			if err := json.Unmarshal(resp.Data, &metrics); err != nil {
   258  				log.Fatalf(context.Background(), "%v", err)
   259  			}
   260  			stores := metrics["stores"].(map[string]interface{})
   261  			for _, v := range stores {
   262  				storeMetrics := v.(map[string]interface{})
   263  				if v, ok := storeMetrics["replicas"]; ok {
   264  					stats.replicas[i] += int(v.(float64))
   265  				}
   266  				if v, ok := storeMetrics["replicas.leaseholders"]; ok {
   267  					stats.leases[i] += int(v.(float64))
   268  				}
   269  				if v, ok := storeMetrics["range.adds"]; ok {
   270  					stats.replicaAdds[i] += int(v.(float64))
   271  				}
   272  				if v, ok := storeMetrics["leases.transfers.success"]; ok {
   273  					stats.leaseTransfers[i] += int(v.(float64))
   274  				}
   275  			}
   276  		}(i)
   277  	}
   278  	wg.Wait()
   279  
   280  	for _, v := range stats.replicas {
   281  		stats.count += v
   282  	}
   283  	return stats
   284  }
   285  
   286  func (a *allocSim) rangeStats(d time.Duration) {
   287  	for {
   288  		stats := a.rangeInfo()
   289  		a.ranges.Lock()
   290  		a.ranges.stats = stats
   291  		a.ranges.Unlock()
   292  
   293  		time.Sleep(d)
   294  	}
   295  }
   296  
   297  const padding = "__________________"
   298  
   299  func formatHeader(header string, numberNodes int, localities []Locality) string {
   300  	var buf bytes.Buffer
   301  	_, _ = buf.WriteString(header)
   302  	for i := 1; i <= numberNodes; i++ {
   303  		node := fmt.Sprintf("%d", i)
   304  		if localities != nil {
   305  			node += fmt.Sprintf(":%s", localities[i-1].Name)
   306  		}
   307  		fmt.Fprintf(&buf, "%s%s", padding[:len(padding)-len(node)], node)
   308  	}
   309  	return buf.String()
   310  }
   311  
   312  func (a *allocSim) monitor(d time.Duration) {
   313  	formatNodes := func(stats allocStats) string {
   314  		var buf bytes.Buffer
   315  		for i := range stats.replicas {
   316  			alive := a.Nodes[i].Alive()
   317  			if !alive {
   318  				_, _ = buf.WriteString("\033[0;31;49m")
   319  			}
   320  			fmt.Fprintf(&buf, "%*s", len(padding), fmt.Sprintf("%d/%d/%d/%d",
   321  				stats.replicas[i], stats.leases[i], stats.replicaAdds[i], stats.leaseTransfers[i]))
   322  			if !alive {
   323  				_, _ = buf.WriteString("\033[0m")
   324  			}
   325  		}
   326  		return buf.String()
   327  	}
   328  
   329  	start := timeutil.Now()
   330  	lastTime := start
   331  	var numReplicas int
   332  	var lastOps uint64
   333  
   334  	for ticks := 0; true; ticks++ {
   335  		time.Sleep(d)
   336  
   337  		now := timeutil.Now()
   338  		elapsed := now.Sub(lastTime).Seconds()
   339  		ops := atomic.LoadUint64(&a.stats.ops)
   340  		totalLatencyNanos := atomic.LoadUint64(&a.stats.totalLatencyNanos)
   341  
   342  		a.ranges.Lock()
   343  		rangeStats := a.ranges.stats
   344  		a.ranges.Unlock()
   345  
   346  		if ticks%20 == 0 || numReplicas != len(rangeStats.replicas) {
   347  			numReplicas = len(rangeStats.replicas)
   348  			fmt.Println(formatHeader("_elapsed__ops/sec__average__latency___errors_replicas", numReplicas, a.localities))
   349  		}
   350  
   351  		var avgLatency float64
   352  		if ops > 0 {
   353  			avgLatency = float64(totalLatencyNanos/ops) / float64(time.Millisecond)
   354  		}
   355  		fmt.Printf("%8s %8.1f %8.1f %6.1fms %8d %8d%s\n",
   356  			time.Duration(now.Sub(start).Seconds()+0.5)*time.Second,
   357  			float64(ops-lastOps)/elapsed, float64(ops)/now.Sub(start).Seconds(), avgLatency,
   358  			atomic.LoadUint64(&a.stats.errors), rangeStats.count, formatNodes(rangeStats))
   359  		lastTime = now
   360  		lastOps = ops
   361  	}
   362  }
   363  
   364  func (a *allocSim) finalStatus() {
   365  	a.ranges.Lock()
   366  	defer a.ranges.Unlock()
   367  
   368  	// TODO(bram): With the addition of localities, these stats will have to be
   369  	// updated.
   370  
   371  	fmt.Println(formatHeader("___stats___________________________", len(a.ranges.stats.replicas), a.localities))
   372  
   373  	genStats := func(name string, counts []int) {
   374  		var total float64
   375  		for _, count := range counts {
   376  			total += float64(count)
   377  		}
   378  		mean := total / float64(len(counts))
   379  		var buf bytes.Buffer
   380  		fmt.Fprintf(&buf, "%8s  (total%% / diff%%)         ", name)
   381  		for _, count := range counts {
   382  			var percent, fromMean float64
   383  			if total != 0 {
   384  				percent = float64(count) / total * 100
   385  				fromMean = (float64(count) - mean) / total * 100
   386  			}
   387  			fmt.Fprintf(&buf, " %9.9s", fmt.Sprintf("%.0f/%.0f", percent, fromMean))
   388  		}
   389  		fmt.Println(buf.String())
   390  	}
   391  	genStats("replicas", a.ranges.stats.replicas)
   392  	genStats("leases", a.ranges.stats.leases)
   393  }
   394  
   395  func handleStart() bool {
   396  	if len(os.Args) < 2 || os.Args[1] != "start" {
   397  		return false
   398  	}
   399  
   400  	// Speed up lease transfer decisions by not requiring quite as much data
   401  	// before beginning to make them. Without this, the rapid splitting of ranges
   402  	// in the few minutes after allocsim starts up causes it to take a long time
   403  	// for leases to settle onto other nodes even when requests are skewed heavily
   404  	// onto them.
   405  	kvserver.MinLeaseTransferStatsDuration = 10 * time.Second
   406  
   407  	cli.Main()
   408  	return true
   409  }
   410  
   411  func main() {
   412  	if handleStart() {
   413  		return
   414  	}
   415  
   416  	flag.Parse()
   417  
   418  	var config Configuration
   419  	if *configFile != "" {
   420  		var err error
   421  		config, err = loadConfig(*configFile)
   422  		if err != nil {
   423  			log.Fatalf(context.Background(), "%v", err)
   424  		}
   425  	}
   426  
   427  	perNodeCfg := localcluster.MakePerNodeFixedPortsCfg(*numNodes)
   428  
   429  	// TODO(a-robinson): Automatically run github.com/tylertreat/comcast for
   430  	// simpler configs that just have a single latency between all nodes.
   431  	var separateAddrs bool
   432  	for _, locality := range config.Localities {
   433  		if len(locality.OutgoingLatencies) != 0 {
   434  			separateAddrs = true
   435  			if runtime.GOOS != "linux" {
   436  				log.Fatal(context.Background(),
   437  					"configs that set per-locality outgoing latencies are only supported on linux")
   438  			}
   439  			break
   440  		}
   441  	}
   442  
   443  	if separateAddrs {
   444  		for i := range perNodeCfg {
   445  			s := perNodeCfg[i]
   446  			s.Addr = fmt.Sprintf("127.0.0.%d", i)
   447  			perNodeCfg[i] = s
   448  		}
   449  	}
   450  
   451  	signalCh := make(chan os.Signal, 1)
   452  	signal.Notify(signalCh, syscall.SIGINT, syscall.SIGTERM, syscall.SIGQUIT)
   453  
   454  	localities := make([]Locality, *numNodes)
   455  	if len(config.Localities) != 0 {
   456  		nodesPerLocality := make(map[string][]int)
   457  		var nodeIdx int
   458  		for _, locality := range config.Localities {
   459  			for i := 0; i < locality.NumNodes; i++ {
   460  				s := perNodeCfg[nodeIdx] // avoid map assignment problems
   461  				if locality.LocalityStr != "" {
   462  					s.ExtraArgs = []string{fmt.Sprintf("--locality=%s", locality.LocalityStr)}
   463  				} else {
   464  					s.ExtraArgs = []string{fmt.Sprintf("--locality=l=%s", locality.Name)}
   465  				}
   466  				if separateAddrs {
   467  					s.ExtraEnv = []string{fmt.Sprintf("COCKROACH_SOURCE_IP_ADDRESS=%s", s.Addr)}
   468  				}
   469  				localities[nodeIdx] = locality
   470  				nodesPerLocality[locality.Name] = append(nodesPerLocality[locality.Name], nodeIdx)
   471  
   472  				perNodeCfg[nodeIdx] = s
   473  				nodeIdx++
   474  			}
   475  		}
   476  		var tcController *tc.Controller
   477  		if separateAddrs {
   478  			// Since localcluster only uses loopback IPs for the nodes, we only need to
   479  			// set up tc rules on the loopback device.
   480  			tcController = tc.NewController("lo")
   481  			if err := tcController.Init(); err != nil {
   482  				log.Fatalf(context.Background(), "%v", err)
   483  			}
   484  			defer func() {
   485  				if err := tcController.CleanUp(); err != nil {
   486  					log.Errorf(context.Background(), "%v", err)
   487  				}
   488  			}()
   489  		}
   490  		for _, locality := range localities {
   491  			for _, outgoing := range locality.OutgoingLatencies {
   492  				if outgoing.Latency > 0 {
   493  					for _, srcNodeIdx := range nodesPerLocality[locality.Name] {
   494  						for _, dstNodeIdx := range nodesPerLocality[outgoing.Name] {
   495  							if err := tcController.AddLatency(
   496  								perNodeCfg[srcNodeIdx].Addr, perNodeCfg[dstNodeIdx].Addr, time.Duration(outgoing.Latency/2),
   497  							); err != nil {
   498  								log.Fatalf(context.Background(), "%v", err)
   499  							}
   500  						}
   501  					}
   502  				}
   503  			}
   504  		}
   505  	}
   506  
   507  	cfg := localcluster.ClusterConfig{
   508  		AllNodeArgs: append(flag.Args(), "--vmodule=allocator=3,allocator_scorer=3,replicate_queue=3"),
   509  		Binary:      os.Args[0],
   510  		NumNodes:    *numNodes,
   511  		DB:          "allocsim",
   512  		NumWorkers:  *workers,
   513  		PerNodeCfg:  perNodeCfg,
   514  		DataDir:     "cockroach-data-allocsim",
   515  	}
   516  
   517  	c := localcluster.New(cfg)
   518  	a := newAllocSim(c)
   519  	a.localities = localities
   520  
   521  	log.SetExitFunc(false /* hideStack */, func(code int) {
   522  		c.Close()
   523  		os.Exit(code)
   524  	})
   525  
   526  	go func() {
   527  		var exitStatus int
   528  		select {
   529  		case s := <-signalCh:
   530  			log.Infof(context.Background(), "signal received: %v", s)
   531  			exitStatus = 1
   532  		case <-time.After(*duration):
   533  			log.Infof(context.Background(), "finished run of: %s", *duration)
   534  		}
   535  		c.Close()
   536  		a.finalStatus()
   537  		os.Exit(exitStatus)
   538  	}()
   539  
   540  	c.Start(context.Background())
   541  	defer c.Close()
   542  	c.UpdateZoneConfig(1, 1<<20)
   543  	_, err := c.Nodes[0].DB().Exec("SET CLUSTER SETTING kv.raft_log.disable_synchronization_unsafe = true")
   544  	if err != nil {
   545  		log.Fatalf(context.Background(), "%v", err)
   546  	}
   547  	if len(config.Localities) != 0 {
   548  		a.runWithConfig(config)
   549  	} else {
   550  		a.run(*workers)
   551  	}
   552  }