github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/kv/kvnemesis/kvnemesis.go (about)

     1  // Copyright 2020 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package kvnemesis
    12  
    13  import (
    14  	"context"
    15  	"fmt"
    16  	"math/rand"
    17  	"strings"
    18  	"sync/atomic"
    19  
    20  	"github.com/cockroachdb/cockroach/pkg/kv"
    21  	"github.com/cockroachdb/cockroach/pkg/util/ctxgroup"
    22  	"github.com/cockroachdb/cockroach/pkg/util/log"
    23  	"github.com/cockroachdb/cockroach/pkg/util/tracing"
    24  )
    25  
    26  // RunNemesis generates and applies a series of Operations to exercise the KV
    27  // api. It returns a slice of the logical failures encountered.
    28  //
    29  // Ideas for conditions to be added to KV nemesis:
    30  // - Transactions being abandoned by their coordinator.
    31  // - CPuts, and continuing after CPut errors (generally continuing after errors
    32  // is not allowed, but it is allowed after ConditionFailedError as a special
    33  // case).
    34  func RunNemesis(
    35  	ctx context.Context,
    36  	rng *rand.Rand,
    37  	ct ClosedTimestampTargetInterval,
    38  	config GeneratorConfig,
    39  	dbs ...*kv.DB,
    40  ) ([]error, error) {
    41  	const concurrency, numSteps = 5, 30
    42  
    43  	g, err := MakeGenerator(config, newGetReplicasFn(dbs...))
    44  	if err != nil {
    45  		return nil, err
    46  	}
    47  	a := MakeApplier(dbs...)
    48  	w, err := Watch(ctx, dbs, ct, GeneratorDataSpan())
    49  	if err != nil {
    50  		return nil, err
    51  	}
    52  	defer func() { _ = w.Finish() }()
    53  
    54  	var stepsStartedAtomic int64
    55  	stepsByWorker := make([][]Step, concurrency)
    56  
    57  	workerFn := func(ctx context.Context, workerIdx int) error {
    58  		workerName := fmt.Sprintf(`%d`, workerIdx)
    59  		var buf strings.Builder
    60  		for atomic.AddInt64(&stepsStartedAtomic, 1) <= numSteps {
    61  			step := g.RandStep(rng)
    62  
    63  			recCtx, collect, cancel := tracing.ContextWithRecordingSpan(ctx, "txn step")
    64  			err := a.Apply(recCtx, &step)
    65  			log.VEventf(recCtx, 2, "step: %v", step)
    66  			step.Trace = collect().String()
    67  			cancel()
    68  			if err != nil {
    69  				buf.Reset()
    70  				step.format(&buf, formatCtx{indent: `  ` + workerName + ` ERR `})
    71  				log.Infof(ctx, "error: %+v\n\n%s", err, buf.String())
    72  				return err
    73  			}
    74  			buf.Reset()
    75  			fmt.Fprintf(&buf, "\n  before: %s", step.Before)
    76  			step.format(&buf, formatCtx{indent: `  ` + workerName + ` OP  `})
    77  			fmt.Fprintf(&buf, "\n  after: %s", step.After)
    78  			log.Infof(ctx, "%v", buf.String())
    79  			stepsByWorker[workerIdx] = append(stepsByWorker[workerIdx], step)
    80  		}
    81  		return nil
    82  	}
    83  	if err := ctxgroup.GroupWorkers(ctx, concurrency, workerFn); err != nil {
    84  		return nil, err
    85  	}
    86  
    87  	allSteps := make(steps, 0, numSteps)
    88  	for _, steps := range stepsByWorker {
    89  		allSteps = append(allSteps, steps...)
    90  	}
    91  
    92  	// TODO(dan): Also slurp the splits. The meta ranges use expiration based
    93  	// leases, so we can't use RangeFeed/Watcher to do it. Maybe ExportRequest?
    94  	if err := w.WaitForFrontier(ctx, allSteps.After()); err != nil {
    95  		return nil, err
    96  	}
    97  	kvs := w.Finish()
    98  	defer kvs.Close()
    99  	failures := Validate(allSteps, kvs)
   100  
   101  	if len(failures) > 0 {
   102  		log.Infof(ctx, "reproduction steps:\n%s", printRepro(stepsByWorker))
   103  		log.Infof(ctx, "kvs (recorded from rangefeed):\n%s", kvs.DebugPrint("  "))
   104  
   105  		span := GeneratorDataSpan()
   106  		scanKVs, err := dbs[0].Scan(ctx, span.Key, span.EndKey, -1)
   107  		if err != nil {
   108  			log.Infof(ctx, "could not scan actual latest values: %+v", err)
   109  		} else {
   110  			var kvsBuf strings.Builder
   111  			for _, kv := range scanKVs {
   112  				fmt.Fprintf(&kvsBuf, "  %s %s -> %s\n", kv.Key, kv.Value.Timestamp, kv.Value.PrettyPrint())
   113  			}
   114  			log.Infof(ctx, "kvs (scan of latest values according to crdb):\n%s", kvsBuf.String())
   115  		}
   116  	}
   117  
   118  	return failures, nil
   119  }
   120  
   121  func printRepro(stepsByWorker [][]Step) string {
   122  	// TODO(dan): Make this more copy and paste, especially the error handling.
   123  	var buf strings.Builder
   124  	buf.WriteString("g := ctxgroup.WithContext(ctx)\n")
   125  	for _, steps := range stepsByWorker {
   126  		buf.WriteString("g.GoCtx(func(ctx context.Context) error {")
   127  		for _, step := range steps {
   128  			fctx := formatCtx{receiver: fmt.Sprintf(`db%d`, step.DBID), indent: "  "}
   129  			buf.WriteString("\n")
   130  			buf.WriteString(fctx.indent)
   131  			step.Op.format(&buf, fctx)
   132  			buf.WriteString(step.Trace)
   133  			buf.WriteString("\n")
   134  		}
   135  		buf.WriteString("\n  return nil\n")
   136  		buf.WriteString("})\n")
   137  	}
   138  	buf.WriteString("g.Wait()\n")
   139  	return buf.String()
   140  }