github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/kv/kvnemesis/kvnemesis.go (about) 1 // Copyright 2020 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package kvnemesis 12 13 import ( 14 "context" 15 "fmt" 16 "math/rand" 17 "strings" 18 "sync/atomic" 19 20 "github.com/cockroachdb/cockroach/pkg/kv" 21 "github.com/cockroachdb/cockroach/pkg/util/ctxgroup" 22 "github.com/cockroachdb/cockroach/pkg/util/log" 23 "github.com/cockroachdb/cockroach/pkg/util/tracing" 24 ) 25 26 // RunNemesis generates and applies a series of Operations to exercise the KV 27 // api. It returns a slice of the logical failures encountered. 28 // 29 // Ideas for conditions to be added to KV nemesis: 30 // - Transactions being abandoned by their coordinator. 31 // - CPuts, and continuing after CPut errors (generally continuing after errors 32 // is not allowed, but it is allowed after ConditionFailedError as a special 33 // case). 34 func RunNemesis( 35 ctx context.Context, 36 rng *rand.Rand, 37 ct ClosedTimestampTargetInterval, 38 config GeneratorConfig, 39 dbs ...*kv.DB, 40 ) ([]error, error) { 41 const concurrency, numSteps = 5, 30 42 43 g, err := MakeGenerator(config, newGetReplicasFn(dbs...)) 44 if err != nil { 45 return nil, err 46 } 47 a := MakeApplier(dbs...) 48 w, err := Watch(ctx, dbs, ct, GeneratorDataSpan()) 49 if err != nil { 50 return nil, err 51 } 52 defer func() { _ = w.Finish() }() 53 54 var stepsStartedAtomic int64 55 stepsByWorker := make([][]Step, concurrency) 56 57 workerFn := func(ctx context.Context, workerIdx int) error { 58 workerName := fmt.Sprintf(`%d`, workerIdx) 59 var buf strings.Builder 60 for atomic.AddInt64(&stepsStartedAtomic, 1) <= numSteps { 61 step := g.RandStep(rng) 62 63 recCtx, collect, cancel := tracing.ContextWithRecordingSpan(ctx, "txn step") 64 err := a.Apply(recCtx, &step) 65 log.VEventf(recCtx, 2, "step: %v", step) 66 step.Trace = collect().String() 67 cancel() 68 if err != nil { 69 buf.Reset() 70 step.format(&buf, formatCtx{indent: ` ` + workerName + ` ERR `}) 71 log.Infof(ctx, "error: %+v\n\n%s", err, buf.String()) 72 return err 73 } 74 buf.Reset() 75 fmt.Fprintf(&buf, "\n before: %s", step.Before) 76 step.format(&buf, formatCtx{indent: ` ` + workerName + ` OP `}) 77 fmt.Fprintf(&buf, "\n after: %s", step.After) 78 log.Infof(ctx, "%v", buf.String()) 79 stepsByWorker[workerIdx] = append(stepsByWorker[workerIdx], step) 80 } 81 return nil 82 } 83 if err := ctxgroup.GroupWorkers(ctx, concurrency, workerFn); err != nil { 84 return nil, err 85 } 86 87 allSteps := make(steps, 0, numSteps) 88 for _, steps := range stepsByWorker { 89 allSteps = append(allSteps, steps...) 90 } 91 92 // TODO(dan): Also slurp the splits. The meta ranges use expiration based 93 // leases, so we can't use RangeFeed/Watcher to do it. Maybe ExportRequest? 94 if err := w.WaitForFrontier(ctx, allSteps.After()); err != nil { 95 return nil, err 96 } 97 kvs := w.Finish() 98 defer kvs.Close() 99 failures := Validate(allSteps, kvs) 100 101 if len(failures) > 0 { 102 log.Infof(ctx, "reproduction steps:\n%s", printRepro(stepsByWorker)) 103 log.Infof(ctx, "kvs (recorded from rangefeed):\n%s", kvs.DebugPrint(" ")) 104 105 span := GeneratorDataSpan() 106 scanKVs, err := dbs[0].Scan(ctx, span.Key, span.EndKey, -1) 107 if err != nil { 108 log.Infof(ctx, "could not scan actual latest values: %+v", err) 109 } else { 110 var kvsBuf strings.Builder 111 for _, kv := range scanKVs { 112 fmt.Fprintf(&kvsBuf, " %s %s -> %s\n", kv.Key, kv.Value.Timestamp, kv.Value.PrettyPrint()) 113 } 114 log.Infof(ctx, "kvs (scan of latest values according to crdb):\n%s", kvsBuf.String()) 115 } 116 } 117 118 return failures, nil 119 } 120 121 func printRepro(stepsByWorker [][]Step) string { 122 // TODO(dan): Make this more copy and paste, especially the error handling. 123 var buf strings.Builder 124 buf.WriteString("g := ctxgroup.WithContext(ctx)\n") 125 for _, steps := range stepsByWorker { 126 buf.WriteString("g.GoCtx(func(ctx context.Context) error {") 127 for _, step := range steps { 128 fctx := formatCtx{receiver: fmt.Sprintf(`db%d`, step.DBID), indent: " "} 129 buf.WriteString("\n") 130 buf.WriteString(fctx.indent) 131 step.Op.format(&buf, fctx) 132 buf.WriteString(step.Trace) 133 buf.WriteString("\n") 134 } 135 buf.WriteString("\n return nil\n") 136 buf.WriteString("})\n") 137 } 138 buf.WriteString("g.Wait()\n") 139 return buf.String() 140 }