github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/cmd/roachtest/disk_stall.go (about) 1 // Copyright 2018 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package main 12 13 import ( 14 "context" 15 "fmt" 16 "math/rand" 17 "strings" 18 "time" 19 20 "github.com/cockroachdb/cockroach/pkg/util/timeutil" 21 ) 22 23 func registerDiskStalledDetection(r *testRegistry) { 24 for _, affectsLogDir := range []bool{false, true} { 25 for _, affectsDataDir := range []bool{false, true} { 26 // Grab copies of the args because we'll pass them into a closure. 27 // Everyone's favorite bug to write in Go. 28 affectsLogDir := affectsLogDir 29 affectsDataDir := affectsDataDir 30 r.Add(testSpec{ 31 Name: fmt.Sprintf( 32 "disk-stalled/log=%t,data=%t", 33 affectsLogDir, affectsDataDir, 34 ), 35 Owner: OwnerKV, 36 MinVersion: "v19.1.0", 37 Cluster: makeClusterSpec(1), 38 Run: func(ctx context.Context, t *test, c *cluster) { 39 runDiskStalledDetection(ctx, t, c, affectsLogDir, affectsDataDir) 40 }, 41 }) 42 } 43 } 44 } 45 46 func runDiskStalledDetection( 47 ctx context.Context, t *test, c *cluster, affectsLogDir bool, affectsDataDir bool, 48 ) { 49 n := c.Node(1) 50 51 c.Put(ctx, cockroach, "./cockroach") 52 c.Run(ctx, n, "sudo umount -f {store-dir}/faulty || true") 53 c.Run(ctx, n, "mkdir -p {store-dir}/{real,faulty} || true") 54 // Make sure the actual logs are downloaded as artifacts. 55 c.Run(ctx, n, "rm -f logs && ln -s {store-dir}/real/logs logs || true") 56 57 t.Status("setting up charybdefs") 58 59 if err := execCmd(ctx, t.l, roachprod, "install", c.makeNodes(n), "charybdefs"); err != nil { 60 t.Fatal(err) 61 } 62 c.Run(ctx, n, "sudo charybdefs {store-dir}/faulty -oallow_other,modules=subdir,subdir={store-dir}/real") 63 c.Run(ctx, n, "sudo mkdir -p {store-dir}/real/logs") 64 c.Run(ctx, n, "sudo chmod -R 777 {store-dir}/{real,faulty}") 65 l, err := t.l.ChildLogger("cockroach") 66 if err != nil { 67 t.Fatal(err) 68 } 69 type result struct { 70 err error 71 out string 72 } 73 errCh := make(chan result) 74 75 // NB: charybdefs' delay nemesis introduces 50ms per syscall. It would 76 // be nicer to introduce a longer delay, but this works. 77 tooShortSync := 40 * time.Millisecond 78 79 maxLogSync := time.Hour 80 logDir := "real/logs" 81 if affectsLogDir { 82 logDir = "faulty/logs" 83 maxLogSync = tooShortSync 84 } 85 maxDataSync := time.Hour 86 dataDir := "real" 87 if affectsDataDir { 88 maxDataSync = tooShortSync 89 dataDir = "faulty" 90 } 91 92 tStarted := timeutil.Now() 93 dur := 10 * time.Minute 94 if !affectsDataDir && !affectsLogDir { 95 dur = 30 * time.Second 96 } 97 98 go func() { 99 t.WorkerStatus("running server") 100 out, err := c.RunWithBuffer(ctx, l, n, 101 fmt.Sprintf("timeout --signal 9 %ds env COCKROACH_ENGINE_MAX_SYNC_DURATION_FATAL=true "+ 102 "COCKROACH_ENGINE_MAX_SYNC_DURATION=%s COCKROACH_LOG_MAX_SYNC_DURATION=%s "+ 103 "./cockroach start --insecure --logtostderr=INFO --store {store-dir}/%s --log-dir {store-dir}/%s", 104 int(dur.Seconds()), maxDataSync, maxLogSync, dataDir, logDir, 105 ), 106 ) 107 errCh <- result{err, string(out)} 108 }() 109 110 time.Sleep(time.Duration(rand.Intn(5)) * time.Second) 111 112 t.Status("blocking storage") 113 c.Run(ctx, n, "charybdefs-nemesis --delay") 114 115 res := <-errCh 116 if res.err == nil { 117 t.Fatalf("expected an error: %s", res.out) 118 } 119 120 // This test can also run in sanity check mode to make sure it doesn't fail 121 // due to the aggressive env vars above. 122 expectMsg := affectsDataDir || affectsLogDir 123 124 if expectMsg != strings.Contains(res.out, "disk stall detected") { 125 t.Fatalf("unexpected output: %v %s", res.err, res.out) 126 } else if elapsed := timeutil.Since(tStarted); !expectMsg && elapsed < dur { 127 t.Fatalf("no disk stall injected, but process terminated too early after %s (expected >= %s)", elapsed, dur) 128 } 129 130 c.Run(ctx, n, "charybdefs-nemesis --clear") 131 c.Run(ctx, n, "sudo umount {store-dir}/faulty") 132 }