github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/cmd/roachtest/quit.go (about) 1 // Copyright 2020 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package main 12 13 import ( 14 "context" 15 "encoding/json" 16 "fmt" 17 "strings" 18 "time" 19 20 "github.com/cockroachdb/cockroach/pkg/testutils" 21 "github.com/cockroachdb/cockroach/pkg/util/contextutil" 22 "github.com/cockroachdb/cockroach/pkg/util/retry" 23 "github.com/cockroachdb/errors" 24 "github.com/kr/pretty" 25 ) 26 27 type quitTest struct { 28 t *test 29 c *cluster 30 args option 31 } 32 33 // runQuitTransfersLeases performs rolling restarts on a 34 // 3-node cluster and ascertains that each node shutting down 35 // transfers all its leases reliably to other nodes prior to 36 // terminating. 37 func runQuitTransfersLeases( 38 ctx context.Context, 39 t *test, 40 c *cluster, 41 methodName string, 42 method func(ctx context.Context, t *test, c *cluster, nodeID int), 43 ) { 44 q := quitTest{t: t, c: c} 45 q.init(ctx) 46 q.runTest(ctx, method) 47 } 48 49 func (q *quitTest) init(ctx context.Context) { 50 q.args = startArgs( 51 "--env=COCKROACH_SCAN_MAX_IDLE_TIME=5ms", // iterate fast for rebalancing 52 "-a", "--vmodule=store=1,replica=1,replica_proposal=1", // verbosity to troubleshoot drains 53 ) 54 q.c.Put(ctx, cockroach, "./cockroach") 55 q.c.Start(ctx, q.t, q.args) 56 } 57 58 func (q *quitTest) Fatal(args ...interface{}) { 59 q.t.Fatal(args...) 60 } 61 62 func (q *quitTest) Fatalf(format string, args ...interface{}) { 63 q.t.Fatalf(format, args...) 64 } 65 66 func (q *quitTest) runTest( 67 ctx context.Context, method func(ctx context.Context, t *test, c *cluster, nodeID int), 68 ) { 69 q.waitForUpReplication(ctx) 70 q.createRanges(ctx) 71 q.setupIncrementalDrain(ctx) 72 73 // runTest iterates through the cluster two times and restarts each 74 // node in turn. After each node shutdown it verifies that there are 75 // no leases held by the down node. (See the comments inside 76 // checkNoLeases() for details.) 77 // 78 // The shutdown method is passed in via the 'method' parameter, used 79 // below. 80 q.t.l.Printf("now running restart loop\n") 81 for i := 0; i < 3; i++ { 82 q.t.l.Printf("iteration %d\n", i) 83 for nodeID := 1; nodeID <= q.c.spec.NodeCount; nodeID++ { 84 q.t.l.Printf("stopping node %d\n", nodeID) 85 q.runWithTimeout(ctx, func(ctx context.Context) { method(ctx, q.t, q.c, nodeID) }) 86 q.runWithTimeout(ctx, func(ctx context.Context) { q.checkNoLeases(ctx, nodeID) }) 87 q.t.l.Printf("restarting node %d\n", nodeID) 88 q.runWithTimeout(ctx, func(ctx context.Context) { q.restartNode(ctx, nodeID) }) 89 } 90 } 91 } 92 93 // restartNode restarts one node and waits until it's up and ready to 94 // accept clients. 95 func (q *quitTest) restartNode(ctx context.Context, nodeID int) { 96 q.c.Start(ctx, q.t, q.args, q.c.Node(nodeID)) 97 98 q.t.l.Printf("waiting for readiness of node %d\n", nodeID) 99 // Now perform a SQL query. This achieves two goals: 100 // - it waits until the server is ready. 101 // - the particular query forces a cluster-wide RPC; which 102 // forces any circuit breaker to trip and re-establish 103 // the RPC connection if needed. 104 db := q.c.Conn(ctx, nodeID) 105 defer db.Close() 106 if _, err := db.ExecContext(ctx, `TABLE crdb_internal.cluster_sessions`); err != nil { 107 q.Fatal(err) 108 } 109 } 110 111 func (q *quitTest) waitForUpReplication(ctx context.Context) { 112 db := q.c.Conn(ctx, 1) 113 defer db.Close() 114 115 // We'll want rebalancing to be a bit faster than normal, so 116 // that the up-replication does not take ages. 117 if _, err := db.ExecContext(ctx, `SET CLUSTER SETTING kv.snapshot_rebalance.max_rate = '128MiB'`); err != nil { 118 q.Fatal(err) 119 } 120 121 err := retry.ForDuration(30*time.Second, func() error { 122 q.t.l.Printf("waiting for up-replication\n") 123 row := db.QueryRowContext(ctx, `SELECT min(array_length(replicas, 1)) FROM crdb_internal.ranges_no_leases`) 124 minReplicas := 0 125 if err := row.Scan(&minReplicas); err != nil { 126 q.Fatal(err) 127 } 128 if minReplicas < 3 { 129 time.Sleep(time.Second) 130 return errors.Newf("some ranges not up-replicated yet") 131 } 132 return nil 133 }) 134 if err != nil { 135 q.Fatalf("cluster did not up-replicate: %v", err) 136 } 137 } 138 139 // runWithTimeout runs a command with a 1-minute timeout. 140 func (q *quitTest) runWithTimeout(ctx context.Context, fn func(ctx context.Context)) { 141 if err := contextutil.RunWithTimeout(ctx, "do", time.Minute, func(ctx context.Context) error { 142 fn(ctx) 143 return nil 144 }); err != nil { 145 q.Fatal(err) 146 } 147 } 148 149 // setupIncrementalDrain simulate requiring more than one Drain round 150 // to transfer all leases. This way, we exercise the iterating code in 151 // quit/node drain. 152 func (q *quitTest) setupIncrementalDrain(ctx context.Context) { 153 db := q.c.Conn(ctx, 1) 154 defer db.Close() 155 if _, err := db.ExecContext(ctx, ` 156 SET CLUSTER SETTING server.shutdown.lease_transfer_wait = '10ms'`); err != nil { 157 if strings.Contains(err.Error(), "unknown cluster setting") { 158 // old version; ok 159 } else { 160 q.Fatal(err) 161 } 162 } 163 } 164 165 // createRanges creates a bunch of ranges on the test cluster. 166 func (q *quitTest) createRanges(ctx context.Context) { 167 const numRanges = 500 168 169 db := q.c.Conn(ctx, 1) 170 defer db.Close() 171 if _, err := db.ExecContext(ctx, fmt.Sprintf(` 172 CREATE TABLE t(x, y, PRIMARY KEY(x)) AS SELECT @1, 1 FROM generate_series(1,%[1]d)`, 173 numRanges)); err != nil { 174 q.Fatal(err) 175 } 176 // We split them from right-to-left so we're peeling at most 1 177 // row each time on the right. 178 // 179 // Also we do it a hundred at a time, so as to be able to see the 180 // progress when watching the roachtest progress interactively. 181 for i := numRanges; i > 1; i -= 100 { 182 q.t.l.Printf("creating %d ranges (%d-%d)...\n", numRanges, i, i-99) 183 if _, err := db.ExecContext(ctx, fmt.Sprintf(` 184 ALTER TABLE t SPLIT AT TABLE generate_series(%[1]d,%[1]d-99,-1)`, i)); err != nil { 185 q.Fatal(err) 186 } 187 } 188 } 189 190 // checkNoLeases verifies that no range has a lease on the node 191 // that's just been shut down. 192 func (q *quitTest) checkNoLeases(ctx context.Context, nodeID int) { 193 // We need to use SQL against a node that's not the one we're 194 // shutting down. 195 otherNodeID := 1 + nodeID%q.c.spec.NodeCount 196 197 // Now we're going to check two things: 198 // 199 // 1) *immediately*, that every range in the cluster has a lease 200 // some other place than nodeID. 201 // 202 // Note that for with this condition, it is possible that _some_ 203 // replica of any given range think that the leaseholder is 204 // nodeID, even though _another_ replica has become leaseholder 205 // already. That's because followers can lag behind and 206 // drain does not wait for followers to catch up. 207 // https://github.com/cockroachdb/cockroach/issues/47100 208 // 209 // 2) *eventually* that every other node than nodeID has no range 210 // replica whose lease refers to nodeID, i.e. the followers 211 // have all caught up. 212 // Note: when issue #47100 is fixed, this 2nd condition 213 // must be true immediately -- drain is then able to wait 214 // for all followers to learn who the new leaseholder is. 215 216 if err := testutils.SucceedsSoonError(func() error { 217 // To achieve that, we ask first each range in turn for its range 218 // report. 219 // 220 // For condition (1) we accumulate all the known ranges in 221 // knownRanges, and assign them the node ID of their leaseholder 222 // whenever it is not nodeID. Then at the end we check that every 223 // entry in the map has a non-zero value. 224 knownRanges := map[string]int{} 225 // 226 // For condition (2) we accumulate the unwanted leases in 227 // invLeaseMap, then check at the end that the map is empty. 228 invLeaseMap := map[int][]string{} 229 for i := 1; i <= q.c.spec.NodeCount; i++ { 230 if i == nodeID { 231 // Can't request this node. Ignore. 232 continue 233 } 234 235 q.t.l.Printf("retrieving ranges for node %d\n", i) 236 // Get the report via HTTP. 237 // Flag -s is to remove progress on stderr, so that the buffer 238 // contains the JSON of the response and nothing else. 239 buf, err := q.c.RunWithBuffer(ctx, q.t.l, q.c.Node(otherNodeID), 240 "curl", "-s", fmt.Sprintf("http://%s/_status/ranges/%d", 241 q.c.InternalAdminUIAddr(ctx, q.c.Node(otherNodeID))[0], i)) 242 if err != nil { 243 q.Fatal(err) 244 } 245 // We need just a subset of the response. Make an ad-hoc 246 // struct with just the bits of interest. 247 type jsonOutput struct { 248 Ranges []struct { 249 State struct { 250 State struct { 251 Desc struct { 252 RangeID string `json:"rangeId"` 253 } `json:"desc"` 254 Lease struct { 255 Replica struct { 256 NodeID int `json:"nodeId"` 257 } `json:"replica"` 258 } `json:"lease"` 259 } `json:"state"` 260 } `json:"state"` 261 } `json:"ranges"` 262 } 263 var details jsonOutput 264 if err := json.Unmarshal(buf, &details); err != nil { 265 q.Fatal(err) 266 } 267 // Some sanity check. 268 if len(details.Ranges) == 0 { 269 q.Fatal("expected some ranges from RPC, got none") 270 } 271 // Is there any range whose lease refers to nodeID? 272 var invalidLeases []string 273 for _, r := range details.Ranges { 274 // Some more sanity check. 275 if r.State.State.Lease.Replica.NodeID == 0 { 276 q.Fatalf("expected a valid lease state, got %# v", pretty.Formatter(r)) 277 } 278 curLeaseHolder := knownRanges[r.State.State.Desc.RangeID] 279 if r.State.State.Lease.Replica.NodeID == nodeID { 280 // As per condition (2) above we want to know which ranges 281 // have an unexpected left over lease on nodeID. 282 invalidLeases = append(invalidLeases, r.State.State.Desc.RangeID) 283 } else { 284 // As per condition (1) above we track in knownRanges if there 285 // is at least one known other than nodeID that thinks that 286 // the lease has been transferred. 287 curLeaseHolder = r.State.State.Lease.Replica.NodeID 288 } 289 knownRanges[r.State.State.Desc.RangeID] = curLeaseHolder 290 } 291 if len(invalidLeases) > 0 { 292 invLeaseMap[i] = invalidLeases 293 } 294 } 295 // (1): is there a range with no replica outside of nodeID? 296 var leftOver []string 297 for r, n := range knownRanges { 298 if n == 0 { 299 leftOver = append(leftOver, r) 300 } 301 } 302 if len(leftOver) > 0 { 303 q.Fatalf("(1) ranges with no lease outside of node %d: %# v", nodeID, pretty.Formatter(leftOver)) 304 } 305 // (2): is there a range with left over replicas on nodeID? 306 // 307 // TODO(knz): Eventually we want this condition to be always 308 // true, i.e. fail the test immediately if found to be false 309 // instead of waiting. (#47100) 310 if len(invLeaseMap) > 0 { 311 err := errors.Newf( 312 "(2) ranges with remaining leases on node %d, per node: %# v", 313 nodeID, pretty.Formatter(invLeaseMap)) 314 q.t.l.Printf("condition failed: %v\n", err) 315 q.t.l.Printf("retrying until SucceedsSoon has enough...\n") 316 return err 317 } 318 return nil 319 }); err != nil { 320 q.Fatal(err) 321 } 322 323 db := q.c.Conn(ctx, otherNodeID) 324 defer db.Close() 325 // For good measure, also write to the table. This ensures it 326 // remains available. 327 if _, err := db.ExecContext(ctx, `UPDATE t SET y = y + 1`); err != nil { 328 q.Fatal(err) 329 } 330 } 331 332 func registerQuitTransfersLeases(r *testRegistry) { 333 registerTest := func(name, minver string, method func(context.Context, *test, *cluster, int)) { 334 r.Add(testSpec{ 335 Name: fmt.Sprintf("transfer-leases/%s", name), 336 Owner: OwnerKV, 337 Cluster: makeClusterSpec(3), 338 MinVersion: minver, 339 Run: func(ctx context.Context, t *test, c *cluster) { 340 runQuitTransfersLeases(ctx, t, c, name, method) 341 }, 342 }) 343 } 344 345 // Uses 'roachprod stop --sig 15 --wait', ie send SIGTERM and wait 346 // until the process exits. 347 registerTest("signal", "v19.2.0", func(ctx context.Context, t *test, c *cluster, nodeID int) { 348 c.Stop(ctx, c.Node(nodeID), 349 roachprodArgOption{"--sig", "15", "--wait"}, // graceful shutdown 350 ) 351 }) 352 353 // Uses 'cockroach quit' which should drain and then request a 354 // shutdown. It then waits for the process to self-exit. 355 registerTest("quit", "v19.2.0", func(ctx context.Context, t *test, c *cluster, nodeID int) { 356 _ = runQuit(ctx, t, c, nodeID) 357 }) 358 359 // Uses 'cockroach drain', followed by a non-graceful process 360 // kill. If the drain is successful, the leases are transferred 361 // successfully even if if the process terminates non-gracefully. 362 registerTest("drain", "v20.1.0", func(ctx context.Context, t *test, c *cluster, nodeID int) { 363 buf, err := c.RunWithBuffer(ctx, t.l, c.Node(nodeID), 364 "./cockroach", "node", "drain", "--insecure", "--logtostderr=INFO", 365 fmt.Sprintf("--port={pgport:%d}", nodeID), 366 ) 367 t.l.Printf("cockroach node drain:\n%s\n", buf) 368 if err != nil { 369 t.Fatal(err) 370 } 371 // Send first SIGHUP to the process to force it to flush its logs 372 // before terminating. Otherwise the SIGKILL below will truncate 373 // the log. 374 c.Stop(ctx, c.Node(nodeID), 375 roachprodArgOption{"--sig", "1"}, 376 ) 377 // We use SIGKILL to terminate nodes here. Of course, an operator 378 // should not do this and instead terminate with SIGTERM even 379 // after a complete graceful drain. However, what this test is 380 // asserting is that a graceful drain is *sufficient* to make 381 // everything look smooth from the perspective of other nodes, 382 // even if the node goes "kaput" after the drain. 383 // 384 // (This also ensures that the test exercises separate code; if we 385 // used SIGTERM here we'd be combining the graceful drain by 'node 386 // drain' with the graceful drain by the signal handler. If either 387 // becomes broken, the test wouldn't help identify which one needs 388 // attention.) 389 c.Stop(ctx, c.Node(nodeID), 390 roachprodArgOption{"--sig", "9", "--wait"}) 391 }) 392 } 393 394 func runQuit(ctx context.Context, t *test, c *cluster, nodeID int, extraArgs ...string) []byte { 395 args := append([]string{ 396 "./cockroach", "quit", "--insecure", "--logtostderr=INFO", 397 fmt.Sprintf("--port={pgport:%d}", nodeID)}, 398 extraArgs...) 399 buf, err := c.RunWithBuffer(ctx, t.l, c.Node(nodeID), args...) 400 t.l.Printf("cockroach quit:\n%s\n", buf) 401 if err != nil { 402 t.Fatal(err) 403 } 404 c.Stop(ctx, c.Node(nodeID), 405 roachprodArgOption{"--sig", "0", "--wait"}, // no shutdown, just wait for exit 406 ) 407 return buf 408 } 409 410 func registerQuitAllNodes(r *testRegistry) { 411 // This test verifies that 'cockroach quit' can terminate all nodes 412 // in the cluster: normally as long as there's quorum, then with a 413 // short --drain-wait for the remaining nodes under quorum. 414 r.Add(testSpec{ 415 Name: "quit-all-nodes", 416 Owner: OwnerKV, 417 Cluster: makeClusterSpec(5), 418 MinVersion: "v20.1.0", 419 Run: func(ctx context.Context, t *test, c *cluster) { 420 q := quitTest{t: t, c: c} 421 422 // Start the cluster. 423 q.init(ctx) 424 // Wait for up-replication so that the cluster expects 1 ranges 425 // everywhere for system ranges. 426 q.waitForUpReplication(ctx) 427 428 // Shut one nodes down gracefully with a very long wait (longer 429 // than the test timeout). This is guaranteed to work - we still 430 // have quorum at that point. 431 q.runWithTimeout(ctx, func(ctx context.Context) { _ = runQuit(ctx, q.t, q.c, 5, "--drain-wait=1h") }) 432 433 // Now shut down the remaining 4 nodes less gracefully, with a 434 // short wait. 435 436 // For the next two nodes, we may or may not observe that 437 // the graceful shutdown succeed. It may succeed if every 438 // range has enough quorum on the last 2 nodes (shut down later below). 439 // It may fail if some ranges have a quorum composed of n3, n4, n5. 440 // See: https://github.com/cockroachdb/cockroach/issues/48339 441 q.runWithTimeout(ctx, func(ctx context.Context) { _ = runQuit(ctx, q.t, q.c, 4, "--drain-wait=4s") }) 442 q.runWithTimeout(ctx, func(ctx context.Context) { _ = runQuit(ctx, q.t, q.c, 3, "--drain-wait=4s") }) 443 444 // For the lat two nodes, we are always under quorum. In this 445 // case we can expect `quit` to always report a hard shutdown 446 // was required. 447 q.runWithTimeout(ctx, func(ctx context.Context) { expectHardShutdown(ctx, q.t, runQuit(ctx, q.t, q.c, 2, "--drain-wait=4s")) }) 448 q.runWithTimeout(ctx, func(ctx context.Context) { expectHardShutdown(ctx, q.t, runQuit(ctx, q.t, q.c, 1, "--drain-wait=4s")) }) 449 450 // At the end, restart all nodes. We do this to check that 451 // the cluster can indeed restart, and also to please 452 // the dead node detection check at the end of each test. 453 q.c.Start(ctx, q.t, q.args) 454 }, 455 }) 456 } 457 458 // expectHardShutdown expects a "drain did not complete successfully" message. 459 func expectHardShutdown(ctx context.Context, t *test, cmdOut []byte) { 460 if !strings.Contains(string(cmdOut), "drain did not complete successfully") { 461 t.Fatalf("expected 'drain did not complete successfully' in quit output, got:\n%s", cmdOut) 462 } 463 }