github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/cmd/roachtest/inconsistency.go (about)

     1  // Copyright 2018 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package main
    12  
    13  import (
    14  	"context"
    15  	"fmt"
    16  	"time"
    17  
    18  	_ "github.com/lib/pq"
    19  )
    20  
    21  func registerInconsistency(r *testRegistry) {
    22  	r.Add(testSpec{
    23  		Name:       fmt.Sprintf("inconsistency"),
    24  		Owner:      OwnerKV,
    25  		MinVersion: "v19.2.2", // https://github.com/cockroachdb/cockroach/pull/42149 is new in 19.2.2
    26  		Cluster:    makeClusterSpec(3),
    27  		Run:        runInconsistency,
    28  	})
    29  }
    30  
    31  func runInconsistency(ctx context.Context, t *test, c *cluster) {
    32  	// With encryption on, our attempt below to manually introduce an inconsistency
    33  	// will fail.
    34  	c.encryptDefault = false
    35  
    36  	nodes := c.Range(1, 3)
    37  	c.Put(ctx, cockroach, "./cockroach", nodes)
    38  	c.Start(ctx, t, nodes)
    39  
    40  	{
    41  		db := c.Conn(ctx, 1)
    42  		_, err := db.ExecContext(ctx, `SET CLUSTER SETTING server.consistency_check.interval = '10ms'`)
    43  		if err != nil {
    44  			t.Fatal(err)
    45  		}
    46  		waitForFullReplication(t, db)
    47  		_, db = db.Close(), nil
    48  	}
    49  
    50  	c.Stop(ctx, nodes)
    51  
    52  	// KV pair created via:
    53  	//
    54  	// t.Errorf("0x%x", EncodeKey(MVCCKey{
    55  	// 	Key: keys.TransactionKey(keys.LocalMax, uuid.Nil),
    56  	// }))
    57  	// for i := 0; i < 3; i++ {
    58  	// 	var m enginepb.MVCCMetadata
    59  	// 	var txn enginepb.TxnMeta
    60  	// 	txn.Key = []byte(fmt.Sprintf("fake transaction %d", i))
    61  	// 	var err error
    62  	// 	m.RawBytes, err = protoutil.Marshal(&txn)
    63  	// 	require.NoError(t, err)
    64  	// 	data, err := protoutil.Marshal(&m)
    65  	// 	require.NoError(t, err)
    66  	// 	t.Error(fmt.Sprintf("0x%x", data))
    67  	// }
    68  	//
    69  	// Output:
    70  	// 0x016b1202000174786e2d0000000000000000000000000000000000
    71  	// 0x120408001000180020002800322a0a10000000000000000000000000000000001a1266616b65207472616e73616374696f6e20302a004a00
    72  	// 0x120408001000180020002800322a0a10000000000000000000000000000000001a1266616b65207472616e73616374696f6e20312a004a00
    73  	// 0x120408001000180020002800322a0a10000000000000000000000000000000001a1266616b65207472616e73616374696f6e20322a004a00
    74  
    75  	c.Run(ctx, c.Node(1), "./cockroach debug rocksdb put --hex --db={store-dir} "+
    76  		"0x016b1202000174786e2d0000000000000000000000000000000000 "+
    77  		"0x12040800100018002000280032280a10000000000000000000000000000000001a1066616b65207472616e73616374696f6e2a004a00")
    78  
    79  	m := newMonitor(ctx, c)
    80  	c.Start(ctx, t, nodes)
    81  	m.Go(func(ctx context.Context) error {
    82  		select {
    83  		case <-time.After(5 * time.Minute):
    84  		case <-ctx.Done():
    85  		}
    86  		return nil
    87  	})
    88  	if err := m.WaitE(); err == nil {
    89  		t.Fatal("expected a node to crash")
    90  	}
    91  
    92  	time.Sleep(20 * time.Second) // wait for liveness to time out for dead nodes
    93  
    94  	db := c.Conn(ctx, 2)
    95  	rows, err := db.Query(`SELECT node_id FROM crdb_internal.gossip_nodes WHERE is_live = false;`)
    96  	if err != nil {
    97  		t.Fatal(err)
    98  	}
    99  	var ids []int
   100  	for rows.Next() {
   101  		var id int
   102  		if err := rows.Scan(&id); err != nil {
   103  			t.Fatal(err)
   104  		}
   105  		ids = append(ids, id)
   106  	}
   107  	if err := rows.Err(); err != nil {
   108  		t.Fatal(err)
   109  	}
   110  	if len(ids) != 1 {
   111  		t.Fatalf("expected one dead NodeID, got %v", ids)
   112  	}
   113  	const expr = "this.node.is.terminating.because.a.replica.inconsistency.was.detected"
   114  	c.Run(ctx, c.Node(1), "grep "+
   115  		expr+" "+"{log-dir}/cockroach.log")
   116  
   117  	if err := c.StartE(ctx, c.Node(1)); err == nil {
   118  		// NB: we can't easily verify the error because there's a lot of output
   119  		// which isn't fully included in the error returned from StartE.
   120  		t.Fatalf("node restart should have failed")
   121  	}
   122  
   123  	// roachtest checks that no nodes are down when the test finishes, but in this
   124  	// case we have a down node that we can't restart. Remove the data dir, which
   125  	// tells roachtest to ignore this node.
   126  	c.Wipe(ctx, c.Node(1))
   127  }