github.phpd.cn/hashicorp/consul@v1.4.5/agent/consul/snapshot_endpoint_test.go (about)

     1  package consul
     2  
     3  import (
     4  	"bytes"
     5  	"os"
     6  	"strings"
     7  	"testing"
     8  	"time"
     9  
    10  	"github.com/hashicorp/consul/acl"
    11  	"github.com/hashicorp/consul/agent/structs"
    12  	"github.com/hashicorp/consul/api"
    13  	"github.com/hashicorp/consul/testrpc"
    14  	"github.com/hashicorp/consul/testutil/retry"
    15  	"github.com/hashicorp/net-rpc-msgpackrpc"
    16  )
    17  
    18  // verifySnapshot is a helper that does a snapshot and restore.
    19  func verifySnapshot(t *testing.T, s *Server, dc, token string) {
    20  	codec := rpcClient(t, s)
    21  	defer codec.Close()
    22  
    23  	// Set a key to a before value.
    24  	{
    25  		args := structs.KVSRequest{
    26  			Datacenter: dc,
    27  			Op:         api.KVSet,
    28  			DirEnt: structs.DirEntry{
    29  				Key:   "test",
    30  				Value: []byte("hello"),
    31  			},
    32  			WriteRequest: structs.WriteRequest{
    33  				Token: token,
    34  			},
    35  		}
    36  		var out bool
    37  		if err := msgpackrpc.CallWithCodec(codec, "KVS.Apply", &args, &out); err != nil {
    38  			t.Fatalf("err: %v", err)
    39  		}
    40  	}
    41  
    42  	// Take a snapshot.
    43  	args := structs.SnapshotRequest{
    44  		Datacenter: dc,
    45  		Token:      token,
    46  		Op:         structs.SnapshotSave,
    47  	}
    48  	var reply structs.SnapshotResponse
    49  	snap, err := SnapshotRPC(s.connPool, s.config.Datacenter, s.config.RPCAddr, false,
    50  		&args, bytes.NewReader([]byte("")), &reply)
    51  	if err != nil {
    52  		t.Fatalf("err: %v", err)
    53  	}
    54  	defer snap.Close()
    55  
    56  	// Read back the before value.
    57  	{
    58  		getR := structs.KeyRequest{
    59  			Datacenter: dc,
    60  			Key:        "test",
    61  			QueryOptions: structs.QueryOptions{
    62  				Token: token,
    63  			},
    64  		}
    65  		var dirent structs.IndexedDirEntries
    66  		if err := msgpackrpc.CallWithCodec(codec, "KVS.Get", &getR, &dirent); err != nil {
    67  			t.Fatalf("err: %v", err)
    68  		}
    69  		if len(dirent.Entries) != 1 {
    70  			t.Fatalf("Bad: %v", dirent)
    71  		}
    72  		d := dirent.Entries[0]
    73  		if string(d.Value) != "hello" {
    74  			t.Fatalf("bad: %v", d)
    75  		}
    76  	}
    77  
    78  	// Set a key to an after value.
    79  	{
    80  		args := structs.KVSRequest{
    81  			Datacenter: dc,
    82  			Op:         api.KVSet,
    83  			DirEnt: structs.DirEntry{
    84  				Key:   "test",
    85  				Value: []byte("goodbye"),
    86  			},
    87  			WriteRequest: structs.WriteRequest{
    88  				Token: token,
    89  			},
    90  		}
    91  		var out bool
    92  		if err := msgpackrpc.CallWithCodec(codec, "KVS.Apply", &args, &out); err != nil {
    93  			t.Fatalf("err: %v", err)
    94  		}
    95  	}
    96  
    97  	// Read back the before value. We do this with a retry and stale mode so
    98  	// we can query the server we are working with, which might not be the
    99  	// leader.
   100  	retry.Run(t, func(r *retry.R) {
   101  		getR := structs.KeyRequest{
   102  			Datacenter: dc,
   103  			Key:        "test",
   104  			QueryOptions: structs.QueryOptions{
   105  				Token:      token,
   106  				AllowStale: true,
   107  			},
   108  		}
   109  		var dirent structs.IndexedDirEntries
   110  		if err := msgpackrpc.CallWithCodec(codec, "KVS.Get", &getR, &dirent); err != nil {
   111  			r.Fatalf("err: %v", err)
   112  		}
   113  		if len(dirent.Entries) != 1 {
   114  			r.Fatalf("Bad: %v", dirent)
   115  		}
   116  		d := dirent.Entries[0]
   117  		if string(d.Value) != "goodbye" {
   118  			r.Fatalf("bad: %v", d)
   119  		}
   120  	})
   121  
   122  	// Restore the snapshot.
   123  	args.Op = structs.SnapshotRestore
   124  	restore, err := SnapshotRPC(s.connPool, s.config.Datacenter, s.config.RPCAddr, false,
   125  		&args, snap, &reply)
   126  	if err != nil {
   127  		t.Fatalf("err: %v", err)
   128  	}
   129  	defer restore.Close()
   130  
   131  	// Read back the before value post-snapshot. Similar rationale here; use
   132  	// stale to query the server we are working with.
   133  	retry.Run(t, func(r *retry.R) {
   134  		getR := structs.KeyRequest{
   135  			Datacenter: dc,
   136  			Key:        "test",
   137  			QueryOptions: structs.QueryOptions{
   138  				Token:      token,
   139  				AllowStale: true,
   140  			},
   141  		}
   142  		var dirent structs.IndexedDirEntries
   143  		if err := msgpackrpc.CallWithCodec(codec, "KVS.Get", &getR, &dirent); err != nil {
   144  			r.Fatalf("err: %v", err)
   145  		}
   146  		if len(dirent.Entries) != 1 {
   147  			r.Fatalf("Bad: %v", dirent)
   148  		}
   149  		d := dirent.Entries[0]
   150  		if string(d.Value) != "hello" {
   151  			r.Fatalf("bad: %v", d)
   152  		}
   153  	})
   154  }
   155  
   156  func TestSnapshot(t *testing.T) {
   157  	t.Parallel()
   158  	dir1, s1 := testServer(t)
   159  	defer os.RemoveAll(dir1)
   160  	defer s1.Shutdown()
   161  
   162  	testrpc.WaitForLeader(t, s1.RPC, "dc1")
   163  	verifySnapshot(t, s1, "dc1", "")
   164  }
   165  
   166  func TestSnapshot_LeaderState(t *testing.T) {
   167  	t.Parallel()
   168  	dir1, s1 := testServer(t)
   169  	defer os.RemoveAll(dir1)
   170  	defer s1.Shutdown()
   171  
   172  	testrpc.WaitForLeader(t, s1.RPC, "dc1")
   173  
   174  	codec := rpcClient(t, s1)
   175  	defer codec.Close()
   176  
   177  	// Make a before session.
   178  	var before string
   179  	{
   180  		args := structs.SessionRequest{
   181  			Datacenter: s1.config.Datacenter,
   182  			Op:         structs.SessionCreate,
   183  			Session: structs.Session{
   184  				Node: s1.config.NodeName,
   185  				TTL:  "60s",
   186  			},
   187  		}
   188  		if err := msgpackrpc.CallWithCodec(codec, "Session.Apply", &args, &before); err != nil {
   189  			t.Fatalf("err: %v", err)
   190  		}
   191  	}
   192  
   193  	// Take a snapshot.
   194  	args := structs.SnapshotRequest{
   195  		Datacenter: s1.config.Datacenter,
   196  		Op:         structs.SnapshotSave,
   197  	}
   198  	var reply structs.SnapshotResponse
   199  	snap, err := SnapshotRPC(s1.connPool, s1.config.Datacenter, s1.config.RPCAddr, false,
   200  		&args, bytes.NewReader([]byte("")), &reply)
   201  	if err != nil {
   202  		t.Fatalf("err: %v", err)
   203  	}
   204  	defer snap.Close()
   205  
   206  	// Make an after session.
   207  	var after string
   208  	{
   209  		args := structs.SessionRequest{
   210  			Datacenter: s1.config.Datacenter,
   211  			Op:         structs.SessionCreate,
   212  			Session: structs.Session{
   213  				Node: s1.config.NodeName,
   214  				TTL:  "60s",
   215  			},
   216  		}
   217  		if err := msgpackrpc.CallWithCodec(codec, "Session.Apply", &args, &after); err != nil {
   218  			t.Fatalf("err: %v", err)
   219  		}
   220  	}
   221  
   222  	// Make sure the leader has timers setup.
   223  	if s1.sessionTimers.Get(before) == nil {
   224  		t.Fatalf("missing session timer")
   225  	}
   226  	if s1.sessionTimers.Get(after) == nil {
   227  		t.Fatalf("missing session timer")
   228  	}
   229  
   230  	// Restore the snapshot.
   231  	args.Op = structs.SnapshotRestore
   232  	restore, err := SnapshotRPC(s1.connPool, s1.config.Datacenter, s1.config.RPCAddr, false,
   233  		&args, snap, &reply)
   234  	if err != nil {
   235  		t.Fatalf("err: %v", err)
   236  	}
   237  	defer restore.Close()
   238  
   239  	// Make sure the before time is still there, and that the after timer
   240  	// got reverted. This proves we fully cycled the leader state.
   241  	if s1.sessionTimers.Get(before) == nil {
   242  		t.Fatalf("missing session timer")
   243  	}
   244  	if s1.sessionTimers.Get(after) != nil {
   245  		t.Fatalf("unexpected session timer")
   246  	}
   247  }
   248  
   249  func TestSnapshot_ACLDeny(t *testing.T) {
   250  	t.Parallel()
   251  	dir1, s1 := testServerWithConfig(t, func(c *Config) {
   252  		c.ACLDatacenter = "dc1"
   253  		c.ACLsEnabled = true
   254  		c.ACLMasterToken = "root"
   255  		c.ACLDefaultPolicy = "deny"
   256  	})
   257  	defer os.RemoveAll(dir1)
   258  	defer s1.Shutdown()
   259  	codec := rpcClient(t, s1)
   260  	defer codec.Close()
   261  
   262  	testrpc.WaitForLeader(t, s1.RPC, "dc1")
   263  
   264  	// Take a snapshot.
   265  	func() {
   266  		args := structs.SnapshotRequest{
   267  			Datacenter: "dc1",
   268  			Op:         structs.SnapshotSave,
   269  		}
   270  		var reply structs.SnapshotResponse
   271  		_, err := SnapshotRPC(s1.connPool, s1.config.Datacenter, s1.config.RPCAddr, false,
   272  			&args, bytes.NewReader([]byte("")), &reply)
   273  		if !acl.IsErrPermissionDenied(err) {
   274  			t.Fatalf("err: %v", err)
   275  		}
   276  	}()
   277  
   278  	// Restore a snapshot.
   279  	func() {
   280  		args := structs.SnapshotRequest{
   281  			Datacenter: "dc1",
   282  			Op:         structs.SnapshotRestore,
   283  		}
   284  		var reply structs.SnapshotResponse
   285  		_, err := SnapshotRPC(s1.connPool, s1.config.Datacenter, s1.config.RPCAddr, false,
   286  			&args, bytes.NewReader([]byte("")), &reply)
   287  		if !acl.IsErrPermissionDenied(err) {
   288  			t.Fatalf("err: %v", err)
   289  		}
   290  	}()
   291  
   292  	// With the token in place everything should go through.
   293  	verifySnapshot(t, s1, "dc1", "root")
   294  }
   295  
   296  func TestSnapshot_Forward_Leader(t *testing.T) {
   297  	t.Parallel()
   298  	dir1, s1 := testServerWithConfig(t, func(c *Config) {
   299  		c.Bootstrap = true
   300  
   301  		// Since we are doing multiple restores to the same leader,
   302  		// the default short time for a reconcile can cause the
   303  		// reconcile to get aborted by our snapshot restore. By
   304  		// setting it much longer than the test, we avoid this case.
   305  		c.ReconcileInterval = 60 * time.Second
   306  	})
   307  	defer os.RemoveAll(dir1)
   308  	defer s1.Shutdown()
   309  
   310  	dir2, s2 := testServerWithConfig(t, func(c *Config) {
   311  		c.Bootstrap = false
   312  	})
   313  	defer os.RemoveAll(dir2)
   314  	defer s2.Shutdown()
   315  	testrpc.WaitForTestAgent(t, s1.RPC, "dc1")
   316  
   317  	// Try to join.
   318  	joinLAN(t, s2, s1)
   319  	testrpc.WaitForTestAgent(t, s2.RPC, "dc1")
   320  
   321  	// Run against the leader and the follower to ensure we forward. When
   322  	// we changed to Raft protocol version 3, since we only have two servers,
   323  	// the second one isn't a voter, so the snapshot API doesn't wait for
   324  	// that to replicate before returning success. We added some logic to
   325  	// verifySnapshot() to poll the server we are working with in stale mode
   326  	// in order to verify that the snapshot contents are there. Previously,
   327  	// with Raft protocol version 2, the snapshot API would wait until the
   328  	// follower got the information as well since it was required to meet
   329  	// the quorum (2/2 servers), so things were synchronized properly with
   330  	// no special logic.
   331  	verifySnapshot(t, s1, "dc1", "")
   332  	verifySnapshot(t, s2, "dc1", "")
   333  }
   334  
   335  func TestSnapshot_Forward_Datacenter(t *testing.T) {
   336  	t.Parallel()
   337  	dir1, s1 := testServerDC(t, "dc1")
   338  	defer os.RemoveAll(dir1)
   339  	defer s1.Shutdown()
   340  
   341  	dir2, s2 := testServerDC(t, "dc2")
   342  	defer os.RemoveAll(dir2)
   343  	defer s2.Shutdown()
   344  
   345  	testrpc.WaitForTestAgent(t, s1.RPC, "dc1")
   346  	testrpc.WaitForTestAgent(t, s2.RPC, "dc2")
   347  
   348  	// Try to WAN join.
   349  	joinWAN(t, s2, s1)
   350  	retry.Run(t, func(r *retry.R) {
   351  		if got, want := len(s1.WANMembers()), 2; got < want {
   352  			r.Fatalf("got %d WAN members want at least %d", got, want)
   353  		}
   354  	})
   355  
   356  	// Run a snapshot from each server locally and remotely to ensure we
   357  	// forward.
   358  	for _, s := range []*Server{s1, s2} {
   359  		verifySnapshot(t, s, "dc1", "")
   360  		verifySnapshot(t, s, "dc2", "")
   361  	}
   362  }
   363  
   364  func TestSnapshot_AllowStale(t *testing.T) {
   365  	t.Parallel()
   366  	dir1, s1 := testServerWithConfig(t, func(c *Config) {
   367  		c.Bootstrap = false
   368  	})
   369  	defer os.RemoveAll(dir1)
   370  	defer s1.Shutdown()
   371  
   372  	dir2, s2 := testServerWithConfig(t, func(c *Config) {
   373  		c.Bootstrap = false
   374  	})
   375  	defer os.RemoveAll(dir2)
   376  	defer s2.Shutdown()
   377  
   378  	// Run against the servers which aren't haven't been set up to establish
   379  	// a leader and make sure we get a no leader error.
   380  	for _, s := range []*Server{s1, s2} {
   381  		// Take a snapshot.
   382  		args := structs.SnapshotRequest{
   383  			Datacenter: s.config.Datacenter,
   384  			Op:         structs.SnapshotSave,
   385  		}
   386  		var reply structs.SnapshotResponse
   387  		_, err := SnapshotRPC(s.connPool, s.config.Datacenter, s.config.RPCAddr, false,
   388  			&args, bytes.NewReader([]byte("")), &reply)
   389  		if err == nil || !strings.Contains(err.Error(), structs.ErrNoLeader.Error()) {
   390  			t.Fatalf("err: %v", err)
   391  		}
   392  	}
   393  
   394  	// Run in stale mode and make sure we get an error from Raft (snapshot
   395  	// was attempted), and not a no leader error.
   396  	for _, s := range []*Server{s1, s2} {
   397  		// Take a snapshot.
   398  		args := structs.SnapshotRequest{
   399  			Datacenter: s.config.Datacenter,
   400  			AllowStale: true,
   401  			Op:         structs.SnapshotSave,
   402  		}
   403  		var reply structs.SnapshotResponse
   404  		_, err := SnapshotRPC(s.connPool, s.config.Datacenter, s.config.RPCAddr, false,
   405  			&args, bytes.NewReader([]byte("")), &reply)
   406  		if err == nil || !strings.Contains(err.Error(), "Raft error when taking snapshot") {
   407  			t.Fatalf("err: %v", err)
   408  		}
   409  	}
   410  }