github.com/clly/consul@v1.4.5/agent/consul/snapshot_endpoint_test.go (about) 1 package consul 2 3 import ( 4 "bytes" 5 "os" 6 "strings" 7 "testing" 8 "time" 9 10 "github.com/hashicorp/consul/acl" 11 "github.com/hashicorp/consul/agent/structs" 12 "github.com/hashicorp/consul/api" 13 "github.com/hashicorp/consul/testrpc" 14 "github.com/hashicorp/consul/testutil/retry" 15 "github.com/hashicorp/net-rpc-msgpackrpc" 16 ) 17 18 // verifySnapshot is a helper that does a snapshot and restore. 19 func verifySnapshot(t *testing.T, s *Server, dc, token string) { 20 codec := rpcClient(t, s) 21 defer codec.Close() 22 23 // Set a key to a before value. 24 { 25 args := structs.KVSRequest{ 26 Datacenter: dc, 27 Op: api.KVSet, 28 DirEnt: structs.DirEntry{ 29 Key: "test", 30 Value: []byte("hello"), 31 }, 32 WriteRequest: structs.WriteRequest{ 33 Token: token, 34 }, 35 } 36 var out bool 37 if err := msgpackrpc.CallWithCodec(codec, "KVS.Apply", &args, &out); err != nil { 38 t.Fatalf("err: %v", err) 39 } 40 } 41 42 // Take a snapshot. 43 args := structs.SnapshotRequest{ 44 Datacenter: dc, 45 Token: token, 46 Op: structs.SnapshotSave, 47 } 48 var reply structs.SnapshotResponse 49 snap, err := SnapshotRPC(s.connPool, s.config.Datacenter, s.config.RPCAddr, false, 50 &args, bytes.NewReader([]byte("")), &reply) 51 if err != nil { 52 t.Fatalf("err: %v", err) 53 } 54 defer snap.Close() 55 56 // Read back the before value. 57 { 58 getR := structs.KeyRequest{ 59 Datacenter: dc, 60 Key: "test", 61 QueryOptions: structs.QueryOptions{ 62 Token: token, 63 }, 64 } 65 var dirent structs.IndexedDirEntries 66 if err := msgpackrpc.CallWithCodec(codec, "KVS.Get", &getR, &dirent); err != nil { 67 t.Fatalf("err: %v", err) 68 } 69 if len(dirent.Entries) != 1 { 70 t.Fatalf("Bad: %v", dirent) 71 } 72 d := dirent.Entries[0] 73 if string(d.Value) != "hello" { 74 t.Fatalf("bad: %v", d) 75 } 76 } 77 78 // Set a key to an after value. 79 { 80 args := structs.KVSRequest{ 81 Datacenter: dc, 82 Op: api.KVSet, 83 DirEnt: structs.DirEntry{ 84 Key: "test", 85 Value: []byte("goodbye"), 86 }, 87 WriteRequest: structs.WriteRequest{ 88 Token: token, 89 }, 90 } 91 var out bool 92 if err := msgpackrpc.CallWithCodec(codec, "KVS.Apply", &args, &out); err != nil { 93 t.Fatalf("err: %v", err) 94 } 95 } 96 97 // Read back the before value. We do this with a retry and stale mode so 98 // we can query the server we are working with, which might not be the 99 // leader. 100 retry.Run(t, func(r *retry.R) { 101 getR := structs.KeyRequest{ 102 Datacenter: dc, 103 Key: "test", 104 QueryOptions: structs.QueryOptions{ 105 Token: token, 106 AllowStale: true, 107 }, 108 } 109 var dirent structs.IndexedDirEntries 110 if err := msgpackrpc.CallWithCodec(codec, "KVS.Get", &getR, &dirent); err != nil { 111 r.Fatalf("err: %v", err) 112 } 113 if len(dirent.Entries) != 1 { 114 r.Fatalf("Bad: %v", dirent) 115 } 116 d := dirent.Entries[0] 117 if string(d.Value) != "goodbye" { 118 r.Fatalf("bad: %v", d) 119 } 120 }) 121 122 // Restore the snapshot. 123 args.Op = structs.SnapshotRestore 124 restore, err := SnapshotRPC(s.connPool, s.config.Datacenter, s.config.RPCAddr, false, 125 &args, snap, &reply) 126 if err != nil { 127 t.Fatalf("err: %v", err) 128 } 129 defer restore.Close() 130 131 // Read back the before value post-snapshot. Similar rationale here; use 132 // stale to query the server we are working with. 133 retry.Run(t, func(r *retry.R) { 134 getR := structs.KeyRequest{ 135 Datacenter: dc, 136 Key: "test", 137 QueryOptions: structs.QueryOptions{ 138 Token: token, 139 AllowStale: true, 140 }, 141 } 142 var dirent structs.IndexedDirEntries 143 if err := msgpackrpc.CallWithCodec(codec, "KVS.Get", &getR, &dirent); err != nil { 144 r.Fatalf("err: %v", err) 145 } 146 if len(dirent.Entries) != 1 { 147 r.Fatalf("Bad: %v", dirent) 148 } 149 d := dirent.Entries[0] 150 if string(d.Value) != "hello" { 151 r.Fatalf("bad: %v", d) 152 } 153 }) 154 } 155 156 func TestSnapshot(t *testing.T) { 157 t.Parallel() 158 dir1, s1 := testServer(t) 159 defer os.RemoveAll(dir1) 160 defer s1.Shutdown() 161 162 testrpc.WaitForLeader(t, s1.RPC, "dc1") 163 verifySnapshot(t, s1, "dc1", "") 164 } 165 166 func TestSnapshot_LeaderState(t *testing.T) { 167 t.Parallel() 168 dir1, s1 := testServer(t) 169 defer os.RemoveAll(dir1) 170 defer s1.Shutdown() 171 172 testrpc.WaitForLeader(t, s1.RPC, "dc1") 173 174 codec := rpcClient(t, s1) 175 defer codec.Close() 176 177 // Make a before session. 178 var before string 179 { 180 args := structs.SessionRequest{ 181 Datacenter: s1.config.Datacenter, 182 Op: structs.SessionCreate, 183 Session: structs.Session{ 184 Node: s1.config.NodeName, 185 TTL: "60s", 186 }, 187 } 188 if err := msgpackrpc.CallWithCodec(codec, "Session.Apply", &args, &before); err != nil { 189 t.Fatalf("err: %v", err) 190 } 191 } 192 193 // Take a snapshot. 194 args := structs.SnapshotRequest{ 195 Datacenter: s1.config.Datacenter, 196 Op: structs.SnapshotSave, 197 } 198 var reply structs.SnapshotResponse 199 snap, err := SnapshotRPC(s1.connPool, s1.config.Datacenter, s1.config.RPCAddr, false, 200 &args, bytes.NewReader([]byte("")), &reply) 201 if err != nil { 202 t.Fatalf("err: %v", err) 203 } 204 defer snap.Close() 205 206 // Make an after session. 207 var after string 208 { 209 args := structs.SessionRequest{ 210 Datacenter: s1.config.Datacenter, 211 Op: structs.SessionCreate, 212 Session: structs.Session{ 213 Node: s1.config.NodeName, 214 TTL: "60s", 215 }, 216 } 217 if err := msgpackrpc.CallWithCodec(codec, "Session.Apply", &args, &after); err != nil { 218 t.Fatalf("err: %v", err) 219 } 220 } 221 222 // Make sure the leader has timers setup. 223 if s1.sessionTimers.Get(before) == nil { 224 t.Fatalf("missing session timer") 225 } 226 if s1.sessionTimers.Get(after) == nil { 227 t.Fatalf("missing session timer") 228 } 229 230 // Restore the snapshot. 231 args.Op = structs.SnapshotRestore 232 restore, err := SnapshotRPC(s1.connPool, s1.config.Datacenter, s1.config.RPCAddr, false, 233 &args, snap, &reply) 234 if err != nil { 235 t.Fatalf("err: %v", err) 236 } 237 defer restore.Close() 238 239 // Make sure the before time is still there, and that the after timer 240 // got reverted. This proves we fully cycled the leader state. 241 if s1.sessionTimers.Get(before) == nil { 242 t.Fatalf("missing session timer") 243 } 244 if s1.sessionTimers.Get(after) != nil { 245 t.Fatalf("unexpected session timer") 246 } 247 } 248 249 func TestSnapshot_ACLDeny(t *testing.T) { 250 t.Parallel() 251 dir1, s1 := testServerWithConfig(t, func(c *Config) { 252 c.ACLDatacenter = "dc1" 253 c.ACLsEnabled = true 254 c.ACLMasterToken = "root" 255 c.ACLDefaultPolicy = "deny" 256 }) 257 defer os.RemoveAll(dir1) 258 defer s1.Shutdown() 259 codec := rpcClient(t, s1) 260 defer codec.Close() 261 262 testrpc.WaitForLeader(t, s1.RPC, "dc1") 263 264 // Take a snapshot. 265 func() { 266 args := structs.SnapshotRequest{ 267 Datacenter: "dc1", 268 Op: structs.SnapshotSave, 269 } 270 var reply structs.SnapshotResponse 271 _, err := SnapshotRPC(s1.connPool, s1.config.Datacenter, s1.config.RPCAddr, false, 272 &args, bytes.NewReader([]byte("")), &reply) 273 if !acl.IsErrPermissionDenied(err) { 274 t.Fatalf("err: %v", err) 275 } 276 }() 277 278 // Restore a snapshot. 279 func() { 280 args := structs.SnapshotRequest{ 281 Datacenter: "dc1", 282 Op: structs.SnapshotRestore, 283 } 284 var reply structs.SnapshotResponse 285 _, err := SnapshotRPC(s1.connPool, s1.config.Datacenter, s1.config.RPCAddr, false, 286 &args, bytes.NewReader([]byte("")), &reply) 287 if !acl.IsErrPermissionDenied(err) { 288 t.Fatalf("err: %v", err) 289 } 290 }() 291 292 // With the token in place everything should go through. 293 verifySnapshot(t, s1, "dc1", "root") 294 } 295 296 func TestSnapshot_Forward_Leader(t *testing.T) { 297 t.Parallel() 298 dir1, s1 := testServerWithConfig(t, func(c *Config) { 299 c.Bootstrap = true 300 301 // Since we are doing multiple restores to the same leader, 302 // the default short time for a reconcile can cause the 303 // reconcile to get aborted by our snapshot restore. By 304 // setting it much longer than the test, we avoid this case. 305 c.ReconcileInterval = 60 * time.Second 306 }) 307 defer os.RemoveAll(dir1) 308 defer s1.Shutdown() 309 310 dir2, s2 := testServerWithConfig(t, func(c *Config) { 311 c.Bootstrap = false 312 }) 313 defer os.RemoveAll(dir2) 314 defer s2.Shutdown() 315 testrpc.WaitForTestAgent(t, s1.RPC, "dc1") 316 317 // Try to join. 318 joinLAN(t, s2, s1) 319 testrpc.WaitForTestAgent(t, s2.RPC, "dc1") 320 321 // Run against the leader and the follower to ensure we forward. When 322 // we changed to Raft protocol version 3, since we only have two servers, 323 // the second one isn't a voter, so the snapshot API doesn't wait for 324 // that to replicate before returning success. We added some logic to 325 // verifySnapshot() to poll the server we are working with in stale mode 326 // in order to verify that the snapshot contents are there. Previously, 327 // with Raft protocol version 2, the snapshot API would wait until the 328 // follower got the information as well since it was required to meet 329 // the quorum (2/2 servers), so things were synchronized properly with 330 // no special logic. 331 verifySnapshot(t, s1, "dc1", "") 332 verifySnapshot(t, s2, "dc1", "") 333 } 334 335 func TestSnapshot_Forward_Datacenter(t *testing.T) { 336 t.Parallel() 337 dir1, s1 := testServerDC(t, "dc1") 338 defer os.RemoveAll(dir1) 339 defer s1.Shutdown() 340 341 dir2, s2 := testServerDC(t, "dc2") 342 defer os.RemoveAll(dir2) 343 defer s2.Shutdown() 344 345 testrpc.WaitForTestAgent(t, s1.RPC, "dc1") 346 testrpc.WaitForTestAgent(t, s2.RPC, "dc2") 347 348 // Try to WAN join. 349 joinWAN(t, s2, s1) 350 retry.Run(t, func(r *retry.R) { 351 if got, want := len(s1.WANMembers()), 2; got < want { 352 r.Fatalf("got %d WAN members want at least %d", got, want) 353 } 354 }) 355 356 // Run a snapshot from each server locally and remotely to ensure we 357 // forward. 358 for _, s := range []*Server{s1, s2} { 359 verifySnapshot(t, s, "dc1", "") 360 verifySnapshot(t, s, "dc2", "") 361 } 362 } 363 364 func TestSnapshot_AllowStale(t *testing.T) { 365 t.Parallel() 366 dir1, s1 := testServerWithConfig(t, func(c *Config) { 367 c.Bootstrap = false 368 }) 369 defer os.RemoveAll(dir1) 370 defer s1.Shutdown() 371 372 dir2, s2 := testServerWithConfig(t, func(c *Config) { 373 c.Bootstrap = false 374 }) 375 defer os.RemoveAll(dir2) 376 defer s2.Shutdown() 377 378 // Run against the servers which aren't haven't been set up to establish 379 // a leader and make sure we get a no leader error. 380 for _, s := range []*Server{s1, s2} { 381 // Take a snapshot. 382 args := structs.SnapshotRequest{ 383 Datacenter: s.config.Datacenter, 384 Op: structs.SnapshotSave, 385 } 386 var reply structs.SnapshotResponse 387 _, err := SnapshotRPC(s.connPool, s.config.Datacenter, s.config.RPCAddr, false, 388 &args, bytes.NewReader([]byte("")), &reply) 389 if err == nil || !strings.Contains(err.Error(), structs.ErrNoLeader.Error()) { 390 t.Fatalf("err: %v", err) 391 } 392 } 393 394 // Run in stale mode and make sure we get an error from Raft (snapshot 395 // was attempted), and not a no leader error. 396 for _, s := range []*Server{s1, s2} { 397 // Take a snapshot. 398 args := structs.SnapshotRequest{ 399 Datacenter: s.config.Datacenter, 400 AllowStale: true, 401 Op: structs.SnapshotSave, 402 } 403 var reply structs.SnapshotResponse 404 _, err := SnapshotRPC(s.connPool, s.config.Datacenter, s.config.RPCAddr, false, 405 &args, bytes.NewReader([]byte("")), &reply) 406 if err == nil || !strings.Contains(err.Error(), "Raft error when taking snapshot") { 407 t.Fatalf("err: %v", err) 408 } 409 } 410 }