github.com/Prakhar-Agarwal-byte/moby@v0.0.0-20231027092010-a14e3e8ab87e/libnetwork/networkdb/networkdb_test.go (about) 1 package networkdb 2 3 import ( 4 "context" 5 "fmt" 6 "net" 7 "os" 8 "strconv" 9 "sync/atomic" 10 "testing" 11 "time" 12 13 "github.com/containerd/log" 14 "github.com/Prakhar-Agarwal-byte/moby/pkg/stringid" 15 "github.com/docker/go-events" 16 "github.com/hashicorp/memberlist" 17 "gotest.tools/v3/assert" 18 is "gotest.tools/v3/assert/cmp" 19 "gotest.tools/v3/poll" 20 ) 21 22 var dbPort int32 = 10000 23 24 func TestMain(m *testing.M) { 25 os.WriteFile("/proc/sys/net/ipv6/conf/lo/disable_ipv6", []byte{'0', '\n'}, 0o644) 26 log.SetLevel("error") 27 os.Exit(m.Run()) 28 } 29 30 func launchNode(t *testing.T, conf Config) *NetworkDB { 31 t.Helper() 32 db, err := New(&conf) 33 assert.NilError(t, err) 34 return db 35 } 36 37 func createNetworkDBInstances(t *testing.T, num int, namePrefix string, conf *Config) []*NetworkDB { 38 t.Helper() 39 var dbs []*NetworkDB 40 for i := 0; i < num; i++ { 41 localConfig := *conf 42 localConfig.Hostname = fmt.Sprintf("%s%d", namePrefix, i+1) 43 localConfig.NodeID = stringid.TruncateID(stringid.GenerateRandomID()) 44 localConfig.BindPort = int(atomic.AddInt32(&dbPort, 1)) 45 db := launchNode(t, localConfig) 46 if i != 0 { 47 assert.Check(t, db.Join([]string{fmt.Sprintf("localhost:%d", db.config.BindPort-1)})) 48 } 49 50 dbs = append(dbs, db) 51 } 52 53 // Wait till the cluster creation is successful 54 check := func(t poll.LogT) poll.Result { 55 // Check that the cluster is properly created 56 for i := 0; i < num; i++ { 57 if num != len(dbs[i].ClusterPeers()) { 58 return poll.Continue("%s:Waiting for cluster peers to be established", dbs[i].config.Hostname) 59 } 60 } 61 return poll.Success() 62 } 63 poll.WaitOn(t, check, poll.WithDelay(2*time.Second), poll.WithTimeout(20*time.Second)) 64 65 return dbs 66 } 67 68 func closeNetworkDBInstances(t *testing.T, dbs []*NetworkDB) { 69 t.Helper() 70 log.G(context.TODO()).Print("Closing DB instances...") 71 for _, db := range dbs { 72 db.Close() 73 } 74 } 75 76 func (db *NetworkDB) verifyNodeExistence(t *testing.T, node string, present bool) { 77 t.Helper() 78 for i := 0; i < 80; i++ { 79 db.RLock() 80 _, ok := db.nodes[node] 81 db.RUnlock() 82 if present && ok { 83 return 84 } 85 86 if !present && !ok { 87 return 88 } 89 90 time.Sleep(50 * time.Millisecond) 91 } 92 93 t.Errorf("%v(%v): Node existence verification for node %s failed", db.config.Hostname, db.config.NodeID, node) 94 } 95 96 func (db *NetworkDB) verifyNetworkExistence(t *testing.T, node string, id string, present bool) { 97 t.Helper() 98 99 const sleepInterval = 50 * time.Millisecond 100 var maxRetries int64 101 if dl, ok := t.Deadline(); ok { 102 maxRetries = int64(time.Until(dl) / sleepInterval) 103 } else { 104 maxRetries = 80 105 } 106 for i := int64(0); i < maxRetries; i++ { 107 db.RLock() 108 nn, nnok := db.networks[node] 109 if nnok { 110 n, ok := nn[id] 111 var leaving bool 112 if ok { 113 leaving = n.leaving 114 } 115 db.RUnlock() 116 if present && ok { 117 return 118 } 119 120 if !present && 121 ((ok && leaving) || 122 !ok) { 123 return 124 } 125 } else { 126 db.RUnlock() 127 } 128 129 time.Sleep(sleepInterval) 130 } 131 132 t.Error("Network existence verification failed") 133 } 134 135 func (db *NetworkDB) verifyEntryExistence(t *testing.T, tname, nid, key, value string, present bool) { 136 t.Helper() 137 n := 80 138 for i := 0; i < n; i++ { 139 v, err := db.GetEntry(tname, nid, key) 140 if present && err == nil && string(v) == value { 141 return 142 } 143 if err != nil && !present { 144 return 145 } 146 147 time.Sleep(50 * time.Millisecond) 148 } 149 150 t.Errorf("Entry existence verification test failed for %v(%v)", db.config.Hostname, db.config.NodeID) 151 } 152 153 func testWatch(t *testing.T, ch chan events.Event, ev interface{}, tname, nid, key, value string) { 154 t.Helper() 155 select { 156 case rcvdEv := <-ch: 157 assert.Check(t, is.Equal(fmt.Sprintf("%T", rcvdEv), fmt.Sprintf("%T", ev))) 158 switch typ := rcvdEv.(type) { 159 case CreateEvent: 160 assert.Check(t, is.Equal(tname, typ.Table)) 161 assert.Check(t, is.Equal(nid, typ.NetworkID)) 162 assert.Check(t, is.Equal(key, typ.Key)) 163 assert.Check(t, is.Equal(value, string(typ.Value))) 164 case UpdateEvent: 165 assert.Check(t, is.Equal(tname, typ.Table)) 166 assert.Check(t, is.Equal(nid, typ.NetworkID)) 167 assert.Check(t, is.Equal(key, typ.Key)) 168 assert.Check(t, is.Equal(value, string(typ.Value))) 169 case DeleteEvent: 170 assert.Check(t, is.Equal(tname, typ.Table)) 171 assert.Check(t, is.Equal(nid, typ.NetworkID)) 172 assert.Check(t, is.Equal(key, typ.Key)) 173 } 174 case <-time.After(time.Second): 175 t.Fail() 176 return 177 } 178 } 179 180 func TestNetworkDBSimple(t *testing.T) { 181 dbs := createNetworkDBInstances(t, 2, "node", DefaultConfig()) 182 closeNetworkDBInstances(t, dbs) 183 } 184 185 func TestNetworkDBJoinLeaveNetwork(t *testing.T) { 186 dbs := createNetworkDBInstances(t, 2, "node", DefaultConfig()) 187 188 err := dbs[0].JoinNetwork("network1") 189 assert.NilError(t, err) 190 191 dbs[1].verifyNetworkExistence(t, dbs[0].config.NodeID, "network1", true) 192 193 err = dbs[0].LeaveNetwork("network1") 194 assert.NilError(t, err) 195 196 dbs[1].verifyNetworkExistence(t, dbs[0].config.NodeID, "network1", false) 197 closeNetworkDBInstances(t, dbs) 198 } 199 200 func TestNetworkDBJoinLeaveNetworks(t *testing.T) { 201 dbs := createNetworkDBInstances(t, 2, "node", DefaultConfig()) 202 203 n := 10 204 for i := 1; i <= n; i++ { 205 err := dbs[0].JoinNetwork(fmt.Sprintf("network0%d", i)) 206 assert.NilError(t, err) 207 } 208 209 for i := 1; i <= n; i++ { 210 err := dbs[1].JoinNetwork(fmt.Sprintf("network1%d", i)) 211 assert.NilError(t, err) 212 } 213 214 for i := 1; i <= n; i++ { 215 dbs[1].verifyNetworkExistence(t, dbs[0].config.NodeID, fmt.Sprintf("network0%d", i), true) 216 } 217 218 for i := 1; i <= n; i++ { 219 dbs[0].verifyNetworkExistence(t, dbs[1].config.NodeID, fmt.Sprintf("network1%d", i), true) 220 } 221 222 for i := 1; i <= n; i++ { 223 err := dbs[0].LeaveNetwork(fmt.Sprintf("network0%d", i)) 224 assert.NilError(t, err) 225 } 226 227 for i := 1; i <= n; i++ { 228 err := dbs[1].LeaveNetwork(fmt.Sprintf("network1%d", i)) 229 assert.NilError(t, err) 230 } 231 232 for i := 1; i <= n; i++ { 233 dbs[1].verifyNetworkExistence(t, dbs[0].config.NodeID, fmt.Sprintf("network0%d", i), false) 234 } 235 236 for i := 1; i <= n; i++ { 237 dbs[0].verifyNetworkExistence(t, dbs[1].config.NodeID, fmt.Sprintf("network1%d", i), false) 238 } 239 240 closeNetworkDBInstances(t, dbs) 241 } 242 243 func TestNetworkDBCRUDTableEntry(t *testing.T) { 244 dbs := createNetworkDBInstances(t, 3, "node", DefaultConfig()) 245 246 err := dbs[0].JoinNetwork("network1") 247 assert.NilError(t, err) 248 249 dbs[1].verifyNetworkExistence(t, dbs[0].config.NodeID, "network1", true) 250 251 err = dbs[1].JoinNetwork("network1") 252 assert.NilError(t, err) 253 254 err = dbs[0].CreateEntry("test_table", "network1", "test_key", []byte("test_value")) 255 assert.NilError(t, err) 256 257 dbs[1].verifyEntryExistence(t, "test_table", "network1", "test_key", "test_value", true) 258 dbs[2].verifyEntryExistence(t, "test_table", "network1", "test_key", "test_value", false) 259 260 err = dbs[0].UpdateEntry("test_table", "network1", "test_key", []byte("test_updated_value")) 261 assert.NilError(t, err) 262 263 dbs[1].verifyEntryExistence(t, "test_table", "network1", "test_key", "test_updated_value", true) 264 265 err = dbs[0].DeleteEntry("test_table", "network1", "test_key") 266 assert.NilError(t, err) 267 268 dbs[1].verifyEntryExistence(t, "test_table", "network1", "test_key", "", false) 269 270 closeNetworkDBInstances(t, dbs) 271 } 272 273 func TestNetworkDBCRUDTableEntries(t *testing.T) { 274 dbs := createNetworkDBInstances(t, 2, "node", DefaultConfig()) 275 276 err := dbs[0].JoinNetwork("network1") 277 assert.NilError(t, err) 278 279 dbs[1].verifyNetworkExistence(t, dbs[0].config.NodeID, "network1", true) 280 281 err = dbs[1].JoinNetwork("network1") 282 assert.NilError(t, err) 283 284 dbs[0].verifyNetworkExistence(t, dbs[1].config.NodeID, "network1", true) 285 286 n := 10 287 for i := 1; i <= n; i++ { 288 err = dbs[0].CreateEntry("test_table", "network1", 289 fmt.Sprintf("test_key0%d", i), 290 []byte(fmt.Sprintf("test_value0%d", i))) 291 assert.NilError(t, err) 292 } 293 294 for i := 1; i <= n; i++ { 295 err = dbs[1].CreateEntry("test_table", "network1", 296 fmt.Sprintf("test_key1%d", i), 297 []byte(fmt.Sprintf("test_value1%d", i))) 298 assert.NilError(t, err) 299 } 300 301 for i := 1; i <= n; i++ { 302 dbs[0].verifyEntryExistence(t, "test_table", "network1", 303 fmt.Sprintf("test_key1%d", i), 304 fmt.Sprintf("test_value1%d", i), true) 305 assert.NilError(t, err) 306 } 307 308 for i := 1; i <= n; i++ { 309 dbs[1].verifyEntryExistence(t, "test_table", "network1", 310 fmt.Sprintf("test_key0%d", i), 311 fmt.Sprintf("test_value0%d", i), true) 312 assert.NilError(t, err) 313 } 314 315 // Verify deletes 316 for i := 1; i <= n; i++ { 317 err = dbs[0].DeleteEntry("test_table", "network1", 318 fmt.Sprintf("test_key0%d", i)) 319 assert.NilError(t, err) 320 } 321 322 for i := 1; i <= n; i++ { 323 err = dbs[1].DeleteEntry("test_table", "network1", 324 fmt.Sprintf("test_key1%d", i)) 325 assert.NilError(t, err) 326 } 327 328 for i := 1; i <= n; i++ { 329 dbs[0].verifyEntryExistence(t, "test_table", "network1", 330 fmt.Sprintf("test_key1%d", i), "", false) 331 assert.NilError(t, err) 332 } 333 334 for i := 1; i <= n; i++ { 335 dbs[1].verifyEntryExistence(t, "test_table", "network1", 336 fmt.Sprintf("test_key0%d", i), "", false) 337 assert.NilError(t, err) 338 } 339 340 closeNetworkDBInstances(t, dbs) 341 } 342 343 func TestNetworkDBNodeLeave(t *testing.T) { 344 dbs := createNetworkDBInstances(t, 2, "node", DefaultConfig()) 345 346 err := dbs[0].JoinNetwork("network1") 347 assert.NilError(t, err) 348 349 err = dbs[1].JoinNetwork("network1") 350 assert.NilError(t, err) 351 352 err = dbs[0].CreateEntry("test_table", "network1", "test_key", []byte("test_value")) 353 assert.NilError(t, err) 354 355 dbs[1].verifyEntryExistence(t, "test_table", "network1", "test_key", "test_value", true) 356 357 dbs[0].Close() 358 dbs[1].verifyEntryExistence(t, "test_table", "network1", "test_key", "test_value", false) 359 dbs[1].Close() 360 } 361 362 func TestNetworkDBWatch(t *testing.T) { 363 dbs := createNetworkDBInstances(t, 2, "node", DefaultConfig()) 364 err := dbs[0].JoinNetwork("network1") 365 assert.NilError(t, err) 366 367 err = dbs[1].JoinNetwork("network1") 368 assert.NilError(t, err) 369 370 ch, cancel := dbs[1].Watch("", "") 371 372 err = dbs[0].CreateEntry("test_table", "network1", "test_key", []byte("test_value")) 373 assert.NilError(t, err) 374 375 testWatch(t, ch.C, CreateEvent{}, "test_table", "network1", "test_key", "test_value") 376 377 err = dbs[0].UpdateEntry("test_table", "network1", "test_key", []byte("test_updated_value")) 378 assert.NilError(t, err) 379 380 testWatch(t, ch.C, UpdateEvent{}, "test_table", "network1", "test_key", "test_updated_value") 381 382 err = dbs[0].DeleteEntry("test_table", "network1", "test_key") 383 assert.NilError(t, err) 384 385 testWatch(t, ch.C, DeleteEvent{}, "test_table", "network1", "test_key", "") 386 387 cancel() 388 closeNetworkDBInstances(t, dbs) 389 } 390 391 func TestNetworkDBBulkSync(t *testing.T) { 392 dbs := createNetworkDBInstances(t, 2, "node", DefaultConfig()) 393 394 err := dbs[0].JoinNetwork("network1") 395 assert.NilError(t, err) 396 397 dbs[1].verifyNetworkExistence(t, dbs[0].config.NodeID, "network1", true) 398 399 n := 1000 400 for i := 1; i <= n; i++ { 401 err = dbs[0].CreateEntry("test_table", "network1", 402 fmt.Sprintf("test_key0%d", i), 403 []byte(fmt.Sprintf("test_value0%d", i))) 404 assert.NilError(t, err) 405 } 406 407 err = dbs[1].JoinNetwork("network1") 408 assert.NilError(t, err) 409 410 dbs[0].verifyNetworkExistence(t, dbs[1].config.NodeID, "network1", true) 411 412 for i := 1; i <= n; i++ { 413 dbs[1].verifyEntryExistence(t, "test_table", "network1", 414 fmt.Sprintf("test_key0%d", i), 415 fmt.Sprintf("test_value0%d", i), true) 416 assert.NilError(t, err) 417 } 418 419 closeNetworkDBInstances(t, dbs) 420 } 421 422 func TestNetworkDBCRUDMediumCluster(t *testing.T) { 423 n := 5 424 425 dbs := createNetworkDBInstances(t, n, "node", DefaultConfig()) 426 427 for i := 0; i < n; i++ { 428 for j := 0; j < n; j++ { 429 if i == j { 430 continue 431 } 432 433 dbs[i].verifyNodeExistence(t, dbs[j].config.NodeID, true) 434 } 435 } 436 437 for i := 0; i < n; i++ { 438 err := dbs[i].JoinNetwork("network1") 439 assert.NilError(t, err) 440 } 441 442 for i := 0; i < n; i++ { 443 for j := 0; j < n; j++ { 444 dbs[i].verifyNetworkExistence(t, dbs[j].config.NodeID, "network1", true) 445 } 446 } 447 448 err := dbs[0].CreateEntry("test_table", "network1", "test_key", []byte("test_value")) 449 assert.NilError(t, err) 450 451 for i := 1; i < n; i++ { 452 dbs[i].verifyEntryExistence(t, "test_table", "network1", "test_key", "test_value", true) 453 } 454 455 err = dbs[0].UpdateEntry("test_table", "network1", "test_key", []byte("test_updated_value")) 456 assert.NilError(t, err) 457 458 for i := 1; i < n; i++ { 459 dbs[i].verifyEntryExistence(t, "test_table", "network1", "test_key", "test_updated_value", true) 460 } 461 462 err = dbs[0].DeleteEntry("test_table", "network1", "test_key") 463 assert.NilError(t, err) 464 465 for i := 1; i < n; i++ { 466 dbs[i].verifyEntryExistence(t, "test_table", "network1", "test_key", "", false) 467 } 468 469 for i := 1; i < n; i++ { 470 _, err = dbs[i].GetEntry("test_table", "network1", "test_key") 471 assert.Check(t, is.ErrorContains(err, "")) 472 assert.Check(t, is.Contains(err.Error(), "deleted and pending garbage collection"), err) 473 } 474 475 closeNetworkDBInstances(t, dbs) 476 } 477 478 func TestNetworkDBNodeJoinLeaveIteration(t *testing.T) { 479 dbs := createNetworkDBInstances(t, 2, "node", DefaultConfig()) 480 481 dbChangeWitness := func(db *NetworkDB) func(network string, expectNodeCount int) { 482 staleNetworkTime := db.networkClock.Time() 483 return func(network string, expectNodeCount int) { 484 check := func(t poll.LogT) poll.Result { 485 networkTime := db.networkClock.Time() 486 if networkTime <= staleNetworkTime { 487 return poll.Continue("network time is stale, no change registered yet.") 488 } 489 count := -1 490 db.Lock() 491 if nodes, ok := db.networkNodes[network]; ok { 492 count = len(nodes) 493 } 494 db.Unlock() 495 if count != expectNodeCount { 496 return poll.Continue("current number of nodes is %d, expect %d.", count, expectNodeCount) 497 } 498 return poll.Success() 499 } 500 t.Helper() 501 poll.WaitOn(t, check, poll.WithTimeout(3*time.Second), poll.WithDelay(5*time.Millisecond)) 502 } 503 } 504 505 // Single node Join/Leave 506 witness0 := dbChangeWitness(dbs[0]) 507 err := dbs[0].JoinNetwork("network1") 508 assert.NilError(t, err) 509 witness0("network1", 1) 510 511 witness0 = dbChangeWitness(dbs[0]) 512 err = dbs[0].LeaveNetwork("network1") 513 assert.NilError(t, err) 514 witness0("network1", 0) 515 516 // Multiple nodes Join/Leave 517 witness0, witness1 := dbChangeWitness(dbs[0]), dbChangeWitness(dbs[1]) 518 err = dbs[0].JoinNetwork("network1") 519 assert.NilError(t, err) 520 521 err = dbs[1].JoinNetwork("network1") 522 assert.NilError(t, err) 523 524 // Wait for the propagation on db[0] 525 dbs[0].verifyNetworkExistence(t, dbs[1].config.NodeID, "network1", true) 526 witness0("network1", 2) 527 if n, ok := dbs[0].networks[dbs[0].config.NodeID]["network1"]; !ok || n.leaving { 528 t.Fatalf("The network should not be marked as leaving:%t", n.leaving) 529 } 530 531 // Wait for the propagation on db[1] 532 dbs[1].verifyNetworkExistence(t, dbs[0].config.NodeID, "network1", true) 533 witness1("network1", 2) 534 if n, ok := dbs[1].networks[dbs[1].config.NodeID]["network1"]; !ok || n.leaving { 535 t.Fatalf("The network should not be marked as leaving:%t", n.leaving) 536 } 537 538 // Try a quick leave/join 539 witness0, witness1 = dbChangeWitness(dbs[0]), dbChangeWitness(dbs[1]) 540 err = dbs[0].LeaveNetwork("network1") 541 assert.NilError(t, err) 542 err = dbs[0].JoinNetwork("network1") 543 assert.NilError(t, err) 544 545 dbs[0].verifyNetworkExistence(t, dbs[1].config.NodeID, "network1", true) 546 witness0("network1", 2) 547 548 dbs[1].verifyNetworkExistence(t, dbs[0].config.NodeID, "network1", true) 549 witness1("network1", 2) 550 551 closeNetworkDBInstances(t, dbs) 552 } 553 554 func TestNetworkDBGarbageCollection(t *testing.T) { 555 keysWriteDelete := 5 556 config := DefaultConfig() 557 config.reapEntryInterval = 30 * time.Second 558 config.StatsPrintPeriod = 15 * time.Second 559 560 dbs := createNetworkDBInstances(t, 3, "node", config) 561 562 // 2 Nodes join network 563 err := dbs[0].JoinNetwork("network1") 564 assert.NilError(t, err) 565 566 err = dbs[1].JoinNetwork("network1") 567 assert.NilError(t, err) 568 569 for i := 0; i < keysWriteDelete; i++ { 570 err = dbs[i%2].CreateEntry("testTable", "network1", "key-"+strconv.Itoa(i), []byte("value")) 571 assert.NilError(t, err) 572 } 573 time.Sleep(time.Second) 574 for i := 0; i < keysWriteDelete; i++ { 575 err = dbs[i%2].DeleteEntry("testTable", "network1", "key-"+strconv.Itoa(i)) 576 assert.NilError(t, err) 577 } 578 for i := 0; i < 2; i++ { 579 dbs[i].Lock() 580 assert.Check(t, is.Equal(int64(keysWriteDelete), dbs[i].networks[dbs[i].config.NodeID]["network1"].entriesNumber.Load()), "entries number should match") 581 dbs[i].Unlock() 582 } 583 584 // from this point the timer for the garbage collection started, wait 5 seconds and then join a new node 585 time.Sleep(5 * time.Second) 586 587 err = dbs[2].JoinNetwork("network1") 588 assert.NilError(t, err) 589 for i := 0; i < 3; i++ { 590 dbs[i].Lock() 591 assert.Check(t, is.Equal(int64(keysWriteDelete), dbs[i].networks[dbs[i].config.NodeID]["network1"].entriesNumber.Load()), "entries number should match") 592 dbs[i].Unlock() 593 } 594 // at this point the entries should had been all deleted 595 time.Sleep(30 * time.Second) 596 for i := 0; i < 3; i++ { 597 dbs[i].Lock() 598 assert.Check(t, is.Equal(int64(0), dbs[i].networks[dbs[i].config.NodeID]["network1"].entriesNumber.Load()), "entries should had been garbage collected") 599 dbs[i].Unlock() 600 } 601 602 // make sure that entries are not coming back 603 time.Sleep(15 * time.Second) 604 for i := 0; i < 3; i++ { 605 dbs[i].Lock() 606 assert.Check(t, is.Equal(int64(0), dbs[i].networks[dbs[i].config.NodeID]["network1"].entriesNumber.Load()), "entries should had been garbage collected") 607 dbs[i].Unlock() 608 } 609 610 closeNetworkDBInstances(t, dbs) 611 } 612 613 func TestFindNode(t *testing.T) { 614 dbs := createNetworkDBInstances(t, 1, "node", DefaultConfig()) 615 616 dbs[0].nodes["active"] = &node{Node: memberlist.Node{Name: "active"}} 617 dbs[0].failedNodes["failed"] = &node{Node: memberlist.Node{Name: "failed"}} 618 dbs[0].leftNodes["left"] = &node{Node: memberlist.Node{Name: "left"}} 619 620 // active nodes is 2 because the testing node is in the list 621 assert.Check(t, is.Len(dbs[0].nodes, 2)) 622 assert.Check(t, is.Len(dbs[0].failedNodes, 1)) 623 assert.Check(t, is.Len(dbs[0].leftNodes, 1)) 624 625 n, currState, m := dbs[0].findNode("active") 626 assert.Check(t, n != nil) 627 assert.Check(t, is.Equal("active", n.Name)) 628 assert.Check(t, is.Equal(nodeActiveState, currState)) 629 assert.Check(t, m != nil) 630 // delete the entry manually 631 delete(m, "active") 632 633 // test if can be still find 634 n, currState, m = dbs[0].findNode("active") 635 assert.Check(t, is.Nil(n)) 636 assert.Check(t, is.Equal(nodeNotFound, currState)) 637 assert.Check(t, is.Nil(m)) 638 639 n, currState, m = dbs[0].findNode("failed") 640 assert.Check(t, n != nil) 641 assert.Check(t, is.Equal("failed", n.Name)) 642 assert.Check(t, is.Equal(nodeFailedState, currState)) 643 assert.Check(t, m != nil) 644 645 // find and remove 646 n, currState, m = dbs[0].findNode("left") 647 assert.Check(t, n != nil) 648 assert.Check(t, is.Equal("left", n.Name)) 649 assert.Check(t, is.Equal(nodeLeftState, currState)) 650 assert.Check(t, m != nil) 651 delete(m, "left") 652 653 n, currState, m = dbs[0].findNode("left") 654 assert.Check(t, is.Nil(n)) 655 assert.Check(t, is.Equal(nodeNotFound, currState)) 656 assert.Check(t, is.Nil(m)) 657 658 closeNetworkDBInstances(t, dbs) 659 } 660 661 func TestChangeNodeState(t *testing.T) { 662 dbs := createNetworkDBInstances(t, 1, "node", DefaultConfig()) 663 664 dbs[0].nodes["node1"] = &node{Node: memberlist.Node{Name: "node1"}} 665 dbs[0].nodes["node2"] = &node{Node: memberlist.Node{Name: "node2"}} 666 dbs[0].nodes["node3"] = &node{Node: memberlist.Node{Name: "node3"}} 667 668 // active nodes is 4 because the testing node is in the list 669 assert.Check(t, is.Len(dbs[0].nodes, 4)) 670 671 n, currState, m := dbs[0].findNode("node1") 672 assert.Check(t, n != nil) 673 assert.Check(t, is.Equal(nodeActiveState, currState)) 674 assert.Check(t, is.Equal("node1", n.Name)) 675 assert.Check(t, m != nil) 676 677 // node1 to failed 678 dbs[0].changeNodeState("node1", nodeFailedState) 679 680 n, currState, m = dbs[0].findNode("node1") 681 assert.Check(t, n != nil) 682 assert.Check(t, is.Equal(nodeFailedState, currState)) 683 assert.Check(t, is.Equal("node1", n.Name)) 684 assert.Check(t, m != nil) 685 assert.Check(t, time.Duration(0) != n.reapTime) 686 687 // node1 back to active 688 dbs[0].changeNodeState("node1", nodeActiveState) 689 690 n, currState, m = dbs[0].findNode("node1") 691 assert.Check(t, n != nil) 692 assert.Check(t, is.Equal(nodeActiveState, currState)) 693 assert.Check(t, is.Equal("node1", n.Name)) 694 assert.Check(t, m != nil) 695 assert.Check(t, is.Equal(time.Duration(0), n.reapTime)) 696 697 // node1 to left 698 dbs[0].changeNodeState("node1", nodeLeftState) 699 dbs[0].changeNodeState("node2", nodeLeftState) 700 dbs[0].changeNodeState("node3", nodeLeftState) 701 702 n, currState, m = dbs[0].findNode("node1") 703 assert.Check(t, n != nil) 704 assert.Check(t, is.Equal(nodeLeftState, currState)) 705 assert.Check(t, is.Equal("node1", n.Name)) 706 assert.Check(t, m != nil) 707 assert.Check(t, time.Duration(0) != n.reapTime) 708 709 n, currState, m = dbs[0].findNode("node2") 710 assert.Check(t, n != nil) 711 assert.Check(t, is.Equal(nodeLeftState, currState)) 712 assert.Check(t, is.Equal("node2", n.Name)) 713 assert.Check(t, m != nil) 714 assert.Check(t, time.Duration(0) != n.reapTime) 715 716 n, currState, m = dbs[0].findNode("node3") 717 assert.Check(t, n != nil) 718 assert.Check(t, is.Equal(nodeLeftState, currState)) 719 assert.Check(t, is.Equal("node3", n.Name)) 720 assert.Check(t, m != nil) 721 assert.Check(t, time.Duration(0) != n.reapTime) 722 723 // active nodes is 1 because the testing node is in the list 724 assert.Check(t, is.Len(dbs[0].nodes, 1)) 725 assert.Check(t, is.Len(dbs[0].failedNodes, 0)) 726 assert.Check(t, is.Len(dbs[0].leftNodes, 3)) 727 728 closeNetworkDBInstances(t, dbs) 729 } 730 731 func TestNodeReincarnation(t *testing.T) { 732 dbs := createNetworkDBInstances(t, 1, "node", DefaultConfig()) 733 734 dbs[0].nodes["node1"] = &node{Node: memberlist.Node{Name: "node1", Addr: net.ParseIP("192.168.1.1")}} 735 dbs[0].leftNodes["node2"] = &node{Node: memberlist.Node{Name: "node2", Addr: net.ParseIP("192.168.1.2")}} 736 dbs[0].failedNodes["node3"] = &node{Node: memberlist.Node{Name: "node3", Addr: net.ParseIP("192.168.1.3")}} 737 738 // active nodes is 2 because the testing node is in the list 739 assert.Check(t, is.Len(dbs[0].nodes, 2)) 740 assert.Check(t, is.Len(dbs[0].failedNodes, 1)) 741 assert.Check(t, is.Len(dbs[0].leftNodes, 1)) 742 743 dbs[0].Lock() 744 b := dbs[0].purgeReincarnation(&memberlist.Node{Name: "node4", Addr: net.ParseIP("192.168.1.1")}) 745 assert.Check(t, b) 746 dbs[0].nodes["node4"] = &node{Node: memberlist.Node{Name: "node4", Addr: net.ParseIP("192.168.1.1")}} 747 748 b = dbs[0].purgeReincarnation(&memberlist.Node{Name: "node5", Addr: net.ParseIP("192.168.1.2")}) 749 assert.Check(t, b) 750 dbs[0].nodes["node5"] = &node{Node: memberlist.Node{Name: "node5", Addr: net.ParseIP("192.168.1.1")}} 751 752 b = dbs[0].purgeReincarnation(&memberlist.Node{Name: "node6", Addr: net.ParseIP("192.168.1.3")}) 753 assert.Check(t, b) 754 dbs[0].nodes["node6"] = &node{Node: memberlist.Node{Name: "node6", Addr: net.ParseIP("192.168.1.1")}} 755 756 b = dbs[0].purgeReincarnation(&memberlist.Node{Name: "node6", Addr: net.ParseIP("192.168.1.10")}) 757 assert.Check(t, !b) 758 759 // active nodes is 1 because the testing node is in the list 760 assert.Check(t, is.Len(dbs[0].nodes, 4)) 761 assert.Check(t, is.Len(dbs[0].failedNodes, 0)) 762 assert.Check(t, is.Len(dbs[0].leftNodes, 3)) 763 764 dbs[0].Unlock() 765 closeNetworkDBInstances(t, dbs) 766 } 767 768 func TestParallelCreate(t *testing.T) { 769 dbs := createNetworkDBInstances(t, 1, "node", DefaultConfig()) 770 771 startCh := make(chan int) 772 doneCh := make(chan error) 773 var success int32 774 for i := 0; i < 20; i++ { 775 go func() { 776 <-startCh 777 err := dbs[0].CreateEntry("testTable", "testNetwork", "key", []byte("value")) 778 if err == nil { 779 atomic.AddInt32(&success, 1) 780 } 781 doneCh <- err 782 }() 783 } 784 785 close(startCh) 786 787 for i := 0; i < 20; i++ { 788 <-doneCh 789 } 790 close(doneCh) 791 // Only 1 write should have succeeded 792 assert.Check(t, is.Equal(int32(1), success)) 793 794 closeNetworkDBInstances(t, dbs) 795 } 796 797 func TestParallelDelete(t *testing.T) { 798 dbs := createNetworkDBInstances(t, 1, "node", DefaultConfig()) 799 800 err := dbs[0].CreateEntry("testTable", "testNetwork", "key", []byte("value")) 801 assert.NilError(t, err) 802 803 startCh := make(chan int) 804 doneCh := make(chan error) 805 var success int32 806 for i := 0; i < 20; i++ { 807 go func() { 808 <-startCh 809 err := dbs[0].DeleteEntry("testTable", "testNetwork", "key") 810 if err == nil { 811 atomic.AddInt32(&success, 1) 812 } 813 doneCh <- err 814 }() 815 } 816 817 close(startCh) 818 819 for i := 0; i < 20; i++ { 820 <-doneCh 821 } 822 close(doneCh) 823 // Only 1 write should have succeeded 824 assert.Check(t, is.Equal(int32(1), success)) 825 826 closeNetworkDBInstances(t, dbs) 827 } 828 829 func TestNetworkDBIslands(t *testing.T) { 830 pollTimeout := func() time.Duration { 831 const defaultTimeout = 120 * time.Second 832 dl, ok := t.Deadline() 833 if !ok { 834 return defaultTimeout 835 } 836 if d := time.Until(dl); d <= defaultTimeout { 837 return d 838 } 839 return defaultTimeout 840 } 841 842 _ = log.SetLevel("debug") 843 conf := DefaultConfig() 844 // Shorten durations to speed up test execution. 845 conf.rejoinClusterDuration = conf.rejoinClusterDuration / 10 846 conf.rejoinClusterInterval = conf.rejoinClusterInterval / 10 847 dbs := createNetworkDBInstances(t, 5, "node", conf) 848 849 // Get the node IP used currently 850 node := dbs[0].nodes[dbs[0].config.NodeID] 851 baseIPStr := node.Addr.String() 852 // Node 0,1,2 are going to be the 3 bootstrap nodes 853 members := []string{ 854 fmt.Sprintf("%s:%d", baseIPStr, dbs[0].config.BindPort), 855 fmt.Sprintf("%s:%d", baseIPStr, dbs[1].config.BindPort), 856 fmt.Sprintf("%s:%d", baseIPStr, dbs[2].config.BindPort), 857 } 858 // Rejoining will update the list of the bootstrap members 859 for i := 3; i < 5; i++ { 860 t.Logf("Re-joining: %d", i) 861 assert.Check(t, dbs[i].Join(members)) 862 } 863 864 // Now the 3 bootstrap nodes will cleanly leave, and will be properly removed from the other 2 nodes 865 for i := 0; i < 3; i++ { 866 log.G(context.TODO()).Infof("node %d leaving", i) 867 dbs[i].Close() 868 } 869 870 checkDBs := make(map[string]*NetworkDB) 871 for i := 3; i < 5; i++ { 872 db := dbs[i] 873 checkDBs[db.config.Hostname] = db 874 } 875 876 // Give some time to let the system propagate the messages and free up the ports 877 check := func(t poll.LogT) poll.Result { 878 // Verify that the nodes are actually all gone and marked appropiately 879 for name, db := range checkDBs { 880 db.RLock() 881 if (len(db.leftNodes) != 3) || (len(db.failedNodes) != 0) { 882 for name := range db.leftNodes { 883 t.Logf("%s: Node %s left", db.config.Hostname, name) 884 } 885 for name := range db.failedNodes { 886 t.Logf("%s: Node %s failed", db.config.Hostname, name) 887 } 888 db.RUnlock() 889 return poll.Continue("%s:Waiting for all nodes to cleanly leave, left: %d, failed nodes: %d", name, len(db.leftNodes), len(db.failedNodes)) 890 } 891 db.RUnlock() 892 t.Logf("%s: OK", name) 893 delete(checkDBs, name) 894 } 895 return poll.Success() 896 } 897 poll.WaitOn(t, check, poll.WithDelay(time.Second), poll.WithTimeout(pollTimeout())) 898 899 // Spawn again the first 3 nodes with different names but same IP:port 900 for i := 0; i < 3; i++ { 901 log.G(context.TODO()).Infof("node %d coming back", i) 902 conf := *dbs[i].config 903 conf.NodeID = stringid.TruncateID(stringid.GenerateRandomID()) 904 dbs[i] = launchNode(t, conf) 905 } 906 907 // Give some time for the reconnect routine to run, it runs every 6s. 908 check = func(t poll.LogT) poll.Result { 909 // Verify that the cluster is again all connected. Note that the 3 previous node did not do any join 910 for i := 0; i < 5; i++ { 911 db := dbs[i] 912 db.RLock() 913 if len(db.nodes) != 5 { 914 db.RUnlock() 915 return poll.Continue("%s:Waiting to connect to all nodes", dbs[i].config.Hostname) 916 } 917 if len(db.failedNodes) != 0 { 918 db.RUnlock() 919 return poll.Continue("%s:Waiting for 0 failedNodes", dbs[i].config.Hostname) 920 } 921 if i < 3 { 922 // nodes from 0 to 3 has no left nodes 923 if len(db.leftNodes) != 0 { 924 db.RUnlock() 925 return poll.Continue("%s:Waiting to have no leftNodes", dbs[i].config.Hostname) 926 } 927 } else { 928 // nodes from 4 to 5 has the 3 previous left nodes 929 if len(db.leftNodes) != 3 { 930 db.RUnlock() 931 return poll.Continue("%s:Waiting to have 3 leftNodes", dbs[i].config.Hostname) 932 } 933 } 934 db.RUnlock() 935 } 936 return poll.Success() 937 } 938 poll.WaitOn(t, check, poll.WithDelay(time.Second), poll.WithTimeout(pollTimeout())) 939 closeNetworkDBInstances(t, dbs) 940 }