github.com/rumpl/bof@v23.0.0-rc.2+incompatible/libnetwork/networkdb/networkdb_test.go (about) 1 package networkdb 2 3 import ( 4 "fmt" 5 "log" 6 "net" 7 "os" 8 "strconv" 9 "sync/atomic" 10 "testing" 11 "time" 12 13 "github.com/docker/docker/pkg/stringid" 14 "github.com/docker/go-events" 15 "github.com/hashicorp/memberlist" 16 "github.com/hashicorp/serf/serf" 17 "github.com/sirupsen/logrus" 18 "gotest.tools/v3/assert" 19 is "gotest.tools/v3/assert/cmp" 20 "gotest.tools/v3/poll" 21 ) 22 23 var dbPort int32 = 10000 24 25 func TestMain(m *testing.M) { 26 os.WriteFile("/proc/sys/net/ipv6/conf/lo/disable_ipv6", []byte{'0', '\n'}, 0644) 27 logrus.SetLevel(logrus.ErrorLevel) 28 os.Exit(m.Run()) 29 } 30 31 func launchNode(t *testing.T, conf Config) *NetworkDB { 32 t.Helper() 33 db, err := New(&conf) 34 assert.NilError(t, err) 35 return db 36 } 37 38 func createNetworkDBInstances(t *testing.T, num int, namePrefix string, conf *Config) []*NetworkDB { 39 t.Helper() 40 var dbs []*NetworkDB 41 for i := 0; i < num; i++ { 42 localConfig := *conf 43 localConfig.Hostname = fmt.Sprintf("%s%d", namePrefix, i+1) 44 localConfig.NodeID = stringid.TruncateID(stringid.GenerateRandomID()) 45 localConfig.BindPort = int(atomic.AddInt32(&dbPort, 1)) 46 db := launchNode(t, localConfig) 47 if i != 0 { 48 assert.Check(t, db.Join([]string{fmt.Sprintf("localhost:%d", db.config.BindPort-1)})) 49 } 50 51 dbs = append(dbs, db) 52 } 53 54 // Wait till the cluster creation is successful 55 check := func(t poll.LogT) poll.Result { 56 // Check that the cluster is properly created 57 for i := 0; i < num; i++ { 58 if num != len(dbs[i].ClusterPeers()) { 59 return poll.Continue("%s:Waiting for cluster peers to be established", dbs[i].config.Hostname) 60 } 61 } 62 return poll.Success() 63 } 64 poll.WaitOn(t, check, poll.WithDelay(2*time.Second), poll.WithTimeout(20*time.Second)) 65 66 return dbs 67 } 68 69 func closeNetworkDBInstances(t *testing.T, dbs []*NetworkDB) { 70 t.Helper() 71 log.Print("Closing DB instances...") 72 for _, db := range dbs { 73 db.Close() 74 } 75 } 76 77 func (db *NetworkDB) verifyNodeExistence(t *testing.T, node string, present bool) { 78 t.Helper() 79 for i := 0; i < 80; i++ { 80 db.RLock() 81 _, ok := db.nodes[node] 82 db.RUnlock() 83 if present && ok { 84 return 85 } 86 87 if !present && !ok { 88 return 89 } 90 91 time.Sleep(50 * time.Millisecond) 92 } 93 94 t.Errorf("%v(%v): Node existence verification for node %s failed", db.config.Hostname, db.config.NodeID, node) 95 } 96 97 func (db *NetworkDB) verifyNetworkExistence(t *testing.T, node string, id string, present bool) { 98 t.Helper() 99 100 const sleepInterval = 50 * time.Millisecond 101 var maxRetries int64 102 if dl, ok := t.Deadline(); ok { 103 maxRetries = int64(time.Until(dl) / sleepInterval) 104 } else { 105 maxRetries = 80 106 } 107 for i := int64(0); i < maxRetries; i++ { 108 db.RLock() 109 nn, nnok := db.networks[node] 110 db.RUnlock() 111 if nnok { 112 n, ok := nn[id] 113 if present && ok { 114 return 115 } 116 117 if !present && 118 ((ok && n.leaving) || 119 !ok) { 120 return 121 } 122 } 123 124 time.Sleep(sleepInterval) 125 } 126 127 t.Error("Network existence verification failed") 128 } 129 130 func (db *NetworkDB) verifyEntryExistence(t *testing.T, tname, nid, key, value string, present bool) { 131 t.Helper() 132 n := 80 133 for i := 0; i < n; i++ { 134 entry, err := db.getEntry(tname, nid, key) 135 if present && err == nil && string(entry.value) == value { 136 return 137 } 138 139 if !present && 140 ((err == nil && entry.deleting) || 141 (err != nil)) { 142 return 143 } 144 145 if i == n-1 && !present && err != nil { 146 return 147 } 148 149 time.Sleep(50 * time.Millisecond) 150 } 151 152 t.Errorf("Entry existence verification test failed for %v(%v)", db.config.Hostname, db.config.NodeID) 153 } 154 155 func testWatch(t *testing.T, ch chan events.Event, ev interface{}, tname, nid, key, value string) { 156 t.Helper() 157 select { 158 case rcvdEv := <-ch: 159 assert.Check(t, is.Equal(fmt.Sprintf("%T", rcvdEv), fmt.Sprintf("%T", ev))) 160 switch typ := rcvdEv.(type) { 161 case CreateEvent: 162 assert.Check(t, is.Equal(tname, typ.Table)) 163 assert.Check(t, is.Equal(nid, typ.NetworkID)) 164 assert.Check(t, is.Equal(key, typ.Key)) 165 assert.Check(t, is.Equal(value, string(typ.Value))) 166 case UpdateEvent: 167 assert.Check(t, is.Equal(tname, typ.Table)) 168 assert.Check(t, is.Equal(nid, typ.NetworkID)) 169 assert.Check(t, is.Equal(key, typ.Key)) 170 assert.Check(t, is.Equal(value, string(typ.Value))) 171 case DeleteEvent: 172 assert.Check(t, is.Equal(tname, typ.Table)) 173 assert.Check(t, is.Equal(nid, typ.NetworkID)) 174 assert.Check(t, is.Equal(key, typ.Key)) 175 } 176 case <-time.After(time.Second): 177 t.Fail() 178 return 179 } 180 } 181 182 func TestNetworkDBSimple(t *testing.T) { 183 dbs := createNetworkDBInstances(t, 2, "node", DefaultConfig()) 184 closeNetworkDBInstances(t, dbs) 185 } 186 187 func TestNetworkDBJoinLeaveNetwork(t *testing.T) { 188 dbs := createNetworkDBInstances(t, 2, "node", DefaultConfig()) 189 190 err := dbs[0].JoinNetwork("network1") 191 assert.NilError(t, err) 192 193 dbs[1].verifyNetworkExistence(t, dbs[0].config.NodeID, "network1", true) 194 195 err = dbs[0].LeaveNetwork("network1") 196 assert.NilError(t, err) 197 198 dbs[1].verifyNetworkExistence(t, dbs[0].config.NodeID, "network1", false) 199 closeNetworkDBInstances(t, dbs) 200 } 201 202 func TestNetworkDBJoinLeaveNetworks(t *testing.T) { 203 dbs := createNetworkDBInstances(t, 2, "node", DefaultConfig()) 204 205 n := 10 206 for i := 1; i <= n; i++ { 207 err := dbs[0].JoinNetwork(fmt.Sprintf("network0%d", i)) 208 assert.NilError(t, err) 209 } 210 211 for i := 1; i <= n; i++ { 212 err := dbs[1].JoinNetwork(fmt.Sprintf("network1%d", i)) 213 assert.NilError(t, err) 214 } 215 216 for i := 1; i <= n; i++ { 217 dbs[1].verifyNetworkExistence(t, dbs[0].config.NodeID, fmt.Sprintf("network0%d", i), true) 218 } 219 220 for i := 1; i <= n; i++ { 221 dbs[0].verifyNetworkExistence(t, dbs[1].config.NodeID, fmt.Sprintf("network1%d", i), true) 222 } 223 224 for i := 1; i <= n; i++ { 225 err := dbs[0].LeaveNetwork(fmt.Sprintf("network0%d", i)) 226 assert.NilError(t, err) 227 } 228 229 for i := 1; i <= n; i++ { 230 err := dbs[1].LeaveNetwork(fmt.Sprintf("network1%d", i)) 231 assert.NilError(t, err) 232 } 233 234 for i := 1; i <= n; i++ { 235 dbs[1].verifyNetworkExistence(t, dbs[0].config.NodeID, fmt.Sprintf("network0%d", i), false) 236 } 237 238 for i := 1; i <= n; i++ { 239 dbs[0].verifyNetworkExistence(t, dbs[1].config.NodeID, fmt.Sprintf("network1%d", i), false) 240 } 241 242 closeNetworkDBInstances(t, dbs) 243 } 244 245 func TestNetworkDBCRUDTableEntry(t *testing.T) { 246 dbs := createNetworkDBInstances(t, 3, "node", DefaultConfig()) 247 248 err := dbs[0].JoinNetwork("network1") 249 assert.NilError(t, err) 250 251 dbs[1].verifyNetworkExistence(t, dbs[0].config.NodeID, "network1", true) 252 253 err = dbs[1].JoinNetwork("network1") 254 assert.NilError(t, err) 255 256 err = dbs[0].CreateEntry("test_table", "network1", "test_key", []byte("test_value")) 257 assert.NilError(t, err) 258 259 dbs[1].verifyEntryExistence(t, "test_table", "network1", "test_key", "test_value", true) 260 dbs[2].verifyEntryExistence(t, "test_table", "network1", "test_key", "test_value", false) 261 262 err = dbs[0].UpdateEntry("test_table", "network1", "test_key", []byte("test_updated_value")) 263 assert.NilError(t, err) 264 265 dbs[1].verifyEntryExistence(t, "test_table", "network1", "test_key", "test_updated_value", true) 266 267 err = dbs[0].DeleteEntry("test_table", "network1", "test_key") 268 assert.NilError(t, err) 269 270 dbs[1].verifyEntryExistence(t, "test_table", "network1", "test_key", "", false) 271 272 closeNetworkDBInstances(t, dbs) 273 } 274 275 func TestNetworkDBCRUDTableEntries(t *testing.T) { 276 dbs := createNetworkDBInstances(t, 2, "node", DefaultConfig()) 277 278 err := dbs[0].JoinNetwork("network1") 279 assert.NilError(t, err) 280 281 dbs[1].verifyNetworkExistence(t, dbs[0].config.NodeID, "network1", true) 282 283 err = dbs[1].JoinNetwork("network1") 284 assert.NilError(t, err) 285 286 dbs[0].verifyNetworkExistence(t, dbs[1].config.NodeID, "network1", true) 287 288 n := 10 289 for i := 1; i <= n; i++ { 290 err = dbs[0].CreateEntry("test_table", "network1", 291 fmt.Sprintf("test_key0%d", i), 292 []byte(fmt.Sprintf("test_value0%d", i))) 293 assert.NilError(t, err) 294 } 295 296 for i := 1; i <= n; i++ { 297 err = dbs[1].CreateEntry("test_table", "network1", 298 fmt.Sprintf("test_key1%d", i), 299 []byte(fmt.Sprintf("test_value1%d", i))) 300 assert.NilError(t, err) 301 } 302 303 for i := 1; i <= n; i++ { 304 dbs[0].verifyEntryExistence(t, "test_table", "network1", 305 fmt.Sprintf("test_key1%d", i), 306 fmt.Sprintf("test_value1%d", i), true) 307 assert.NilError(t, err) 308 } 309 310 for i := 1; i <= n; i++ { 311 dbs[1].verifyEntryExistence(t, "test_table", "network1", 312 fmt.Sprintf("test_key0%d", i), 313 fmt.Sprintf("test_value0%d", i), true) 314 assert.NilError(t, err) 315 } 316 317 // Verify deletes 318 for i := 1; i <= n; i++ { 319 err = dbs[0].DeleteEntry("test_table", "network1", 320 fmt.Sprintf("test_key0%d", i)) 321 assert.NilError(t, err) 322 } 323 324 for i := 1; i <= n; i++ { 325 err = dbs[1].DeleteEntry("test_table", "network1", 326 fmt.Sprintf("test_key1%d", i)) 327 assert.NilError(t, err) 328 } 329 330 for i := 1; i <= n; i++ { 331 dbs[0].verifyEntryExistence(t, "test_table", "network1", 332 fmt.Sprintf("test_key1%d", i), "", false) 333 assert.NilError(t, err) 334 } 335 336 for i := 1; i <= n; i++ { 337 dbs[1].verifyEntryExistence(t, "test_table", "network1", 338 fmt.Sprintf("test_key0%d", i), "", false) 339 assert.NilError(t, err) 340 } 341 342 closeNetworkDBInstances(t, dbs) 343 } 344 345 func TestNetworkDBNodeLeave(t *testing.T) { 346 dbs := createNetworkDBInstances(t, 2, "node", DefaultConfig()) 347 348 err := dbs[0].JoinNetwork("network1") 349 assert.NilError(t, err) 350 351 err = dbs[1].JoinNetwork("network1") 352 assert.NilError(t, err) 353 354 err = dbs[0].CreateEntry("test_table", "network1", "test_key", []byte("test_value")) 355 assert.NilError(t, err) 356 357 dbs[1].verifyEntryExistence(t, "test_table", "network1", "test_key", "test_value", true) 358 359 dbs[0].Close() 360 dbs[1].verifyEntryExistence(t, "test_table", "network1", "test_key", "test_value", false) 361 dbs[1].Close() 362 } 363 364 func TestNetworkDBWatch(t *testing.T) { 365 dbs := createNetworkDBInstances(t, 2, "node", DefaultConfig()) 366 err := dbs[0].JoinNetwork("network1") 367 assert.NilError(t, err) 368 369 err = dbs[1].JoinNetwork("network1") 370 assert.NilError(t, err) 371 372 ch, cancel := dbs[1].Watch("", "", "") 373 374 err = dbs[0].CreateEntry("test_table", "network1", "test_key", []byte("test_value")) 375 assert.NilError(t, err) 376 377 testWatch(t, ch.C, CreateEvent{}, "test_table", "network1", "test_key", "test_value") 378 379 err = dbs[0].UpdateEntry("test_table", "network1", "test_key", []byte("test_updated_value")) 380 assert.NilError(t, err) 381 382 testWatch(t, ch.C, UpdateEvent{}, "test_table", "network1", "test_key", "test_updated_value") 383 384 err = dbs[0].DeleteEntry("test_table", "network1", "test_key") 385 assert.NilError(t, err) 386 387 testWatch(t, ch.C, DeleteEvent{}, "test_table", "network1", "test_key", "") 388 389 cancel() 390 closeNetworkDBInstances(t, dbs) 391 } 392 393 func TestNetworkDBBulkSync(t *testing.T) { 394 dbs := createNetworkDBInstances(t, 2, "node", DefaultConfig()) 395 396 err := dbs[0].JoinNetwork("network1") 397 assert.NilError(t, err) 398 399 dbs[1].verifyNetworkExistence(t, dbs[0].config.NodeID, "network1", true) 400 401 n := 1000 402 for i := 1; i <= n; i++ { 403 err = dbs[0].CreateEntry("test_table", "network1", 404 fmt.Sprintf("test_key0%d", i), 405 []byte(fmt.Sprintf("test_value0%d", i))) 406 assert.NilError(t, err) 407 } 408 409 err = dbs[1].JoinNetwork("network1") 410 assert.NilError(t, err) 411 412 dbs[0].verifyNetworkExistence(t, dbs[1].config.NodeID, "network1", true) 413 414 for i := 1; i <= n; i++ { 415 dbs[1].verifyEntryExistence(t, "test_table", "network1", 416 fmt.Sprintf("test_key0%d", i), 417 fmt.Sprintf("test_value0%d", i), true) 418 assert.NilError(t, err) 419 } 420 421 closeNetworkDBInstances(t, dbs) 422 } 423 424 func TestNetworkDBCRUDMediumCluster(t *testing.T) { 425 n := 5 426 427 dbs := createNetworkDBInstances(t, n, "node", DefaultConfig()) 428 429 for i := 0; i < n; i++ { 430 for j := 0; j < n; j++ { 431 if i == j { 432 continue 433 } 434 435 dbs[i].verifyNodeExistence(t, dbs[j].config.NodeID, true) 436 } 437 } 438 439 for i := 0; i < n; i++ { 440 err := dbs[i].JoinNetwork("network1") 441 assert.NilError(t, err) 442 } 443 444 for i := 0; i < n; i++ { 445 for j := 0; j < n; j++ { 446 dbs[i].verifyNetworkExistence(t, dbs[j].config.NodeID, "network1", true) 447 } 448 } 449 450 err := dbs[0].CreateEntry("test_table", "network1", "test_key", []byte("test_value")) 451 assert.NilError(t, err) 452 453 for i := 1; i < n; i++ { 454 dbs[i].verifyEntryExistence(t, "test_table", "network1", "test_key", "test_value", true) 455 } 456 457 err = dbs[0].UpdateEntry("test_table", "network1", "test_key", []byte("test_updated_value")) 458 assert.NilError(t, err) 459 460 for i := 1; i < n; i++ { 461 dbs[i].verifyEntryExistence(t, "test_table", "network1", "test_key", "test_updated_value", true) 462 } 463 464 err = dbs[0].DeleteEntry("test_table", "network1", "test_key") 465 assert.NilError(t, err) 466 467 for i := 1; i < n; i++ { 468 dbs[i].verifyEntryExistence(t, "test_table", "network1", "test_key", "", false) 469 } 470 471 for i := 1; i < n; i++ { 472 _, err = dbs[i].GetEntry("test_table", "network1", "test_key") 473 assert.Check(t, is.ErrorContains(err, "")) 474 assert.Check(t, is.Contains(err.Error(), "deleted and pending garbage collection"), err) 475 } 476 477 closeNetworkDBInstances(t, dbs) 478 } 479 480 func TestNetworkDBNodeJoinLeaveIteration(t *testing.T) { 481 dbs := createNetworkDBInstances(t, 2, "node", DefaultConfig()) 482 483 var ( 484 dbIndex int32 485 staleNetworkTime [2]serf.LamportTime 486 expectNodeCount int 487 network = "network1" 488 ) 489 dbChangeWitness := func(t poll.LogT) poll.Result { 490 db := dbs[dbIndex] 491 networkTime := db.networkClock.Time() 492 if networkTime <= staleNetworkTime[dbIndex] { 493 return poll.Continue("network time is stale, no change registered yet.") 494 } 495 count := -1 496 db.Lock() 497 if nodes, ok := db.networkNodes[network]; ok { 498 count = len(nodes) 499 } 500 db.Unlock() 501 if count != expectNodeCount { 502 return poll.Continue("current number of nodes is %d, expect %d.", count, expectNodeCount) 503 } 504 return poll.Success() 505 } 506 507 // Single node Join/Leave 508 staleNetworkTime[0], staleNetworkTime[1] = dbs[0].networkClock.Time(), dbs[1].networkClock.Time() 509 err := dbs[0].JoinNetwork("network1") 510 assert.NilError(t, err) 511 512 dbIndex, expectNodeCount = 0, 1 513 poll.WaitOn(t, dbChangeWitness, poll.WithTimeout(3*time.Second), poll.WithDelay(5*time.Millisecond)) 514 515 staleNetworkTime[0], staleNetworkTime[1] = dbs[0].networkClock.Time(), dbs[1].networkClock.Time() 516 err = dbs[0].LeaveNetwork("network1") 517 assert.NilError(t, err) 518 519 dbIndex, expectNodeCount = 0, 0 520 poll.WaitOn(t, dbChangeWitness, poll.WithTimeout(3*time.Second), poll.WithDelay(5*time.Millisecond)) 521 522 // Multiple nodes Join/Leave 523 staleNetworkTime[0], staleNetworkTime[1] = dbs[0].networkClock.Time(), dbs[1].networkClock.Time() 524 err = dbs[0].JoinNetwork("network1") 525 assert.NilError(t, err) 526 527 err = dbs[1].JoinNetwork("network1") 528 assert.NilError(t, err) 529 530 // Wait for the propagation on db[0] 531 dbs[0].verifyNetworkExistence(t, dbs[1].config.NodeID, "network1", true) 532 dbIndex, expectNodeCount = 0, 2 533 poll.WaitOn(t, dbChangeWitness, poll.WithTimeout(3*time.Second), poll.WithDelay(5*time.Millisecond)) 534 if n, ok := dbs[0].networks[dbs[0].config.NodeID]["network1"]; !ok || n.leaving { 535 t.Fatalf("The network should not be marked as leaving:%t", n.leaving) 536 } 537 538 // Wait for the propagation on db[1] 539 dbs[1].verifyNetworkExistence(t, dbs[0].config.NodeID, "network1", true) 540 dbIndex, expectNodeCount = 1, 2 541 poll.WaitOn(t, dbChangeWitness, poll.WithTimeout(3*time.Second), poll.WithDelay(5*time.Millisecond)) 542 if n, ok := dbs[1].networks[dbs[1].config.NodeID]["network1"]; !ok || n.leaving { 543 t.Fatalf("The network should not be marked as leaving:%t", n.leaving) 544 } 545 546 // Try a quick leave/join 547 staleNetworkTime[0], staleNetworkTime[1] = dbs[0].networkClock.Time(), dbs[1].networkClock.Time() 548 err = dbs[0].LeaveNetwork("network1") 549 assert.NilError(t, err) 550 err = dbs[0].JoinNetwork("network1") 551 assert.NilError(t, err) 552 553 dbs[0].verifyNetworkExistence(t, dbs[1].config.NodeID, "network1", true) 554 dbIndex, expectNodeCount = 0, 2 555 poll.WaitOn(t, dbChangeWitness, poll.WithTimeout(3*time.Second), poll.WithDelay(5*time.Millisecond)) 556 557 dbs[1].verifyNetworkExistence(t, dbs[0].config.NodeID, "network1", true) 558 dbIndex, expectNodeCount = 1, 2 559 poll.WaitOn(t, dbChangeWitness, poll.WithTimeout(3*time.Second), poll.WithDelay(5*time.Millisecond)) 560 561 closeNetworkDBInstances(t, dbs) 562 } 563 564 func TestNetworkDBGarbageCollection(t *testing.T) { 565 keysWriteDelete := 5 566 config := DefaultConfig() 567 config.reapEntryInterval = 30 * time.Second 568 config.StatsPrintPeriod = 15 * time.Second 569 570 dbs := createNetworkDBInstances(t, 3, "node", config) 571 572 // 2 Nodes join network 573 err := dbs[0].JoinNetwork("network1") 574 assert.NilError(t, err) 575 576 err = dbs[1].JoinNetwork("network1") 577 assert.NilError(t, err) 578 579 for i := 0; i < keysWriteDelete; i++ { 580 err = dbs[i%2].CreateEntry("testTable", "network1", "key-"+strconv.Itoa(i), []byte("value")) 581 assert.NilError(t, err) 582 } 583 time.Sleep(time.Second) 584 for i := 0; i < keysWriteDelete; i++ { 585 err = dbs[i%2].DeleteEntry("testTable", "network1", "key-"+strconv.Itoa(i)) 586 assert.NilError(t, err) 587 } 588 for i := 0; i < 2; i++ { 589 assert.Check(t, is.Equal(keysWriteDelete, dbs[i].networks[dbs[i].config.NodeID]["network1"].entriesNumber), "entries number should match") 590 } 591 592 // from this point the timer for the garbage collection started, wait 5 seconds and then join a new node 593 time.Sleep(5 * time.Second) 594 595 err = dbs[2].JoinNetwork("network1") 596 assert.NilError(t, err) 597 for i := 0; i < 3; i++ { 598 assert.Check(t, is.Equal(keysWriteDelete, dbs[i].networks[dbs[i].config.NodeID]["network1"].entriesNumber), "entries number should match") 599 } 600 // at this point the entries should had been all deleted 601 time.Sleep(30 * time.Second) 602 for i := 0; i < 3; i++ { 603 assert.Check(t, is.Equal(0, dbs[i].networks[dbs[i].config.NodeID]["network1"].entriesNumber), "entries should had been garbage collected") 604 } 605 606 // make sure that entries are not coming back 607 time.Sleep(15 * time.Second) 608 for i := 0; i < 3; i++ { 609 assert.Check(t, is.Equal(0, dbs[i].networks[dbs[i].config.NodeID]["network1"].entriesNumber), "entries should had been garbage collected") 610 } 611 612 closeNetworkDBInstances(t, dbs) 613 } 614 615 func TestFindNode(t *testing.T) { 616 dbs := createNetworkDBInstances(t, 1, "node", DefaultConfig()) 617 618 dbs[0].nodes["active"] = &node{Node: memberlist.Node{Name: "active"}} 619 dbs[0].failedNodes["failed"] = &node{Node: memberlist.Node{Name: "failed"}} 620 dbs[0].leftNodes["left"] = &node{Node: memberlist.Node{Name: "left"}} 621 622 // active nodes is 2 because the testing node is in the list 623 assert.Check(t, is.Len(dbs[0].nodes, 2)) 624 assert.Check(t, is.Len(dbs[0].failedNodes, 1)) 625 assert.Check(t, is.Len(dbs[0].leftNodes, 1)) 626 627 n, currState, m := dbs[0].findNode("active") 628 assert.Check(t, n != nil) 629 assert.Check(t, is.Equal("active", n.Name)) 630 assert.Check(t, is.Equal(nodeActiveState, currState)) 631 assert.Check(t, m != nil) 632 // delete the entry manually 633 delete(m, "active") 634 635 // test if can be still find 636 n, currState, m = dbs[0].findNode("active") 637 assert.Check(t, is.Nil(n)) 638 assert.Check(t, is.Equal(nodeNotFound, currState)) 639 assert.Check(t, is.Nil(m)) 640 641 n, currState, m = dbs[0].findNode("failed") 642 assert.Check(t, n != nil) 643 assert.Check(t, is.Equal("failed", n.Name)) 644 assert.Check(t, is.Equal(nodeFailedState, currState)) 645 assert.Check(t, m != nil) 646 647 // find and remove 648 n, currState, m = dbs[0].findNode("left") 649 assert.Check(t, n != nil) 650 assert.Check(t, is.Equal("left", n.Name)) 651 assert.Check(t, is.Equal(nodeLeftState, currState)) 652 assert.Check(t, m != nil) 653 delete(m, "left") 654 655 n, currState, m = dbs[0].findNode("left") 656 assert.Check(t, is.Nil(n)) 657 assert.Check(t, is.Equal(nodeNotFound, currState)) 658 assert.Check(t, is.Nil(m)) 659 660 closeNetworkDBInstances(t, dbs) 661 } 662 663 func TestChangeNodeState(t *testing.T) { 664 dbs := createNetworkDBInstances(t, 1, "node", DefaultConfig()) 665 666 dbs[0].nodes["node1"] = &node{Node: memberlist.Node{Name: "node1"}} 667 dbs[0].nodes["node2"] = &node{Node: memberlist.Node{Name: "node2"}} 668 dbs[0].nodes["node3"] = &node{Node: memberlist.Node{Name: "node3"}} 669 670 // active nodes is 4 because the testing node is in the list 671 assert.Check(t, is.Len(dbs[0].nodes, 4)) 672 673 n, currState, m := dbs[0].findNode("node1") 674 assert.Check(t, n != nil) 675 assert.Check(t, is.Equal(nodeActiveState, currState)) 676 assert.Check(t, is.Equal("node1", n.Name)) 677 assert.Check(t, m != nil) 678 679 // node1 to failed 680 dbs[0].changeNodeState("node1", nodeFailedState) 681 682 n, currState, m = dbs[0].findNode("node1") 683 assert.Check(t, n != nil) 684 assert.Check(t, is.Equal(nodeFailedState, currState)) 685 assert.Check(t, is.Equal("node1", n.Name)) 686 assert.Check(t, m != nil) 687 assert.Check(t, time.Duration(0) != n.reapTime) 688 689 // node1 back to active 690 dbs[0].changeNodeState("node1", nodeActiveState) 691 692 n, currState, m = dbs[0].findNode("node1") 693 assert.Check(t, n != nil) 694 assert.Check(t, is.Equal(nodeActiveState, currState)) 695 assert.Check(t, is.Equal("node1", n.Name)) 696 assert.Check(t, m != nil) 697 assert.Check(t, is.Equal(time.Duration(0), n.reapTime)) 698 699 // node1 to left 700 dbs[0].changeNodeState("node1", nodeLeftState) 701 dbs[0].changeNodeState("node2", nodeLeftState) 702 dbs[0].changeNodeState("node3", nodeLeftState) 703 704 n, currState, m = dbs[0].findNode("node1") 705 assert.Check(t, n != nil) 706 assert.Check(t, is.Equal(nodeLeftState, currState)) 707 assert.Check(t, is.Equal("node1", n.Name)) 708 assert.Check(t, m != nil) 709 assert.Check(t, time.Duration(0) != n.reapTime) 710 711 n, currState, m = dbs[0].findNode("node2") 712 assert.Check(t, n != nil) 713 assert.Check(t, is.Equal(nodeLeftState, currState)) 714 assert.Check(t, is.Equal("node2", n.Name)) 715 assert.Check(t, m != nil) 716 assert.Check(t, time.Duration(0) != n.reapTime) 717 718 n, currState, m = dbs[0].findNode("node3") 719 assert.Check(t, n != nil) 720 assert.Check(t, is.Equal(nodeLeftState, currState)) 721 assert.Check(t, is.Equal("node3", n.Name)) 722 assert.Check(t, m != nil) 723 assert.Check(t, time.Duration(0) != n.reapTime) 724 725 // active nodes is 1 because the testing node is in the list 726 assert.Check(t, is.Len(dbs[0].nodes, 1)) 727 assert.Check(t, is.Len(dbs[0].failedNodes, 0)) 728 assert.Check(t, is.Len(dbs[0].leftNodes, 3)) 729 730 closeNetworkDBInstances(t, dbs) 731 } 732 733 func TestNodeReincarnation(t *testing.T) { 734 dbs := createNetworkDBInstances(t, 1, "node", DefaultConfig()) 735 736 dbs[0].nodes["node1"] = &node{Node: memberlist.Node{Name: "node1", Addr: net.ParseIP("192.168.1.1")}} 737 dbs[0].leftNodes["node2"] = &node{Node: memberlist.Node{Name: "node2", Addr: net.ParseIP("192.168.1.2")}} 738 dbs[0].failedNodes["node3"] = &node{Node: memberlist.Node{Name: "node3", Addr: net.ParseIP("192.168.1.3")}} 739 740 // active nodes is 2 because the testing node is in the list 741 assert.Check(t, is.Len(dbs[0].nodes, 2)) 742 assert.Check(t, is.Len(dbs[0].failedNodes, 1)) 743 assert.Check(t, is.Len(dbs[0].leftNodes, 1)) 744 745 b := dbs[0].purgeReincarnation(&memberlist.Node{Name: "node4", Addr: net.ParseIP("192.168.1.1")}) 746 assert.Check(t, b) 747 dbs[0].nodes["node4"] = &node{Node: memberlist.Node{Name: "node4", Addr: net.ParseIP("192.168.1.1")}} 748 749 b = dbs[0].purgeReincarnation(&memberlist.Node{Name: "node5", Addr: net.ParseIP("192.168.1.2")}) 750 assert.Check(t, b) 751 dbs[0].nodes["node5"] = &node{Node: memberlist.Node{Name: "node5", Addr: net.ParseIP("192.168.1.1")}} 752 753 b = dbs[0].purgeReincarnation(&memberlist.Node{Name: "node6", Addr: net.ParseIP("192.168.1.3")}) 754 assert.Check(t, b) 755 dbs[0].nodes["node6"] = &node{Node: memberlist.Node{Name: "node6", Addr: net.ParseIP("192.168.1.1")}} 756 757 b = dbs[0].purgeReincarnation(&memberlist.Node{Name: "node6", Addr: net.ParseIP("192.168.1.10")}) 758 assert.Check(t, !b) 759 760 // active nodes is 1 because the testing node is in the list 761 assert.Check(t, is.Len(dbs[0].nodes, 4)) 762 assert.Check(t, is.Len(dbs[0].failedNodes, 0)) 763 assert.Check(t, is.Len(dbs[0].leftNodes, 3)) 764 765 closeNetworkDBInstances(t, dbs) 766 } 767 768 func TestParallelCreate(t *testing.T) { 769 dbs := createNetworkDBInstances(t, 1, "node", DefaultConfig()) 770 771 startCh := make(chan int) 772 doneCh := make(chan error) 773 var success int32 774 for i := 0; i < 20; i++ { 775 go func() { 776 <-startCh 777 err := dbs[0].CreateEntry("testTable", "testNetwork", "key", []byte("value")) 778 if err == nil { 779 atomic.AddInt32(&success, 1) 780 } 781 doneCh <- err 782 }() 783 } 784 785 close(startCh) 786 787 for i := 0; i < 20; i++ { 788 <-doneCh 789 } 790 close(doneCh) 791 // Only 1 write should have succeeded 792 assert.Check(t, is.Equal(int32(1), success)) 793 794 closeNetworkDBInstances(t, dbs) 795 } 796 797 func TestParallelDelete(t *testing.T) { 798 dbs := createNetworkDBInstances(t, 1, "node", DefaultConfig()) 799 800 err := dbs[0].CreateEntry("testTable", "testNetwork", "key", []byte("value")) 801 assert.NilError(t, err) 802 803 startCh := make(chan int) 804 doneCh := make(chan error) 805 var success int32 806 for i := 0; i < 20; i++ { 807 go func() { 808 <-startCh 809 err := dbs[0].DeleteEntry("testTable", "testNetwork", "key") 810 if err == nil { 811 atomic.AddInt32(&success, 1) 812 } 813 doneCh <- err 814 }() 815 } 816 817 close(startCh) 818 819 for i := 0; i < 20; i++ { 820 <-doneCh 821 } 822 close(doneCh) 823 // Only 1 write should have succeeded 824 assert.Check(t, is.Equal(int32(1), success)) 825 826 closeNetworkDBInstances(t, dbs) 827 } 828 829 func TestNetworkDBIslands(t *testing.T) { 830 pollTimeout := func() time.Duration { 831 const defaultTimeout = 120 * time.Second 832 dl, ok := t.Deadline() 833 if !ok { 834 return defaultTimeout 835 } 836 if d := time.Until(dl); d <= defaultTimeout { 837 return d 838 } 839 return defaultTimeout 840 } 841 842 logrus.SetLevel(logrus.DebugLevel) 843 conf := DefaultConfig() 844 // Shorten durations to speed up test execution. 845 conf.rejoinClusterDuration = conf.rejoinClusterDuration / 10 846 conf.rejoinClusterInterval = conf.rejoinClusterInterval / 10 847 dbs := createNetworkDBInstances(t, 5, "node", conf) 848 849 // Get the node IP used currently 850 node := dbs[0].nodes[dbs[0].config.NodeID] 851 baseIPStr := node.Addr.String() 852 // Node 0,1,2 are going to be the 3 bootstrap nodes 853 members := []string{fmt.Sprintf("%s:%d", baseIPStr, dbs[0].config.BindPort), 854 fmt.Sprintf("%s:%d", baseIPStr, dbs[1].config.BindPort), 855 fmt.Sprintf("%s:%d", baseIPStr, dbs[2].config.BindPort)} 856 // Rejoining will update the list of the bootstrap members 857 for i := 3; i < 5; i++ { 858 t.Logf("Re-joining: %d", i) 859 assert.Check(t, dbs[i].Join(members)) 860 } 861 862 // Now the 3 bootstrap nodes will cleanly leave, and will be properly removed from the other 2 nodes 863 for i := 0; i < 3; i++ { 864 logrus.Infof("node %d leaving", i) 865 dbs[i].Close() 866 } 867 868 checkDBs := make(map[string]*NetworkDB) 869 for i := 3; i < 5; i++ { 870 db := dbs[i] 871 checkDBs[db.config.Hostname] = db 872 } 873 874 // Give some time to let the system propagate the messages and free up the ports 875 check := func(t poll.LogT) poll.Result { 876 // Verify that the nodes are actually all gone and marked appropiately 877 for name, db := range checkDBs { 878 db.RLock() 879 if (len(db.leftNodes) != 3) || (len(db.failedNodes) != 0) { 880 for name := range db.leftNodes { 881 t.Logf("%s: Node %s left", db.config.Hostname, name) 882 } 883 for name := range db.failedNodes { 884 t.Logf("%s: Node %s failed", db.config.Hostname, name) 885 } 886 db.RUnlock() 887 return poll.Continue("%s:Waiting for all nodes to cleanly leave, left: %d, failed nodes: %d", name, len(db.leftNodes), len(db.failedNodes)) 888 } 889 db.RUnlock() 890 t.Logf("%s: OK", name) 891 delete(checkDBs, name) 892 } 893 return poll.Success() 894 } 895 poll.WaitOn(t, check, poll.WithDelay(time.Second), poll.WithTimeout(pollTimeout())) 896 897 // Spawn again the first 3 nodes with different names but same IP:port 898 for i := 0; i < 3; i++ { 899 logrus.Infof("node %d coming back", i) 900 dbs[i].config.NodeID = stringid.TruncateID(stringid.GenerateRandomID()) 901 dbs[i] = launchNode(t, *dbs[i].config) 902 } 903 904 // Give some time for the reconnect routine to run, it runs every 6s. 905 check = func(t poll.LogT) poll.Result { 906 // Verify that the cluster is again all connected. Note that the 3 previous node did not do any join 907 for i := 0; i < 5; i++ { 908 db := dbs[i] 909 db.RLock() 910 if len(db.nodes) != 5 { 911 db.RUnlock() 912 return poll.Continue("%s:Waiting to connect to all nodes", dbs[i].config.Hostname) 913 } 914 if len(db.failedNodes) != 0 { 915 db.RUnlock() 916 return poll.Continue("%s:Waiting for 0 failedNodes", dbs[i].config.Hostname) 917 } 918 if i < 3 { 919 // nodes from 0 to 3 has no left nodes 920 if len(db.leftNodes) != 0 { 921 db.RUnlock() 922 return poll.Continue("%s:Waiting to have no leftNodes", dbs[i].config.Hostname) 923 } 924 } else { 925 // nodes from 4 to 5 has the 3 previous left nodes 926 if len(db.leftNodes) != 3 { 927 db.RUnlock() 928 return poll.Continue("%s:Waiting to have 3 leftNodes", dbs[i].config.Hostname) 929 } 930 } 931 db.RUnlock() 932 } 933 return poll.Success() 934 } 935 poll.WaitOn(t, check, poll.WithDelay(time.Second), poll.WithTimeout(pollTimeout())) 936 closeNetworkDBInstances(t, dbs) 937 }