github.com/portworx/kvdb@v0.0.0-20241107215734-a185a966f535/test/kv_controller.go (about) 1 package test 2 3 import ( 4 "fmt" 5 "io/ioutil" 6 "os" 7 "os/exec" 8 "strconv" 9 "strings" 10 "sync" 11 "testing" 12 "time" 13 14 "github.com/portworx/kvdb" 15 "github.com/stretchr/testify/require" 16 ) 17 18 const ( 19 urlPrefix = "http://" 20 localhost = "localhost" 21 ) 22 23 var ( 24 names = []string{"infra0", "infra1", "infra2", "infra3", "infra4"} 25 clientUrls = []string{"http://127.0.0.1:20379", "http://127.0.0.2:21379", "http://127.0.0.3:22379", "http://127.0.0.4:23379", "http://127.0.0.5:24379"} 26 peerPorts = []string{"20380", "21380", "22380", "23380", "24380"} 27 dataDirs = []string{"/tmp/node0", "/tmp/node1", "/tmp/node2", "/tmp/node3", "/tmp/node4"} 28 cmds map[int]*exec.Cmd 29 firstMemberID uint64 30 ) 31 32 // RunControllerTests is a test suite for kvdb controller APIs 33 func RunControllerTests(datastoreInit kvdb.DatastoreInit, t *testing.T) { 34 cleanup() 35 // Initialize node 0 36 cmds = make(map[int]*exec.Cmd) 37 index := 0 38 initCluster := make(map[string][]string) 39 peerURL := urlPrefix + localhost + ":" + peerPorts[index] 40 initCluster[names[index]] = []string{peerURL} 41 cmd, err := startEtcd(index, initCluster, "new") 42 if err != nil { 43 t.Fatalf(err.Error()) 44 } 45 cmds[index] = cmd 46 kv, err := datastoreInit("pwx/test", clientUrls, nil, fatalErrorCb()) 47 if err != nil { 48 cmd.Process.Kill() 49 t.Fatalf(err.Error()) 50 } 51 52 memberList, err := kv.ListMembers() 53 require.NoError(t, err, "error on ListMembers") 54 require.Equal(t, 1, len(memberList), "incorrect number of members") 55 for id := range memberList { 56 firstMemberID = id 57 break 58 } 59 testAddMember(kv, t) 60 testRemoveMember(kv, t) 61 testReAddAndRemoveByMemberID(kv, t) 62 testUpdateMember(kv, t) 63 testMemberStatus(kv, t) 64 testDefrag(kv, t) 65 testGetSetEndpoints(kv, t) 66 controllerLog("Stopping all etcd processes") 67 for _, cmd := range cmds { 68 cmd.Process.Kill() 69 } 70 } 71 72 func testAddMember(kv kvdb.Kvdb, t *testing.T) { 73 controllerLog("testAddMember") 74 // Add node 1 75 index := 1 76 controllerLog("Adding node 1") 77 initCluster, err := kv.AddMember(localhost, peerPorts[index], names[index]) 78 require.NoError(t, err, "Error on AddMember") 79 require.Equal(t, 2, len(initCluster), "Init Cluster length does not match") 80 81 // Check for unstarted members 82 memberList, err := kv.ListMembers() 83 require.NoError(t, err, "error on ListMembers") 84 require.Equal(t, len(memberList), 2, "incorrect number of members") 85 86 for memberID, m := range memberList { 87 if memberID != firstMemberID { 88 require.Equal(t, len(m.ClientUrls), 0, "Unexpected no. of client urls on unstarted member") 89 require.False(t, m.IsHealthy, "Unexpected health of unstarted member") 90 require.Empty(t, m.Name, "expected name to be empty") 91 require.False(t, m.HasStarted, "expected member to be unstarted") 92 require.Equal(t, len(m.PeerUrls), 1, "peerURLs should be set for unstarted members") 93 require.Equal(t, m.DbSize, int64(0), "db size should be 0") 94 } else { 95 require.Equal(t, len(m.ClientUrls), 1, "clientURLs should be set for started members") 96 require.True(t, m.IsHealthy, "expected member to be healthy") 97 require.NotEmpty(t, m.Name, "expected name") 98 require.True(t, m.HasStarted, "expected member to be started") 99 require.Equal(t, len(m.PeerUrls), 1, "peerURLs should be set for started members") 100 require.NotEqual(t, m.DbSize, int64(0), "db size should not be 0") 101 } 102 } 103 104 cmd, err := startEtcd(index, initCluster, "existing") 105 require.NoError(t, err, "Error on start etcd") 106 cmds[index] = cmd 107 108 // Check the list again after starting the second member. 109 memberList, err = kv.ListMembers() 110 require.NoError(t, err, "Error on ListMembers") 111 require.Equal(t, 2, len(memberList), "List returned different length of cluster") 112 113 for _, m := range memberList { 114 require.True(t, m.IsHealthy, "expected member to be healthy") 115 require.NotEmpty(t, m.Name, "expected name") 116 require.True(t, m.HasStarted, "expected member to be started") 117 require.Equal(t, len(m.PeerUrls), 1, "peerURLs should be set for started members") 118 require.Equal(t, len(m.ClientUrls), 1, "clientURLs should be set for started members") 119 require.NotEqual(t, m.DbSize, 0, "db size should not be 0") 120 } 121 } 122 123 func testRemoveMember(kv kvdb.Kvdb, t *testing.T) { 124 controllerLog("testRemoveMember") 125 // Add node 2 126 index := 2 127 controllerLog("Adding node 2") 128 initCluster, err := kv.AddMember(localhost, peerPorts[index], names[index]) 129 require.NoError(t, err, "Error on AddMember") 130 require.Equal(t, 3, len(initCluster), "Init Cluster length does not match") 131 cmd, err := startEtcd(index, initCluster, "existing") 132 require.NoError(t, err, "Error on start etcd") 133 cmds[index] = cmd 134 // Check the list returned 135 list, err := kv.ListMembers() 136 require.NoError(t, err, "Error on ListMembers") 137 require.Equal(t, 3, len(list), "List returned different length of cluster") 138 139 // Before removing all endpoints should be set 140 require.Equal(t, len(clientUrls), len(kv.GetEndpoints()), "unexpected endpoints") 141 142 // Remove node 1 143 index = 1 144 controllerLog("Removing node 1") 145 err = kv.RemoveMember(names[index], localhost) 146 require.NoError(t, err, "Error on RemoveMember") 147 148 // Only 2 endpoints should be set and the third one should have been removed 149 require.Equal(t, 2, len(kv.GetEndpoints()), "unexpected endpoints") 150 for _, actualEndpoint := range kv.GetEndpoints() { 151 require.NotEqual(t, actualEndpoint, clientUrls[index], "removed member should not be present") 152 } 153 154 cmd, _ = cmds[index] 155 cmd.Process.Kill() 156 delete(cmds, index) 157 // Check the list returned 158 list, err = kv.ListMembers() 159 require.NoError(t, err, "Error on ListMembers") 160 require.Equal(t, 2, len(list), "List returned different length of cluster") 161 162 // Remove an already removed node 163 index = 1 164 controllerLog("Removing node 1") 165 err = kv.RemoveMember(names[index], localhost) 166 require.NoError(t, err, "Error on RemoveMember") 167 } 168 169 func testReAddAndRemoveByMemberID(kv kvdb.Kvdb, t *testing.T) { 170 controllerLog("testReAddAndRemoveByMemberID") 171 172 // Add node 1 back 173 node1Index := 1 174 controllerLog("Re-adding node 1") 175 // For re-adding we need to delete the data-dir of this member 176 os.RemoveAll(dataDirs[node1Index]) 177 initCluster, err := kv.AddMember(localhost, peerPorts[node1Index], names[node1Index]) 178 require.NoError(t, err, "Error on AddMember") 179 require.Equal(t, 3, len(initCluster), "Init Cluster length does not match") 180 cmd, err := startEtcd(node1Index, initCluster, "existing") 181 require.NoError(t, err, "Error on start etcd") 182 cmds[node1Index] = cmd 183 184 // Check the list returned 185 list, err := kv.ListMembers() 186 require.NoError(t, err, "Error on ListMembers") 187 require.Equal(t, 3, len(list), "List returned different length of cluster") 188 189 // Remove node 1 190 var removeMemberID uint64 191 for memberID, member := range list { 192 if member.Name == names[node1Index] { 193 removeMemberID = memberID 194 break 195 } 196 } 197 require.NotEqual(t, removeMemberID, 0, "unexpected memberID") 198 199 // Remove on non-existent member should succeed 200 err = kv.RemoveMemberByID(12345) 201 require.NoError(t, err, "unexpected error on removing a non-existent member") 202 203 err = kv.RemoveMemberByID(removeMemberID) 204 require.NoError(t, err, "unexpected error on remove") 205 206 // Only 2 endpoints should be set and the third one should have been removed 207 require.Equal(t, 2, len(kv.GetEndpoints()), "unexpected endpoints") 208 for _, actualEndpoint := range kv.GetEndpoints() { 209 require.NotEqual(t, actualEndpoint, clientUrls[node1Index], "removed member should not be present") 210 } 211 212 // Kill the old etcd process 213 cmd, _ = cmds[node1Index] 214 cmd.Process.Kill() 215 delete(cmds, node1Index) 216 217 // Add node 1 back 218 controllerLog("Re-adding node 1 again") 219 // For re-adding we need to delete the data-dir of this member 220 os.RemoveAll(dataDirs[node1Index]) 221 initCluster, err = kv.AddMember(localhost, peerPorts[node1Index], names[node1Index]) 222 require.NoError(t, err, "Error on AddMember") 223 require.Equal(t, 3, len(initCluster), "Init Cluster length does not match") 224 cmd, err = startEtcd(node1Index, initCluster, "existing") 225 require.NoError(t, err, "Error on start etcd") 226 cmds[node1Index] = cmd 227 228 } 229 230 func testUpdateMember(kv kvdb.Kvdb, t *testing.T) { 231 controllerLog("testUpdateMember") 232 233 // Stop node 1 234 index := 1 235 cmd, _ := cmds[index] 236 cmd.Process.Kill() 237 delete(cmds, index) 238 // Change the port 239 peerPorts[index] = "33380" 240 241 // Update the member 242 initCluster, err := kv.UpdateMember(localhost, peerPorts[index], names[index]) 243 require.NoError(t, err, "Error on UpdateMember") 244 require.Equal(t, 3, len(initCluster), "Initial cluster length does not match") 245 cmd, err = startEtcd(index, initCluster, "existing") 246 require.NoError(t, err, "Error on start etcd") 247 cmds[index] = cmd 248 249 list, err := kv.ListMembers() 250 require.NoError(t, err, "Error on ListMembers") 251 require.Equal(t, 3, len(list), "List returned different length of cluster") 252 253 // Update an invalid member 254 _, err = kv.UpdateMember(localhost, peerPorts[index], "foobar") 255 require.EqualError(t, kvdb.ErrMemberDoesNotExist, err.Error(), "Unexpected error on UpdateMember") 256 } 257 258 func testDefrag(kv kvdb.Kvdb, t *testing.T) { 259 controllerLog("testDefrag") 260 261 // Run defrag with 0 timeout 262 index := 1 263 err := kv.Defragment(clientUrls[index], 0) 264 require.NoError(t, err, "Unexpected error on Defragment") 265 266 // Run defrag with 60 timeout 267 index = 4 268 err = kv.Defragment(clientUrls[index], 60) 269 require.NoError(t, err, "Unexpected error on Defragment") 270 } 271 272 func testMemberStatus(kv kvdb.Kvdb, t *testing.T) { 273 controllerLog("testMemberStatus") 274 275 index := 3 276 controllerLog("Adding node 3") 277 initCluster, err := kv.AddMember(localhost, peerPorts[index], names[index]) 278 require.NoError(t, err, "Error on AddMember") 279 cmd, err := startEtcd(index, initCluster, "existing") 280 require.NoError(t, err, "Error on start etcd") 281 cmds[index] = cmd 282 283 // Wait for some time for etcd to detect a node offline 284 time.Sleep(5 * time.Second) 285 286 index = 4 287 controllerLog("Adding node 4") 288 initCluster, err = kv.AddMember(localhost, peerPorts[index], names[index]) 289 require.NoError(t, err, "Error on AddMember") 290 cmd, err = startEtcd(index, initCluster, "existing") 291 require.NoError(t, err, "Error on start etcd") 292 cmds[index] = cmd 293 294 // Wait for some time for etcd to detect a node offline 295 time.Sleep(5 * time.Second) 296 297 // Stop node 2 298 stoppedIndex := 2 299 cmd, _ = cmds[stoppedIndex] 300 cmd.Process.Kill() 301 delete(cmds, stoppedIndex) 302 303 // Stop node 3 304 stoppedIndex2 := 3 305 cmd, _ = cmds[stoppedIndex2] 306 cmd.Process.Kill() 307 delete(cmds, stoppedIndex2) 308 309 // Wait for some time for etcd to detect a node offline 310 time.Sleep(5 * time.Second) 311 312 numOfGoroutines := 10 313 var wg sync.WaitGroup 314 wg.Add(numOfGoroutines) 315 316 checkMembers := func(index string, wait int) { 317 defer wg.Done() 318 // Add a sleep so that all go routines run just around the same time 319 time.Sleep(time.Duration(wait) * time.Second) 320 controllerLog("Listing Members for goroutine no. " + index) 321 list, err := kv.ListMembers() 322 require.NoError(t, err, "%v: Error on ListMembers", index) 323 require.Equal(t, 5, len(list), "%v: List returned different length of cluster", index) 324 325 for _, m := range list { 326 if m.Name == names[stoppedIndex] || m.Name == names[stoppedIndex2] { 327 require.Equal(t, len(m.ClientUrls), 0, "%v: Unexpected no. of client urls on down member", index) 328 require.False(t, m.IsHealthy, "%v: Unexpected health of down member", index) 329 } else { 330 require.True(t, m.IsHealthy, "%v: Expected member %v to be healthy", index, m.Name) 331 } 332 } 333 fmt.Println("checkMembers done for ", index) 334 } 335 for i := 0; i < numOfGoroutines; i++ { 336 go checkMembers(strconv.Itoa(i), numOfGoroutines-1) 337 } 338 c := make(chan struct{}) 339 go func() { 340 wg.Wait() 341 close(c) 342 }() 343 344 select { 345 case <-c: 346 return 347 case <-time.After(10 * time.Minute): 348 t.Fatalf("testMemberStatus timeout") 349 } 350 } 351 352 func testGetSetEndpoints(kv kvdb.Kvdb, t *testing.T) { 353 err := kv.SetEndpoints(clientUrls) 354 require.NoError(t, err, "Unexpected error on SetEndpoints") 355 endpoints := kv.GetEndpoints() 356 require.Equal(t, len(endpoints), len(clientUrls), "Unexpected no. of endpoints") 357 358 subsetUrls := clientUrls[1:] 359 360 err = kv.SetEndpoints(subsetUrls) 361 require.NoError(t, err, "Unexpected error on SetEndpoints") 362 endpoints = kv.GetEndpoints() 363 require.Equal(t, len(endpoints), len(subsetUrls), "Unexpected no. of endpoints") 364 365 } 366 367 func startEtcd(index int, initCluster map[string][]string, initState string) (*exec.Cmd, error) { 368 peerURL := urlPrefix + localhost + ":" + peerPorts[index] 369 clientURL := clientUrls[index] 370 initialCluster := "" 371 for name, ip := range initCluster { 372 initialCluster = initialCluster + name + "=" + ip[0] + "," 373 } 374 fmt.Println("Starting etcd for node ", index, "with initial cluster: ", initialCluster) 375 initialCluster = strings.TrimSuffix(initialCluster, ",") 376 etcdArgs := []string{ 377 "--name=" + 378 names[index], 379 "--initial-advertise-peer-urls=" + 380 peerURL, 381 "--listen-peer-urls=" + 382 peerURL, 383 "--listen-client-urls=" + 384 clientURL, 385 "--advertise-client-urls=" + 386 clientURL, 387 "--initial-cluster=" + 388 initialCluster, 389 "--data-dir=" + 390 dataDirs[index], 391 "--initial-cluster-state=" + 392 initState, 393 } 394 395 // unset env that can prevent etcd startup 396 os.Unsetenv("ETCD_LISTEN_CLIENT_URLS") 397 os.Unsetenv("ETCDCTL_API") 398 399 cmd := exec.Command("/tmp/test-etcd/etcd", etcdArgs...) 400 cmd.Stdout = ioutil.Discard 401 cmd.Stderr = ioutil.Discard 402 if err := cmd.Start(); err != nil { 403 return nil, fmt.Errorf("Failed to run %v(%v) : %v", 404 names[index], etcdArgs, err.Error()) 405 } 406 // XXX: Replace with check for etcd is up 407 time.Sleep(10 * time.Second) 408 return cmd, nil 409 } 410 411 func cleanup() { 412 for _, dir := range dataDirs { 413 os.RemoveAll(dir) 414 os.MkdirAll(dir, 0777) 415 } 416 } 417 418 func controllerLog(log string) { 419 fmt.Println("--------------------") 420 fmt.Println(log) 421 fmt.Println("--------------------") 422 }