github.com/NVIDIA/aistore@v1.3.23-0.20240517131212-7df6609be51d/ais/metasync_internal_test.go (about) 1 // Package ais provides core functionality for the AIStore object storage. 2 /* 3 * Copyright (c) 2018-2024, NVIDIA CORPORATION. All rights reserved. 4 */ 5 package ais 6 7 import ( 8 "bytes" 9 "errors" 10 "net" 11 "net/http" 12 "net/http/httptest" 13 "reflect" 14 "sort" 15 "strconv" 16 "strings" 17 "sync" 18 "testing" 19 "time" 20 21 "github.com/NVIDIA/aistore/api/apc" 22 "github.com/NVIDIA/aistore/cmn" 23 "github.com/NVIDIA/aistore/cmn/atomic" 24 "github.com/NVIDIA/aistore/cmn/cos" 25 "github.com/NVIDIA/aistore/core/meta" 26 "github.com/NVIDIA/aistore/core/mock" 27 "github.com/NVIDIA/aistore/memsys" 28 "github.com/NVIDIA/aistore/tools" 29 "github.com/NVIDIA/aistore/tools/tassert" 30 jsoniter "github.com/json-iterator/go" 31 ) 32 33 type ( 34 // syncf is the sync function this test uses to control what to do when a metasync call 35 // is received, for example, accepts or rejects the request. 36 syncf func(w http.ResponseWriter, r *http.Request, cnt int) (int, error) 37 38 // metaSyncServer represents one test metaSyncServer object, proxy or target 39 metaSyncServer struct { 40 id string 41 isProxy bool 42 sf syncf 43 failCnt []int 44 } 45 46 // transportData records information about metasync calls including called for which server, how many 47 // times it is called. 48 transportData struct { 49 isProxy bool 50 id string 51 cnt int 52 } 53 54 // helper for sorting []transportData 55 msgSortHelper []transportData 56 ) 57 58 // serverTCPAddr takes a string in format of "http://ip:port" and returns its ip and port 59 func serverTCPAddr(u string) (ni meta.NetInfo) { 60 s := strings.TrimPrefix(u, "http://") 61 addr, _ := net.ResolveTCPAddr("tcp", s) 62 ni.Init("http", addr.IP.String(), strconv.Itoa(addr.Port)) 63 return 64 } 65 66 // newPrimary returns a proxy runner after initializing the fields that are needed by this test 67 func newPrimary() *proxy { 68 var ( 69 p = &proxy{} 70 tracker = mock.NewStatsTracker() 71 smap = newSmap() 72 ) 73 74 p.owner.smap = newSmapOwner(cmn.GCO.Get()) 75 p.si = newSnode("primary", apc.Proxy, meta.NetInfo{}, meta.NetInfo{}, meta.NetInfo{}) 76 77 smap.addProxy(p.si) 78 smap.Primary = p.si 79 p.owner.smap.put(smap) 80 81 config := cmn.GCO.BeginUpdate() 82 config.ConfigDir = "/tmp/ais-tests" 83 config.Periodic.RetrySyncTime = cos.Duration(time.Millisecond * 100) 84 config.Keepalive.Proxy.Name = "heartbeat" 85 config.Keepalive.Proxy.Interval = cos.Duration(3 * time.Second) 86 config.Timeout.CplaneOperation = cos.Duration(2 * time.Second) 87 config.Timeout.MaxKeepalive = cos.Duration(4 * time.Second) 88 config.Client.Timeout = cos.Duration(10 * time.Second) 89 config.Client.TimeoutLong = cos.Duration(10 * time.Second) 90 config.Cksum.Type = cos.ChecksumXXHash 91 cmn.GCO.CommitUpdate(config) 92 cmn.GCO.SetInitialGconfPath("/tmp/ais-tests/ais.config") 93 94 g.client.data = &http.Client{} 95 g.client.control = &http.Client{} 96 97 p.keepalive = newPalive(p, tracker, atomic.NewBool(true)) 98 99 o := newBMDOwnerPrx(config) 100 o.put(newBucketMD()) 101 p.owner.bmd = o 102 103 e := newEtlMDOwnerPrx(config) 104 e.put(newEtlMD()) 105 p.owner.etl = e 106 107 p.gmm = memsys.PageMM() 108 return p 109 } 110 111 func newSecondary(name string) *proxy { 112 p := &proxy{} 113 p.si = newSnode(name, apc.Proxy, meta.NetInfo{}, meta.NetInfo{}, meta.NetInfo{}) 114 p.owner.smap = newSmapOwner(cmn.GCO.Get()) 115 p.owner.smap.put(newSmap()) 116 117 g.client.data = &http.Client{} 118 g.client.control = &http.Client{} 119 120 config := cmn.GCO.BeginUpdate() 121 config.Periodic.RetrySyncTime = cos.Duration(100 * time.Millisecond) 122 config.Keepalive.Proxy.Name = "heartbeat" 123 config.Keepalive.Proxy.Interval = cos.Duration(3 * time.Second) 124 config.Timeout.CplaneOperation = cos.Duration(2 * time.Second) 125 config.Timeout.MaxKeepalive = cos.Duration(4 * time.Second) 126 config.Cksum.Type = cos.ChecksumXXHash 127 cmn.GCO.CommitUpdate(config) 128 129 o := newBMDOwnerPrx(cmn.GCO.Get()) 130 o.put(newBucketMD()) 131 p.owner.bmd = o 132 return p 133 } 134 135 // newTransportServer creates an http test server to simulate a proxy or a target, and is used to test the 136 // transport of metasync, which is making sync calls, retrying failed calls, etc. 137 // newTransportServer's http handler calls the sync function which decides how to respond to the sync call, 138 // counts number of times sync call received, sends result to the result channel on each sync (error or 139 // no error), completes the http request with the status returned by the sync function. 140 func newTransportServer(primary *proxy, s *metaSyncServer, ch chan<- transportData) *httptest.Server { 141 cnt := 0 142 // notes: needs to assign these from 's', otherwise 'f' captures what in 's' which changes from call to call 143 isProxy := s.isProxy 144 id := s.id 145 sf := s.sf 146 147 // entry point for metasyncer's sync call 148 f := func(w http.ResponseWriter, r *http.Request) { 149 cnt++ 150 status, err := sf(w, r, cnt) 151 ch <- transportData{isProxy, id, cnt} 152 if err == nil { 153 return 154 } 155 http.Error(w, err.Error(), status) 156 } 157 158 // creates the test proxy/target server and add to primary proxy's smap 159 ts := httptest.NewServer(http.HandlerFunc(f)) 160 addrInfo := serverTCPAddr(ts.URL) 161 clone := primary.owner.smap.get().clone() 162 if s.isProxy { 163 clone.Pmap[id] = newSnode(id, apc.Proxy, addrInfo, addrInfo, addrInfo) 164 } else { 165 clone.Tmap[id] = newSnode(id, apc.Target, addrInfo, addrInfo, addrInfo) 166 } 167 clone.Version++ 168 primary.owner.smap.put(clone) 169 170 return ts 171 } 172 173 func TestMetasyncDeepCopy(t *testing.T) { 174 bmd := newBucketMD() 175 bmd.add(meta.NewBck("bucket1", apc.AIS, cmn.NsGlobal), &cmn.Bprops{ 176 Cksum: cmn.CksumConf{ 177 Type: cos.ChecksumXXHash, 178 }, 179 }) 180 bmd.add(meta.NewBck("bucket2", apc.AIS, cmn.NsGlobal), &cmn.Bprops{ 181 Cksum: cmn.CksumConf{ 182 Type: cos.ChecksumXXHash, 183 }, 184 }) 185 bmd.add(meta.NewBck("bucket3", apc.AWS, cmn.NsGlobal), &cmn.Bprops{ 186 Cksum: cmn.CksumConf{ 187 Type: cos.ChecksumXXHash, 188 }, 189 }) 190 bmd.add(meta.NewBck("bucket4", apc.AWS, cmn.NsGlobal), &cmn.Bprops{ 191 Cksum: cmn.CksumConf{ 192 Type: cos.ChecksumXXHash, 193 }, 194 }) 195 196 clone := bmd.clone() 197 s1 := string(cos.MustMarshal(bmd)) 198 s2 := string(cos.MustMarshal(clone)) 199 if s1 == "" || s2 == "" || s1 != s2 { 200 t.Log(s1) 201 t.Log(s2) 202 t.Fatal("marshal(bucketmd) != marshal(clone(bucketmd))") 203 } 204 } 205 206 // TestMetasyncTransport is the driver for metasync transport tests. 207 // for each test case, it creates a primary proxy, starts the metasync instance, run the test case, 208 // verifies the result, and stop the syncer. 209 func TestMetasyncTransport(t *testing.T) { 210 tools.CheckSkip(t, &tools.SkipTestArgs{Long: true}) 211 tcs := []struct { 212 name string 213 testf func(*testing.T, *proxy, *metasyncer) ([]transportData, []transportData) 214 }{ 215 {"SyncOnce", syncOnce}, 216 {"SyncOnceWait", syncOnceWait}, 217 {"SyncOnceNoWait", syncOnceNoWait}, 218 {"Retry", retry}, 219 {"MultipleSync", multipleSync}, 220 {"Refused", refused}, 221 } 222 223 for _, tc := range tcs { 224 primary := newPrimary() 225 syncer := testSyncer(primary) 226 227 var wg sync.WaitGroup 228 wg.Add(1) 229 go func(wg *sync.WaitGroup) { 230 defer wg.Done() 231 syncer.Run() 232 }(&wg) 233 234 t.Run(tc.name, func(t *testing.T) { 235 exp, act := tc.testf(t, primary, syncer) 236 if !reflect.DeepEqual(exp, act) { 237 t.Fatalf("exp = %+v, act = %+v", exp, act) 238 } 239 }) 240 241 syncer.Stop(nil) 242 wg.Wait() 243 } 244 } 245 246 // collectResult reads N sync call results from the channel, sort the results and returns. 247 // sorting is to make result checking easier as sync calls to different servers run in paraller so 248 // the calls are received in random order. 249 func collectResult(n int, ch <-chan transportData) []transportData { 250 msgs := make([]transportData, n) 251 for i := range n { 252 msgs[i] = <-ch 253 } 254 255 sort.Sort(msgSortHelper(msgs)) 256 return msgs 257 } 258 259 // alwaysOk accepts the sync call 260 func alwaysOk(http.ResponseWriter, *http.Request, int) (int, error) { return 0, nil } 261 262 // deletedOk accepts the sync call after a short wait 263 func delayedOk(http.ResponseWriter, *http.Request, int) (int, error) { 264 time.Sleep(time.Second) 265 return 0, nil 266 } 267 268 // failFirst rejects the first sync call, accept all other calls 269 func failFirst(_ http.ResponseWriter, _ *http.Request, cnt int) (int, error) { 270 if cnt == 1 { 271 return http.StatusForbidden, errors.New("fail first call") 272 } 273 return 0, nil 274 } 275 276 // syncOnce checks a mixed number of proxy and targets accept one sync call 277 func syncOnce(_ *testing.T, primary *proxy, syncer *metasyncer) ([]transportData, []transportData) { 278 var ( 279 servers = []metaSyncServer{ 280 {"p1", true, alwaysOk, nil}, 281 {"p2", true, alwaysOk, nil}, 282 {"t1", false, alwaysOk, nil}, 283 {"t2", false, alwaysOk, nil}, 284 } 285 ch = make(chan transportData, len(servers)) 286 ) 287 288 for i := range servers { 289 v := servers[i] 290 s := newTransportServer(primary, &v, ch) 291 defer s.Close() 292 } 293 294 smap := primary.owner.smap.get() 295 msg := primary.newAmsgStr("", nil) 296 wg := syncer.sync(revsPair{smap, msg}) 297 wg.Wait() 298 return []transportData{ 299 {true, "p1", 1}, 300 {true, "p2", 1}, 301 {false, "t1", 1}, 302 {false, "t2", 1}, 303 }, collectResult(len(servers), ch) 304 } 305 306 // syncOnceWait checks sync(wait = true) doesn't return before all servers receive the call 307 func syncOnceWait(t *testing.T, primary *proxy, syncer *metasyncer) ([]transportData, []transportData) { 308 var ( 309 servers = []metaSyncServer{ 310 {"p1", true, delayedOk, nil}, 311 {"t1", false, alwaysOk, nil}, 312 } 313 ch = make(chan transportData, len(servers)) 314 ) 315 316 for i := range servers { 317 v := servers[i] 318 s := newTransportServer(primary, &v, ch) 319 defer s.Close() 320 } 321 322 smap := primary.owner.smap.get() 323 msg := primary.newAmsgStr("", nil) 324 wg := syncer.sync(revsPair{smap, msg}) 325 wg.Wait() 326 if len(ch) != len(servers) { 327 t.Fatalf("sync call wait returned before sync is completed") 328 } 329 330 return []transportData{ 331 {true, "p1", 1}, 332 {false, "t1", 1}, 333 }, collectResult(len(servers), ch) 334 } 335 336 // syncOnceNoWait checks sync(wait = false) returns before all servers receive the call 337 func syncOnceNoWait(t *testing.T, primary *proxy, syncer *metasyncer) ([]transportData, []transportData) { 338 var ( 339 servers = []metaSyncServer{ 340 {"p1", true, delayedOk, nil}, 341 {"t1", false, alwaysOk, nil}, 342 } 343 ch = make(chan transportData, len(servers)) 344 ) 345 346 for i := range servers { 347 v := servers[i] 348 s := newTransportServer(primary, &v, ch) 349 defer s.Close() 350 } 351 352 smap := primary.owner.smap.get() 353 msg := primary.newAmsgStr("", nil) 354 syncer.sync(revsPair{smap, msg}) 355 if len(ch) == len(servers) { 356 t.Fatalf("sync call no wait returned after sync is completed") 357 } 358 359 return []transportData{ 360 {true, "p1", 1}, 361 {false, "t1", 1}, 362 }, collectResult(len(servers), ch) 363 } 364 365 // retry checks a failed sync call is retried 366 func retry(_ *testing.T, primary *proxy, syncer *metasyncer) ([]transportData, []transportData) { 367 var ( 368 servers = []metaSyncServer{ 369 {"p1", true, failFirst, nil}, 370 {"p2", true, alwaysOk, nil}, 371 {"t1", false, failFirst, nil}, 372 } 373 ch = make(chan transportData, len(servers)+2) 374 ) 375 376 for i := range servers { 377 v := servers[i] 378 s := newTransportServer(primary, &v, ch) 379 defer s.Close() 380 } 381 382 smap := primary.owner.smap.get() 383 msg := primary.newAmsgStr("", nil) 384 wg := syncer.sync(revsPair{smap, msg}) 385 wg.Wait() 386 return []transportData{ 387 {true, "p1", 1}, 388 {true, "p1", 2}, 389 {true, "p2", 1}, 390 {false, "t1", 1}, 391 {false, "t1", 2}, 392 }, collectResult(len(servers)+2, ch) 393 } 394 395 // multipleSync checks a mixed number of proxy and targets accept multiple sync calls 396 func multipleSync(_ *testing.T, primary *proxy, syncer *metasyncer) ([]transportData, []transportData) { 397 var ( 398 servers = []metaSyncServer{ 399 {"p1", true, alwaysOk, nil}, 400 {"p2", true, alwaysOk, nil}, 401 {"t1", false, alwaysOk, nil}, 402 {"t2", false, alwaysOk, nil}, 403 } 404 ch = make(chan transportData, len(servers)*3) 405 ) 406 407 for i := range servers { 408 v := servers[i] 409 s := newTransportServer(primary, &v, ch) 410 defer s.Close() 411 } 412 413 smap := primary.owner.smap.get() 414 msg := primary.newAmsgStr("", nil) 415 syncer.sync(revsPair{smap, msg}).Wait() 416 417 ctx := &smapModifier{ 418 pre: func(_ *smapModifier, clone *smapX) error { 419 clone.Version++ 420 return nil 421 }, 422 final: func(_ *smapModifier, clone *smapX) { 423 msg := primary.newAmsgStr("", nil) 424 syncer.sync(revsPair{clone, msg}) 425 }, 426 } 427 primary.owner.smap.modify(ctx) 428 429 ctx = &smapModifier{ 430 pre: func(_ *smapModifier, clone *smapX) error { 431 clone.Version++ 432 return nil 433 }, 434 final: func(_ *smapModifier, clone *smapX) { 435 msg := primary.newAmsgStr("", nil) 436 syncer.sync(revsPair{clone, msg}).Wait() 437 }, 438 } 439 primary.owner.smap.modify(ctx) 440 441 return []transportData{ 442 {true, "p1", 1}, 443 {true, "p1", 2}, 444 {true, "p1", 3}, 445 {true, "p2", 1}, 446 {true, "p2", 2}, 447 {true, "p2", 3}, 448 {false, "t1", 1}, 449 {false, "t1", 2}, 450 {false, "t1", 3}, 451 {false, "t2", 1}, 452 {false, "t2", 2}, 453 {false, "t2", 3}, 454 }, collectResult(len(servers)*3, ch) 455 } 456 457 // refused tests the connection-refused scenario 458 // it has two test cases: one with a short delay to let metasyncer handle it immediately, 459 // the other with a longer delay so that metasyncer times out 460 // retrying connection-refused errors and falls back to the retry-pending "route" 461 func refused(t *testing.T, primary *proxy, syncer *metasyncer) ([]transportData, []transportData) { 462 var ( 463 addrInfo meta.NetInfo 464 ch = make(chan transportData, 2) // NOTE: Use 2 to avoid unbuffered channel, http handler can return. 465 id = "p" 466 ) 467 addrInfo.Init("http", "127.0.0.1", "53538") 468 469 // handler for /v1/metasync 470 http.HandleFunc(apc.URLPathMetasync.S, func(_ http.ResponseWriter, _ *http.Request) { 471 ch <- transportData{true, id, 1} 472 }) 473 474 clone := primary.owner.smap.get().clone() 475 clone.Pmap[id] = newSnode(id, apc.Proxy, addrInfo, addrInfo, addrInfo) 476 clone.Version++ 477 primary.owner.smap.put(clone) 478 479 // function shared between the two cases: start proxy, wait for a sync call 480 f := func() { 481 timer := time.NewTimer(time.Minute) 482 defer timer.Stop() 483 484 wg := &sync.WaitGroup{} 485 s := &http.Server{ 486 Addr: addrInfo.String(), 487 ReadHeaderTimeout: 10 * time.Second, 488 } 489 490 wg.Add(1) 491 go func() { 492 defer wg.Done() 493 s.ListenAndServe() 494 }() 495 496 select { 497 case <-timer.C: 498 t.Log("timed out") 499 case <-ch: 500 } 501 502 s.Close() 503 wg.Wait() 504 } 505 506 // testcase #1: short delay 507 smap := primary.owner.smap.get() 508 msg := primary.newAmsgStr("", nil) 509 syncer.sync(revsPair{smap, msg}) 510 time.Sleep(time.Millisecond) 511 // sync will return even though the sync actually failed, and there is no error return 512 f() 513 514 // testcase #2: long delay 515 ctx := &smapModifier{ 516 pre: func(_ *smapModifier, clone *smapX) error { 517 clone.Version++ 518 return nil 519 }, 520 final: func(_ *smapModifier, clone *smapX) { 521 msg := primary.newAmsgStr("", nil) 522 syncer.sync(revsPair{clone, msg}) 523 }, 524 } 525 primary.owner.smap.modify(ctx) 526 527 time.Sleep(2 * time.Second) 528 f() 529 530 // only cares if the sync call comes, no need to verify the id and cnt as we are the one 531 // filling those in above 532 exp := []transportData{{true, id, 1}} 533 return exp, exp 534 } 535 536 // TestMetasyncData is the driver for metasync data tests. 537 func TestMetasyncData(t *testing.T) { 538 // data stores the data comes from the http sync call and an error 539 type data struct { 540 payload msPayload 541 err error 542 } 543 544 // newServer simulates a proxy or a target for metasync's data tests 545 newServer := func(primary *proxy, s *metaSyncServer, ch chan<- data) *httptest.Server { 546 cnt := 0 547 id := s.id 548 failCnt := s.failCnt 549 550 // entry point for metasyncer's sync call 551 f := func(w http.ResponseWriter, r *http.Request) { 552 cnt++ 553 554 for _, v := range failCnt { 555 if v == cnt { 556 http.Error(w, "retry", http.StatusUnavailableForLegalReasons) 557 return 558 } 559 } 560 561 d := make(msPayload) 562 err := d.unmarshal(r.Body, "") 563 ch <- data{d, err} 564 } 565 566 // creates the test proxy/target server and add to primary proxy's smap 567 ts := httptest.NewServer(http.HandlerFunc(f)) 568 addrInfo := serverTCPAddr(ts.URL) 569 clone := primary.owner.smap.get().clone() 570 if s.isProxy { 571 clone.Pmap[id] = newSnode(id, apc.Proxy, addrInfo, addrInfo, addrInfo) 572 } else { 573 clone.Tmap[id] = newSnode(id, apc.Target, addrInfo, addrInfo, addrInfo) 574 } 575 clone.Version++ 576 primary.owner.smap.put(clone) 577 578 return ts 579 } 580 581 match := func(t *testing.T, exp msPayload, ch <-chan data, cnt int) { 582 fail := func(t *testing.T, exp, act msPayload) { 583 t.Fatalf("Mismatch: exp = %+v, act = %+v", exp, act) 584 } 585 586 for range cnt { 587 act := (<-ch).payload 588 for k, e := range act { 589 a, ok := exp[k] 590 if !ok { 591 fail(t, exp, act) 592 } 593 594 if !bytes.Equal(e, a) { 595 fail(t, exp, act) 596 } 597 } 598 } 599 } 600 601 var ( 602 exp = make(msPayload) 603 expRetry = make(msPayload) 604 primary = newPrimary() 605 syncer = testSyncer(primary) 606 ch = make(chan data, 5) 607 bmd = newBucketMD() 608 ) 609 610 emptyAisMsg, err := jsoniter.Marshal(aisMsg{}) 611 if err != nil { 612 t.Fatal("Failed to marshal empty apc.ActMsg, err =", err) 613 } 614 615 var wg sync.WaitGroup 616 wg.Add(1) 617 go func(wg *sync.WaitGroup) { 618 defer wg.Done() 619 syncer.Run() 620 }(&wg) 621 622 proxy := newServer(primary, &metaSyncServer{"proxy", true, nil, []int{3, 4, 5}}, ch) 623 defer proxy.Close() 624 625 target := newServer(primary, &metaSyncServer{"target", false, nil, []int{2}}, ch) 626 defer target.Close() 627 628 // sync smap 629 smap := primary.owner.smap.get() 630 smapBody := smap.marshal() 631 632 exp[revsSmapTag] = smapBody 633 expRetry[revsSmapTag] = smapBody 634 exp[revsSmapTag+revsActionTag] = emptyAisMsg 635 expRetry[revsSmapTag+revsActionTag] = emptyAisMsg 636 637 syncer.sync(revsPair{smap, &aisMsg{}}) 638 match(t, expRetry, ch, 1) 639 640 // sync bucketmd, fail target and retry 641 bmd.add(meta.NewBck("bucket1", apc.AIS, cmn.NsGlobal), &cmn.Bprops{ 642 Cksum: cmn.CksumConf{ 643 Type: cos.ChecksumXXHash, 644 }, 645 }) 646 bmd.add(meta.NewBck("bucket2", apc.AIS, cmn.NsGlobal), &cmn.Bprops{ 647 Cksum: cmn.CksumConf{ 648 Type: cos.ChecksumXXHash, 649 }, 650 }) 651 primary.owner.bmd.putPersist(bmd, nil) 652 bmdBody := bmd.marshal() 653 654 exp[revsBMDTag] = bmdBody 655 expRetry[revsBMDTag] = bmdBody 656 exp[revsBMDTag+revsActionTag] = emptyAisMsg 657 expRetry[revsBMDTag+revsActionTag] = emptyAisMsg 658 659 syncer.sync(revsPair{bmd, &aisMsg{}}) 660 match(t, exp, ch, 1) 661 match(t, expRetry, ch, 1) 662 663 // sync bucketmd, fail proxy, sync new bucketmd, expect proxy to receive the new bucketmd 664 // after rejecting a few sync requests 665 bmd = bmd.clone() 666 bprops := &cmn.Bprops{ 667 Cksum: cmn.CksumConf{Type: cos.ChecksumXXHash}, 668 LRU: cmn.GCO.Get().LRU, 669 } 670 bmd.add(meta.NewBck("bucket3", apc.AIS, cmn.NsGlobal), bprops) 671 primary.owner.bmd.putPersist(bmd, nil) 672 bmdBody = bmd.marshal() 673 674 exp[revsBMDTag] = bmdBody 675 msg := primary.newAmsgStr("", bmd) 676 syncer.sync(revsPair{bmd, msg}) 677 } 678 679 // TestMetasyncMembership tests metasync's logic when accessing proxy's smap directly 680 func TestMetasyncMembership(t *testing.T) { 681 { 682 // pending server dropped without sync 683 primary := newPrimary() 684 syncer := testSyncer(primary) 685 686 var wg sync.WaitGroup 687 wg.Add(1) 688 go func(wg *sync.WaitGroup) { 689 defer wg.Done() 690 syncer.Run() 691 }(&wg) 692 693 var cnt atomic.Int32 694 s := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { 695 cnt.Add(1) 696 http.Error(w, "i don't know how to deal with you", http.StatusNotAcceptable) 697 })) 698 699 defer s.Close() 700 701 id := "t" 702 addrInfo := serverTCPAddr(s.URL) 703 clone := primary.owner.smap.get().clone() 704 clone.addTarget(newSnode(id, apc.Target, addrInfo, addrInfo, addrInfo)) 705 primary.owner.smap.put(clone) 706 msg := primary.newAmsgStr("", nil) 707 wg1 := syncer.sync(revsPair{clone, msg}) 708 wg1.Wait() 709 time.Sleep(time.Millisecond * 300) 710 711 clone = primary.owner.smap.get().clone() 712 clone.delTarget(id) 713 primary.owner.smap.put(clone) 714 715 time.Sleep(time.Millisecond * 300) 716 savedCnt := cnt.Load() 717 time.Sleep(time.Millisecond * 300) 718 if cnt.Load() != savedCnt { 719 t.Fatal("Sync call didn't stop after traget is deleted") 720 } 721 722 syncer.Stop(nil) 723 wg.Wait() 724 } 725 726 primary := newPrimary() 727 syncer := testSyncer(primary) 728 729 var wg sync.WaitGroup 730 wg.Add(1) 731 go func(wg *sync.WaitGroup) { 732 defer wg.Done() 733 syncer.Run() 734 }(&wg) 735 736 ch := make(chan struct{}, 10) 737 f := func(_ http.ResponseWriter, _ *http.Request) { 738 ch <- struct{}{} 739 } 740 741 { 742 // sync before smap sync (no previous sync saved in metasyncer) 743 s1 := httptest.NewServer(http.HandlerFunc(f)) 744 defer s1.Close() 745 746 id := "t1111" 747 addrInfo := serverTCPAddr(s1.URL) 748 di := newSnode(id, apc.Target, addrInfo, addrInfo, addrInfo) 749 clone := primary.owner.smap.get().clone() 750 clone.addTarget(di) 751 primary.owner.smap.put(clone) 752 bmd := primary.owner.bmd.get() 753 msg := primary.newAmsgStr("", bmd) 754 wg := syncer.sync(revsPair{bmd, msg}) 755 wg.Wait() 756 <-ch 757 758 // sync smap so metasyncer has a smap 759 wg = syncer.sync(revsPair{clone, msg}) 760 wg.Wait() 761 <-ch 762 } 763 764 { 765 // add a new target but new smap is not synced 766 // metasyncer picks up the new target directly from primary's smap 767 // and metasyncer will also add the new target to pending to sync all previously synced data 768 // that's why the extra channel read 769 s2 := httptest.NewServer(http.HandlerFunc(f)) 770 defer s2.Close() 771 772 id := "t22222" 773 addrInfo := serverTCPAddr(s2.URL) 774 di := newSnode(id, apc.Target, addrInfo, addrInfo, addrInfo) 775 clone := primary.owner.smap.get().clone() 776 clone.addTarget(di) 777 primary.owner.smap.put(clone) 778 779 bmd := primary.owner.bmd.get() 780 msg := primary.newAmsgStr("", bmd) 781 wg := syncer.sync(revsPair{bmd, msg}) 782 wg.Wait() 783 <-ch // target 1 784 <-ch // target 2 785 if len(ch) != 0 { 786 t.Fatal("Too many sync calls received") 787 } 788 789 syncer.Stop(nil) 790 wg.Wait() 791 } 792 } 793 794 // TestMetasyncReceive tests extracting received sync data. 795 func TestMetasyncReceive(t *testing.T) { 796 { 797 emptyAisMsg := func(a *aisMsg) { 798 if a.Action != "" || a.Name != "" || a.Value != nil { 799 t.Fatal("Expecting empty action message", a) 800 } 801 } 802 803 nilSMap := func(m *smapX) { 804 if m != nil { 805 t.Fatal("Expecting nil Smap", m) 806 } 807 } 808 809 matchSMap := func(a, b *smapX) { 810 _, sameUUID, sameVersion, eq := a.Compare(&b.Smap) 811 if !sameUUID || !sameVersion || !eq { 812 t.Fatal("Smap mismatch", a.StringEx(), b.StringEx()) 813 } 814 } 815 816 primary := newPrimary() 817 syncer := testSyncer(primary) 818 819 var wg sync.WaitGroup 820 wg.Add(1) 821 go func(wg *sync.WaitGroup) { 822 defer wg.Done() 823 syncer.Run() 824 }(&wg) 825 826 chProxy := make(chan msPayload, 10) 827 fProxy := func(_ http.ResponseWriter, r *http.Request) { 828 d := make(msPayload) 829 err := d.unmarshal(r.Body, "") 830 cos.AssertNoErr(err) 831 chProxy <- d 832 } 833 834 // the only difference is the channel 835 s := httptest.NewServer(http.HandlerFunc(fProxy)) 836 defer s.Close() 837 addrInfo := serverTCPAddr(s.URL) 838 clone := primary.owner.smap.get().clone() 839 clone.addProxy(newSnode("p1", apc.Proxy, addrInfo, addrInfo, addrInfo)) 840 primary.owner.smap.put(clone) 841 842 proxy1 := newSecondary("p1") 843 844 // empty payload 845 newSMap, msg, err := proxy1.extractSmap(make(msPayload), "", false /*skip validation*/) 846 if newSMap != nil || msg != nil || err != nil { 847 t.Fatal("Extract smap from empty payload returned data") 848 } 849 850 wg1 := syncer.sync(revsPair{primary.owner.smap.get(), &aisMsg{}}) 851 wg1.Wait() 852 payload := <-chProxy 853 854 newSMap, msg, err = proxy1.extractSmap(payload, "", false /*skip validation*/) 855 tassert.CheckFatal(t, err) 856 emptyAisMsg(msg) 857 matchSMap(primary.owner.smap.get(), newSMap) 858 proxy1.owner.smap.put(newSMap) 859 860 // same version of smap received 861 newSMap, msg, err = proxy1.extractSmap(payload, "", false /*skip validation*/) 862 tassert.CheckFatal(t, err) 863 emptyAisMsg(msg) 864 nilSMap(newSMap) 865 } 866 } 867 868 func testSyncer(p *proxy) (syncer *metasyncer) { 869 syncer = newMetasyncer(p) 870 return 871 } 872 873 /////////////////// 874 // msgSortHelper // 875 /////////////////// 876 877 func (m msgSortHelper) Len() int { 878 return len(m) 879 } 880 881 func (m msgSortHelper) Swap(i, j int) { 882 m[i], m[j] = m[j], m[i] 883 } 884 885 func (m msgSortHelper) Less(i, j int) bool { 886 if m[i].isProxy != m[j].isProxy { 887 return m[i].isProxy 888 } 889 890 if m[i].id != m[j].id { 891 return m[i].id < m[j].id 892 } 893 894 return m[i].cnt < m[j].cnt 895 }