github.com/NVIDIA/aistore@v1.3.23-0.20240517131212-7df6609be51d/ais/metasync_internal_test.go (about)

     1  // Package ais provides core functionality for the AIStore object storage.
     2  /*
     3   * Copyright (c) 2018-2024, NVIDIA CORPORATION. All rights reserved.
     4   */
     5  package ais
     6  
     7  import (
     8  	"bytes"
     9  	"errors"
    10  	"net"
    11  	"net/http"
    12  	"net/http/httptest"
    13  	"reflect"
    14  	"sort"
    15  	"strconv"
    16  	"strings"
    17  	"sync"
    18  	"testing"
    19  	"time"
    20  
    21  	"github.com/NVIDIA/aistore/api/apc"
    22  	"github.com/NVIDIA/aistore/cmn"
    23  	"github.com/NVIDIA/aistore/cmn/atomic"
    24  	"github.com/NVIDIA/aistore/cmn/cos"
    25  	"github.com/NVIDIA/aistore/core/meta"
    26  	"github.com/NVIDIA/aistore/core/mock"
    27  	"github.com/NVIDIA/aistore/memsys"
    28  	"github.com/NVIDIA/aistore/tools"
    29  	"github.com/NVIDIA/aistore/tools/tassert"
    30  	jsoniter "github.com/json-iterator/go"
    31  )
    32  
    33  type (
    34  	// syncf is the sync function this test uses to control what to do when a metasync call
    35  	// is received, for example, accepts or rejects the request.
    36  	syncf func(w http.ResponseWriter, r *http.Request, cnt int) (int, error)
    37  
    38  	// metaSyncServer represents one test metaSyncServer object, proxy or target
    39  	metaSyncServer struct {
    40  		id      string
    41  		isProxy bool
    42  		sf      syncf
    43  		failCnt []int
    44  	}
    45  
    46  	// transportData records information about metasync calls including called for which server, how many
    47  	// times it is called.
    48  	transportData struct {
    49  		isProxy bool
    50  		id      string
    51  		cnt     int
    52  	}
    53  
    54  	// helper for sorting []transportData
    55  	msgSortHelper []transportData
    56  )
    57  
    58  // serverTCPAddr takes a string in format of "http://ip:port" and returns its ip and port
    59  func serverTCPAddr(u string) (ni meta.NetInfo) {
    60  	s := strings.TrimPrefix(u, "http://")
    61  	addr, _ := net.ResolveTCPAddr("tcp", s)
    62  	ni.Init("http", addr.IP.String(), strconv.Itoa(addr.Port))
    63  	return
    64  }
    65  
    66  // newPrimary returns a proxy runner after initializing the fields that are needed by this test
    67  func newPrimary() *proxy {
    68  	var (
    69  		p       = &proxy{}
    70  		tracker = mock.NewStatsTracker()
    71  		smap    = newSmap()
    72  	)
    73  
    74  	p.owner.smap = newSmapOwner(cmn.GCO.Get())
    75  	p.si = newSnode("primary", apc.Proxy, meta.NetInfo{}, meta.NetInfo{}, meta.NetInfo{})
    76  
    77  	smap.addProxy(p.si)
    78  	smap.Primary = p.si
    79  	p.owner.smap.put(smap)
    80  
    81  	config := cmn.GCO.BeginUpdate()
    82  	config.ConfigDir = "/tmp/ais-tests"
    83  	config.Periodic.RetrySyncTime = cos.Duration(time.Millisecond * 100)
    84  	config.Keepalive.Proxy.Name = "heartbeat"
    85  	config.Keepalive.Proxy.Interval = cos.Duration(3 * time.Second)
    86  	config.Timeout.CplaneOperation = cos.Duration(2 * time.Second)
    87  	config.Timeout.MaxKeepalive = cos.Duration(4 * time.Second)
    88  	config.Client.Timeout = cos.Duration(10 * time.Second)
    89  	config.Client.TimeoutLong = cos.Duration(10 * time.Second)
    90  	config.Cksum.Type = cos.ChecksumXXHash
    91  	cmn.GCO.CommitUpdate(config)
    92  	cmn.GCO.SetInitialGconfPath("/tmp/ais-tests/ais.config")
    93  
    94  	g.client.data = &http.Client{}
    95  	g.client.control = &http.Client{}
    96  
    97  	p.keepalive = newPalive(p, tracker, atomic.NewBool(true))
    98  
    99  	o := newBMDOwnerPrx(config)
   100  	o.put(newBucketMD())
   101  	p.owner.bmd = o
   102  
   103  	e := newEtlMDOwnerPrx(config)
   104  	e.put(newEtlMD())
   105  	p.owner.etl = e
   106  
   107  	p.gmm = memsys.PageMM()
   108  	return p
   109  }
   110  
   111  func newSecondary(name string) *proxy {
   112  	p := &proxy{}
   113  	p.si = newSnode(name, apc.Proxy, meta.NetInfo{}, meta.NetInfo{}, meta.NetInfo{})
   114  	p.owner.smap = newSmapOwner(cmn.GCO.Get())
   115  	p.owner.smap.put(newSmap())
   116  
   117  	g.client.data = &http.Client{}
   118  	g.client.control = &http.Client{}
   119  
   120  	config := cmn.GCO.BeginUpdate()
   121  	config.Periodic.RetrySyncTime = cos.Duration(100 * time.Millisecond)
   122  	config.Keepalive.Proxy.Name = "heartbeat"
   123  	config.Keepalive.Proxy.Interval = cos.Duration(3 * time.Second)
   124  	config.Timeout.CplaneOperation = cos.Duration(2 * time.Second)
   125  	config.Timeout.MaxKeepalive = cos.Duration(4 * time.Second)
   126  	config.Cksum.Type = cos.ChecksumXXHash
   127  	cmn.GCO.CommitUpdate(config)
   128  
   129  	o := newBMDOwnerPrx(cmn.GCO.Get())
   130  	o.put(newBucketMD())
   131  	p.owner.bmd = o
   132  	return p
   133  }
   134  
   135  // newTransportServer creates an http test server to simulate a proxy or a target, and is used to test the
   136  // transport of metasync, which is making sync calls, retrying failed calls, etc.
   137  // newTransportServer's http handler calls the sync function which decides how to respond to the sync call,
   138  // counts number of times sync call received, sends result to the result channel on each sync (error or
   139  // no error), completes the http request with the status returned by the sync function.
   140  func newTransportServer(primary *proxy, s *metaSyncServer, ch chan<- transportData) *httptest.Server {
   141  	cnt := 0
   142  	// notes: needs to assign these from 's', otherwise 'f' captures what in 's' which changes from call to call
   143  	isProxy := s.isProxy
   144  	id := s.id
   145  	sf := s.sf
   146  
   147  	// entry point for metasyncer's sync call
   148  	f := func(w http.ResponseWriter, r *http.Request) {
   149  		cnt++
   150  		status, err := sf(w, r, cnt)
   151  		ch <- transportData{isProxy, id, cnt}
   152  		if err == nil {
   153  			return
   154  		}
   155  		http.Error(w, err.Error(), status)
   156  	}
   157  
   158  	// creates the test proxy/target server and add to primary proxy's smap
   159  	ts := httptest.NewServer(http.HandlerFunc(f))
   160  	addrInfo := serverTCPAddr(ts.URL)
   161  	clone := primary.owner.smap.get().clone()
   162  	if s.isProxy {
   163  		clone.Pmap[id] = newSnode(id, apc.Proxy, addrInfo, addrInfo, addrInfo)
   164  	} else {
   165  		clone.Tmap[id] = newSnode(id, apc.Target, addrInfo, addrInfo, addrInfo)
   166  	}
   167  	clone.Version++
   168  	primary.owner.smap.put(clone)
   169  
   170  	return ts
   171  }
   172  
   173  func TestMetasyncDeepCopy(t *testing.T) {
   174  	bmd := newBucketMD()
   175  	bmd.add(meta.NewBck("bucket1", apc.AIS, cmn.NsGlobal), &cmn.Bprops{
   176  		Cksum: cmn.CksumConf{
   177  			Type: cos.ChecksumXXHash,
   178  		},
   179  	})
   180  	bmd.add(meta.NewBck("bucket2", apc.AIS, cmn.NsGlobal), &cmn.Bprops{
   181  		Cksum: cmn.CksumConf{
   182  			Type: cos.ChecksumXXHash,
   183  		},
   184  	})
   185  	bmd.add(meta.NewBck("bucket3", apc.AWS, cmn.NsGlobal), &cmn.Bprops{
   186  		Cksum: cmn.CksumConf{
   187  			Type: cos.ChecksumXXHash,
   188  		},
   189  	})
   190  	bmd.add(meta.NewBck("bucket4", apc.AWS, cmn.NsGlobal), &cmn.Bprops{
   191  		Cksum: cmn.CksumConf{
   192  			Type: cos.ChecksumXXHash,
   193  		},
   194  	})
   195  
   196  	clone := bmd.clone()
   197  	s1 := string(cos.MustMarshal(bmd))
   198  	s2 := string(cos.MustMarshal(clone))
   199  	if s1 == "" || s2 == "" || s1 != s2 {
   200  		t.Log(s1)
   201  		t.Log(s2)
   202  		t.Fatal("marshal(bucketmd) != marshal(clone(bucketmd))")
   203  	}
   204  }
   205  
   206  // TestMetasyncTransport is the driver for metasync transport tests.
   207  // for each test case, it creates a primary proxy, starts the metasync instance, run the test case,
   208  // verifies the result, and stop the syncer.
   209  func TestMetasyncTransport(t *testing.T) {
   210  	tools.CheckSkip(t, &tools.SkipTestArgs{Long: true})
   211  	tcs := []struct {
   212  		name  string
   213  		testf func(*testing.T, *proxy, *metasyncer) ([]transportData, []transportData)
   214  	}{
   215  		{"SyncOnce", syncOnce},
   216  		{"SyncOnceWait", syncOnceWait},
   217  		{"SyncOnceNoWait", syncOnceNoWait},
   218  		{"Retry", retry},
   219  		{"MultipleSync", multipleSync},
   220  		{"Refused", refused},
   221  	}
   222  
   223  	for _, tc := range tcs {
   224  		primary := newPrimary()
   225  		syncer := testSyncer(primary)
   226  
   227  		var wg sync.WaitGroup
   228  		wg.Add(1)
   229  		go func(wg *sync.WaitGroup) {
   230  			defer wg.Done()
   231  			syncer.Run()
   232  		}(&wg)
   233  
   234  		t.Run(tc.name, func(t *testing.T) {
   235  			exp, act := tc.testf(t, primary, syncer)
   236  			if !reflect.DeepEqual(exp, act) {
   237  				t.Fatalf("exp = %+v, act = %+v", exp, act)
   238  			}
   239  		})
   240  
   241  		syncer.Stop(nil)
   242  		wg.Wait()
   243  	}
   244  }
   245  
   246  // collectResult reads N sync call results from the channel, sort the results and returns.
   247  // sorting is to make result checking easier as sync calls to different servers run in paraller so
   248  // the calls are received in random order.
   249  func collectResult(n int, ch <-chan transportData) []transportData {
   250  	msgs := make([]transportData, n)
   251  	for i := range n {
   252  		msgs[i] = <-ch
   253  	}
   254  
   255  	sort.Sort(msgSortHelper(msgs))
   256  	return msgs
   257  }
   258  
   259  // alwaysOk accepts the sync call
   260  func alwaysOk(http.ResponseWriter, *http.Request, int) (int, error) { return 0, nil }
   261  
   262  // deletedOk accepts the sync call after a short wait
   263  func delayedOk(http.ResponseWriter, *http.Request, int) (int, error) {
   264  	time.Sleep(time.Second)
   265  	return 0, nil
   266  }
   267  
   268  // failFirst rejects the first sync call, accept all other calls
   269  func failFirst(_ http.ResponseWriter, _ *http.Request, cnt int) (int, error) {
   270  	if cnt == 1 {
   271  		return http.StatusForbidden, errors.New("fail first call")
   272  	}
   273  	return 0, nil
   274  }
   275  
   276  // syncOnce checks a mixed number of proxy and targets accept one sync call
   277  func syncOnce(_ *testing.T, primary *proxy, syncer *metasyncer) ([]transportData, []transportData) {
   278  	var (
   279  		servers = []metaSyncServer{
   280  			{"p1", true, alwaysOk, nil},
   281  			{"p2", true, alwaysOk, nil},
   282  			{"t1", false, alwaysOk, nil},
   283  			{"t2", false, alwaysOk, nil},
   284  		}
   285  		ch = make(chan transportData, len(servers))
   286  	)
   287  
   288  	for i := range servers {
   289  		v := servers[i]
   290  		s := newTransportServer(primary, &v, ch)
   291  		defer s.Close()
   292  	}
   293  
   294  	smap := primary.owner.smap.get()
   295  	msg := primary.newAmsgStr("", nil)
   296  	wg := syncer.sync(revsPair{smap, msg})
   297  	wg.Wait()
   298  	return []transportData{
   299  		{true, "p1", 1},
   300  		{true, "p2", 1},
   301  		{false, "t1", 1},
   302  		{false, "t2", 1},
   303  	}, collectResult(len(servers), ch)
   304  }
   305  
   306  // syncOnceWait checks sync(wait = true) doesn't return before all servers receive the call
   307  func syncOnceWait(t *testing.T, primary *proxy, syncer *metasyncer) ([]transportData, []transportData) {
   308  	var (
   309  		servers = []metaSyncServer{
   310  			{"p1", true, delayedOk, nil},
   311  			{"t1", false, alwaysOk, nil},
   312  		}
   313  		ch = make(chan transportData, len(servers))
   314  	)
   315  
   316  	for i := range servers {
   317  		v := servers[i]
   318  		s := newTransportServer(primary, &v, ch)
   319  		defer s.Close()
   320  	}
   321  
   322  	smap := primary.owner.smap.get()
   323  	msg := primary.newAmsgStr("", nil)
   324  	wg := syncer.sync(revsPair{smap, msg})
   325  	wg.Wait()
   326  	if len(ch) != len(servers) {
   327  		t.Fatalf("sync call wait returned before sync is completed")
   328  	}
   329  
   330  	return []transportData{
   331  		{true, "p1", 1},
   332  		{false, "t1", 1},
   333  	}, collectResult(len(servers), ch)
   334  }
   335  
   336  // syncOnceNoWait checks sync(wait = false) returns before all servers receive the call
   337  func syncOnceNoWait(t *testing.T, primary *proxy, syncer *metasyncer) ([]transportData, []transportData) {
   338  	var (
   339  		servers = []metaSyncServer{
   340  			{"p1", true, delayedOk, nil},
   341  			{"t1", false, alwaysOk, nil},
   342  		}
   343  		ch = make(chan transportData, len(servers))
   344  	)
   345  
   346  	for i := range servers {
   347  		v := servers[i]
   348  		s := newTransportServer(primary, &v, ch)
   349  		defer s.Close()
   350  	}
   351  
   352  	smap := primary.owner.smap.get()
   353  	msg := primary.newAmsgStr("", nil)
   354  	syncer.sync(revsPair{smap, msg})
   355  	if len(ch) == len(servers) {
   356  		t.Fatalf("sync call no wait returned after sync is completed")
   357  	}
   358  
   359  	return []transportData{
   360  		{true, "p1", 1},
   361  		{false, "t1", 1},
   362  	}, collectResult(len(servers), ch)
   363  }
   364  
   365  // retry checks a failed sync call is retried
   366  func retry(_ *testing.T, primary *proxy, syncer *metasyncer) ([]transportData, []transportData) {
   367  	var (
   368  		servers = []metaSyncServer{
   369  			{"p1", true, failFirst, nil},
   370  			{"p2", true, alwaysOk, nil},
   371  			{"t1", false, failFirst, nil},
   372  		}
   373  		ch = make(chan transportData, len(servers)+2)
   374  	)
   375  
   376  	for i := range servers {
   377  		v := servers[i]
   378  		s := newTransportServer(primary, &v, ch)
   379  		defer s.Close()
   380  	}
   381  
   382  	smap := primary.owner.smap.get()
   383  	msg := primary.newAmsgStr("", nil)
   384  	wg := syncer.sync(revsPair{smap, msg})
   385  	wg.Wait()
   386  	return []transportData{
   387  		{true, "p1", 1},
   388  		{true, "p1", 2},
   389  		{true, "p2", 1},
   390  		{false, "t1", 1},
   391  		{false, "t1", 2},
   392  	}, collectResult(len(servers)+2, ch)
   393  }
   394  
   395  // multipleSync checks a mixed number of proxy and targets accept multiple sync calls
   396  func multipleSync(_ *testing.T, primary *proxy, syncer *metasyncer) ([]transportData, []transportData) {
   397  	var (
   398  		servers = []metaSyncServer{
   399  			{"p1", true, alwaysOk, nil},
   400  			{"p2", true, alwaysOk, nil},
   401  			{"t1", false, alwaysOk, nil},
   402  			{"t2", false, alwaysOk, nil},
   403  		}
   404  		ch = make(chan transportData, len(servers)*3)
   405  	)
   406  
   407  	for i := range servers {
   408  		v := servers[i]
   409  		s := newTransportServer(primary, &v, ch)
   410  		defer s.Close()
   411  	}
   412  
   413  	smap := primary.owner.smap.get()
   414  	msg := primary.newAmsgStr("", nil)
   415  	syncer.sync(revsPair{smap, msg}).Wait()
   416  
   417  	ctx := &smapModifier{
   418  		pre: func(_ *smapModifier, clone *smapX) error {
   419  			clone.Version++
   420  			return nil
   421  		},
   422  		final: func(_ *smapModifier, clone *smapX) {
   423  			msg := primary.newAmsgStr("", nil)
   424  			syncer.sync(revsPair{clone, msg})
   425  		},
   426  	}
   427  	primary.owner.smap.modify(ctx)
   428  
   429  	ctx = &smapModifier{
   430  		pre: func(_ *smapModifier, clone *smapX) error {
   431  			clone.Version++
   432  			return nil
   433  		},
   434  		final: func(_ *smapModifier, clone *smapX) {
   435  			msg := primary.newAmsgStr("", nil)
   436  			syncer.sync(revsPair{clone, msg}).Wait()
   437  		},
   438  	}
   439  	primary.owner.smap.modify(ctx)
   440  
   441  	return []transportData{
   442  		{true, "p1", 1},
   443  		{true, "p1", 2},
   444  		{true, "p1", 3},
   445  		{true, "p2", 1},
   446  		{true, "p2", 2},
   447  		{true, "p2", 3},
   448  		{false, "t1", 1},
   449  		{false, "t1", 2},
   450  		{false, "t1", 3},
   451  		{false, "t2", 1},
   452  		{false, "t2", 2},
   453  		{false, "t2", 3},
   454  	}, collectResult(len(servers)*3, ch)
   455  }
   456  
   457  // refused tests the connection-refused scenario
   458  // it has two test cases: one with a short delay to let metasyncer handle it immediately,
   459  // the other with a longer delay so that metasyncer times out
   460  // retrying connection-refused errors and falls back to the retry-pending "route"
   461  func refused(t *testing.T, primary *proxy, syncer *metasyncer) ([]transportData, []transportData) {
   462  	var (
   463  		addrInfo meta.NetInfo
   464  		ch       = make(chan transportData, 2) // NOTE: Use 2 to avoid unbuffered channel, http handler can return.
   465  		id       = "p"
   466  	)
   467  	addrInfo.Init("http", "127.0.0.1", "53538")
   468  
   469  	// handler for /v1/metasync
   470  	http.HandleFunc(apc.URLPathMetasync.S, func(_ http.ResponseWriter, _ *http.Request) {
   471  		ch <- transportData{true, id, 1}
   472  	})
   473  
   474  	clone := primary.owner.smap.get().clone()
   475  	clone.Pmap[id] = newSnode(id, apc.Proxy, addrInfo, addrInfo, addrInfo)
   476  	clone.Version++
   477  	primary.owner.smap.put(clone)
   478  
   479  	// function shared between the two cases: start proxy, wait for a sync call
   480  	f := func() {
   481  		timer := time.NewTimer(time.Minute)
   482  		defer timer.Stop()
   483  
   484  		wg := &sync.WaitGroup{}
   485  		s := &http.Server{
   486  			Addr:              addrInfo.String(),
   487  			ReadHeaderTimeout: 10 * time.Second,
   488  		}
   489  
   490  		wg.Add(1)
   491  		go func() {
   492  			defer wg.Done()
   493  			s.ListenAndServe()
   494  		}()
   495  
   496  		select {
   497  		case <-timer.C:
   498  			t.Log("timed out")
   499  		case <-ch:
   500  		}
   501  
   502  		s.Close()
   503  		wg.Wait()
   504  	}
   505  
   506  	// testcase #1: short delay
   507  	smap := primary.owner.smap.get()
   508  	msg := primary.newAmsgStr("", nil)
   509  	syncer.sync(revsPair{smap, msg})
   510  	time.Sleep(time.Millisecond)
   511  	// sync will return even though the sync actually failed, and there is no error return
   512  	f()
   513  
   514  	// testcase #2: long delay
   515  	ctx := &smapModifier{
   516  		pre: func(_ *smapModifier, clone *smapX) error {
   517  			clone.Version++
   518  			return nil
   519  		},
   520  		final: func(_ *smapModifier, clone *smapX) {
   521  			msg := primary.newAmsgStr("", nil)
   522  			syncer.sync(revsPair{clone, msg})
   523  		},
   524  	}
   525  	primary.owner.smap.modify(ctx)
   526  
   527  	time.Sleep(2 * time.Second)
   528  	f()
   529  
   530  	// only cares if the sync call comes, no need to verify the id and cnt as we are the one
   531  	// filling those in above
   532  	exp := []transportData{{true, id, 1}}
   533  	return exp, exp
   534  }
   535  
   536  // TestMetasyncData is the driver for metasync data tests.
   537  func TestMetasyncData(t *testing.T) {
   538  	// data stores the data comes from the http sync call and an error
   539  	type data struct {
   540  		payload msPayload
   541  		err     error
   542  	}
   543  
   544  	// newServer simulates a proxy or a target for metasync's data tests
   545  	newServer := func(primary *proxy, s *metaSyncServer, ch chan<- data) *httptest.Server {
   546  		cnt := 0
   547  		id := s.id
   548  		failCnt := s.failCnt
   549  
   550  		// entry point for metasyncer's sync call
   551  		f := func(w http.ResponseWriter, r *http.Request) {
   552  			cnt++
   553  
   554  			for _, v := range failCnt {
   555  				if v == cnt {
   556  					http.Error(w, "retry", http.StatusUnavailableForLegalReasons)
   557  					return
   558  				}
   559  			}
   560  
   561  			d := make(msPayload)
   562  			err := d.unmarshal(r.Body, "")
   563  			ch <- data{d, err}
   564  		}
   565  
   566  		// creates the test proxy/target server and add to primary proxy's smap
   567  		ts := httptest.NewServer(http.HandlerFunc(f))
   568  		addrInfo := serverTCPAddr(ts.URL)
   569  		clone := primary.owner.smap.get().clone()
   570  		if s.isProxy {
   571  			clone.Pmap[id] = newSnode(id, apc.Proxy, addrInfo, addrInfo, addrInfo)
   572  		} else {
   573  			clone.Tmap[id] = newSnode(id, apc.Target, addrInfo, addrInfo, addrInfo)
   574  		}
   575  		clone.Version++
   576  		primary.owner.smap.put(clone)
   577  
   578  		return ts
   579  	}
   580  
   581  	match := func(t *testing.T, exp msPayload, ch <-chan data, cnt int) {
   582  		fail := func(t *testing.T, exp, act msPayload) {
   583  			t.Fatalf("Mismatch: exp = %+v, act = %+v", exp, act)
   584  		}
   585  
   586  		for range cnt {
   587  			act := (<-ch).payload
   588  			for k, e := range act {
   589  				a, ok := exp[k]
   590  				if !ok {
   591  					fail(t, exp, act)
   592  				}
   593  
   594  				if !bytes.Equal(e, a) {
   595  					fail(t, exp, act)
   596  				}
   597  			}
   598  		}
   599  	}
   600  
   601  	var (
   602  		exp      = make(msPayload)
   603  		expRetry = make(msPayload)
   604  		primary  = newPrimary()
   605  		syncer   = testSyncer(primary)
   606  		ch       = make(chan data, 5)
   607  		bmd      = newBucketMD()
   608  	)
   609  
   610  	emptyAisMsg, err := jsoniter.Marshal(aisMsg{})
   611  	if err != nil {
   612  		t.Fatal("Failed to marshal empty apc.ActMsg, err =", err)
   613  	}
   614  
   615  	var wg sync.WaitGroup
   616  	wg.Add(1)
   617  	go func(wg *sync.WaitGroup) {
   618  		defer wg.Done()
   619  		syncer.Run()
   620  	}(&wg)
   621  
   622  	proxy := newServer(primary, &metaSyncServer{"proxy", true, nil, []int{3, 4, 5}}, ch)
   623  	defer proxy.Close()
   624  
   625  	target := newServer(primary, &metaSyncServer{"target", false, nil, []int{2}}, ch)
   626  	defer target.Close()
   627  
   628  	// sync smap
   629  	smap := primary.owner.smap.get()
   630  	smapBody := smap.marshal()
   631  
   632  	exp[revsSmapTag] = smapBody
   633  	expRetry[revsSmapTag] = smapBody
   634  	exp[revsSmapTag+revsActionTag] = emptyAisMsg
   635  	expRetry[revsSmapTag+revsActionTag] = emptyAisMsg
   636  
   637  	syncer.sync(revsPair{smap, &aisMsg{}})
   638  	match(t, expRetry, ch, 1)
   639  
   640  	// sync bucketmd, fail target and retry
   641  	bmd.add(meta.NewBck("bucket1", apc.AIS, cmn.NsGlobal), &cmn.Bprops{
   642  		Cksum: cmn.CksumConf{
   643  			Type: cos.ChecksumXXHash,
   644  		},
   645  	})
   646  	bmd.add(meta.NewBck("bucket2", apc.AIS, cmn.NsGlobal), &cmn.Bprops{
   647  		Cksum: cmn.CksumConf{
   648  			Type: cos.ChecksumXXHash,
   649  		},
   650  	})
   651  	primary.owner.bmd.putPersist(bmd, nil)
   652  	bmdBody := bmd.marshal()
   653  
   654  	exp[revsBMDTag] = bmdBody
   655  	expRetry[revsBMDTag] = bmdBody
   656  	exp[revsBMDTag+revsActionTag] = emptyAisMsg
   657  	expRetry[revsBMDTag+revsActionTag] = emptyAisMsg
   658  
   659  	syncer.sync(revsPair{bmd, &aisMsg{}})
   660  	match(t, exp, ch, 1)
   661  	match(t, expRetry, ch, 1)
   662  
   663  	// sync bucketmd, fail proxy, sync new bucketmd, expect proxy to receive the new bucketmd
   664  	// after rejecting a few sync requests
   665  	bmd = bmd.clone()
   666  	bprops := &cmn.Bprops{
   667  		Cksum: cmn.CksumConf{Type: cos.ChecksumXXHash},
   668  		LRU:   cmn.GCO.Get().LRU,
   669  	}
   670  	bmd.add(meta.NewBck("bucket3", apc.AIS, cmn.NsGlobal), bprops)
   671  	primary.owner.bmd.putPersist(bmd, nil)
   672  	bmdBody = bmd.marshal()
   673  
   674  	exp[revsBMDTag] = bmdBody
   675  	msg := primary.newAmsgStr("", bmd)
   676  	syncer.sync(revsPair{bmd, msg})
   677  }
   678  
   679  // TestMetasyncMembership tests metasync's logic when accessing proxy's smap directly
   680  func TestMetasyncMembership(t *testing.T) {
   681  	{
   682  		// pending server dropped without sync
   683  		primary := newPrimary()
   684  		syncer := testSyncer(primary)
   685  
   686  		var wg sync.WaitGroup
   687  		wg.Add(1)
   688  		go func(wg *sync.WaitGroup) {
   689  			defer wg.Done()
   690  			syncer.Run()
   691  		}(&wg)
   692  
   693  		var cnt atomic.Int32
   694  		s := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
   695  			cnt.Add(1)
   696  			http.Error(w, "i don't know how to deal with you", http.StatusNotAcceptable)
   697  		}))
   698  
   699  		defer s.Close()
   700  
   701  		id := "t"
   702  		addrInfo := serverTCPAddr(s.URL)
   703  		clone := primary.owner.smap.get().clone()
   704  		clone.addTarget(newSnode(id, apc.Target, addrInfo, addrInfo, addrInfo))
   705  		primary.owner.smap.put(clone)
   706  		msg := primary.newAmsgStr("", nil)
   707  		wg1 := syncer.sync(revsPair{clone, msg})
   708  		wg1.Wait()
   709  		time.Sleep(time.Millisecond * 300)
   710  
   711  		clone = primary.owner.smap.get().clone()
   712  		clone.delTarget(id)
   713  		primary.owner.smap.put(clone)
   714  
   715  		time.Sleep(time.Millisecond * 300)
   716  		savedCnt := cnt.Load()
   717  		time.Sleep(time.Millisecond * 300)
   718  		if cnt.Load() != savedCnt {
   719  			t.Fatal("Sync call didn't stop after traget is deleted")
   720  		}
   721  
   722  		syncer.Stop(nil)
   723  		wg.Wait()
   724  	}
   725  
   726  	primary := newPrimary()
   727  	syncer := testSyncer(primary)
   728  
   729  	var wg sync.WaitGroup
   730  	wg.Add(1)
   731  	go func(wg *sync.WaitGroup) {
   732  		defer wg.Done()
   733  		syncer.Run()
   734  	}(&wg)
   735  
   736  	ch := make(chan struct{}, 10)
   737  	f := func(_ http.ResponseWriter, _ *http.Request) {
   738  		ch <- struct{}{}
   739  	}
   740  
   741  	{
   742  		// sync before smap sync (no previous sync saved in metasyncer)
   743  		s1 := httptest.NewServer(http.HandlerFunc(f))
   744  		defer s1.Close()
   745  
   746  		id := "t1111"
   747  		addrInfo := serverTCPAddr(s1.URL)
   748  		di := newSnode(id, apc.Target, addrInfo, addrInfo, addrInfo)
   749  		clone := primary.owner.smap.get().clone()
   750  		clone.addTarget(di)
   751  		primary.owner.smap.put(clone)
   752  		bmd := primary.owner.bmd.get()
   753  		msg := primary.newAmsgStr("", bmd)
   754  		wg := syncer.sync(revsPair{bmd, msg})
   755  		wg.Wait()
   756  		<-ch
   757  
   758  		// sync smap so metasyncer has a smap
   759  		wg = syncer.sync(revsPair{clone, msg})
   760  		wg.Wait()
   761  		<-ch
   762  	}
   763  
   764  	{
   765  		// add a new target but new smap is not synced
   766  		// metasyncer picks up the new target directly from primary's smap
   767  		// and metasyncer will also add the new target to pending to sync all previously synced data
   768  		// that's why the extra channel read
   769  		s2 := httptest.NewServer(http.HandlerFunc(f))
   770  		defer s2.Close()
   771  
   772  		id := "t22222"
   773  		addrInfo := serverTCPAddr(s2.URL)
   774  		di := newSnode(id, apc.Target, addrInfo, addrInfo, addrInfo)
   775  		clone := primary.owner.smap.get().clone()
   776  		clone.addTarget(di)
   777  		primary.owner.smap.put(clone)
   778  
   779  		bmd := primary.owner.bmd.get()
   780  		msg := primary.newAmsgStr("", bmd)
   781  		wg := syncer.sync(revsPair{bmd, msg})
   782  		wg.Wait()
   783  		<-ch // target 1
   784  		<-ch // target 2
   785  		if len(ch) != 0 {
   786  			t.Fatal("Too many sync calls received")
   787  		}
   788  
   789  		syncer.Stop(nil)
   790  		wg.Wait()
   791  	}
   792  }
   793  
   794  // TestMetasyncReceive tests extracting received sync data.
   795  func TestMetasyncReceive(t *testing.T) {
   796  	{
   797  		emptyAisMsg := func(a *aisMsg) {
   798  			if a.Action != "" || a.Name != "" || a.Value != nil {
   799  				t.Fatal("Expecting empty action message", a)
   800  			}
   801  		}
   802  
   803  		nilSMap := func(m *smapX) {
   804  			if m != nil {
   805  				t.Fatal("Expecting nil Smap", m)
   806  			}
   807  		}
   808  
   809  		matchSMap := func(a, b *smapX) {
   810  			_, sameUUID, sameVersion, eq := a.Compare(&b.Smap)
   811  			if !sameUUID || !sameVersion || !eq {
   812  				t.Fatal("Smap mismatch", a.StringEx(), b.StringEx())
   813  			}
   814  		}
   815  
   816  		primary := newPrimary()
   817  		syncer := testSyncer(primary)
   818  
   819  		var wg sync.WaitGroup
   820  		wg.Add(1)
   821  		go func(wg *sync.WaitGroup) {
   822  			defer wg.Done()
   823  			syncer.Run()
   824  		}(&wg)
   825  
   826  		chProxy := make(chan msPayload, 10)
   827  		fProxy := func(_ http.ResponseWriter, r *http.Request) {
   828  			d := make(msPayload)
   829  			err := d.unmarshal(r.Body, "")
   830  			cos.AssertNoErr(err)
   831  			chProxy <- d
   832  		}
   833  
   834  		// the only difference is the channel
   835  		s := httptest.NewServer(http.HandlerFunc(fProxy))
   836  		defer s.Close()
   837  		addrInfo := serverTCPAddr(s.URL)
   838  		clone := primary.owner.smap.get().clone()
   839  		clone.addProxy(newSnode("p1", apc.Proxy, addrInfo, addrInfo, addrInfo))
   840  		primary.owner.smap.put(clone)
   841  
   842  		proxy1 := newSecondary("p1")
   843  
   844  		// empty payload
   845  		newSMap, msg, err := proxy1.extractSmap(make(msPayload), "", false /*skip validation*/)
   846  		if newSMap != nil || msg != nil || err != nil {
   847  			t.Fatal("Extract smap from empty payload returned data")
   848  		}
   849  
   850  		wg1 := syncer.sync(revsPair{primary.owner.smap.get(), &aisMsg{}})
   851  		wg1.Wait()
   852  		payload := <-chProxy
   853  
   854  		newSMap, msg, err = proxy1.extractSmap(payload, "", false /*skip validation*/)
   855  		tassert.CheckFatal(t, err)
   856  		emptyAisMsg(msg)
   857  		matchSMap(primary.owner.smap.get(), newSMap)
   858  		proxy1.owner.smap.put(newSMap)
   859  
   860  		// same version of smap received
   861  		newSMap, msg, err = proxy1.extractSmap(payload, "", false /*skip validation*/)
   862  		tassert.CheckFatal(t, err)
   863  		emptyAisMsg(msg)
   864  		nilSMap(newSMap)
   865  	}
   866  }
   867  
   868  func testSyncer(p *proxy) (syncer *metasyncer) {
   869  	syncer = newMetasyncer(p)
   870  	return
   871  }
   872  
   873  ///////////////////
   874  // msgSortHelper //
   875  ///////////////////
   876  
   877  func (m msgSortHelper) Len() int {
   878  	return len(m)
   879  }
   880  
   881  func (m msgSortHelper) Swap(i, j int) {
   882  	m[i], m[j] = m[j], m[i]
   883  }
   884  
   885  func (m msgSortHelper) Less(i, j int) bool {
   886  	if m[i].isProxy != m[j].isProxy {
   887  		return m[i].isProxy
   888  	}
   889  
   890  	if m[i].id != m[j].id {
   891  		return m[i].id < m[j].id
   892  	}
   893  
   894  	return m[i].cnt < m[j].cnt
   895  }