github.com/NVIDIA/aistore@v1.3.23-0.20240517131212-7df6609be51d/ais/test/integration_test.go (about)

     1  // Package integration_test.
     2  /*
     3   * Copyright (c) 2018-2024, NVIDIA CORPORATION. All rights reserved.
     4   */
     5  package integration_test
     6  
     7  import (
     8  	"errors"
     9  	"math/rand"
    10  	"os"
    11  	"path/filepath"
    12  	"strconv"
    13  	"sync"
    14  	"testing"
    15  	"time"
    16  
    17  	"github.com/NVIDIA/aistore/api"
    18  	"github.com/NVIDIA/aistore/api/apc"
    19  	"github.com/NVIDIA/aistore/cmn"
    20  	"github.com/NVIDIA/aistore/cmn/cos"
    21  	"github.com/NVIDIA/aistore/core/meta"
    22  	"github.com/NVIDIA/aistore/tools"
    23  	"github.com/NVIDIA/aistore/tools/docker"
    24  	"github.com/NVIDIA/aistore/tools/readers"
    25  	"github.com/NVIDIA/aistore/tools/tassert"
    26  	"github.com/NVIDIA/aistore/tools/tlog"
    27  	"github.com/NVIDIA/aistore/xact"
    28  )
    29  
    30  // Intended for a deployment with multiple targets
    31  // 1. Create ais bucket
    32  // 2. Unregister target T
    33  // 3. PUT large amount of objects into the ais bucket
    34  // 4. GET the objects while simultaneously registering the target T
    35  func TestGetAndReRegisterInParallel(t *testing.T) {
    36  	tools.CheckSkip(t, &tools.SkipTestArgs{Long: true})
    37  	var (
    38  		m = ioContext{
    39  			t:               t,
    40  			num:             50000,
    41  			numGetsEachFile: 3,
    42  			fileSize:        10 * cos.KiB,
    43  		}
    44  		rebID string
    45  	)
    46  
    47  	m.initAndSaveState(true /*cleanup*/)
    48  	m.expectTargets(2)
    49  
    50  	// Step 1.
    51  	tools.CreateBucket(t, m.proxyURL, m.bck, nil, true /*cleanup*/)
    52  
    53  	// Step 2.
    54  	target := m.startMaintenanceNoRebalance()
    55  
    56  	// Step 3.
    57  	m.puts()
    58  
    59  	// Step 4.
    60  	wg := &sync.WaitGroup{}
    61  	wg.Add(2)
    62  	go func() {
    63  		// without defer, if gets crashes Done is not called resulting in test hangs
    64  		defer wg.Done()
    65  		m.gets(nil, false)
    66  	}()
    67  
    68  	time.Sleep(time.Second * 3) // give gets some room to breathe
    69  	go func() {
    70  		// without defer, if reregister crashes Done is not called resulting in test hangs
    71  		defer wg.Done()
    72  		rebID = m.stopMaintenance(target)
    73  	}()
    74  	wg.Wait()
    75  
    76  	m.ensureNoGetErrors()
    77  	m.waitAndCheckCluState()
    78  	tools.WaitForRebalanceByID(t, baseParams, rebID)
    79  }
    80  
    81  // All of the above PLUS proxy failover/failback sequence in parallel:
    82  // 1. Create an ais bucket
    83  // 2. Unregister a target
    84  // 3. Crash the primary proxy and PUT in parallel
    85  // 4. Failback to the original primary proxy, register target, and GET in parallel
    86  func TestProxyFailbackAndReRegisterInParallel(t *testing.T) {
    87  	tools.CheckSkip(t, &tools.SkipTestArgs{Long: true, MinTargets: 2, MinProxies: 3})
    88  	m := ioContext{
    89  		t:                   t,
    90  		otherTasksToTrigger: 1,
    91  		num:                 150000,
    92  	}
    93  
    94  	m.initAndSaveState(true /*cleanup*/)
    95  
    96  	// Step 1.
    97  	tools.CreateBucket(t, m.proxyURL, m.bck, nil, true /*cleanup*/)
    98  
    99  	// Step 2.
   100  	target := m.startMaintenanceNoRebalance()
   101  
   102  	// Step 3.
   103  	_, newPrimaryURL, err := chooseNextProxy(m.smap)
   104  	tassert.CheckFatal(t, err)
   105  	// use a new proxyURL because primaryCrashElectRestart has a side-effect:
   106  	// it changes the primary proxy. Without the change tools.PutRandObjs is
   107  	// failing while the current primary is restarting and rejoining
   108  	m.proxyURL = newPrimaryURL
   109  
   110  	wg := &sync.WaitGroup{}
   111  	wg.Add(1)
   112  	go func() {
   113  		defer wg.Done()
   114  		killRestorePrimary(t, m.proxyURL, false, nil)
   115  	}()
   116  
   117  	// delay PUTs to ensure they run during primary elections
   118  	time.Sleep(5 * time.Second)
   119  	m.puts()
   120  	wg.Wait()
   121  
   122  	// Step 4: (three tasks)
   123  	wg.Add(3)
   124  	go func() {
   125  		defer wg.Done()
   126  		m.stopMaintenance(target)
   127  	}()
   128  	go func() {
   129  		defer wg.Done()
   130  		m.gets(nil, false)
   131  	}()
   132  	go func() {
   133  		defer wg.Done()
   134  		<-m.controlCh // <-- half GETs
   135  		primarySetToRand(t)
   136  	}()
   137  	wg.Wait()
   138  
   139  	xargs := xact.ArgsMsg{Kind: apc.ActRebalance, OnlyRunning: true, Timeout: tools.RebalanceTimeout}
   140  	_, _ = api.WaitForXactionIC(baseParams, &xargs)
   141  
   142  	// Step 5.
   143  	m.ensureNoGetErrors()
   144  	m.waitAndCheckCluState()
   145  }
   146  
   147  // Similar to TestGetAndReRegisterInParallel, but instead of unregister, we kill the target
   148  // 1. Kill registered target and wait for Smap to updated
   149  // 2. Create ais bucket
   150  // 3. PUT large amounts of objects into ais bucket
   151  // 4. Get the objects while simultaneously registering the target
   152  func TestGetAndRestoreInParallel(t *testing.T) {
   153  	tools.CheckSkip(t, &tools.SkipTestArgs{Long: true, RequiredDeployment: tools.ClusterTypeLocal})
   154  
   155  	var (
   156  		m = ioContext{
   157  			t:               t,
   158  			num:             20000,
   159  			numGetsEachFile: 5,
   160  			fileSize:        cos.KiB * 2,
   161  		}
   162  		targetNode *meta.Snode
   163  	)
   164  
   165  	m.initAndSaveState(true /*cleanup*/)
   166  	m.expectTargets(3)
   167  
   168  	// Step 1
   169  	// Select a random target
   170  	targetNode, _ = m.smap.GetRandTarget()
   171  	tlog.Logf("Killing %s\n", targetNode.StringEx())
   172  	tcmd, err := tools.KillNode(targetNode)
   173  	tassert.CheckFatal(t, err)
   174  
   175  	proxyURL := tools.RandomProxyURL(t)
   176  	m.smap, err = tools.WaitForClusterState(proxyURL, "target removed", m.smap.Version, m.originalProxyCount,
   177  		m.originalTargetCount-1)
   178  	tassert.CheckError(t, err)
   179  
   180  	// Step 2
   181  	tools.CreateBucket(t, m.proxyURL, m.bck, nil, true /*cleanup*/)
   182  
   183  	// Step 3
   184  	m.puts()
   185  
   186  	// Step 4
   187  	wg := &sync.WaitGroup{}
   188  	wg.Add(2)
   189  	go func() {
   190  		defer wg.Done()
   191  		time.Sleep(4 * time.Second)
   192  		tools.RestoreNode(tcmd, false, "target")
   193  	}()
   194  	go func() {
   195  		defer wg.Done()
   196  		m.gets(nil, false)
   197  	}()
   198  	wg.Wait()
   199  
   200  	m.ensureNoGetErrors()
   201  	m.waitAndCheckCluState()
   202  	tools.WaitForRebalAndResil(m.t, tools.BaseAPIParams(m.proxyURL))
   203  }
   204  
   205  func TestUnregisterPreviouslyUnregisteredTarget(t *testing.T) {
   206  	m := ioContext{t: t}
   207  	m.initAndSaveState(true /*cleanup*/)
   208  	m.expectTargets(1)
   209  	target := m.startMaintenanceNoRebalance()
   210  
   211  	// Decommission the same target again.
   212  	args := &apc.ActValRmNode{DaemonID: target.ID(), SkipRebalance: true}
   213  	_, err := api.StartMaintenance(tools.BaseAPIParams(m.proxyURL), args)
   214  	tassert.Errorf(t, err != nil, "error expected")
   215  
   216  	n := tools.GetClusterMap(t, m.proxyURL).CountActiveTs()
   217  	if n != m.originalTargetCount-1 {
   218  		t.Fatalf("expected %d targets after putting target in maintenance, got %d targets",
   219  			m.originalTargetCount-1, n)
   220  	}
   221  
   222  	// Register target (bring cluster to normal state)
   223  	rebID := m.stopMaintenance(target)
   224  	m.waitAndCheckCluState()
   225  	tools.WaitForRebalanceByID(m.t, tools.BaseAPIParams(m.proxyURL), rebID)
   226  }
   227  
   228  func TestRegisterAndUnregisterTargetAndPutInParallel(t *testing.T) {
   229  	tools.CheckSkip(t, &tools.SkipTestArgs{Long: true})
   230  
   231  	m := ioContext{
   232  		t:   t,
   233  		num: 10000,
   234  	}
   235  
   236  	m.initAndSaveState(true /*cleanup*/)
   237  	m.expectTargets(3)
   238  
   239  	targets := m.smap.Tmap.ActiveNodes()
   240  
   241  	tools.CreateBucket(t, m.proxyURL, m.bck, nil, true /*cleanup*/)
   242  
   243  	// Unregister target[0]
   244  	args := &apc.ActValRmNode{DaemonID: targets[0].ID(), SkipRebalance: true}
   245  	baseParams := tools.BaseAPIParams(m.proxyURL)
   246  	_, err := api.StartMaintenance(baseParams, args)
   247  	tassert.CheckFatal(t, err)
   248  	tools.WaitForClusterState(
   249  		m.proxyURL,
   250  		"put target in maintenance",
   251  		m.smap.Version,
   252  		m.originalProxyCount,
   253  		m.originalTargetCount-1,
   254  	)
   255  
   256  	n := tools.GetClusterMap(t, m.proxyURL).CountActiveTs()
   257  	if n != m.originalTargetCount-1 {
   258  		t.Fatalf("expected %d targets after putting target in maintenance, got %d targets",
   259  			m.originalTargetCount-1, n)
   260  	}
   261  
   262  	// Do puts in parallel
   263  	wg := &sync.WaitGroup{}
   264  	wg.Add(3)
   265  	go func() {
   266  		defer wg.Done()
   267  		m.puts()
   268  	}()
   269  
   270  	// Register target 0 in parallel
   271  	go func() {
   272  		defer wg.Done()
   273  		args := &apc.ActValRmNode{DaemonID: targets[0].ID()}
   274  		tlog.Logf("Take %s out of maintenance mode ...\n", targets[0].StringEx())
   275  		_, err = api.StopMaintenance(baseParams, args)
   276  		tassert.CheckFatal(t, err)
   277  	}()
   278  
   279  	// Decommission target[1] in parallel
   280  	go func() {
   281  		defer wg.Done()
   282  		args := &apc.ActValRmNode{DaemonID: targets[1].ID(), SkipRebalance: true}
   283  		_, err = api.StartMaintenance(baseParams, args)
   284  		tassert.CheckFatal(t, err)
   285  	}()
   286  
   287  	// Wait for everything to end
   288  	wg.Wait()
   289  
   290  	// Register target 1 to bring cluster to original state
   291  	rebID := m.stopMaintenance(targets[1])
   292  
   293  	// wait for rebalance to complete
   294  	tools.WaitForRebalanceByID(t, baseParams, rebID)
   295  
   296  	m.waitAndCheckCluState()
   297  }
   298  
   299  func TestAckRebalance(t *testing.T) {
   300  	tools.CheckSkip(t, &tools.SkipTestArgs{Long: true})
   301  
   302  	m := ioContext{
   303  		t:             t,
   304  		num:           30000,
   305  		getErrIsFatal: true,
   306  	}
   307  
   308  	m.initAndSaveState(true /*cleanup*/)
   309  	m.expectTargets(3)
   310  
   311  	tools.CreateBucket(t, m.proxyURL, m.bck, nil, true /*cleanup*/)
   312  
   313  	target := m.startMaintenanceNoRebalance()
   314  
   315  	// Start putting files into bucket.
   316  	m.puts()
   317  
   318  	rebID := m.stopMaintenance(target)
   319  
   320  	// Wait for everything to finish.
   321  	baseParams := tools.BaseAPIParams(m.proxyURL)
   322  	tools.WaitForRebalanceByID(t, baseParams, rebID)
   323  
   324  	m.gets(nil, false)
   325  
   326  	m.ensureNoGetErrors()
   327  	m.waitAndCheckCluState()
   328  }
   329  
   330  func TestStressRebalance(t *testing.T) {
   331  	tools.CheckSkip(t, &tools.SkipTestArgs{Long: true})
   332  
   333  	m := &ioContext{
   334  		t: t,
   335  	}
   336  
   337  	m.initAndSaveState(true /*cleanup*/)
   338  	m.expectTargets(4)
   339  
   340  	tools.CreateBucket(t, m.proxyURL, m.bck, nil, true /*cleanup*/)
   341  
   342  	for i := 1; i <= 3; i++ {
   343  		tlog.Logf("Iteration #%d ======\n", i)
   344  		testStressRebalance(t, m.bck)
   345  	}
   346  }
   347  
   348  func testStressRebalance(t *testing.T, bck cmn.Bck) {
   349  	m := &ioContext{
   350  		t:             t,
   351  		bck:           bck,
   352  		num:           50000,
   353  		getErrIsFatal: true,
   354  	}
   355  
   356  	m.initAndSaveState(true /*cleanup*/)
   357  
   358  	tgts := m.smap.Tmap.ActiveNodes()
   359  	i1 := rand.Intn(len(tgts))
   360  	i2 := (i1 + 1) % len(tgts)
   361  	target1, target2 := tgts[i1], tgts[i2]
   362  
   363  	// Unregister targets.
   364  	tlog.Logf("Killing %s and %s\n", target1.StringEx(), target2.StringEx())
   365  	cmd1, err := tools.KillNode(target1)
   366  	tassert.CheckFatal(t, err)
   367  	time.Sleep(time.Second)
   368  	cmd2, err := tools.KillNode(target2)
   369  	tassert.CheckFatal(t, err)
   370  
   371  	// Start putting objects into bucket
   372  	m.puts()
   373  
   374  	// Get objects and register targets in parallel
   375  	wg := &sync.WaitGroup{}
   376  	wg.Add(1)
   377  	go func() {
   378  		defer wg.Done()
   379  		m.gets(nil, false)
   380  	}()
   381  
   382  	// and join 2 targets in parallel
   383  	time.Sleep(time.Second)
   384  	err = tools.RestoreNode(cmd1, false, "the 1st target")
   385  	tassert.CheckFatal(t, err)
   386  
   387  	// random sleep between the first and the second join
   388  	time.Sleep(time.Duration(rand.Intn(3)+1) * time.Second)
   389  
   390  	err = tools.RestoreNode(cmd2, false, "the 2nd target")
   391  	tassert.CheckFatal(t, err)
   392  
   393  	_, err = tools.WaitForClusterState(
   394  		m.proxyURL,
   395  		"targets to join",
   396  		m.smap.Version,
   397  		m.originalProxyCount,
   398  		m.originalTargetCount,
   399  	)
   400  	tassert.CheckFatal(m.t, err)
   401  
   402  	// wait for the rebalance to finish
   403  	baseParams := tools.BaseAPIParams(m.proxyURL)
   404  	tools.WaitForRebalAndResil(t, baseParams)
   405  
   406  	// wait for the reads to run out
   407  	wg.Wait()
   408  
   409  	m.ensureNoGetErrors()
   410  	m.waitAndCheckCluState()
   411  }
   412  
   413  func TestRebalanceAfterUnregisterAndReregister(t *testing.T) {
   414  	tools.CheckSkip(t, &tools.SkipTestArgs{Long: true})
   415  	m := ioContext{
   416  		t:   t,
   417  		num: 10000,
   418  	}
   419  	m.initAndSaveState(true /*cleanup*/)
   420  	m.expectTargets(3)
   421  
   422  	targets := m.smap.Tmap.ActiveNodes()
   423  
   424  	tools.CreateBucket(t, m.proxyURL, m.bck, nil, true /*cleanup*/)
   425  
   426  	target0, target1 := targets[0], targets[1]
   427  	args := &apc.ActValRmNode{DaemonID: target0.ID(), SkipRebalance: true}
   428  	baseParams := tools.BaseAPIParams(m.proxyURL)
   429  	_, err := api.StartMaintenance(baseParams, args)
   430  	tassert.CheckFatal(t, err)
   431  
   432  	_, err = tools.WaitForClusterState(
   433  		m.proxyURL,
   434  		"put target in maintenance",
   435  		m.smap.Version,
   436  		m.originalProxyCount,
   437  		m.originalTargetCount-1,
   438  	)
   439  	tassert.CheckFatal(m.t, err)
   440  
   441  	// Put some files
   442  	m.puts()
   443  
   444  	// Register target 0 in parallel
   445  	wg := &sync.WaitGroup{}
   446  	wg.Add(2)
   447  	go func() {
   448  		defer wg.Done()
   449  		tlog.Logf("Take %s out of maintenance mode ...\n", target0.StringEx())
   450  		args := &apc.ActValRmNode{DaemonID: target0.ID()}
   451  		_, err = api.StopMaintenance(baseParams, args)
   452  		tassert.CheckFatal(t, err)
   453  	}()
   454  
   455  	// Unregister target 1 in parallel
   456  	go func() {
   457  		defer wg.Done()
   458  		err = tools.RemoveNodeUnsafe(m.proxyURL, target1.ID())
   459  		tassert.CheckFatal(t, err)
   460  	}()
   461  
   462  	// Wait for everything to end
   463  	wg.Wait()
   464  
   465  	// Register target 1 to bring cluster to original state
   466  	sleep := time.Duration(rand.Intn(5))*time.Second + time.Millisecond
   467  	time.Sleep(sleep)
   468  	tlog.Logf("Join %s back\n", target1.StringEx())
   469  	rebID, err := tools.JoinCluster(m.proxyURL, target1)
   470  	tassert.CheckFatal(t, err)
   471  	_, err = tools.WaitForClusterState(
   472  		m.proxyURL,
   473  		"targets to join",
   474  		m.smap.Version,
   475  		m.originalProxyCount,
   476  		m.originalTargetCount,
   477  	)
   478  	tassert.CheckFatal(m.t, err)
   479  
   480  	time.Sleep(sleep)
   481  	tools.WaitForRebalanceByID(t, baseParams, rebID)
   482  
   483  	m.gets(nil, false)
   484  
   485  	m.ensureNoGetErrors()
   486  	m.waitAndCheckCluState()
   487  }
   488  
   489  func TestPutDuringRebalance(t *testing.T) {
   490  	tools.CheckSkip(t, &tools.SkipTestArgs{Long: true})
   491  
   492  	m := ioContext{
   493  		t:   t,
   494  		num: 10000,
   495  	}
   496  
   497  	m.initAndSaveState(true /*cleanup*/)
   498  	m.expectTargets(3)
   499  
   500  	tools.CreateBucket(t, m.proxyURL, m.bck, nil, true /*cleanup*/)
   501  
   502  	target := m.startMaintenanceNoRebalance()
   503  
   504  	// Start putting files and register target in parallel.
   505  	wg := &sync.WaitGroup{}
   506  	wg.Add(1)
   507  	go func() {
   508  		defer wg.Done()
   509  		m.puts()
   510  	}()
   511  
   512  	// Sleep some time to wait for PUT operations to begin.
   513  	time.Sleep(3 * time.Second)
   514  
   515  	rebID := m.stopMaintenance(target)
   516  
   517  	// Wait for everything to finish.
   518  	wg.Wait()
   519  	baseParams := tools.BaseAPIParams(m.proxyURL)
   520  	tools.WaitForRebalanceByID(t, baseParams, rebID)
   521  
   522  	// Main check - try to read all objects.
   523  	m.gets(nil, false)
   524  
   525  	m.checkObjectDistribution(t)
   526  	m.waitAndCheckCluState()
   527  }
   528  
   529  func TestGetDuringLocalAndGlobalRebalance(t *testing.T) {
   530  	tools.CheckSkip(t, &tools.SkipTestArgs{Long: true})
   531  
   532  	var (
   533  		m = ioContext{
   534  			t:               t,
   535  			num:             10000,
   536  			numGetsEachFile: 3,
   537  		}
   538  		baseParams     = tools.BaseAPIParams()
   539  		selectedTarget *meta.Snode
   540  		killTarget     *meta.Snode
   541  	)
   542  
   543  	m.initAndSaveState(true /*cleanup*/)
   544  	m.expectTargets(2)
   545  
   546  	tools.CreateBucket(t, m.proxyURL, m.bck, nil, true /*cleanup*/)
   547  
   548  	// Select a random target to disable one of its mountpaths,
   549  	// and another random target to unregister.
   550  	for _, target := range m.smap.Tmap {
   551  		if selectedTarget != nil {
   552  			killTarget = target
   553  			break
   554  		}
   555  		selectedTarget = target
   556  	}
   557  	mpList, err := api.GetMountpaths(baseParams, selectedTarget)
   558  	tassert.CheckFatal(t, err)
   559  	ensureNoDisabledMountpaths(t, selectedTarget, mpList)
   560  
   561  	if len(mpList.Available) < 2 {
   562  		t.Fatalf("Must have at least 2 mountpaths")
   563  	}
   564  
   565  	// Disable mountpaths temporarily
   566  	mpath := mpList.Available[0]
   567  	tlog.Logf("Disable mountpath at target %s\n", selectedTarget.ID())
   568  	err = api.DisableMountpath(baseParams, selectedTarget, mpath, false /*dont-resil*/)
   569  	tassert.CheckFatal(t, err)
   570  
   571  	args := &apc.ActValRmNode{DaemonID: killTarget.ID(), SkipRebalance: true}
   572  	_, err = api.StartMaintenance(baseParams, args)
   573  	tassert.CheckFatal(t, err)
   574  	smap, err := tools.WaitForClusterState(
   575  		m.proxyURL,
   576  		"target removal",
   577  		m.smap.Version,
   578  		m.originalProxyCount,
   579  		m.originalTargetCount-1,
   580  	)
   581  	tassert.CheckFatal(m.t, err)
   582  
   583  	m.puts()
   584  
   585  	// Start getting objects
   586  	wg := &sync.WaitGroup{}
   587  	wg.Add(1)
   588  	go func() {
   589  		defer wg.Done()
   590  		m.gets(nil, false)
   591  	}()
   592  
   593  	// Let's give gets some momentum
   594  	time.Sleep(time.Second * 4)
   595  
   596  	// register a new target
   597  	args = &apc.ActValRmNode{DaemonID: killTarget.ID()}
   598  	_, err = api.StopMaintenance(baseParams, args)
   599  	tassert.CheckFatal(t, err)
   600  
   601  	// enable mountpath
   602  	err = api.EnableMountpath(baseParams, selectedTarget, mpath)
   603  	tassert.CheckFatal(t, err)
   604  
   605  	// wait until GETs are done while 2 rebalance are running
   606  	wg.Wait()
   607  
   608  	// make sure that the cluster has all targets enabled
   609  	_, err = tools.WaitForClusterState(
   610  		m.proxyURL,
   611  		"target joined back",
   612  		smap.Version,
   613  		m.originalProxyCount,
   614  		m.originalTargetCount,
   615  	)
   616  	tassert.CheckFatal(m.t, err)
   617  
   618  	// wait for rebalance to complete
   619  	baseParams = tools.BaseAPIParams(m.proxyURL)
   620  	tools.WaitForRebalAndResil(t, baseParams)
   621  
   622  	m.ensureNoGetErrors()
   623  	m.waitAndCheckCluState()
   624  	m.ensureNumMountpaths(selectedTarget, mpList)
   625  }
   626  
   627  func TestGetDuringResilver(t *testing.T) {
   628  	tools.CheckSkip(t, &tools.SkipTestArgs{Long: true})
   629  
   630  	var (
   631  		m = ioContext{
   632  			t:   t,
   633  			num: 20000,
   634  		}
   635  		baseParams = tools.BaseAPIParams()
   636  	)
   637  
   638  	m.initAndSaveState(true /*cleanup*/)
   639  	m.expectTargets(1)
   640  
   641  	tools.CreateBucket(t, m.proxyURL, m.bck, nil, true /*cleanup*/)
   642  
   643  	target, _ := m.smap.GetRandTarget()
   644  	mpList, err := api.GetMountpaths(baseParams, target)
   645  	tassert.CheckFatal(t, err)
   646  	ensureNoDisabledMountpaths(t, target, mpList)
   647  
   648  	if len(mpList.Available) < 2 {
   649  		t.Fatalf("Must have at least 2 mountpaths")
   650  	}
   651  
   652  	// select up to 2 mountpath
   653  	mpaths := []string{mpList.Available[0]}
   654  	if len(mpList.Available) > 2 {
   655  		mpaths = append(mpaths, mpList.Available[1])
   656  	}
   657  
   658  	// Disable mountpaths temporarily
   659  	for _, mp := range mpaths {
   660  		err = api.DisableMountpath(baseParams, target, mp, false /*dont-resil*/)
   661  		tassert.CheckFatal(t, err)
   662  	}
   663  
   664  	m.puts()
   665  
   666  	// Start getting objects and enable mountpaths in parallel
   667  	wg := &sync.WaitGroup{}
   668  	wg.Add(1)
   669  	go func() {
   670  		defer wg.Done()
   671  		m.getsUntilStop()
   672  	}()
   673  
   674  	for _, mp := range mpaths {
   675  		time.Sleep(time.Second)
   676  		err = api.EnableMountpath(baseParams, target, mp)
   677  		tassert.CheckFatal(t, err)
   678  	}
   679  	m.stopGets()
   680  
   681  	wg.Wait()
   682  	time.Sleep(2 * time.Second)
   683  
   684  	tlog.Logf("Wait for rebalance (when target %s that has previously lost all mountpaths joins back)\n", target.StringEx())
   685  	args := xact.ArgsMsg{Kind: apc.ActRebalance, Timeout: tools.RebalanceTimeout}
   686  	_, _ = api.WaitForXactionIC(baseParams, &args)
   687  
   688  	tools.WaitForResilvering(t, baseParams, nil)
   689  
   690  	m.ensureNoGetErrors()
   691  	m.ensureNumMountpaths(target, mpList)
   692  }
   693  
   694  func TestGetDuringRebalance(t *testing.T) {
   695  	tools.CheckSkip(t, &tools.SkipTestArgs{Long: true})
   696  
   697  	m := ioContext{
   698  		t:   t,
   699  		num: 30000,
   700  	}
   701  
   702  	m.initAndSaveState(true /*cleanup*/)
   703  	m.expectTargets(3)
   704  
   705  	tools.CreateBucket(t, m.proxyURL, m.bck, nil, true /*cleanup*/)
   706  
   707  	target := m.startMaintenanceNoRebalance()
   708  
   709  	m.puts()
   710  
   711  	// Start getting objects and register target in parallel.
   712  	wg := &sync.WaitGroup{}
   713  	wg.Add(1)
   714  	go func() {
   715  		defer wg.Done()
   716  		m.gets(nil, false)
   717  	}()
   718  
   719  	rebID := m.stopMaintenance(target)
   720  
   721  	// Wait for everything to finish.
   722  	baseParams := tools.BaseAPIParams(m.proxyURL)
   723  	tools.WaitForRebalanceByID(t, baseParams, rebID)
   724  	wg.Wait()
   725  
   726  	// Get objects once again to check if they are still accessible after rebalance.
   727  	m.gets(nil, false)
   728  
   729  	m.ensureNoGetErrors()
   730  	m.waitAndCheckCluState()
   731  }
   732  
   733  func TestRegisterTargetsAndCreateBucketsInParallel(t *testing.T) {
   734  	tools.CheckSkip(t, &tools.SkipTestArgs{Long: true})
   735  
   736  	const (
   737  		unregisterTargetCount = 2
   738  		newBucketCount        = 3
   739  	)
   740  
   741  	m := ioContext{
   742  		t: t,
   743  	}
   744  
   745  	m.initAndSaveState(true /*cleanup*/)
   746  	m.expectTargets(3)
   747  
   748  	targets := m.smap.Tmap.ActiveNodes()
   749  	baseParams := tools.BaseAPIParams(m.proxyURL)
   750  
   751  	// Decommission targets
   752  	for i := range unregisterTargetCount {
   753  		args := &apc.ActValRmNode{DaemonID: targets[i].ID(), SkipRebalance: true}
   754  		_, err := api.StartMaintenance(baseParams, args)
   755  		tassert.CheckError(t, err)
   756  	}
   757  	tools.WaitForClusterState(
   758  		m.proxyURL,
   759  		"remove targets",
   760  		m.smap.Version,
   761  		m.originalProxyCount,
   762  		m.originalTargetCount-unregisterTargetCount,
   763  	)
   764  
   765  	wg := &sync.WaitGroup{}
   766  	wg.Add(unregisterTargetCount)
   767  	for i := range unregisterTargetCount {
   768  		go func(number int) {
   769  			defer wg.Done()
   770  			args := &apc.ActValRmNode{DaemonID: targets[number].ID()}
   771  			_, err := api.StopMaintenance(baseParams, args)
   772  			tassert.CheckError(t, err)
   773  		}(i)
   774  	}
   775  
   776  	wg.Add(newBucketCount)
   777  	for i := range newBucketCount {
   778  		bck := m.bck
   779  		bck.Name += strconv.Itoa(i)
   780  
   781  		go func() {
   782  			defer wg.Done()
   783  			tools.CreateBucket(t, m.proxyURL, bck, nil, true /*cleanup*/)
   784  		}()
   785  	}
   786  	wg.Wait()
   787  	m.waitAndCheckCluState()
   788  	tools.WaitForRebalAndResil(t, baseParams)
   789  }
   790  
   791  func TestMountpathDetachAll(t *testing.T) {
   792  	if true {
   793  		t.Skipf("skipping %s", t.Name()) // TODO -- FIXME: add back, here and elsewhere
   794  	}
   795  	tools.CheckSkip(t, &tools.SkipTestArgs{Long: true, MinTargets: 2})
   796  
   797  	var (
   798  		m = ioContext{
   799  			t:               t,
   800  			num:             5000,
   801  			numGetsEachFile: 2,
   802  		}
   803  		baseParams = tools.BaseAPIParams()
   804  	)
   805  
   806  	m.initAndSaveState(true /*cleanup*/)
   807  	m.expectTargets(2)
   808  
   809  	target, _ := m.smap.GetRandTarget()
   810  	tname := target.StringEx()
   811  	origMountpaths, err := api.GetMountpaths(baseParams, target)
   812  	tassert.CheckFatal(t, err)
   813  	ensureNoDisabledMountpaths(t, target, origMountpaths)
   814  
   815  	// Remove all mountpaths on the target
   816  	for _, mpath := range origMountpaths.Available {
   817  		err = api.DetachMountpath(baseParams, target, mpath, false /*dont-resil*/)
   818  		tassert.CheckFatal(t, err)
   819  	}
   820  
   821  	time.Sleep(time.Second)
   822  	tlog.Logf("Wait for rebalance (triggered by %s leaving the cluster after having lost all mountpaths)\n", tname)
   823  	args := xact.ArgsMsg{Kind: apc.ActRebalance, Timeout: tools.RebalanceTimeout}
   824  	_, _ = api.WaitForXactionIC(baseParams, &args)
   825  
   826  	// Check if mountpaths were actually removed
   827  	mountpaths, err := api.GetMountpaths(baseParams, target)
   828  	tassert.CheckFatal(t, err)
   829  
   830  	if len(mountpaths.Available) != 0 {
   831  		t.Fatalf("%s should not have any paths available: %d", tname, len(mountpaths.Available))
   832  	}
   833  
   834  	// Create ais bucket
   835  	tools.CreateBucket(t, m.proxyURL, m.bck, nil, true /*cleanup*/)
   836  
   837  	// Add target mountpath again
   838  	for _, mpath := range origMountpaths.Available {
   839  		err = api.AttachMountpath(baseParams, target, mpath)
   840  		tassert.CheckFatal(t, err)
   841  	}
   842  
   843  	time.Sleep(2 * time.Second)
   844  	tlog.Logf("Wait for rebalance (when target %s that has previously lost all mountpaths joins back)\n", target.StringEx())
   845  	args = xact.ArgsMsg{Kind: apc.ActRebalance, Timeout: tools.RebalanceTimeout}
   846  	_, _ = api.WaitForXactionIC(baseParams, &args)
   847  
   848  	tools.WaitForResilvering(t, baseParams, target)
   849  
   850  	// random read/write
   851  	m.puts()
   852  	m.gets(nil, false)
   853  
   854  	m.ensureNoGetErrors()
   855  	m.ensureNumMountpaths(target, origMountpaths)
   856  }
   857  
   858  func TestResilverAfterAddingMountpath(t *testing.T) {
   859  	tools.CheckSkip(t, &tools.SkipTestArgs{Long: true})
   860  	var (
   861  		m = ioContext{
   862  			t:               t,
   863  			num:             5000,
   864  			numGetsEachFile: 2,
   865  		}
   866  		baseParams = tools.BaseAPIParams()
   867  	)
   868  
   869  	m.initAndSaveState(true /*cleanup*/)
   870  	m.expectTargets(1)
   871  	target, _ := m.smap.GetRandTarget()
   872  	mpList, err := api.GetMountpaths(baseParams, target)
   873  	tassert.CheckFatal(t, err)
   874  	ensureNoDisabledMountpaths(t, target, mpList)
   875  
   876  	tools.CreateBucket(t, m.proxyURL, m.bck, nil, true /*cleanup*/)
   877  
   878  	if docker.IsRunning() {
   879  		err := docker.CreateMpathDir(0, testMpath)
   880  		tassert.CheckFatal(t, err)
   881  	} else {
   882  		err := cos.CreateDir(testMpath)
   883  		tassert.CheckFatal(t, err)
   884  	}
   885  
   886  	defer func() {
   887  		if !docker.IsRunning() {
   888  			os.RemoveAll(testMpath)
   889  		}
   890  	}()
   891  
   892  	m.puts()
   893  
   894  	// Add new mountpath to target
   895  	tlog.Logf("attach new %q at target %s\n", testMpath, target.StringEx())
   896  	err = api.AttachMountpath(baseParams, target, testMpath)
   897  	tassert.CheckFatal(t, err)
   898  
   899  	tools.WaitForResilvering(t, baseParams, target)
   900  
   901  	m.gets(nil, false)
   902  
   903  	// Remove new mountpath from target
   904  	tlog.Logf("detach %q from target %s\n", testMpath, target.StringEx())
   905  	if docker.IsRunning() {
   906  		if err := api.DetachMountpath(baseParams, target, testMpath, false /*dont-resil*/); err != nil {
   907  			t.Error(err.Error())
   908  		}
   909  	} else {
   910  		err = api.DetachMountpath(baseParams, target, testMpath, false /*dont-resil*/)
   911  		tassert.CheckFatal(t, err)
   912  	}
   913  
   914  	m.ensureNoGetErrors()
   915  
   916  	tools.WaitForResilvering(t, baseParams, target)
   917  	m.ensureNumMountpaths(target, mpList)
   918  }
   919  
   920  func TestAttachDetachMountpathAllTargets(t *testing.T) {
   921  	tools.CheckSkip(t, &tools.SkipTestArgs{Long: true})
   922  	var (
   923  		m = ioContext{
   924  			t:               t,
   925  			num:             10000,
   926  			numGetsEachFile: 5,
   927  		}
   928  		baseParams = tools.BaseAPIParams()
   929  
   930  		allMps = make(map[string]*apc.MountpathList)
   931  	)
   932  
   933  	m.initAndSaveState(true /*cleanup*/)
   934  	m.expectTargets(1)
   935  
   936  	targets := m.smap.Tmap.ActiveNodes()
   937  
   938  	tools.CreateBucket(t, m.proxyURL, m.bck, nil, true /*cleanup*/)
   939  
   940  	defer func() {
   941  		if !docker.IsRunning() {
   942  			os.RemoveAll(testMpath)
   943  		}
   944  	}()
   945  
   946  	// PUT random objects
   947  	m.puts()
   948  
   949  	if docker.IsRunning() {
   950  		err := docker.CreateMpathDir(0, testMpath)
   951  		tassert.CheckFatal(t, err)
   952  		for _, target := range targets {
   953  			mpList, err := api.GetMountpaths(baseParams, target)
   954  			tassert.CheckFatal(t, err)
   955  			allMps[target.ID()] = mpList
   956  
   957  			err = api.AttachMountpath(baseParams, target, testMpath)
   958  			tassert.CheckFatal(t, err)
   959  		}
   960  	} else {
   961  		// Add new mountpath to all targets
   962  		for idx, target := range targets {
   963  			mpList, err := api.GetMountpaths(baseParams, target)
   964  			tassert.CheckFatal(t, err)
   965  			allMps[target.ID()] = mpList
   966  
   967  			mountpath := filepath.Join(testMpath, strconv.Itoa(idx))
   968  			cos.CreateDir(mountpath)
   969  			err = api.AttachMountpath(baseParams, target, mountpath)
   970  			tassert.CheckFatal(t, err)
   971  		}
   972  	}
   973  
   974  	tools.WaitForResilvering(t, baseParams, nil)
   975  
   976  	// Read after rebalance
   977  	m.gets(nil, false)
   978  
   979  	// Remove new mountpath from all targets
   980  	if docker.IsRunning() {
   981  		err := docker.RemoveMpathDir(0, testMpath)
   982  		tassert.CheckFatal(t, err)
   983  		for _, target := range targets {
   984  			if err := api.DetachMountpath(baseParams, target, testMpath, false /*dont-resil*/); err != nil {
   985  				t.Error(err.Error())
   986  			}
   987  		}
   988  	} else {
   989  		for idx, target := range targets {
   990  			mountpath := filepath.Join(testMpath, strconv.Itoa(idx))
   991  			os.RemoveAll(mountpath)
   992  			if err := api.DetachMountpath(baseParams, target, mountpath, false /*dont-resil*/); err != nil {
   993  				t.Error(err.Error())
   994  			}
   995  		}
   996  	}
   997  
   998  	tools.WaitForResilvering(t, baseParams, nil)
   999  
  1000  	m.ensureNoGetErrors()
  1001  	for _, target := range targets {
  1002  		m.ensureNumMountpaths(target, allMps[target.ID()])
  1003  	}
  1004  }
  1005  
  1006  func TestMountpathDisableAll(t *testing.T) {
  1007  	var (
  1008  		m = ioContext{
  1009  			t:               t,
  1010  			num:             5000,
  1011  			numGetsEachFile: 2,
  1012  		}
  1013  		baseParams = tools.BaseAPIParams()
  1014  	)
  1015  
  1016  	m.initAndSaveState(true /*cleanup*/)
  1017  	m.expectTargets(1)
  1018  
  1019  	// Remove all mountpaths on the target
  1020  	target, _ := m.smap.GetRandTarget()
  1021  	tname := target.StringEx()
  1022  	origMountpaths, err := api.GetMountpaths(baseParams, target)
  1023  	tassert.CheckFatal(t, err)
  1024  	ensureNoDisabledMountpaths(t, target, origMountpaths)
  1025  
  1026  	if len(origMountpaths.WaitingDD) != 0 || len(origMountpaths.Disabled) != 0 {
  1027  		tlog.Logf("Warning %s: orig mountpaths (avail=%d, dd=%d, disabled=%d)\n", tname,
  1028  			len(origMountpaths.Available), len(origMountpaths.WaitingDD), len(origMountpaths.Disabled))
  1029  		for _, mpath := range origMountpaths.Disabled {
  1030  			err = api.EnableMountpath(baseParams, target, mpath)
  1031  			tlog.Logf("Warning %s: late enable %q, err=%v\n", tname, mpath, err)
  1032  			time.Sleep(2 * time.Second)
  1033  		}
  1034  		origMountpaths, err = api.GetMountpaths(baseParams, target)
  1035  		tassert.CheckFatal(t, err)
  1036  	} else {
  1037  		tlog.Logf("%s: orig avail mountpaths=%d\n", tname, len(origMountpaths.Available))
  1038  	}
  1039  	disabled := make(cos.StrSet)
  1040  	defer func() {
  1041  		for mpath := range disabled {
  1042  			err := api.EnableMountpath(baseParams, target, mpath)
  1043  			tassert.CheckError(t, err)
  1044  		}
  1045  		if len(disabled) != 0 {
  1046  			tlog.Logf("Wait for rebalance (when target %s that has previously lost all mountpaths joins back)\n",
  1047  				tname)
  1048  			args := xact.ArgsMsg{Kind: apc.ActRebalance, Timeout: tools.RebalanceTimeout}
  1049  			_, _ = api.WaitForXactionIC(baseParams, &args)
  1050  
  1051  			tools.WaitForResilvering(t, baseParams, nil)
  1052  		}
  1053  	}()
  1054  	for _, mpath := range origMountpaths.Available {
  1055  		err := api.DisableMountpath(baseParams, target, mpath, true /*dont-resil*/)
  1056  		tassert.CheckFatal(t, err)
  1057  		disabled.Add(mpath)
  1058  	}
  1059  
  1060  	time.Sleep(2 * time.Second)
  1061  	tlog.Logf("Wait for rebalance (triggered by %s leaving the cluster after having lost all mountpaths)\n", tname)
  1062  	args := xact.ArgsMsg{Kind: apc.ActRebalance, Timeout: tools.RebalanceTimeout}
  1063  	_, _ = api.WaitForXactionIC(baseParams, &args)
  1064  
  1065  	// Check if mountpaths were actually disabled
  1066  	time.Sleep(time.Second)
  1067  	mountpaths, err := api.GetMountpaths(baseParams, target)
  1068  	tassert.CheckFatal(t, err)
  1069  
  1070  	if len(mountpaths.Available) != 0 {
  1071  		t.Fatalf("%s should not have any mountpaths left (%d)", tname, len(mountpaths.Available))
  1072  	}
  1073  	if len(mountpaths.Disabled)+len(mountpaths.WaitingDD) != len(origMountpaths.Available) {
  1074  		t.Fatalf("%s: not all mountpaths were disabled (%d, %d, %d)", tname,
  1075  			len(mountpaths.Disabled), len(mountpaths.WaitingDD), len(origMountpaths.Available))
  1076  	}
  1077  
  1078  	// Create ais bucket
  1079  	tools.CreateBucket(t, m.proxyURL, m.bck, nil, true /*cleanup*/)
  1080  
  1081  	// Re-enable target mountpaths
  1082  	for _, mpath := range origMountpaths.Available {
  1083  		err := api.EnableMountpath(baseParams, target, mpath)
  1084  		tassert.CheckFatal(t, err)
  1085  		disabled.Delete(mpath)
  1086  	}
  1087  
  1088  	time.Sleep(2 * time.Second)
  1089  	tlog.Logf("Wait for rebalance (when target %s that has previously lost all mountpaths joins back)\n", target.StringEx())
  1090  	args = xact.ArgsMsg{Kind: apc.ActRebalance, Timeout: tools.RebalanceTimeout}
  1091  	_, _ = api.WaitForXactionIC(baseParams, &args)
  1092  
  1093  	tools.WaitForResilvering(t, baseParams, target)
  1094  
  1095  	tlog.Logf("waiting for bucket %s to show up on all targets\n", m.bck)
  1096  	err = checkTargetBMDsFor(m.proxyURL, m.bck)
  1097  	tassert.CheckFatal(t, err)
  1098  
  1099  	// Put and read random files
  1100  	m.puts()
  1101  	m.gets(nil, false)
  1102  
  1103  	m.ensureNoGetErrors()
  1104  	m.ensureNumMountpaths(target, origMountpaths)
  1105  }
  1106  
  1107  // get BMD from each target; check the BMD for the specified bucket
  1108  func checkTargetBMDsFor(proxyURL string, bck cmn.Bck) error {
  1109  	bp := tools.BaseAPIParams(proxyURL)
  1110  	smap, err := api.GetClusterMap(bp)
  1111  	if err != nil {
  1112  		return err
  1113  	}
  1114  	to := time.Now().Add(10 * time.Second)
  1115  	b := meta.CloneBck(&bck)
  1116  	for tid := range smap.Tmap {
  1117  		// poll
  1118  		for {
  1119  			// alternatively, something like: api.GetBMD(tools.BaseAPIParams(tsi.URL(...)))
  1120  			val, err := api.GetNodeMeta(bp, tid, apc.WhatBMD)
  1121  			if err != nil {
  1122  				return err
  1123  			}
  1124  			bmd := val.(*meta.BMD)
  1125  			if _, bucketExists := bmd.Get(b); bucketExists {
  1126  				break
  1127  			}
  1128  			if time.Now().After(to) {
  1129  				return errors.New("checkBMDsFor: timeout")
  1130  			}
  1131  			time.Sleep(time.Second)
  1132  		}
  1133  	}
  1134  	return nil
  1135  }
  1136  
  1137  func TestForwardCP(t *testing.T) {
  1138  	m := ioContext{
  1139  		t:               t,
  1140  		num:             10000,
  1141  		numGetsEachFile: 2,
  1142  		fileSize:        128,
  1143  	}
  1144  
  1145  	// Step 1.
  1146  	m.initAndSaveState(true /*cleanup*/)
  1147  	m.expectProxies(2)
  1148  
  1149  	// Step 2.
  1150  	origID, origURL := m.smap.Primary.ID(), m.smap.Primary.PubNet.URL
  1151  	nextProxyID, nextProxyURL, err := chooseNextProxy(m.smap)
  1152  	tassert.CheckFatal(t, err)
  1153  
  1154  	t.Cleanup(func() {
  1155  		// Restore original primary.
  1156  		m.smap = tools.GetClusterMap(m.t, m.proxyURL)
  1157  		setPrimaryTo(t, m.proxyURL, m.smap, origURL, origID)
  1158  
  1159  		time.Sleep(time.Second)
  1160  	})
  1161  
  1162  	tools.CreateBucket(t, nextProxyURL, m.bck, nil, true /*cleanup*/)
  1163  	tlog.Logf("Created bucket %s via non-primary %s\n", m.bck, nextProxyID)
  1164  
  1165  	// Step 3.
  1166  	m.puts()
  1167  
  1168  	// Step 4. in parallel: run GETs and designate a new primary=nextProxyID
  1169  	wg := &sync.WaitGroup{}
  1170  	wg.Add(2)
  1171  	go func() {
  1172  		defer wg.Done()
  1173  		m.gets(nil, false)
  1174  	}()
  1175  	go func() {
  1176  		defer wg.Done()
  1177  
  1178  		setPrimaryTo(t, m.proxyURL, m.smap, nextProxyURL, nextProxyID)
  1179  		m.proxyURL = nextProxyURL
  1180  	}()
  1181  	wg.Wait()
  1182  
  1183  	m.ensureNoGetErrors()
  1184  
  1185  	// Step 5. destroy ais bucket via original primary which is not primary at this point
  1186  	tools.DestroyBucket(t, origURL, m.bck)
  1187  	tlog.Logf("Destroyed bucket %s via non-primary %s/%s\n", m.bck, origID, origURL)
  1188  }
  1189  
  1190  func TestAtimeRebalance(t *testing.T) {
  1191  	tools.CheckSkip(t, &tools.SkipTestArgs{Long: true})
  1192  
  1193  	m := ioContext{
  1194  		t:               t,
  1195  		num:             2000,
  1196  		numGetsEachFile: 2,
  1197  	}
  1198  
  1199  	m.initAndSaveState(true /*cleanup*/)
  1200  	m.expectTargets(2)
  1201  
  1202  	tools.CreateBucket(t, m.proxyURL, m.bck, nil, true /*cleanup*/)
  1203  
  1204  	target := m.startMaintenanceNoRebalance()
  1205  
  1206  	m.puts()
  1207  
  1208  	// Get atime in a format that includes nanoseconds to properly check if it
  1209  	// was updated in atime cache (if it wasn't, then the returned atime would
  1210  	// be different from the original one, but the difference could be very small).
  1211  	msg := &apc.LsoMsg{TimeFormat: time.StampNano}
  1212  	msg.AddProps(apc.GetPropsAtime, apc.GetPropsStatus)
  1213  	baseParams := tools.BaseAPIParams(m.proxyURL)
  1214  	lst, err := api.ListObjects(baseParams, m.bck, msg, api.ListArgs{})
  1215  	tassert.CheckFatal(t, err)
  1216  
  1217  	objNames := make(cos.StrKVs, 10)
  1218  	for _, en := range lst.Entries {
  1219  		objNames[en.Name] = en.Atime
  1220  	}
  1221  
  1222  	rebID := m.stopMaintenance(target)
  1223  
  1224  	// make sure that the cluster has all targets enabled
  1225  	_, err = tools.WaitForClusterState(
  1226  		m.proxyURL,
  1227  		"target joined back",
  1228  		m.smap.Version,
  1229  		m.originalProxyCount,
  1230  		m.originalTargetCount,
  1231  	)
  1232  	tassert.CheckFatal(t, err)
  1233  
  1234  	tools.WaitForRebalanceByID(t, baseParams, rebID)
  1235  
  1236  	msg = &apc.LsoMsg{TimeFormat: time.StampNano}
  1237  	msg.AddProps(apc.GetPropsAtime, apc.GetPropsStatus)
  1238  	lstReb, err := api.ListObjects(baseParams, m.bck, msg, api.ListArgs{})
  1239  	tassert.CheckFatal(t, err)
  1240  
  1241  	itemCount, itemCountOk := len(lstReb.Entries), 0
  1242  	l := len(lst.Entries)
  1243  	if itemCount != l {
  1244  		t.Errorf("The number of objects mismatch: before %d, after %d", len(lst.Entries), itemCount)
  1245  	}
  1246  	for _, en := range lstReb.Entries {
  1247  		atime, ok := objNames[en.Name]
  1248  		if !ok {
  1249  			t.Errorf("Object %q not found", en.Name)
  1250  			continue
  1251  		}
  1252  		if atime != en.Atime {
  1253  			t.Errorf("Atime mismatched for %s: before %q, after %q", en.Name, atime, en.Atime)
  1254  		}
  1255  		if en.IsStatusOK() {
  1256  			itemCountOk++
  1257  		}
  1258  	}
  1259  	if itemCountOk != l {
  1260  		t.Errorf("Wrong number of objects with status OK: %d (expecting %d)", itemCountOk, l)
  1261  	}
  1262  }
  1263  
  1264  func TestAtimeLocalGet(t *testing.T) {
  1265  	var (
  1266  		bck = cmn.Bck{
  1267  			Name:     t.Name(),
  1268  			Provider: apc.AIS,
  1269  		}
  1270  		proxyURL      = tools.RandomProxyURL(t)
  1271  		baseParams    = tools.BaseAPIParams(proxyURL)
  1272  		objectName    = t.Name()
  1273  		objectContent = readers.NewBytes([]byte("file content"))
  1274  	)
  1275  
  1276  	tools.CreateBucket(t, proxyURL, bck, nil, true /*cleanup*/)
  1277  
  1278  	_, err := api.PutObject(&api.PutArgs{BaseParams: baseParams, Bck: bck, ObjName: objectName, Reader: objectContent})
  1279  	tassert.CheckFatal(t, err)
  1280  
  1281  	putAtime, putAtimeFormatted := tools.GetObjectAtime(t, baseParams, bck, objectName, time.RFC3339Nano)
  1282  
  1283  	// Get object so that atime is updated
  1284  	_, err = api.GetObject(baseParams, bck, objectName, nil)
  1285  	tassert.CheckFatal(t, err)
  1286  
  1287  	getAtime, getAtimeFormatted := tools.GetObjectAtime(t, baseParams, bck, objectName, time.RFC3339Nano)
  1288  
  1289  	if !(getAtime.After(putAtime)) {
  1290  		t.Errorf("Expected PUT atime (%s) to be before GET atime (%s)", putAtimeFormatted, getAtimeFormatted)
  1291  	}
  1292  }
  1293  
  1294  func TestAtimeColdGet(t *testing.T) {
  1295  	var (
  1296  		bck           = cliBck
  1297  		proxyURL      = tools.RandomProxyURL(t)
  1298  		baseParams    = tools.BaseAPIParams(proxyURL)
  1299  		objectName    = t.Name()
  1300  		objectContent = readers.NewBytes([]byte("dummy content"))
  1301  	)
  1302  
  1303  	tools.CheckSkip(t, &tools.SkipTestArgs{RemoteBck: true, Bck: bck})
  1304  	api.DeleteObject(baseParams, bck, objectName)
  1305  	defer api.DeleteObject(baseParams, bck, objectName)
  1306  
  1307  	tools.PutObjectInRemoteBucketWithoutCachingLocally(t, bck, objectName, objectContent)
  1308  
  1309  	timeAfterPut := time.Now()
  1310  
  1311  	// Perform the COLD get
  1312  	oah, err := api.GetObject(baseParams, bck, objectName, nil)
  1313  	tassert.CheckFatal(t, err)
  1314  
  1315  	tlog.Logf("%+v\n", oah) // DEBUG
  1316  
  1317  	getAtime, getAtimeFormatted := tools.GetObjectAtime(t, baseParams, bck, objectName, time.RFC3339Nano)
  1318  	tassert.Fatalf(t, !getAtime.IsZero(), "GET atime is zero")
  1319  
  1320  	if !(getAtime.After(timeAfterPut)) {
  1321  		t.Errorf("Expected PUT atime (%s) to be before GET atime (%s)", timeAfterPut.Format(time.RFC3339Nano), getAtimeFormatted)
  1322  	}
  1323  }
  1324  
  1325  func TestAtimePrefetch(t *testing.T) {
  1326  	tools.CheckSkip(t, &tools.SkipTestArgs{Long: true})
  1327  
  1328  	var (
  1329  		bck        = cliBck
  1330  		proxyURL   = tools.RandomProxyURL(t)
  1331  		baseParams = tools.BaseAPIParams(proxyURL)
  1332  		objectName = t.Name()
  1333  		numObjs    = 10
  1334  		objPath    = "atime/obj-"
  1335  		errCh      = make(chan error, numObjs)
  1336  		nameCh     = make(chan string, numObjs)
  1337  		objs       = make([]string, 0, numObjs)
  1338  	)
  1339  
  1340  	tools.CheckSkip(t, &tools.SkipTestArgs{RemoteBck: true, Bck: bck})
  1341  	api.DeleteObject(baseParams, bck, objectName)
  1342  	defer func() {
  1343  		for _, obj := range objs {
  1344  			api.DeleteObject(baseParams, bck, obj)
  1345  		}
  1346  	}()
  1347  
  1348  	wg := &sync.WaitGroup{}
  1349  	for i := range numObjs {
  1350  		wg.Add(1)
  1351  		go func(idx int) {
  1352  			defer wg.Done()
  1353  			object := objPath + strconv.FormatUint(uint64(idx), 10)
  1354  			_, err := api.PutObject(&api.PutArgs{
  1355  				BaseParams: baseParams,
  1356  				Bck:        bck,
  1357  				ObjName:    object,
  1358  				Reader:     readers.NewBytes([]byte("dummy content")),
  1359  			})
  1360  			if err == nil {
  1361  				nameCh <- object
  1362  			} else {
  1363  				errCh <- err
  1364  			}
  1365  		}(i)
  1366  	}
  1367  	wg.Wait()
  1368  	close(errCh)
  1369  	close(nameCh)
  1370  	tassert.SelectErr(t, errCh, "put", true)
  1371  	for obj := range nameCh {
  1372  		objs = append(objs, obj)
  1373  	}
  1374  	xid, err := api.EvictMultiObj(baseParams, bck, objs, "" /*template*/)
  1375  	tassert.CheckFatal(t, err)
  1376  	args := xact.ArgsMsg{ID: xid, Timeout: tools.RebalanceTimeout}
  1377  	_, err = api.WaitForXactionIC(baseParams, &args)
  1378  	tassert.CheckFatal(t, err)
  1379  
  1380  	timeAfterPut := time.Now()
  1381  
  1382  	{
  1383  		var msg apc.PrefetchMsg
  1384  		msg.ObjNames = objs
  1385  		xid, err = api.Prefetch(baseParams, bck, msg)
  1386  		tassert.CheckFatal(t, err)
  1387  		args = xact.ArgsMsg{ID: xid, Kind: apc.ActPrefetchObjects, Timeout: tools.RebalanceTimeout}
  1388  		_, err = api.WaitForXactionIC(baseParams, &args)
  1389  		tassert.CheckFatal(t, err)
  1390  	}
  1391  
  1392  	timeFormat := time.RFC3339Nano
  1393  	msg := &apc.LsoMsg{Props: apc.GetPropsAtime, TimeFormat: timeFormat, Prefix: objPath}
  1394  	lst, err := api.ListObjects(baseParams, bck, msg, api.ListArgs{})
  1395  	tassert.CheckFatal(t, err)
  1396  	if len(lst.Entries) != numObjs {
  1397  		t.Errorf("Number of objects mismatch: expected %d, found %d", numObjs, len(lst.Entries))
  1398  	}
  1399  	for _, en := range lst.Entries {
  1400  		atime, err := time.Parse(timeFormat, en.Atime)
  1401  		tassert.CheckFatal(t, err)
  1402  		if atime.After(timeAfterPut) {
  1403  			t.Errorf("Atime should not be updated after prefetch (got: atime after PUT: %s, atime after GET: %s).",
  1404  				timeAfterPut.Format(timeFormat), atime.Format(timeFormat))
  1405  		}
  1406  	}
  1407  }
  1408  
  1409  func TestAtimeLocalPut(t *testing.T) {
  1410  	var (
  1411  		bck = cmn.Bck{
  1412  			Name:     t.Name(),
  1413  			Provider: apc.AIS,
  1414  		}
  1415  		proxyURL      = tools.RandomProxyURL(t)
  1416  		baseParams    = tools.BaseAPIParams(proxyURL)
  1417  		objectName    = t.Name()
  1418  		objectContent = readers.NewBytes([]byte("dummy content"))
  1419  	)
  1420  
  1421  	tools.CreateBucket(t, proxyURL, bck, nil, true /*cleanup*/)
  1422  
  1423  	timeBeforePut := time.Now()
  1424  	_, err := api.PutObject(&api.PutArgs{BaseParams: baseParams, Bck: bck, ObjName: objectName, Reader: objectContent})
  1425  	tassert.CheckFatal(t, err)
  1426  
  1427  	putAtime, putAtimeFormatted := tools.GetObjectAtime(t, baseParams, bck, objectName, time.RFC3339Nano)
  1428  
  1429  	if !(putAtime.After(timeBeforePut)) {
  1430  		t.Errorf("Expected atime after PUT (%s) to be after atime before PUT (%s)",
  1431  			putAtimeFormatted, timeBeforePut.Format(time.RFC3339Nano))
  1432  	}
  1433  }
  1434  
  1435  // 1. Unregister target
  1436  // 2. Add bucket - unregistered target should miss the update
  1437  // 3. Reregister target
  1438  // 4. Put objects
  1439  // 5. Get objects - everything should succeed
  1440  func TestGetAndPutAfterReregisterWithMissedBucketUpdate(t *testing.T) {
  1441  	tools.CheckSkip(t, &tools.SkipTestArgs{Long: true})
  1442  
  1443  	m := ioContext{
  1444  		t:               t,
  1445  		num:             10000,
  1446  		numGetsEachFile: 5,
  1447  	}
  1448  
  1449  	m.initAndSaveState(true /*cleanup*/)
  1450  	m.expectTargets(2)
  1451  
  1452  	target := m.startMaintenanceNoRebalance()
  1453  
  1454  	tools.CreateBucket(t, m.proxyURL, m.bck, nil, true /*cleanup*/)
  1455  
  1456  	rebID := m.stopMaintenance(target)
  1457  
  1458  	m.puts()
  1459  	m.gets(nil, false)
  1460  
  1461  	m.ensureNoGetErrors()
  1462  	m.waitAndCheckCluState()
  1463  	baseParams := tools.BaseAPIParams(m.proxyURL)
  1464  	tools.WaitForRebalanceByID(t, baseParams, rebID)
  1465  }
  1466  
  1467  // 1. Unregister target
  1468  // 2. Add bucket - unregistered target should miss the update
  1469  // 3. Put objects
  1470  // 4. Reregister target - rebalance kicks in
  1471  // 5. Get objects - everything should succeed
  1472  func TestGetAfterReregisterWithMissedBucketUpdate(t *testing.T) {
  1473  	tools.CheckSkip(t, &tools.SkipTestArgs{Long: true})
  1474  
  1475  	m := ioContext{
  1476  		t:               t,
  1477  		num:             10000,
  1478  		fileSize:        1024,
  1479  		numGetsEachFile: 5,
  1480  	}
  1481  
  1482  	// Initialize ioContext
  1483  	m.initAndSaveState(true /*cleanup*/)
  1484  	m.expectTargets(2)
  1485  
  1486  	targets := m.smap.Tmap.ActiveNodes()
  1487  
  1488  	// Unregister target[0]
  1489  	args := &apc.ActValRmNode{DaemonID: targets[0].ID(), SkipRebalance: true}
  1490  	_, err := api.StartMaintenance(tools.BaseAPIParams(m.proxyURL), args)
  1491  	tassert.CheckFatal(t, err)
  1492  	tools.WaitForClusterState(
  1493  		m.proxyURL,
  1494  		"remove target",
  1495  		m.smap.Version,
  1496  		m.originalProxyCount,
  1497  		m.originalTargetCount-1,
  1498  	)
  1499  
  1500  	// Create ais bucket
  1501  	tools.CreateBucket(t, m.proxyURL, m.bck, nil, true /*cleanup*/)
  1502  
  1503  	m.puts()
  1504  
  1505  	// Reregister target 0
  1506  	rebID := m.stopMaintenance(targets[0])
  1507  
  1508  	// Wait for rebalance and execute GETs
  1509  	baseParams := tools.BaseAPIParams(m.proxyURL)
  1510  	tools.WaitForRebalanceByID(t, baseParams, rebID)
  1511  
  1512  	m.gets(nil, false)
  1513  
  1514  	m.ensureNoGetErrors()
  1515  	m.waitAndCheckCluState()
  1516  }
  1517  
  1518  func TestRenewRebalance(t *testing.T) {
  1519  	tools.CheckSkip(t, &tools.SkipTestArgs{Long: true})
  1520  
  1521  	var (
  1522  		m = ioContext{
  1523  			t:                   t,
  1524  			num:                 10000,
  1525  			numGetsEachFile:     5,
  1526  			otherTasksToTrigger: 1,
  1527  		}
  1528  		rebID string
  1529  	)
  1530  
  1531  	m.initAndSaveState(true /*cleanup*/)
  1532  	m.expectTargets(2)
  1533  
  1534  	// Step 1: Unregister a target
  1535  	target := m.startMaintenanceNoRebalance()
  1536  
  1537  	// Step 2: Create an ais bucket
  1538  	tools.CreateBucket(t, m.proxyURL, m.bck, nil, true /*cleanup*/)
  1539  
  1540  	// Step 3: PUT objects in the bucket
  1541  	m.puts()
  1542  
  1543  	baseParams := tools.BaseAPIParams(m.proxyURL)
  1544  
  1545  	// Step 4: Re-register target (triggers rebalance)
  1546  	m.stopMaintenance(target)
  1547  	xargs := xact.ArgsMsg{Kind: apc.ActRebalance, Timeout: tools.RebalanceStartTimeout}
  1548  	err := api.WaitForXactionNode(baseParams, &xargs, xactSnapRunning)
  1549  	tassert.CheckError(t, err)
  1550  	tlog.Logf("rebalance started\n")
  1551  
  1552  	wg := &sync.WaitGroup{}
  1553  	wg.Add(2)
  1554  	// Step 5: GET objects from the buket
  1555  	go func() {
  1556  		defer wg.Done()
  1557  		m.gets(nil, false)
  1558  	}()
  1559  
  1560  	// Step 6:
  1561  	//   - Start new rebalance manually after some time
  1562  	//   - TODO: Verify that new rebalance xaction has started
  1563  	go func() {
  1564  		defer wg.Done()
  1565  
  1566  		<-m.controlCh // wait for half the GETs to complete
  1567  
  1568  		rebID, err = api.StartXaction(baseParams, &xact.ArgsMsg{Kind: apc.ActRebalance}, "")
  1569  		tassert.CheckFatal(t, err)
  1570  		tlog.Logf("manually initiated rebalance\n")
  1571  	}()
  1572  
  1573  	wg.Wait()
  1574  	args := xact.ArgsMsg{ID: rebID, Kind: apc.ActRebalance, Timeout: tools.RebalanceTimeout}
  1575  	_, err = api.WaitForXactionIC(baseParams, &args)
  1576  	tassert.CheckError(t, err)
  1577  
  1578  	m.ensureNoGetErrors()
  1579  	m.waitAndCheckCluState()
  1580  }
  1581  
  1582  func TestGetFromMirroredWithLostOneMountpath(t *testing.T) {
  1583  	tools.CheckSkip(t, &tools.SkipTestArgs{Long: true})
  1584  	var (
  1585  		copies = 2
  1586  		m      = ioContext{
  1587  			t:               t,
  1588  			num:             5000,
  1589  			numGetsEachFile: 4,
  1590  		}
  1591  		baseParams = tools.BaseAPIParams()
  1592  	)
  1593  
  1594  	m.initAndSaveState(true /*cleanup*/)
  1595  	m.expectTargets(1)
  1596  
  1597  	// Select one target at random
  1598  	target, _ := m.smap.GetRandTarget()
  1599  	mpList, err := api.GetMountpaths(baseParams, target)
  1600  	tassert.CheckFatal(t, err)
  1601  	ensureNoDisabledMountpaths(t, target, mpList)
  1602  	if len(mpList.Available) < copies {
  1603  		t.Fatalf("%s requires at least %d mountpaths per target", t.Name(), copies)
  1604  	}
  1605  
  1606  	// Step 1: Create a local bucket
  1607  	tools.CreateBucket(t, m.proxyURL, m.bck, nil, true /*cleanup*/)
  1608  
  1609  	// Step 2: Make the bucket redundant
  1610  	_, err = api.SetBucketProps(baseParams, m.bck, &cmn.BpropsToSet{
  1611  		Mirror: &cmn.MirrorConfToSet{
  1612  			Enabled: apc.Ptr(true),
  1613  			Copies:  apc.Ptr(int64(copies)),
  1614  		},
  1615  	})
  1616  	if err != nil {
  1617  		t.Fatalf("Failed to make the bucket redundant: %v", err)
  1618  	}
  1619  
  1620  	// Step 3: PUT objects in the bucket
  1621  	m.puts()
  1622  	m.ensureNumCopies(baseParams, copies, false)
  1623  
  1624  	// Step 4: Remove a mountpath
  1625  	mpath := mpList.Available[0]
  1626  	tlog.Logf("Remove mountpath %s on target %s\n", mpath, target.ID())
  1627  	err = api.DetachMountpath(baseParams, target, mpath, false /*dont-resil*/)
  1628  	tassert.CheckFatal(t, err)
  1629  
  1630  	tools.WaitForResilvering(t, baseParams, target)
  1631  
  1632  	// Step 5: GET objects from the bucket
  1633  	m.gets(nil, false)
  1634  
  1635  	m.ensureNumCopies(baseParams, copies, true /*greaterOk*/)
  1636  
  1637  	// Step 6: Add previously removed mountpath
  1638  	tlog.Logf("Add mountpath %s on target %s\n", mpath, target.ID())
  1639  	err = api.AttachMountpath(baseParams, target, mpath)
  1640  	tassert.CheckFatal(t, err)
  1641  
  1642  	tools.WaitForResilvering(t, baseParams, target)
  1643  
  1644  	m.ensureNumCopies(baseParams, copies, true)
  1645  	m.ensureNoGetErrors()
  1646  	m.ensureNumMountpaths(target, mpList)
  1647  }
  1648  
  1649  func TestGetFromMirroredWithLostMountpathAllExceptOne(t *testing.T) {
  1650  	tools.CheckSkip(t, &tools.SkipTestArgs{Long: true})
  1651  	m := ioContext{
  1652  		t:               t,
  1653  		num:             10000,
  1654  		numGetsEachFile: 4,
  1655  	}
  1656  	m.initAndSaveState(true /*cleanup*/)
  1657  	baseParams := tools.BaseAPIParams(m.proxyURL)
  1658  
  1659  	// Select a random target
  1660  	target, _ := m.smap.GetRandTarget()
  1661  	mpList, err := api.GetMountpaths(baseParams, target)
  1662  	mpathCount := len(mpList.Available)
  1663  	ensureNoDisabledMountpaths(t, target, mpList)
  1664  	tassert.CheckFatal(t, err)
  1665  	if mpathCount < 3 {
  1666  		t.Skipf("%s requires at least 3 mountpaths per target (%s has %d)", t.Name(), target.StringEx(), mpathCount)
  1667  	}
  1668  
  1669  	tools.CreateBucket(t, m.proxyURL, m.bck, nil, true /*cleanup*/)
  1670  
  1671  	// Make the bucket n-copy mirrored
  1672  	_, err = api.SetBucketProps(baseParams, m.bck, &cmn.BpropsToSet{
  1673  		Mirror: &cmn.MirrorConfToSet{
  1674  			Enabled: apc.Ptr(true),
  1675  			Copies:  apc.Ptr(int64(mpathCount)),
  1676  		},
  1677  	})
  1678  	if err != nil {
  1679  		t.Fatalf("Failed to make the bucket redundant: %v", err)
  1680  	}
  1681  
  1682  	// PUT
  1683  	m.puts()
  1684  	m.ensureNumCopies(baseParams, mpathCount, false /*greaterOk*/)
  1685  
  1686  	// Remove all mountpaths except one
  1687  	tlog.Logf("Remove all except one (%q) mountpath on target %s\n", mpList.Available[0], target.StringEx())
  1688  	for i, mpath := range mpList.Available[1:] {
  1689  		err = api.DetachMountpath(baseParams, target, mpath, false /*dont-resil*/)
  1690  		if err != nil {
  1691  			for j := range i {
  1692  				api.AttachMountpath(baseParams, target, mpList.Available[j+1])
  1693  			}
  1694  			tassert.CheckFatal(t, err)
  1695  		}
  1696  		time.Sleep(time.Second)
  1697  	}
  1698  
  1699  	tools.WaitForResilvering(t, baseParams, target)
  1700  
  1701  	// Wait for async mirroring to finish
  1702  	flt := xact.ArgsMsg{Kind: apc.ActPutCopies, Bck: m.bck}
  1703  	api.WaitForXactionIdle(baseParams, &flt)
  1704  	time.Sleep(time.Second) // pending writes
  1705  
  1706  	// GET
  1707  	m.gets(nil, false)
  1708  
  1709  	// Reattach previously removed mountpaths
  1710  	tlog.Logf("Reattach mountpaths at %s\n", target.StringEx())
  1711  	for _, mpath := range mpList.Available[1:] {
  1712  		err = api.AttachMountpath(baseParams, target, mpath)
  1713  		tassert.CheckFatal(t, err)
  1714  		time.Sleep(time.Second)
  1715  	}
  1716  
  1717  	tools.WaitForResilvering(t, baseParams, nil)
  1718  
  1719  	m.ensureNumCopies(baseParams, mpathCount, true /*greaterOk*/)
  1720  	m.ensureNoGetErrors()
  1721  	m.ensureNumMountpaths(target, mpList)
  1722  }
  1723  
  1724  // TODO: remove all except one mountpath, run short, reduce sleep, increase stress...
  1725  func TestGetNonRedundantWithDisabledMountpath(t *testing.T) {
  1726  	testNonRedundantMpathDD(t, apc.ActMountpathDisable)
  1727  }
  1728  
  1729  func TestGetNonRedundantWithDetachedMountpath(t *testing.T) {
  1730  	testNonRedundantMpathDD(t, apc.ActMountpathDetach)
  1731  }
  1732  
  1733  func testNonRedundantMpathDD(t *testing.T, action string) {
  1734  	tools.CheckSkip(t, &tools.SkipTestArgs{Long: true})
  1735  	m := ioContext{
  1736  		t:               t,
  1737  		num:             1000,
  1738  		numGetsEachFile: 2,
  1739  	}
  1740  	m.initAndSaveState(true /*cleanup*/)
  1741  	baseParams := tools.BaseAPIParams(m.proxyURL)
  1742  
  1743  	// Select a random target
  1744  	target, _ := m.smap.GetRandTarget()
  1745  	mpList, err := api.GetMountpaths(baseParams, target)
  1746  	tassert.CheckFatal(t, err)
  1747  	ensureNoDisabledMountpaths(t, target, mpList)
  1748  
  1749  	mpathCount := len(mpList.Available)
  1750  	if mpathCount < 2 {
  1751  		t.Skipf("%s requires at least 2 mountpaths per target (%s has %d)", t.Name(), target.StringEx(), mpathCount)
  1752  	}
  1753  
  1754  	tools.CreateBucket(t, m.proxyURL, m.bck, nil, true /*cleanup*/)
  1755  
  1756  	// PUT
  1757  	m.puts()
  1758  
  1759  	tlog.Logf("%s %q at target %s\n", action, mpList.Available[0], target.StringEx())
  1760  	if action == apc.ActMountpathDisable {
  1761  		err = api.DisableMountpath(baseParams, target, mpList.Available[0], false /*dont-resil*/)
  1762  	} else {
  1763  		err = api.DetachMountpath(baseParams, target, mpList.Available[0], false /*dont-resil*/)
  1764  	}
  1765  	tassert.CheckFatal(t, err)
  1766  
  1767  	tools.WaitForResilvering(t, baseParams, target)
  1768  
  1769  	// GET
  1770  	m.gets(nil, false)
  1771  
  1772  	// Add previously disabled or detached mountpath
  1773  	if action == apc.ActMountpathDisable {
  1774  		tlog.Logf("Re-enable %q at target %s\n", mpList.Available[0], target.StringEx())
  1775  		err = api.EnableMountpath(baseParams, target, mpList.Available[0])
  1776  	} else {
  1777  		tlog.Logf("Re-attach %q at target %s\n", mpList.Available[0], target.StringEx())
  1778  		err = api.AttachMountpath(baseParams, target, mpList.Available[0])
  1779  	}
  1780  	tassert.CheckFatal(t, err)
  1781  
  1782  	tools.WaitForResilvering(t, baseParams, target)
  1783  
  1784  	m.ensureNoGetErrors()
  1785  	m.ensureNumMountpaths(target, mpList)
  1786  }
  1787  
  1788  // 1. Start rebalance
  1789  // 2. Start changing the primary proxy
  1790  // 3. IC must survive and rebalance must finish
  1791  func TestICRebalance(t *testing.T) {
  1792  	tools.CheckSkip(t, &tools.SkipTestArgs{Long: true, RequiredDeployment: tools.ClusterTypeLocal})
  1793  
  1794  	var (
  1795  		m = ioContext{
  1796  			t:   t,
  1797  			num: 25000,
  1798  		}
  1799  		rebID string
  1800  	)
  1801  
  1802  	m.initAndSaveState(true /*cleanup*/)
  1803  	m.expectTargets(3)
  1804  	m.expectProxies(3)
  1805  	psi, err := m.smap.GetRandProxy(true /*exclude primary*/)
  1806  	tassert.CheckFatal(t, err)
  1807  	m.proxyURL = psi.URL(cmn.NetPublic)
  1808  	icNode := tools.GetICProxy(t, m.smap, psi.ID())
  1809  
  1810  	tools.CreateBucket(t, m.proxyURL, m.bck, nil, true /*cleanup*/)
  1811  
  1812  	m.puts()
  1813  
  1814  	baseParams := tools.BaseAPIParams(m.proxyURL)
  1815  
  1816  	tlog.Logf("Manually initiated rebalance\n")
  1817  	rebID, err = api.StartXaction(baseParams, &xact.ArgsMsg{Kind: apc.ActRebalance}, "")
  1818  	tassert.CheckFatal(t, err)
  1819  
  1820  	xargs := xact.ArgsMsg{Kind: apc.ActRebalance, Timeout: tools.RebalanceStartTimeout}
  1821  	api.WaitForXactionNode(baseParams, &xargs, xactSnapRunning)
  1822  
  1823  	tlog.Logf("Killing %s\n", icNode.StringEx())
  1824  	// cmd and args are the original command line of how the proxy is started
  1825  	cmd, err := tools.KillNode(icNode)
  1826  	tassert.CheckFatal(t, err)
  1827  
  1828  	proxyCnt := m.smap.CountActivePs()
  1829  	smap, err := tools.WaitForClusterState(m.proxyURL, "designate new primary", m.smap.Version, proxyCnt-1, 0)
  1830  	tassert.CheckError(t, err)
  1831  
  1832  	// re-construct the command line to start the original proxy but add the current primary proxy to the args
  1833  	err = tools.RestoreNode(cmd, false, "proxy (prev primary)")
  1834  	tassert.CheckFatal(t, err)
  1835  
  1836  	smap, err = tools.WaitForClusterState(m.proxyURL, "restore", smap.Version, proxyCnt, 0)
  1837  	tassert.CheckFatal(t, err)
  1838  	if _, ok := smap.Pmap[psi.ID()]; !ok {
  1839  		t.Fatalf("Previous primary proxy did not rejoin the cluster")
  1840  	}
  1841  	checkSmaps(t, m.proxyURL)
  1842  
  1843  	tlog.Logf("Wait for rebalance: %s\n", rebID)
  1844  	args := xact.ArgsMsg{ID: rebID, Kind: apc.ActRebalance, Timeout: tools.RebalanceTimeout}
  1845  	_, _ = api.WaitForXactionIC(baseParams, &args)
  1846  
  1847  	m.waitAndCheckCluState()
  1848  }
  1849  
  1850  // 1. Start decommissioning a target with rebalance
  1851  // 2. Start changing the primary proxy
  1852  // 3. IC must survive, rebalance must finish, and the target must be gone
  1853  func TestICDecommission(t *testing.T) {
  1854  	tools.CheckSkip(t, &tools.SkipTestArgs{Long: true, RequiredDeployment: tools.ClusterTypeLocal})
  1855  
  1856  	var (
  1857  		err error
  1858  		m   = ioContext{
  1859  			t:   t,
  1860  			num: 25000,
  1861  		}
  1862  	)
  1863  
  1864  	m.initAndSaveState(true /*cleanup*/)
  1865  	m.expectTargets(3)
  1866  	m.expectProxies(3)
  1867  	psi, err := m.smap.GetRandProxy(true /*exclude primary*/)
  1868  	tassert.CheckFatal(t, err)
  1869  	m.proxyURL = psi.URL(cmn.NetPublic)
  1870  	tlog.Logf("Monitoring node: %s\n", psi.StringEx())
  1871  	icNode := tools.GetICProxy(t, m.smap, psi.ID())
  1872  
  1873  	tools.CreateBucket(t, m.proxyURL, m.bck, nil, true /*cleanup*/)
  1874  
  1875  	m.puts()
  1876  
  1877  	baseParams := tools.BaseAPIParams(m.proxyURL)
  1878  	tsi, err := m.smap.GetRandTarget()
  1879  	tassert.CheckFatal(t, err)
  1880  
  1881  	args := &apc.ActValRmNode{DaemonID: tsi.ID(), SkipRebalance: true}
  1882  	_, err = api.StartMaintenance(baseParams, args)
  1883  	tassert.CheckFatal(t, err)
  1884  
  1885  	defer func() {
  1886  		args := &apc.ActValRmNode{DaemonID: tsi.ID()}
  1887  		rebID, err := api.StopMaintenance(baseParams, args)
  1888  		tassert.CheckFatal(t, err)
  1889  		tools.WaitForRebalanceByID(t, baseParams, rebID)
  1890  		tassert.CheckFatal(t, err)
  1891  	}()
  1892  
  1893  	tassert.CheckFatal(t, err)
  1894  	tlog.Logf("Killing %s\n", icNode.StringEx())
  1895  
  1896  	// cmd and args are the original command line of how the proxy is started
  1897  	cmd, err := tools.KillNode(icNode)
  1898  	tassert.CheckFatal(t, err)
  1899  
  1900  	proxyCnt := m.smap.CountActivePs()
  1901  	smap, err := tools.WaitForClusterState(m.proxyURL, "designate new primary", m.smap.Version, proxyCnt-1, 0)
  1902  	tassert.CheckError(t, err)
  1903  
  1904  	// re-construct the command line to start the original proxy but add the current primary proxy to the args
  1905  	err = tools.RestoreNode(cmd, false, "proxy (prev primary)")
  1906  	tassert.CheckFatal(t, err)
  1907  
  1908  	smap, err = tools.WaitForClusterState(m.proxyURL, "restore", smap.Version, proxyCnt, 0)
  1909  	tassert.CheckFatal(t, err)
  1910  	if _, ok := smap.Pmap[psi.ID()]; !ok {
  1911  		t.Fatalf("Previous primary proxy did not rejoin the cluster")
  1912  	}
  1913  	checkSmaps(t, m.proxyURL)
  1914  
  1915  	_, err = tools.WaitForClusterState(m.proxyURL, "decommission target",
  1916  		m.smap.Version, m.smap.CountProxies(), m.smap.CountTargets()-1)
  1917  	tassert.CheckFatal(t, err)
  1918  }
  1919  
  1920  func TestSingleResilver(t *testing.T) {
  1921  	m := ioContext{t: t}
  1922  	m.initAndSaveState(true /*cleanup*/)
  1923  	baseParams := tools.BaseAPIParams(m.proxyURL)
  1924  
  1925  	// Select a random target
  1926  	target, _ := m.smap.GetRandTarget()
  1927  
  1928  	// Start resilvering just on the target
  1929  	args := xact.ArgsMsg{Kind: apc.ActResilver, DaemonID: target.ID()}
  1930  	id, err := api.StartXaction(baseParams, &args, "")
  1931  	tassert.CheckFatal(t, err)
  1932  
  1933  	// Wait for specific resilvering x[id]
  1934  	args = xact.ArgsMsg{ID: id, Kind: apc.ActResilver, Timeout: tools.RebalanceTimeout}
  1935  	_, err = api.WaitForXactionIC(baseParams, &args)
  1936  	tassert.CheckFatal(t, err)
  1937  
  1938  	// Make sure other nodes were not resilvered
  1939  	args = xact.ArgsMsg{ID: id}
  1940  	snaps, err := api.QueryXactionSnaps(baseParams, &args)
  1941  	tassert.CheckFatal(t, err)
  1942  	tassert.Errorf(t, len(snaps) == 1, "expected only 1 resilver")
  1943  }