github.com/NVIDIA/aistore@v1.3.23-0.20240517131212-7df6609be51d/ais/test/fshc_test.go (about)

     1  // Package integration_test.
     2  /*
     3   * Copyright (c) 2018-2024, NVIDIA CORPORATION. All rights reserved.
     4   */
     5  package integration_test
     6  
     7  import (
     8  	"fmt"
     9  	"net/http"
    10  	"os"
    11  	"path"
    12  	"sync"
    13  	"testing"
    14  	"time"
    15  
    16  	"github.com/NVIDIA/aistore/api"
    17  	"github.com/NVIDIA/aistore/api/apc"
    18  	"github.com/NVIDIA/aistore/cmn"
    19  	"github.com/NVIDIA/aistore/cmn/cos"
    20  	"github.com/NVIDIA/aistore/core/meta"
    21  	"github.com/NVIDIA/aistore/tools"
    22  	"github.com/NVIDIA/aistore/tools/readers"
    23  	"github.com/NVIDIA/aistore/tools/tassert"
    24  	"github.com/NVIDIA/aistore/tools/tlog"
    25  	"github.com/NVIDIA/aistore/tools/trand"
    26  	"github.com/NVIDIA/aistore/xact"
    27  )
    28  
    29  const (
    30  	fshcDetectTimeMax = time.Second * 10
    31  	fshcRunTimeMax    = time.Second * 15
    32  	fshcDir           = "fschecker"
    33  )
    34  
    35  type checkerMD struct {
    36  	t          *testing.T
    37  	seed       int64
    38  	numObjs    int
    39  	proxyURL   string
    40  	bck        cmn.Bck
    41  	smap       *meta.Smap
    42  	mpList     meta.NodeMap
    43  	allMps     map[string]*apc.MountpathList
    44  	origAvail  int
    45  	fileSize   int64
    46  	baseParams api.BaseParams
    47  	chstop     chan struct{}
    48  	chfail     chan struct{}
    49  	wg         *sync.WaitGroup
    50  }
    51  
    52  func newCheckerMD(t *testing.T) *checkerMD {
    53  	md := &checkerMD{
    54  		t:        t,
    55  		seed:     300,
    56  		proxyURL: tools.RandomProxyURL(),
    57  		bck: cmn.Bck{
    58  			Name:     testBucketName,
    59  			Provider: apc.AIS,
    60  		},
    61  		fileSize: 64 * cos.KiB,
    62  		mpList:   make(meta.NodeMap, 10),
    63  		allMps:   make(map[string]*apc.MountpathList, 10),
    64  		chstop:   make(chan struct{}),
    65  		chfail:   make(chan struct{}),
    66  		wg:       &sync.WaitGroup{},
    67  	}
    68  
    69  	md.init()
    70  	md.numObjs = 20 * len(md.mpList)
    71  	tlog.Logf("Create %d objects[%d mountpaths] for test\n", md.numObjs, len(md.mpList))
    72  
    73  	return md
    74  }
    75  
    76  func (md *checkerMD) init() {
    77  	md.baseParams = tools.BaseAPIParams(md.proxyURL)
    78  	md.smap = tools.GetClusterMap(md.t, md.proxyURL)
    79  
    80  	for targetID, tsi := range md.smap.Tmap {
    81  		tlog.Logf("Target: %s\n", targetID)
    82  		lst, err := api.GetMountpaths(md.baseParams, tsi)
    83  		tassert.CheckFatal(md.t, err)
    84  		tlog.Logf("    Mountpaths: %v\n", lst)
    85  
    86  		for _, mpath := range lst.Available {
    87  			si, ok := md.mpList[mpath]
    88  			tassert.Errorf(md.t, !ok, "duplication (%s, %s, %s)", si, mpath, tsi)
    89  			md.mpList[mpath] = tsi
    90  		}
    91  		md.allMps[targetID] = lst
    92  
    93  		md.origAvail += len(lst.Available)
    94  	}
    95  }
    96  
    97  func (md *checkerMD) ensureNumMountpaths(target *meta.Snode, mpList *apc.MountpathList) {
    98  	ensureNumMountpaths(md.t, target, mpList)
    99  }
   100  
   101  func (md *checkerMD) randomTargetMpath() (target *meta.Snode, mpath string, mpathMap *apc.MountpathList) {
   102  	// select random target and mountpath
   103  	for m, t := range md.mpList {
   104  		target, mpath = t, m
   105  		mpathMap = md.allMps[target.ID()]
   106  		break
   107  	}
   108  	return
   109  }
   110  
   111  func (md *checkerMD) runTestAsync(method string, target *meta.Snode, mpath string, mpathList *apc.MountpathList, suffix string) {
   112  	md.wg.Add(1)
   113  	go runAsyncJob(md.t, md.bck, md.wg, method, mpath, fileNames, md.chfail, md.chstop, suffix)
   114  	// let the job run for a while and then make a mountpath broken
   115  	time.Sleep(2 * time.Second)
   116  	md.chfail <- struct{}{}
   117  	if detected := waitForMountpathChanges(md.t, target, len(mpathList.Available)-1, len(mpathList.Disabled)+1, true); detected {
   118  		// let the job run for a while with broken mountpath, so FSHC detects the trouble
   119  		time.Sleep(2 * time.Second)
   120  		md.chstop <- struct{}{}
   121  	}
   122  	md.wg.Wait()
   123  
   124  	repairMountpath(md.t, target, mpath, len(mpathList.Available), len(mpathList.Disabled), suffix)
   125  }
   126  
   127  func (md *checkerMD) runTestSync(method string, target *meta.Snode, mpath string, mpathList *apc.MountpathList,
   128  	objList []string, suffix string) {
   129  	breakMountpath(md.t, mpath, suffix)
   130  	defer repairMountpath(md.t, target, mpath, len(mpathList.Available), len(mpathList.Disabled), suffix)
   131  
   132  	switch method {
   133  	case http.MethodPut:
   134  		p, err := api.HeadBucket(md.baseParams, md.bck, true /* don't add */)
   135  		tassert.CheckFatal(md.t, err)
   136  		for _, objName := range objList {
   137  			r, _ := readers.NewRand(md.fileSize, p.Cksum.Type)
   138  			_, err := api.PutObject(&api.PutArgs{
   139  				BaseParams: md.baseParams,
   140  				Bck:        md.bck,
   141  				ObjName:    path.Join(fshcDir, objName),
   142  				Reader:     r,
   143  				Size:       uint64(md.fileSize),
   144  			})
   145  			if err != nil {
   146  				tlog.Logf("%s: %v\n", objName, err)
   147  			}
   148  		}
   149  	case http.MethodGet:
   150  		for _, objName := range objList {
   151  			// GetObject must fail - so no error checking
   152  			_, err := api.GetObject(md.baseParams, md.bck, objName, nil)
   153  			if err == nil {
   154  				md.t.Errorf("Get %q must fail", objName)
   155  			}
   156  		}
   157  	}
   158  
   159  	if detected := waitForMountpathChanges(md.t, target, len(mpathList.Available)-1, len(mpathList.Disabled)+1, false); detected {
   160  		md.t.Error("PUT objects to a broken mountpath should not disable the mountpath when FSHC is disabled")
   161  	}
   162  }
   163  
   164  func waitForMountpathChanges(t *testing.T, target *meta.Snode, availLen, disabledLen int, failIfDiffer bool) bool {
   165  	var (
   166  		err        error
   167  		newMpaths  *apc.MountpathList
   168  		baseParams = tools.BaseAPIParams()
   169  	)
   170  
   171  	detectStart := time.Now()
   172  	detectLimit := time.Now().Add(fshcDetectTimeMax)
   173  
   174  	for detectLimit.After(time.Now()) {
   175  		newMpaths, err = api.GetMountpaths(baseParams, target)
   176  		if err != nil {
   177  			t.Errorf("Failed to read target mountpaths: %v\n", err)
   178  			break
   179  		}
   180  		if len(newMpaths.Disabled) == disabledLen {
   181  			break
   182  		}
   183  		time.Sleep(time.Millisecond * 100)
   184  	}
   185  	detectTime := time.Since(detectStart)
   186  	tlog.Logf("passed %v\n", detectTime)
   187  
   188  	if len(newMpaths.Disabled) == disabledLen && len(newMpaths.Available) == availLen {
   189  		tlog.Logf("Check is successful in %v\n", detectTime)
   190  		return true
   191  	}
   192  
   193  	if !failIfDiffer {
   194  		return false
   195  	}
   196  
   197  	tlog.Logf("Current mpath list: %v\n", newMpaths)
   198  	if len(newMpaths.Disabled) != disabledLen {
   199  		t.Errorf("Disabled mpath count mismatch, old count: %v, new list: %v",
   200  			disabledLen, newMpaths.Disabled)
   201  	} else if len(newMpaths.Available) != availLen {
   202  		t.Errorf("Available mpath count mismatch, old count: %v, new list: %v",
   203  			availLen, newMpaths.Available)
   204  	}
   205  	return false
   206  }
   207  
   208  // Simulating mountpath death requested.
   209  // It is the easiest way to simulate: stop putting data and
   210  // replace the mountpath with regular file. If we do not stop
   211  // putting objects it recreates the mountpath and does not fail
   212  func breakMountpath(t *testing.T, mpath, suffix string) {
   213  	os.Rename(mpath, mpath+suffix)
   214  	f, err := os.OpenFile(mpath, os.O_CREATE|os.O_WRONLY, cos.PermRWR)
   215  	if err != nil {
   216  		t.Errorf("Failed to create file: %v", err)
   217  	}
   218  	f.Close()
   219  }
   220  
   221  func repairMountpath(t *testing.T, target *meta.Snode, mpath string, availLen, disabledLen int, suffix string) {
   222  	var (
   223  		err        error
   224  		baseParams = tools.BaseAPIParams()
   225  	)
   226  
   227  	// "broken" mpath does no exist, nothing to restore
   228  	if err := cos.Stat(mpath + suffix); err != nil {
   229  		return
   230  	}
   231  	// cleanup
   232  	// restore original mountpath
   233  	os.Remove(mpath)
   234  	cos.Rename(mpath+suffix, mpath)
   235  
   236  	// ask fschecker to check all mountpath - it should make disabled
   237  	// mountpath back to available list
   238  	api.EnableMountpath(baseParams, target, mpath)
   239  	tlog.Logln("Recheck mountpaths")
   240  	detectStart := time.Now()
   241  	detectLimit := time.Now().Add(fshcDetectTimeMax)
   242  	var mpaths *apc.MountpathList
   243  	// Wait for fsckeeper detects that the mountpath is accessible now
   244  	for detectLimit.After(time.Now()) {
   245  		mpaths, err = api.GetMountpaths(baseParams, target)
   246  		if err != nil {
   247  			t.Errorf("Failed to read target mountpaths: %v\n", err)
   248  			break
   249  		}
   250  		if len(mpaths.Disabled) == disabledLen && len(mpaths.Available) == availLen {
   251  			break
   252  		}
   253  		time.Sleep(time.Second)
   254  	}
   255  
   256  	// final test checks - available and disabled lists must equal list
   257  	// before starting the test
   258  	if len(mpaths.Disabled) != disabledLen {
   259  		t.Errorf("Failed mountpath is still disabled in %v\nExpected disabled count: %d\nNew list:%v\n",
   260  			time.Since(detectStart), disabledLen, mpaths.Disabled)
   261  	} else if len(mpaths.Available) != availLen {
   262  		t.Errorf("Failed mountpath is not back in %v.\nExpected available count: %d\nNew list:%v\n",
   263  			time.Since(detectStart), availLen, mpaths.Available)
   264  	}
   265  }
   266  
   267  func runAsyncJob(t *testing.T, bck cmn.Bck, wg *sync.WaitGroup, op, mpath string, filelist []string, chfail,
   268  	chstop chan struct{}, suffix string) {
   269  	defer wg.Done()
   270  
   271  	const fileSize = 64 * cos.KiB
   272  	var (
   273  		proxyURL   = tools.RandomProxyURL()
   274  		baseParams = tools.BaseAPIParams(proxyURL)
   275  	)
   276  
   277  	tlog.Logf("Testing mpath fail detection on %s\n", op)
   278  	stopTime := time.Now().Add(fshcRunTimeMax)
   279  
   280  	p, err := api.HeadBucket(baseParams, bck, true /* don't add */)
   281  	tassert.CheckFatal(t, err)
   282  
   283  	for stopTime.After(time.Now()) {
   284  		errCh := make(chan error, len(filelist))
   285  		objsPutCh := make(chan string, len(filelist))
   286  
   287  		for _, fname := range filelist {
   288  			select {
   289  			case <-chfail:
   290  				breakMountpath(t, mpath, suffix)
   291  			case <-chstop:
   292  				return
   293  			default:
   294  				// do nothing and just start the next loop
   295  			}
   296  
   297  			switch op {
   298  			case "PUT":
   299  				r, _ := readers.NewRand(fileSize, p.Cksum.Type)
   300  				api.PutObject(&api.PutArgs{
   301  					BaseParams: baseParams,
   302  					Bck:        bck,
   303  					ObjName:    path.Join(fshcDir, fname),
   304  					Reader:     r,
   305  					Size:       fileSize,
   306  				})
   307  			case "GET":
   308  				api.GetObject(baseParams, bck, path.Join(fshcDir, fname), nil)
   309  				time.Sleep(time.Millisecond * 10)
   310  			default:
   311  				t.Errorf("Invalid operation: %s", op)
   312  			}
   313  		}
   314  
   315  		close(errCh)
   316  		close(objsPutCh)
   317  	}
   318  }
   319  
   320  func TestFSCheckerDetectionEnabled(t *testing.T) {
   321  	// TODO -- FIXME:
   322  	// revise all fs-checker tests that manipulate mountpaths, make sure
   323  	// those (mountpaths) are always getting restored correctly when (and if) a test fails -
   324  	// then remove the "skipping" - here and elsewhere
   325  	if true {
   326  		t.Skipf("skipping %s", t.Name())
   327  	}
   328  	tools.CheckSkip(t, &tools.SkipTestArgs{Long: true})
   329  
   330  	var (
   331  		md     = newCheckerMD(t)
   332  		suffix = "-" + trand.String(5)
   333  	)
   334  
   335  	if md.origAvail == 0 {
   336  		t.Fatal("No available mountpaths found")
   337  	}
   338  
   339  	tools.CreateBucket(t, md.proxyURL, md.bck, nil, true /*cleanup*/)
   340  	selectedTarget, selectedMpath, selectedMpathList := md.randomTargetMpath()
   341  	tlog.Logf("mountpath %s of %s is selected for the test\n", selectedMpath, selectedTarget.StringEx())
   342  	defer func() {
   343  		if err := api.DetachMountpath(md.baseParams, selectedTarget, selectedMpath, true /*dont-resil*/); err != nil {
   344  			t.Logf("Failed to remove mpath %s of %s: %v", selectedMpath, selectedTarget.StringEx(), err)
   345  		}
   346  		if err := api.AttachMountpath(md.baseParams, selectedTarget, selectedMpath); err != nil {
   347  			t.Logf("Failed to add mpath %s of %s: %v", selectedMpath, selectedTarget.StringEx(), err)
   348  		}
   349  
   350  		tools.WaitForResilvering(t, md.baseParams, nil)
   351  
   352  		md.ensureNumMountpaths(selectedTarget, md.allMps[selectedTarget.ID()])
   353  	}()
   354  
   355  	// generate some filenames to PUT to them in a loop
   356  	generateRandomNames(md.numObjs)
   357  
   358  	// Checking detection on object PUT
   359  	md.runTestAsync(http.MethodPut, selectedTarget, selectedMpath, selectedMpathList, suffix)
   360  	// Checking detection on object GET
   361  	md.runTestAsync(http.MethodGet, selectedTarget, selectedMpath, selectedMpathList, suffix)
   362  
   363  	// Checking that reading "bad" objects does not disable mpath if the mpath is OK
   364  	tlog.Logf("Reading non-existing objects: read is expected to fail but mountpath must be available\n")
   365  	for n := 1; n < 10; n++ {
   366  		objName := fmt.Sprintf("%s/o%d", fshcDir, n)
   367  		if _, err := api.GetObject(md.baseParams, md.bck, objName, nil); err == nil {
   368  			t.Error("Should not be able to GET non-existing objects")
   369  		}
   370  	}
   371  	if detected := waitForMountpathChanges(t, selectedTarget, len(selectedMpathList.Available), len(selectedMpathList.Disabled), false); !detected {
   372  		t.Error("GETting non-existing objects should not disable mountpath")
   373  		repairMountpath(t, selectedTarget, selectedMpath, len(selectedMpathList.Available), len(selectedMpathList.Disabled), suffix)
   374  	}
   375  }
   376  
   377  func TestFSCheckerDetectionDisabled(t *testing.T) {
   378  	if true {
   379  		t.Skipf("skipping %s", t.Name())
   380  	}
   381  	tools.CheckSkip(t, &tools.SkipTestArgs{Long: true})
   382  
   383  	var (
   384  		md     = newCheckerMD(t)
   385  		suffix = "-" + trand.String(5)
   386  	)
   387  
   388  	if md.origAvail == 0 {
   389  		t.Fatal("No available mountpaths found")
   390  	}
   391  
   392  	tlog.Logf("*** Testing with disabled FSHC***\n")
   393  	tools.SetClusterConfig(t, cos.StrKVs{"fshc.enabled": "false"})
   394  	defer tools.SetClusterConfig(t, cos.StrKVs{"fshc.enabled": "true"})
   395  
   396  	selectedTarget, selectedMpath, selectedMap := md.randomTargetMpath()
   397  	tlog.Logf("mountpath %s of %s is selected for the test\n", selectedMpath, selectedTarget.StringEx())
   398  	tools.CreateBucket(t, md.proxyURL, md.bck, nil, true /*cleanup*/)
   399  	defer func() {
   400  		if err := api.DetachMountpath(md.baseParams, selectedTarget, selectedMpath, true /*dont-resil*/); err != nil {
   401  			t.Logf("Failed to remove mpath %s of %s: %v", selectedMpath, selectedTarget.StringEx(), err)
   402  		}
   403  		if err := api.AttachMountpath(md.baseParams, selectedTarget, selectedMpath); err != nil {
   404  			t.Logf("Failed to add mpath %s of %s: %v", selectedMpath, selectedTarget.StringEx(), err)
   405  		}
   406  
   407  		tools.WaitForResilvering(t, md.baseParams, nil)
   408  
   409  		md.ensureNumMountpaths(selectedTarget, md.allMps[selectedTarget.ID()])
   410  	}()
   411  
   412  	// generate a short list of file to run the test (to avoid flooding the log with false errors)
   413  	objList := make([]string, 0, 5)
   414  	for n := range 5 {
   415  		objName := fmt.Sprintf("obj-fshc-%d", n)
   416  		objList = append(objList, objName)
   417  	}
   418  
   419  	// Checking detection on object PUT
   420  	md.runTestSync(http.MethodPut, selectedTarget, selectedMpath, selectedMap, objList, suffix)
   421  	// Checking detection on object GET
   422  	md.runTestSync(http.MethodGet, selectedTarget, selectedMpath, selectedMap, objList, suffix)
   423  }
   424  
   425  func TestFSCheckerEnablingMountpath(t *testing.T) {
   426  	if true {
   427  		t.Skipf("skipping %s", t.Name())
   428  	}
   429  	tools.CheckSkip(t, &tools.SkipTestArgs{Long: true})
   430  	var (
   431  		proxyURL   = tools.RandomProxyURL()
   432  		baseParams = tools.BaseAPIParams(proxyURL)
   433  		smap       = tools.GetClusterMap(t, proxyURL)
   434  		mpList     = make(meta.NodeMap, 10)
   435  		origAvail  = 0
   436  	)
   437  
   438  	for targetID, tsi := range smap.Tmap {
   439  		tlog.Logf("Target: %s\n", targetID)
   440  		lst, err := api.GetMountpaths(baseParams, tsi)
   441  		tassert.CheckFatal(t, err)
   442  		tlog.Logf("    Mountpaths: %v\n", lst)
   443  
   444  		for _, mpath := range lst.Available {
   445  			mpList[mpath] = tsi
   446  		}
   447  
   448  		origAvail += len(lst.Available)
   449  	}
   450  
   451  	if origAvail == 0 {
   452  		t.Fatal("No available mountpaths found")
   453  	}
   454  
   455  	// select random target and mountpath
   456  	var (
   457  		selectedTarget *meta.Snode
   458  		selectedMpath  string
   459  	)
   460  	for m, t := range mpList {
   461  		selectedTarget, selectedMpath = t, m
   462  		break
   463  	}
   464  
   465  	origMpl, err := api.GetMountpaths(baseParams, selectedTarget)
   466  	tassert.CheckFatal(t, err)
   467  
   468  	err = api.EnableMountpath(baseParams, selectedTarget, selectedMpath)
   469  	if err != nil {
   470  		t.Errorf("Enabling available mountpath should return success, got: %v", err)
   471  	}
   472  
   473  	err = api.EnableMountpath(baseParams, selectedTarget, selectedMpath+"some_text")
   474  	if err == nil {
   475  		t.Errorf("Enabling non-existing mountpath should return error")
   476  	} else {
   477  		status := api.HTTPStatus(err)
   478  		if status != http.StatusNotFound {
   479  			t.Errorf("Expected status %d, got %d, %v", http.StatusNotFound, status, err)
   480  		}
   481  	}
   482  	tools.WaitForResilvering(t, baseParams, selectedTarget)
   483  
   484  	ensureNumMountpaths(t, selectedTarget, origMpl)
   485  }
   486  
   487  func TestFSCheckerTargetDisableAllMountpaths(t *testing.T) {
   488  	if true {
   489  		t.Skipf("skipping %s", t.Name())
   490  	}
   491  	tools.CheckSkip(t, &tools.SkipTestArgs{Long: true})
   492  	var (
   493  		target *meta.Snode
   494  
   495  		proxyURL   = tools.RandomProxyURL()
   496  		baseParams = tools.BaseAPIParams()
   497  		smap       = tools.GetClusterMap(t, proxyURL)
   498  		proxyCnt   = smap.CountActivePs()
   499  		targetCnt  = smap.CountActiveTs()
   500  	)
   501  
   502  	if targetCnt < 2 {
   503  		t.Skip("The number of targets must be at least 2")
   504  	}
   505  
   506  	target, _ = smap.GetRandTarget()
   507  	oldMpaths, err := api.GetMountpaths(baseParams, target)
   508  	tassert.CheckFatal(t, err)
   509  	if len(oldMpaths.Available) == 0 {
   510  		t.Fatalf("Target %s does not have mountpaths", target)
   511  	}
   512  
   513  	tlog.Logf("Removing all mountpaths from target: %s\n", target.StringEx())
   514  	for _, mpath := range oldMpaths.Available {
   515  		err = api.DisableMountpath(baseParams, target, mpath, true /*dont-resil*/)
   516  		tassert.CheckFatal(t, err)
   517  	}
   518  
   519  	smap, err = tools.WaitForClusterState(proxyURL, "all mountpaths disabled", smap.Version, proxyCnt, targetCnt-1)
   520  	tassert.CheckFatal(t, err)
   521  	tlog.Logf("Wait for rebalance (triggered by %s leaving the cluster after having lost all mountpaths)\n",
   522  		target.StringEx())
   523  	args := xact.ArgsMsg{Kind: apc.ActRebalance, Timeout: tools.RebalanceTimeout}
   524  	_, _ = api.WaitForXactionIC(baseParams, &args)
   525  
   526  	tlog.Logf("Restoring target %s mountpaths\n", target.ID())
   527  	for _, mpath := range oldMpaths.Available {
   528  		err = api.EnableMountpath(baseParams, target, mpath)
   529  		tassert.CheckFatal(t, err)
   530  	}
   531  
   532  	_, err = tools.WaitForClusterState(proxyURL, "all mountpaths enabled", smap.Version, proxyCnt, targetCnt)
   533  	tassert.CheckFatal(t, err)
   534  
   535  	tlog.Logf("Wait for rebalance (when target %s that has previously lost all mountpaths joins back)\n", target.StringEx())
   536  	args = xact.ArgsMsg{Kind: apc.ActRebalance, Timeout: tools.RebalanceTimeout}
   537  	_, _ = api.WaitForXactionIC(baseParams, &args)
   538  
   539  	tools.WaitForResilvering(t, baseParams, nil)
   540  
   541  	ensureNumMountpaths(t, target, oldMpaths)
   542  }
   543  
   544  func TestFSAddMountpathRestartNode(t *testing.T) {
   545  	if true {
   546  		t.Skipf("skipping %s", t.Name())
   547  	}
   548  	var (
   549  		target *meta.Snode
   550  
   551  		proxyURL   = tools.RandomProxyURL()
   552  		baseParams = tools.BaseAPIParams()
   553  		smap       = tools.GetClusterMap(t, proxyURL)
   554  		proxyCnt   = smap.CountProxies()
   555  		targetCnt  = smap.CountActiveTs()
   556  		tmpMpath   = "/tmp/testmp"
   557  	)
   558  	if targetCnt < 2 {
   559  		t.Skip("The number of targets must be at least 2")
   560  	}
   561  	target, _ = smap.GetRandTarget()
   562  	oldMpaths, err := api.GetMountpaths(baseParams, target)
   563  	tassert.CheckFatal(t, err)
   564  	numMpaths := len(oldMpaths.Available)
   565  	tassert.Fatalf(t, numMpaths != 0, "target %s doesn't have mountpaths", target.StringEx())
   566  
   567  	cos.CreateDir(tmpMpath)
   568  	tlog.Logf("Adding mountpath to %s\n", target.StringEx())
   569  	err = api.AttachMountpath(baseParams, target, tmpMpath)
   570  	tassert.CheckFatal(t, err)
   571  
   572  	tools.WaitForResilvering(t, baseParams, target)
   573  
   574  	t.Cleanup(func() {
   575  		api.DetachMountpath(baseParams, target, tmpMpath, true /*dont-resil*/)
   576  		time.Sleep(2 * time.Second)
   577  		os.Remove(tmpMpath)
   578  
   579  		ensureNumMountpaths(t, target, oldMpaths)
   580  	})
   581  
   582  	newMpaths, err := api.GetMountpaths(baseParams, target)
   583  	tassert.CheckFatal(t, err)
   584  
   585  	tassert.Fatalf(t, numMpaths+1 == len(newMpaths.Available),
   586  		"should add new mountpath - available %d!=%d", numMpaths+1, len(newMpaths.Available))
   587  
   588  	// Kill and restore target
   589  	tlog.Logf("Killing %s\n", target.StringEx())
   590  	tcmd, err := tools.KillNode(target)
   591  	tassert.CheckFatal(t, err)
   592  	smap, err = tools.WaitForClusterState(proxyURL, "target removed", smap.Version, proxyCnt, targetCnt-1)
   593  
   594  	tassert.CheckError(t, err)
   595  	tools.RestoreNode(tcmd, false, "target")
   596  	smap, err = tools.WaitForClusterState(smap.Primary.URL(cmn.NetPublic), "target restored", smap.Version,
   597  		proxyCnt, targetCnt)
   598  	tassert.CheckFatal(t, err)
   599  	if _, ok := smap.Tmap[target.ID()]; !ok {
   600  		t.Fatalf("Removed target didn't rejoin")
   601  	}
   602  	tlog.Logf("Wait for rebalance\n")
   603  	args := xact.ArgsMsg{Kind: apc.ActRebalance, Timeout: tools.RebalanceTimeout}
   604  	_, _ = api.WaitForXactionIC(baseParams, &args)
   605  
   606  	// Check if the node has newly added mountpath
   607  	newMpaths, err = api.GetMountpaths(baseParams, target)
   608  	tassert.CheckFatal(t, err)
   609  	tassert.Fatalf(t, numMpaths+1 == len(newMpaths.Available),
   610  		"should include newly added mountpath after restore - available %d!=%d", numMpaths+1, len(newMpaths.Available))
   611  }
   612  
   613  func TestFSDisableAllExceptOneMountpathRestartNode(t *testing.T) {
   614  	if true {
   615  		t.Skipf("skipping %s", t.Name())
   616  	}
   617  	tools.CheckSkip(t, &tools.SkipTestArgs{
   618  		Long:               true,
   619  		MinMountpaths:      3,
   620  		MinTargets:         2,
   621  		RequiredDeployment: tools.ClusterTypeLocal,
   622  	})
   623  	var (
   624  		target *meta.Snode
   625  
   626  		smap       = tools.GetClusterMap(t, tools.RandomProxyURL())
   627  		baseParams = tools.BaseAPIParams()
   628  		proxyURL   = smap.Primary.URL(cmn.NetPublic)
   629  		proxyCnt   = smap.CountProxies()
   630  		targetCnt  = smap.CountActiveTs()
   631  		enabled    bool
   632  	)
   633  	for _, tsi := range smap.Tmap {
   634  		target = tsi
   635  		break
   636  	}
   637  
   638  	oldMpaths, err := api.GetMountpaths(baseParams, target)
   639  	tassert.CheckFatal(t, err)
   640  	mpathCnt := len(oldMpaths.Available)
   641  	tlog.Logf("Target %s has %d mountpaths\n", target.ID(), mpathCnt)
   642  
   643  	// Disable, temporarily, all mountpaths except 1.
   644  	mpaths := oldMpaths.Available[:mpathCnt-1]
   645  	for _, mpath := range mpaths {
   646  		tlog.Logf("Disable mountpath %q at %s\n", mpath, target.StringEx())
   647  		err = api.DisableMountpath(baseParams, target, mpath, false /*dont-resil*/)
   648  		tassert.CheckFatal(t, err)
   649  	}
   650  	tools.WaitForResilvering(t, baseParams, target)
   651  
   652  	t.Cleanup(func() {
   653  		if enabled {
   654  			return
   655  		}
   656  		for _, mpath := range mpaths {
   657  			api.EnableMountpath(baseParams, target, mpath)
   658  		}
   659  		time.Sleep(time.Second)
   660  
   661  		tools.WaitForResilvering(t, baseParams, target)
   662  
   663  		ensureNumMountpaths(t, target, oldMpaths)
   664  	})
   665  
   666  	// Kill and restore target
   667  	tlog.Logf("Killing target %s\n", target.StringEx())
   668  	tcmd, err := tools.KillNode(target)
   669  	tassert.CheckFatal(t, err)
   670  	smap, err = tools.WaitForClusterState(proxyURL, "remove target", smap.Version, proxyCnt, targetCnt-1)
   671  	tassert.CheckFatal(t, err)
   672  
   673  	time.Sleep(time.Second)
   674  	err = tools.RestoreNode(tcmd, false, "target")
   675  	tassert.CheckFatal(t, err)
   676  	smap, err = tools.WaitForClusterState(proxyURL, "restore", smap.Version, proxyCnt, targetCnt)
   677  	tassert.CheckFatal(t, err)
   678  	tassert.Fatalf(t, smap.GetTarget(target.ID()) != nil, "removed target didn't rejoin")
   679  
   680  	args := xact.ArgsMsg{Kind: apc.ActRebalance, Timeout: tools.RebalanceTimeout}
   681  	_, _ = api.WaitForXactionIC(baseParams, &args)
   682  
   683  	// Check if the the mountpaths are disabled after restart.
   684  	newMpaths, err := api.GetMountpaths(baseParams, target)
   685  	tassert.CheckError(t, err)
   686  	tassert.Errorf(
   687  		t, len(newMpaths.Available) == 1,
   688  		"unexpected count of available mountpaths, got: %d, expected: %d",
   689  		len(newMpaths.Available), 1,
   690  	)
   691  	tassert.Errorf(
   692  		t, len(newMpaths.Disabled) == mpathCnt-1,
   693  		"unexpected count of disabled mountpaths, got: %d, expected: %d",
   694  		len(newMpaths.Disabled), mpathCnt-1,
   695  	)
   696  
   697  	// Re-enable the mountpaths
   698  	for _, mpath := range mpaths {
   699  		err = api.EnableMountpath(baseParams, target, mpath)
   700  		tassert.CheckFatal(t, err)
   701  	}
   702  	tools.WaitForResilvering(t, baseParams, target)
   703  
   704  	enabled = true
   705  
   706  	newMpaths, err = api.GetMountpaths(baseParams, target)
   707  	tassert.CheckFatal(t, err)
   708  	tassert.Errorf(
   709  		t, len(newMpaths.Available) == mpathCnt,
   710  		"unexpected count of available mountpaths, got: %d, expected: %d",
   711  		len(newMpaths.Available), mpathCnt,
   712  	)
   713  	tassert.Errorf(
   714  		t, len(newMpaths.Disabled) == 0,
   715  		"unexpected count of disabled mountpaths, got: %d, expected: %d",
   716  		len(newMpaths.Disabled), 0,
   717  	)
   718  }