github.com/NVIDIA/aistore@v1.3.23-0.20240517131212-7df6609be51d/ais/test/common_test.go (about)

     1  // Package integration_test.
     2  /*
     3   * Copyright (c) 2018-2024, NVIDIA CORPORATION. All rights reserved.
     4   */
     5  package integration_test
     6  
     7  import (
     8  	"context"
     9  	"errors"
    10  	"fmt"
    11  	"math/rand"
    12  	"net/http"
    13  	"path/filepath"
    14  	"strings"
    15  	"sync"
    16  	"testing"
    17  	"time"
    18  
    19  	"github.com/NVIDIA/aistore/api"
    20  	"github.com/NVIDIA/aistore/api/apc"
    21  	"github.com/NVIDIA/aistore/cmn"
    22  	"github.com/NVIDIA/aistore/cmn/atomic"
    23  	"github.com/NVIDIA/aistore/cmn/cos"
    24  	"github.com/NVIDIA/aistore/cmn/debug"
    25  	"github.com/NVIDIA/aistore/cmn/feat"
    26  	"github.com/NVIDIA/aistore/core"
    27  	"github.com/NVIDIA/aistore/core/meta"
    28  	"github.com/NVIDIA/aistore/fs"
    29  	"github.com/NVIDIA/aistore/tools"
    30  	"github.com/NVIDIA/aistore/tools/readers"
    31  	"github.com/NVIDIA/aistore/tools/tassert"
    32  	"github.com/NVIDIA/aistore/tools/tlog"
    33  	"github.com/NVIDIA/aistore/tools/trand"
    34  	"github.com/NVIDIA/aistore/xact"
    35  	jsoniter "github.com/json-iterator/go"
    36  )
    37  
    38  // more tools
    39  
    40  const rebalanceObjectDistributionTestCoef = 0.3
    41  
    42  const (
    43  	prefixDir     = "filter"
    44  	largeFileSize = 4 * cos.MiB
    45  
    46  	workerCnt = 10
    47  )
    48  
    49  const testMpath = "/tmp/ais/mountpath"
    50  
    51  var (
    52  	cliBck         cmn.Bck
    53  	errObjectFound = errors.New("found") // to interrupt fs.Walk when object found
    54  	fsOnce         sync.Once
    55  )
    56  
    57  type ioContext struct {
    58  	t                   *testing.T
    59  	smap                *meta.Smap
    60  	controlCh           chan struct{}
    61  	stopCh              chan struct{}
    62  	objNames            []string
    63  	bck                 cmn.Bck
    64  	fileSize            uint64
    65  	proxyURL            string
    66  	prefix              string
    67  	otherTasksToTrigger int
    68  	originalTargetCount int
    69  	originalProxyCount  int
    70  	num                 int
    71  	numGetsEachFile     int
    72  	getErrIsFatal       bool
    73  	silent              bool
    74  	fixedSize           bool
    75  	deleteRemoteBckObjs bool
    76  	ordered             bool // true - object names make sequence, false - names are random
    77  
    78  	numGetErrs atomic.Uint64
    79  	numPutErrs int
    80  }
    81  
    82  func (m *ioContext) initAndSaveState(cleanup bool) {
    83  	m.init(cleanup)
    84  	m.saveCluState(m.proxyURL)
    85  }
    86  
    87  func (m *ioContext) saveCluState(proxyURL string) {
    88  	m.smap = tools.GetClusterMap(m.t, proxyURL)
    89  	m.originalTargetCount = m.smap.CountActiveTs()
    90  	m.originalProxyCount = m.smap.CountActivePs()
    91  	tlog.Logf("targets: %d, proxies: %d\n", m.originalTargetCount, m.originalProxyCount)
    92  }
    93  
    94  func (m *ioContext) waitAndCheckCluState() {
    95  	smap, err := tools.WaitForClusterState(
    96  		m.proxyURL,
    97  		"cluster state",
    98  		m.smap.Version,
    99  		m.originalProxyCount,
   100  		m.originalTargetCount,
   101  	)
   102  	tassert.CheckFatal(m.t, err)
   103  	m.checkCluState(smap)
   104  }
   105  
   106  func (m *ioContext) checkCluState(smap *meta.Smap) {
   107  	proxyCount := smap.CountActivePs()
   108  	targetCount := smap.CountActiveTs()
   109  	if targetCount != m.originalTargetCount ||
   110  		proxyCount != m.originalProxyCount {
   111  		m.t.Errorf(
   112  			"cluster state is not preserved. targets (before: %d, now: %d); proxies: (before: %d, now: %d)",
   113  			targetCount, m.originalTargetCount,
   114  			proxyCount, m.originalProxyCount,
   115  		)
   116  	}
   117  }
   118  
   119  func (m *ioContext) init(cleanup bool) {
   120  	m.proxyURL = tools.RandomProxyURL()
   121  	if m.proxyURL == "" {
   122  		// if random selection failed, use RO url
   123  		m.proxyURL = tools.GetPrimaryURL()
   124  	}
   125  	if m.fileSize == 0 {
   126  		m.fileSize = cos.KiB
   127  	}
   128  	if m.num > 0 {
   129  		m.objNames = make([]string, 0, m.num)
   130  	}
   131  	if m.otherTasksToTrigger > 0 {
   132  		m.controlCh = make(chan struct{}, m.otherTasksToTrigger)
   133  	}
   134  	if m.bck.Name == "" {
   135  		m.bck.Name = trand.String(15)
   136  	}
   137  	if m.bck.Provider == "" {
   138  		m.bck.Provider = apc.AIS
   139  	}
   140  	if m.numGetsEachFile == 0 {
   141  		m.numGetsEachFile = 1
   142  	}
   143  	m.stopCh = make(chan struct{})
   144  
   145  	if m.bck.IsRemote() {
   146  		if m.deleteRemoteBckObjs {
   147  			m.del(-1 /*delete all*/, 0 /* lsmsg.Flags */)
   148  		} else {
   149  			tools.EvictRemoteBucket(m.t, m.proxyURL, m.bck) // evict from AIStore
   150  		}
   151  	}
   152  
   153  	if cleanup {
   154  		// cleanup m.bck upon exit from the test
   155  		m.t.Cleanup(m._cleanup)
   156  	}
   157  }
   158  
   159  func (m *ioContext) _cleanup() {
   160  	m.del()
   161  	if m.bck.IsRemote() {
   162  		// Ensure all local objects are removed.
   163  		tools.EvictRemoteBucket(m.t, m.proxyURL, m.bck)
   164  	}
   165  }
   166  
   167  func (m *ioContext) expectTargets(n int) {
   168  	if m.originalTargetCount < n {
   169  		m.t.Skipf("Must have %d or more targets in the cluster, have only %d", n, m.originalTargetCount)
   170  	}
   171  }
   172  
   173  func (m *ioContext) expectProxies(n int) {
   174  	if m.originalProxyCount < n {
   175  		m.t.Skipf("Must have %d or more proxies in the cluster, have only %d", n, m.originalProxyCount)
   176  	}
   177  }
   178  
   179  func (m *ioContext) checkObjectDistribution(t *testing.T) {
   180  	var (
   181  		requiredCount     = int64(rebalanceObjectDistributionTestCoef * (float64(m.num) / float64(m.originalTargetCount)))
   182  		targetObjectCount = make(map[string]int64)
   183  	)
   184  	tlog.Logf("Checking if each target has a required number of object in bucket %s...\n", m.bck)
   185  	baseParams := tools.BaseAPIParams(m.proxyURL)
   186  	lst, err := api.ListObjects(baseParams, m.bck, &apc.LsoMsg{Props: apc.GetPropsLocation}, api.ListArgs{})
   187  	tassert.CheckFatal(t, err)
   188  	for _, obj := range lst.Entries {
   189  		tname, _ := core.ParseObjLoc(obj.Location)
   190  		tid := meta.N2ID(tname)
   191  		targetObjectCount[tid]++
   192  	}
   193  	if len(targetObjectCount) != m.originalTargetCount {
   194  		t.Fatalf("Rebalance error, %d/%d targets received no objects from bucket %s\n",
   195  			m.originalTargetCount-len(targetObjectCount), m.originalTargetCount, m.bck)
   196  	}
   197  	for targetURL, objCount := range targetObjectCount {
   198  		if objCount < requiredCount {
   199  			t.Fatalf("Rebalance error, target %s didn't receive required number of objects\n", targetURL)
   200  		}
   201  	}
   202  }
   203  
   204  func (m *ioContext) puts(ignoreErrs ...bool) {
   205  	if !m.bck.IsAIS() {
   206  		m.remotePuts(false /*evict*/)
   207  		return
   208  	}
   209  	baseParams := tools.BaseAPIParams(m.proxyURL)
   210  	p, err := api.HeadBucket(baseParams, m.bck, false /* don't add */)
   211  	tassert.CheckFatal(m.t, err)
   212  
   213  	var ignoreErr bool
   214  	if len(ignoreErrs) > 0 {
   215  		ignoreErr = ignoreErrs[0]
   216  	}
   217  	if !m.silent {
   218  		var s, k string
   219  		if m.fixedSize {
   220  			s = fmt.Sprintf(" (size %d)", m.fileSize)
   221  		} else if m.fileSize > 0 {
   222  			s = fmt.Sprintf(" (approx. size %d)", m.fileSize)
   223  		}
   224  		if k = m.prefix; k != "" {
   225  			k = "/" + k + "*"
   226  		}
   227  		tlog.Logf("PUT %d objects%s => %s%s\n", m.num, s, m.bck, k)
   228  	}
   229  	m.objNames, m.numPutErrs, err = tools.PutRandObjs(tools.PutObjectsArgs{
   230  		ProxyURL:  m.proxyURL,
   231  		Bck:       m.bck,
   232  		ObjPath:   m.prefix,
   233  		ObjCnt:    m.num,
   234  		ObjSize:   m.fileSize,
   235  		FixedSize: m.fixedSize,
   236  		CksumType: p.Cksum.Type,
   237  		WorkerCnt: 0, // TODO: Should we set something custom?
   238  		IgnoreErr: ignoreErr,
   239  		Ordered:   m.ordered,
   240  	})
   241  	tassert.CheckFatal(m.t, err)
   242  }
   243  
   244  // remotePuts by default empties remote bucket and puts new `m.num` objects
   245  // into the bucket. If `override` parameter is set then the existing objects
   246  // are updated with new ones (new version and checksum).
   247  func (m *ioContext) remotePuts(evict bool, overrides ...bool) {
   248  	var override bool
   249  	if len(overrides) > 0 {
   250  		override = overrides[0]
   251  	}
   252  
   253  	if !override {
   254  		// Cleanup the remote bucket.
   255  		m.del()
   256  		m.objNames = m.objNames[:0]
   257  	}
   258  
   259  	m._remoteFill(m.num, evict, override)
   260  }
   261  
   262  // remoteRefill calculates number of missing objects and refills the bucket.
   263  // It is expected that the number of missing objects is positive meaning that
   264  // some of the objects were removed before calling remoteRefill.
   265  func (m *ioContext) remoteRefill() {
   266  	var (
   267  		baseParams = tools.BaseAPIParams()
   268  		msg        = &apc.LsoMsg{Prefix: m.prefix, Props: apc.GetPropsName}
   269  	)
   270  
   271  	objList, err := api.ListObjects(baseParams, m.bck, msg, api.ListArgs{})
   272  	tassert.CheckFatal(m.t, err)
   273  
   274  	m.objNames = m.objNames[:0]
   275  	for _, obj := range objList.Entries {
   276  		m.objNames = append(m.objNames, obj.Name)
   277  	}
   278  
   279  	leftToFill := m.num - len(objList.Entries)
   280  	tassert.Errorf(m.t, leftToFill > 0, "leftToFill %d", leftToFill)
   281  
   282  	m._remoteFill(leftToFill, false /*evict*/, false /*override*/)
   283  }
   284  
   285  func (m *ioContext) _remoteFill(objCnt int, evict, override bool) {
   286  	var (
   287  		baseParams = tools.BaseAPIParams()
   288  		errCh      = make(chan error, objCnt)
   289  		wg         = cos.NewLimitedWaitGroup(20, 0)
   290  	)
   291  	if !m.silent {
   292  		tlog.Logf("remote PUT %d objects (size %s) => %s\n", objCnt, cos.ToSizeIEC(int64(m.fileSize), 0), m.bck)
   293  	}
   294  	p, err := api.HeadBucket(baseParams, m.bck, false /* don't add */)
   295  	tassert.CheckFatal(m.t, err)
   296  
   297  	for i := range objCnt {
   298  		r, err := readers.NewRand(int64(m.fileSize), p.Cksum.Type)
   299  		tassert.CheckFatal(m.t, err)
   300  
   301  		var objName string
   302  		if override {
   303  			objName = m.objNames[i]
   304  		} else if m.ordered {
   305  			objName = fmt.Sprintf("%s%d", m.prefix, i)
   306  		} else {
   307  			objName = fmt.Sprintf("%s%s-%d", m.prefix, trand.String(8), i)
   308  		}
   309  		wg.Add(1)
   310  		go func() {
   311  			defer wg.Done()
   312  			tools.Put(m.proxyURL, m.bck, objName, r, errCh)
   313  		}()
   314  		if !override {
   315  			m.objNames = append(m.objNames, objName)
   316  		}
   317  	}
   318  	wg.Wait()
   319  	tassert.SelectErr(m.t, errCh, "put", true)
   320  	tlog.Logf("remote bucket %s: %d cached objects\n", m.bck, m.num)
   321  
   322  	if evict {
   323  		m.evict()
   324  	}
   325  }
   326  
   327  func (m *ioContext) evict() {
   328  	var (
   329  		baseParams = tools.BaseAPIParams()
   330  		msg        = &apc.LsoMsg{Prefix: m.prefix, Props: apc.GetPropsName}
   331  	)
   332  
   333  	objList, err := api.ListObjects(baseParams, m.bck, msg, api.ListArgs{})
   334  	tassert.CheckFatal(m.t, err)
   335  	if len(objList.Entries) != m.num {
   336  		m.t.Fatalf("list_objects err: %d != %d", len(objList.Entries), m.num)
   337  	}
   338  
   339  	tlog.Logf("evicting remote bucket %s...\n", m.bck)
   340  	err = api.EvictRemoteBucket(baseParams, m.bck, false)
   341  	tassert.CheckFatal(m.t, err)
   342  }
   343  
   344  func (m *ioContext) remotePrefetch(prefetchCnt int) {
   345  	var (
   346  		baseParams = tools.BaseAPIParams()
   347  		msg        = &apc.LsoMsg{Prefix: m.prefix, Props: apc.GetPropsName}
   348  	)
   349  
   350  	objList, err := api.ListObjects(baseParams, m.bck, msg, api.ListArgs{})
   351  	tassert.CheckFatal(m.t, err)
   352  
   353  	tlog.Logf("remote PREFETCH %d objects...\n", prefetchCnt)
   354  
   355  	wg := &sync.WaitGroup{}
   356  	for idx, obj := range objList.Entries {
   357  		if idx >= prefetchCnt {
   358  			break
   359  		}
   360  
   361  		wg.Add(1)
   362  		go func(obj *cmn.LsoEnt) {
   363  			_, err := api.GetObject(baseParams, m.bck, obj.Name, nil)
   364  			tassert.CheckError(m.t, err)
   365  			wg.Done()
   366  		}(obj)
   367  	}
   368  	wg.Wait()
   369  }
   370  
   371  func isContextDeadline(err error) bool {
   372  	if err == nil {
   373  		return false
   374  	}
   375  	return err == context.DeadlineExceeded || strings.Contains(err.Error(), context.DeadlineExceeded.Error())
   376  }
   377  
   378  // bucket cleanup
   379  // is called in a variety of ways including (post-test) t.Cleanup => _cleanup()
   380  // and (pre-test) via deleteRemoteBckObjs
   381  
   382  const maxDelObjErrCount = 100
   383  
   384  func (m *ioContext) del(opts ...int) {
   385  	var (
   386  		herr        *cmn.ErrHTTP
   387  		toRemoveCnt = -1 // remove all or opts[0]
   388  		baseParams  = tools.BaseAPIParams()
   389  	)
   390  	// checks, params
   391  	exists, err := api.QueryBuckets(baseParams, cmn.QueryBcks(m.bck), apc.FltExists)
   392  	if isContextDeadline(err) {
   393  		if m.bck.IsRemote() {
   394  			time.Sleep(time.Second)
   395  			tlog.Logf("Warning: 2nd attempt to query buckets %q\n", cmn.QueryBcks(m.bck))
   396  			exists, err = api.QueryBuckets(baseParams, cmn.QueryBcks(m.bck), apc.FltExists)
   397  			if isContextDeadline(err) {
   398  				tlog.Logf("Error: failing to query buckets %q: %v - proceeding anyway...\n", cmn.QueryBcks(m.bck), err)
   399  				exists, err = false, nil
   400  			}
   401  		}
   402  	}
   403  	tassert.CheckFatal(m.t, err)
   404  	if !exists {
   405  		return
   406  	}
   407  
   408  	// list
   409  	lsmsg := &apc.LsoMsg{
   410  		Prefix: m.prefix,
   411  		Props:  apc.GetPropsName,
   412  		Flags:  apc.LsBckPresent, // don't lookup unless overridden by the variadic (below)
   413  	}
   414  	if len(opts) > 0 {
   415  		toRemoveCnt = opts[0]
   416  		if len(opts) > 1 {
   417  			lsmsg.Flags = uint64(opts[1]) // do HEAD(remote-bucket)
   418  		}
   419  	}
   420  	if toRemoveCnt < 0 && m.prefix != "" {
   421  		lsmsg.Prefix = "" // all means all
   422  	}
   423  	objList, err := api.ListObjects(baseParams, m.bck, lsmsg, api.ListArgs{})
   424  	if err != nil {
   425  		if errors.As(err, &herr) && herr.Status == http.StatusNotFound {
   426  			return
   427  		}
   428  		emsg := err.Error()
   429  		// ignore client timeout awaiting headers
   430  		if strings.Contains(emsg, "awaiting") && strings.Contains(emsg, "headers") {
   431  			return
   432  		}
   433  	}
   434  	tassert.CheckFatal(m.t, err)
   435  
   436  	// delete
   437  	toRemove := objList.Entries
   438  	if toRemoveCnt >= 0 {
   439  		toRemove = toRemove[:toRemoveCnt]
   440  	}
   441  	l := len(toRemove)
   442  	if l == 0 {
   443  		return
   444  	}
   445  	tlog.Logf("deleting %d object%s from %s\n", l, cos.Plural(l), m.bck.Cname(""))
   446  	var (
   447  		errCnt atomic.Int64
   448  		wg     = cos.NewLimitedWaitGroup(16, l)
   449  	)
   450  	for _, obj := range toRemove {
   451  		if errCnt.Load() > maxDelObjErrCount {
   452  			tassert.CheckFatal(m.t, errors.New("too many errors"))
   453  			break
   454  		}
   455  		wg.Add(1)
   456  		go func(obj *cmn.LsoEnt) {
   457  			m._delOne(baseParams, obj, &errCnt)
   458  			wg.Done()
   459  		}(obj)
   460  	}
   461  	wg.Wait()
   462  }
   463  
   464  func (m *ioContext) _delOne(baseParams api.BaseParams, obj *cmn.LsoEnt, errCnt *atomic.Int64) {
   465  	err := api.DeleteObject(baseParams, m.bck, obj.Name)
   466  	if err == nil {
   467  		return
   468  	}
   469  	//
   470  	// excepting benign (TODO: rid of strings.Contains)
   471  	//
   472  	const sleepRetry = 2 * time.Second
   473  	e := strings.ToLower(err.Error())
   474  	switch {
   475  	case cmn.IsErrObjNought(err):
   476  		return
   477  	case strings.Contains(e, "server closed idle connection"):
   478  		return // see (unexported) http.exportErrServerClosedIdle in the Go source
   479  	case cos.IsErrConnectionNotAvail(err):
   480  		errCnt.Add(maxDelObjErrCount/10 - 1)
   481  	// retry
   482  	case m.bck.IsCloud() && (cos.IsErrConnectionReset(err) || strings.Contains(e, "reset by peer")):
   483  		time.Sleep(sleepRetry)
   484  		err = api.DeleteObject(baseParams, m.bck, obj.Name)
   485  	case m.bck.IsCloud() && strings.Contains(e, "try again"):
   486  		// aws-error[InternalError: We encountered an internal error. Please try again.]
   487  		time.Sleep(sleepRetry)
   488  		err = api.DeleteObject(baseParams, m.bck, obj.Name)
   489  	case m.bck.IsCloud() && apc.ToScheme(m.bck.Provider) == apc.GSScheme &&
   490  		strings.Contains(e, "gateway") && strings.Contains(e, "timeout"):
   491  		// e.g:. "googleapi: Error 504: , gatewayTimeout" (where the gateway is in fact LB)
   492  		time.Sleep(sleepRetry)
   493  		err = api.DeleteObject(baseParams, m.bck, obj.Name)
   494  	}
   495  
   496  	if err == nil || cmn.IsErrObjNought(err) {
   497  		return
   498  	}
   499  	errCnt.Inc()
   500  	if m.bck.IsCloud() && errCnt.Load() < 5 {
   501  		tlog.Logf("Warning: failed to cleanup %s: %v\n", m.bck.Cname(""), err)
   502  	}
   503  	tassert.CheckError(m.t, err)
   504  }
   505  
   506  func (m *ioContext) get(baseParams api.BaseParams, idx, totalGets int, getArgs *api.GetArgs, validate bool) {
   507  	var (
   508  		err     error
   509  		objName = m.objNames[idx%len(m.objNames)]
   510  	)
   511  	if validate {
   512  		_, err = api.GetObjectWithValidation(baseParams, m.bck, objName, getArgs)
   513  	} else {
   514  		_, err = api.GetObject(baseParams, m.bck, objName, getArgs)
   515  	}
   516  	if err != nil {
   517  		if m.getErrIsFatal {
   518  			m.t.Error(err)
   519  		}
   520  		m.numGetErrs.Inc()
   521  	}
   522  	if m.getErrIsFatal && m.numGetErrs.Load() > 0 {
   523  		return
   524  	}
   525  	if idx > 0 && idx%5000 == 0 && !m.silent {
   526  		if totalGets > 0 {
   527  			tlog.Logf(" %d/%d GET requests completed...\n", idx, totalGets)
   528  		} else {
   529  			tlog.Logf(" %d GET requests completed...\n", idx)
   530  		}
   531  	}
   532  
   533  	// Tell other tasks they can begin to do work in parallel
   534  	if totalGets > 0 && idx == totalGets/2 { // only for `m.gets(nil, false)`
   535  		for range m.otherTasksToTrigger {
   536  			m.controlCh <- struct{}{}
   537  		}
   538  	}
   539  }
   540  
   541  func (m *ioContext) gets(getArgs *api.GetArgs, withValidation bool) {
   542  	var (
   543  		baseParams = tools.BaseAPIParams()
   544  		totalGets  = m.num * m.numGetsEachFile
   545  	)
   546  	if !m.silent {
   547  		if m.numGetsEachFile == 1 {
   548  			tlog.Logf("GET %d objects from %s\n", m.num, m.bck)
   549  		} else {
   550  			tlog.Logf("GET %d objects %d times from %s\n", m.num, m.numGetsEachFile, m.bck)
   551  		}
   552  	}
   553  	wg := cos.NewLimitedWaitGroup(20, 0)
   554  	for i := range totalGets {
   555  		wg.Add(1)
   556  		go func(idx int) {
   557  			m.get(baseParams, idx, totalGets, getArgs, withValidation)
   558  			wg.Done()
   559  		}(i)
   560  	}
   561  	wg.Wait()
   562  }
   563  
   564  func (m *ioContext) getsUntilStop() {
   565  	var (
   566  		idx        = 0
   567  		baseParams = tools.BaseAPIParams()
   568  		wg         = cos.NewLimitedWaitGroup(20, 0)
   569  	)
   570  	for {
   571  		select {
   572  		case <-m.stopCh:
   573  			wg.Wait()
   574  			return
   575  		default:
   576  			wg.Add(1)
   577  			go func(idx int) {
   578  				defer wg.Done()
   579  				m.get(baseParams, idx, 0, nil /*api.GetArgs*/, false /*validate*/)
   580  			}(idx)
   581  			idx++
   582  			if idx%5000 == 0 {
   583  				time.Sleep(500 * time.Millisecond) // prevents generating too many GET requests
   584  			}
   585  		}
   586  	}
   587  }
   588  
   589  func (m *ioContext) stopGets() {
   590  	m.stopCh <- struct{}{}
   591  }
   592  
   593  func (m *ioContext) ensureNumCopies(baseParams api.BaseParams, expectedCopies int, greaterOk bool) {
   594  	m.t.Helper()
   595  	time.Sleep(time.Second)
   596  	xargs := xact.ArgsMsg{Kind: apc.ActMakeNCopies, Bck: m.bck, Timeout: tools.RebalanceTimeout}
   597  	_, err := api.WaitForXactionIC(baseParams, &xargs)
   598  	tassert.CheckFatal(m.t, err)
   599  
   600  	// List Bucket - primarily for the copies
   601  	msg := &apc.LsoMsg{Flags: apc.LsObjCached, Prefix: m.prefix}
   602  	msg.AddProps(apc.GetPropsCopies, apc.GetPropsAtime, apc.GetPropsStatus)
   603  	objectList, err := api.ListObjects(baseParams, m.bck, msg, api.ListArgs{})
   604  	tassert.CheckFatal(m.t, err)
   605  
   606  	total := 0
   607  	copiesToNumObjects := make(map[int]int)
   608  	for _, entry := range objectList.Entries {
   609  		if entry.Atime == "" {
   610  			m.t.Errorf("%s: access time is empty", m.bck.Cname(entry.Name))
   611  		}
   612  		total++
   613  		if greaterOk && int(entry.Copies) > expectedCopies {
   614  			copiesToNumObjects[expectedCopies]++
   615  		} else {
   616  			copiesToNumObjects[int(entry.Copies)]++
   617  		}
   618  	}
   619  	tlog.Logf("objects (total, copies) = (%d, %v)\n", total, copiesToNumObjects)
   620  	if total != m.num {
   621  		m.t.Errorf("list_objects: expecting %d objects, got %d", m.num, total)
   622  	}
   623  
   624  	if len(copiesToNumObjects) != 1 {
   625  		s, _ := jsoniter.MarshalIndent(copiesToNumObjects, "", " ")
   626  		m.t.Errorf("some objects do not have expected number of copies: %s", s)
   627  	}
   628  
   629  	for copies := range copiesToNumObjects {
   630  		if copies != expectedCopies {
   631  			m.t.Errorf("Expecting %d objects all to have %d replicas, got: %d", total, expectedCopies, copies)
   632  		}
   633  	}
   634  }
   635  
   636  func (m *ioContext) ensureNoGetErrors() {
   637  	m.t.Helper()
   638  	if m.numGetErrs.Load() > 0 {
   639  		m.t.Fatalf("Number of get errors is non-zero: %d\n", m.numGetErrs.Load())
   640  	}
   641  }
   642  
   643  func (m *ioContext) ensureNumMountpaths(target *meta.Snode, mpList *apc.MountpathList) {
   644  	ensureNumMountpaths(m.t, target, mpList)
   645  }
   646  
   647  func ensureNumMountpaths(t *testing.T, target *meta.Snode, mpList *apc.MountpathList) {
   648  	t.Helper()
   649  	tname := target.StringEx()
   650  	baseParams := tools.BaseAPIParams()
   651  	mpl, err := api.GetMountpaths(baseParams, target)
   652  	tassert.CheckFatal(t, err)
   653  	for range 6 {
   654  		if len(mpl.Available) == len(mpList.Available) &&
   655  			len(mpl.Disabled) == len(mpList.Disabled) &&
   656  			len(mpl.WaitingDD) == len(mpList.WaitingDD) {
   657  			break
   658  		}
   659  		time.Sleep(time.Second)
   660  	}
   661  	if len(mpl.Available) != len(mpList.Available) {
   662  		t.Errorf("%s ended up with %d mountpaths (dd=%v, disabled=%v), expecting: %d",
   663  			tname, len(mpl.Available), mpl.WaitingDD, mpl.Disabled, len(mpList.Available))
   664  	} else if len(mpl.Disabled) != len(mpList.Disabled) || len(mpl.WaitingDD) != len(mpList.WaitingDD) {
   665  		t.Errorf("%s ended up with (dd=%v, disabled=%v) mountpaths, expecting (%v and %v), respectively",
   666  			tname, mpl.WaitingDD, mpl.Disabled, mpList.WaitingDD, mpList.Disabled)
   667  	}
   668  }
   669  
   670  func ensureNoDisabledMountpaths(t *testing.T, target *meta.Snode, mpList *apc.MountpathList) {
   671  	t.Helper()
   672  	for range 6 {
   673  		if len(mpList.WaitingDD) == 0 && len(mpList.Disabled) == 0 {
   674  			break
   675  		}
   676  		time.Sleep(time.Second)
   677  	}
   678  	if len(mpList.WaitingDD) != 0 || len(mpList.Disabled) != 0 {
   679  		t.Fatalf("%s: disabled mountpaths at the start of the %q (avail=%d, dd=%v, disabled=%v)\n",
   680  			target.StringEx(), t.Name(), len(mpList.Available), mpList.WaitingDD, mpList.Disabled)
   681  	}
   682  }
   683  
   684  // background: shuffle=on increases the chance to have still-running rebalance
   685  // at the beginning of a new rename, rebalance, copy-bucket and similar
   686  func ensurePrevRebalanceIsFinished(baseParams api.BaseParams, err error) bool {
   687  	herr, ok := err.(*cmn.ErrHTTP)
   688  	if !ok {
   689  		return false
   690  	}
   691  	// TODO: improve checking for cmn.ErrLimitedCoexistence
   692  	if !strings.Contains(herr.Message, "is currently running,") {
   693  		return false
   694  	}
   695  	tlog.Logln("Warning: wait for unfinished rebalance(?)")
   696  	time.Sleep(5 * time.Second)
   697  	args := xact.ArgsMsg{Kind: apc.ActRebalance, Timeout: tools.RebalanceTimeout}
   698  	_, _ = api.WaitForXactionIC(baseParams, &args)
   699  	time.Sleep(5 * time.Second)
   700  	return true
   701  }
   702  
   703  func (m *ioContext) startMaintenanceNoRebalance() *meta.Snode {
   704  	target, _ := m.smap.GetRandTarget()
   705  	tlog.Logf("Put %s in maintenance\n", target.StringEx())
   706  	args := &apc.ActValRmNode{DaemonID: target.ID(), SkipRebalance: true}
   707  	_, err := api.StartMaintenance(tools.BaseAPIParams(m.proxyURL), args)
   708  	tassert.CheckFatal(m.t, err)
   709  	m.smap, err = tools.WaitForClusterState(
   710  		m.proxyURL,
   711  		"put target in maintenance",
   712  		m.smap.Version,
   713  		m.smap.CountActivePs(),
   714  		m.smap.CountActiveTs()-1,
   715  	)
   716  	tassert.CheckFatal(m.t, err)
   717  	return target
   718  }
   719  
   720  func (m *ioContext) stopMaintenance(target *meta.Snode) string {
   721  	tlog.Logf("Take %s out of maintenance mode...\n", target.StringEx())
   722  	bp := tools.BaseAPIParams(m.proxyURL)
   723  	rebID, err := api.StopMaintenance(bp, &apc.ActValRmNode{DaemonID: target.ID()})
   724  	tassert.CheckFatal(m.t, err)
   725  	if rebID == "" {
   726  		return ""
   727  	}
   728  	tassert.Fatalf(m.t, xact.IsValidRebID(rebID), "invalid reb ID %q", rebID)
   729  
   730  	xargs := xact.ArgsMsg{ID: rebID, Kind: apc.ActRebalance, Timeout: tools.RebalanceStartTimeout}
   731  	api.WaitForXactionNode(bp, &xargs, xactSnapRunning)
   732  
   733  	return rebID
   734  }
   735  
   736  func (m *ioContext) setNonDefaultBucketProps() {
   737  	baseParams := tools.BaseAPIParams()
   738  	copies := int64(rand.Intn(2))
   739  	props := &cmn.BpropsToSet{
   740  		Mirror: &cmn.MirrorConfToSet{
   741  			Enabled: apc.Ptr(copies > 0),
   742  			Copies:  apc.Ptr[int64](copies),
   743  		},
   744  		Cksum: &cmn.CksumConfToSet{
   745  			Type:            apc.Ptr(cos.ChecksumSHA512),
   746  			EnableReadRange: apc.Ptr(true),
   747  			ValidateWarmGet: apc.Ptr(true),
   748  			ValidateColdGet: apc.Ptr(false),
   749  		},
   750  		Extra: &cmn.ExtraToSet{
   751  			AWS: &cmn.ExtraPropsAWSToSet{CloudRegion: apc.Ptr("us-notheast")},
   752  		},
   753  	}
   754  	_, err := api.SetBucketProps(baseParams, m.bck, props)
   755  	tassert.CheckFatal(m.t, err)
   756  }
   757  
   758  func runProviderTests(t *testing.T, f func(*testing.T, *meta.Bck)) {
   759  	tests := []struct {
   760  		name       string
   761  		bck        cmn.Bck
   762  		backendBck cmn.Bck
   763  		skipArgs   tools.SkipTestArgs
   764  		props      *cmn.BpropsToSet
   765  	}{
   766  		{
   767  			name: "local",
   768  			bck:  cmn.Bck{Name: trand.String(10), Provider: apc.AIS},
   769  		},
   770  		{
   771  			name: "remote",
   772  			bck:  cliBck,
   773  			skipArgs: tools.SkipTestArgs{
   774  				Long:      true,
   775  				RemoteBck: true,
   776  			},
   777  		},
   778  		{
   779  			name: "remote_ais",
   780  			bck: cmn.Bck{
   781  				Name:     trand.String(10),
   782  				Provider: apc.AIS, Ns: cmn.Ns{UUID: tools.RemoteCluster.UUID},
   783  			},
   784  			skipArgs: tools.SkipTestArgs{
   785  				RequiresRemoteCluster: true,
   786  				Long:                  true,
   787  			},
   788  		},
   789  		{
   790  			name:       "backend",
   791  			bck:        cmn.Bck{Name: trand.String(10), Provider: apc.AIS},
   792  			backendBck: cliBck,
   793  			skipArgs: tools.SkipTestArgs{
   794  				Long:      true,
   795  				RemoteBck: true,
   796  			},
   797  		},
   798  		{
   799  			name: "local_3_copies",
   800  			bck:  cmn.Bck{Name: trand.String(10), Provider: apc.AIS},
   801  			props: &cmn.BpropsToSet{
   802  				Mirror: &cmn.MirrorConfToSet{
   803  					Enabled: apc.Ptr(true),
   804  					Copies:  apc.Ptr[int64](3),
   805  				},
   806  			},
   807  			skipArgs: tools.SkipTestArgs{Long: true},
   808  		},
   809  		{
   810  			name: "local_ec_2_2",
   811  			bck:  cmn.Bck{Name: trand.String(10), Provider: apc.AIS},
   812  			props: &cmn.BpropsToSet{
   813  				EC: &cmn.ECConfToSet{
   814  					DataSlices:   apc.Ptr(2),
   815  					ParitySlices: apc.Ptr(2),
   816  					ObjSizeLimit: apc.Ptr[int64](0),
   817  				},
   818  			},
   819  			skipArgs: tools.SkipTestArgs{Long: true},
   820  		},
   821  	}
   822  	for i := range tests {
   823  		test := tests[i]
   824  		t.Run(test.name, func(t *testing.T) {
   825  			if test.backendBck.IsEmpty() {
   826  				test.skipArgs.Bck = test.bck
   827  			} else {
   828  				test.skipArgs.Bck = test.backendBck
   829  				if !test.backendBck.IsCloud() {
   830  					t.Skipf("backend bucket must be a Cloud bucket (have %q)", test.backendBck)
   831  				}
   832  			}
   833  			tools.CheckSkip(t, &test.skipArgs)
   834  
   835  			baseParams := tools.BaseAPIParams()
   836  
   837  			if test.props != nil && test.props.Mirror != nil {
   838  				skip := tools.SkipTestArgs{
   839  					MinMountpaths: int(*test.props.Mirror.Copies),
   840  				}
   841  				tools.CheckSkip(t, &skip)
   842  			}
   843  			if test.props != nil && test.props.EC != nil {
   844  				skip := tools.SkipTestArgs{
   845  					MinTargets: *test.props.EC.DataSlices + *test.props.EC.ParitySlices + 1,
   846  				}
   847  				tools.CheckSkip(t, &skip)
   848  			}
   849  
   850  			if test.bck.IsAIS() || test.bck.IsRemoteAIS() {
   851  				err := api.CreateBucket(baseParams, test.bck, test.props)
   852  				tassert.CheckFatal(t, err)
   853  
   854  				if !test.backendBck.IsEmpty() {
   855  					tools.SetBackendBck(t, baseParams, test.bck, test.backendBck)
   856  				}
   857  				t.Cleanup(func() {
   858  					api.DestroyBucket(baseParams, test.bck)
   859  				})
   860  			}
   861  
   862  			p, err := api.HeadBucket(baseParams, test.bck, false /* don't add */)
   863  			tassert.CheckFatal(t, err)
   864  
   865  			bck := meta.CloneBck(&test.bck)
   866  			bck.Props = p
   867  
   868  			f(t, bck)
   869  		})
   870  	}
   871  }
   872  
   873  func numberOfFilesWithPrefix(fileNames []string, namePrefix string) int {
   874  	numFiles := 0
   875  	for _, fileName := range fileNames {
   876  		if strings.HasPrefix(fileName, namePrefix) {
   877  			numFiles++
   878  		}
   879  	}
   880  	return numFiles
   881  }
   882  
   883  func prefixCreateFiles(t *testing.T, proxyURL string, bck cmn.Bck, cksumType string) []string {
   884  	const (
   885  		objCnt   = 100
   886  		fileSize = cos.KiB
   887  	)
   888  
   889  	// Create specific files to test corner cases.
   890  	var (
   891  		extraNames = []string{"dir/obj01", "dir/obj02", "dir/obj03", "dir1/dir2/obj04", "dir1/dir2/obj05"}
   892  		fileNames  = make([]string, 0, objCnt)
   893  		wg         = &sync.WaitGroup{}
   894  		errCh      = make(chan error, objCnt+len(extraNames))
   895  	)
   896  
   897  	for range objCnt {
   898  		fileName := trand.String(20)
   899  		keyName := fmt.Sprintf("%s/%s", prefixDir, fileName)
   900  
   901  		// NOTE: Since this test is to test prefix fetch, the reader type is ignored, always use rand reader.
   902  		r, err := readers.NewRand(fileSize, cksumType)
   903  		if err != nil {
   904  			t.Fatal(err)
   905  		}
   906  
   907  		wg.Add(1)
   908  		go func() {
   909  			defer wg.Done()
   910  			tools.Put(proxyURL, bck, keyName, r, errCh)
   911  		}()
   912  		fileNames = append(fileNames, fileName)
   913  	}
   914  
   915  	for _, fName := range extraNames {
   916  		keyName := fmt.Sprintf("%s/%s", prefixDir, fName)
   917  		// NOTE: Since this test is to test prefix fetch, the reader type is ignored, always use rand reader.
   918  		r, err := readers.NewRand(fileSize, cksumType)
   919  		if err != nil {
   920  			t.Fatal(err)
   921  		}
   922  
   923  		wg.Add(1)
   924  		go func() {
   925  			defer wg.Done()
   926  			tools.Put(proxyURL, bck, keyName, r, errCh)
   927  		}()
   928  		fileNames = append(fileNames, fName)
   929  	}
   930  
   931  	wg.Wait()
   932  	tassert.SelectErr(t, errCh, "put", false)
   933  	return fileNames
   934  }
   935  
   936  func prefixLookupDefault(t *testing.T, proxyURL string, bck cmn.Bck, fileNames []string) {
   937  	tlog.Logf("Looking up for files in alphabetic order\n")
   938  
   939  	var (
   940  		letters    = "abcdefghijklmnopqrstuvwxyz"
   941  		baseParams = tools.BaseAPIParams(proxyURL)
   942  	)
   943  	for i := range len(letters) {
   944  		key := letters[i : i+1]
   945  		lookFor := fmt.Sprintf("%s/%s", prefixDir, key)
   946  		msg := &apc.LsoMsg{Prefix: lookFor}
   947  		objList, err := api.ListObjects(baseParams, bck, msg, api.ListArgs{})
   948  		if err != nil {
   949  			t.Errorf("List files with prefix failed, err = %v", err)
   950  			return
   951  		}
   952  
   953  		numFiles := len(objList.Entries)
   954  		realNumFiles := numberOfFilesWithPrefix(fileNames, key)
   955  
   956  		if numFiles == realNumFiles {
   957  			if numFiles != 0 {
   958  				tlog.Logf("Found %v files starting with %q\n", numFiles, key)
   959  			}
   960  		} else {
   961  			t.Errorf("Expected number of files with prefix %q is %v but found %v files", key, realNumFiles, numFiles)
   962  			tlog.Logf("Objects returned:\n")
   963  			for id, oo := range objList.Entries {
   964  				tlog.Logf("    %d[%d]. %s\n", i, id, oo.Name)
   965  			}
   966  		}
   967  	}
   968  }
   969  
   970  func prefixLookupCornerCases(t *testing.T, proxyURL string, bck cmn.Bck, objNames []string) {
   971  	tlog.Logf("Testing corner cases\n")
   972  
   973  	tools.SetClusterConfig(t, cos.StrKVs{"features": feat.DontOptimizeVirtualDir.String()})
   974  	t.Cleanup(func() {
   975  		tools.SetClusterConfig(t, cos.StrKVs{"features": "0"})
   976  	})
   977  
   978  	tests := []struct {
   979  		title  string
   980  		prefix string
   981  	}{
   982  		{"Entire list (dir)", "dir"},
   983  		{"dir/", "dir/"},
   984  		{"dir1", "dir1"},
   985  		{"dir1/", "dir1/"},
   986  	}
   987  	baseParams := tools.BaseAPIParams(proxyURL)
   988  	for idx, test := range tests {
   989  		p := fmt.Sprintf("%s/%s", prefixDir, test.prefix)
   990  
   991  		objCount := 0
   992  		for _, objName := range objNames {
   993  			fullObjName := fmt.Sprintf("%s/%s", prefixDir, objName)
   994  			if strings.HasPrefix(fullObjName, p) {
   995  				objCount++
   996  			}
   997  		}
   998  
   999  		tlog.Logf("%d. Prefix: %s [%s]\n", idx, test.title, p)
  1000  		msg := &apc.LsoMsg{Prefix: p}
  1001  		objList, err := api.ListObjects(baseParams, bck, msg, api.ListArgs{})
  1002  		if err != nil {
  1003  			t.Errorf("List files with prefix failed, err = %v", err)
  1004  			return
  1005  		}
  1006  
  1007  		if len(objList.Entries) != objCount {
  1008  			t.Errorf("Expected number of objects with prefix %q is %d but found %d",
  1009  				test.prefix, objCount, len(objList.Entries))
  1010  			tlog.Logf("Objects returned:\n")
  1011  			for id, oo := range objList.Entries {
  1012  				tlog.Logf("    %d[%d]. %s\n", idx, id, oo.Name)
  1013  			}
  1014  		}
  1015  	}
  1016  }
  1017  
  1018  func prefixLookup(t *testing.T, proxyURL string, bck cmn.Bck, fileNames []string) {
  1019  	prefixLookupDefault(t, proxyURL, bck, fileNames)
  1020  	prefixLookupCornerCases(t, proxyURL, bck, fileNames)
  1021  }
  1022  
  1023  func prefixCleanup(t *testing.T, proxyURL string, bck cmn.Bck, fileNames []string) {
  1024  	var (
  1025  		wg    = cos.NewLimitedWaitGroup(40, 0)
  1026  		errCh = make(chan error, len(fileNames))
  1027  	)
  1028  
  1029  	for _, fileName := range fileNames {
  1030  		keyName := fmt.Sprintf("%s/%s", prefixDir, fileName)
  1031  		wg.Add(1)
  1032  		go func() {
  1033  			defer wg.Done()
  1034  			tools.Del(proxyURL, bck, keyName, nil, errCh, true)
  1035  		}()
  1036  	}
  1037  	wg.Wait()
  1038  
  1039  	select {
  1040  	case e := <-errCh:
  1041  		tlog.Logf("Failed to DEL: %s\n", e)
  1042  		t.Fail()
  1043  	default:
  1044  	}
  1045  }
  1046  
  1047  func initFS() {
  1048  	proxyURL := tools.GetPrimaryURL()
  1049  	primary, err := tools.GetPrimaryProxy(proxyURL)
  1050  	if err != nil {
  1051  		tlog.Logf("Error: %v", err)
  1052  	}
  1053  	baseParams := tools.BaseAPIParams(proxyURL)
  1054  	cfg, err := api.GetDaemonConfig(baseParams, primary)
  1055  	if err != nil {
  1056  		tlog.Logf("Error: %v", err)
  1057  	}
  1058  
  1059  	config := cmn.GCO.BeginUpdate()
  1060  	config.TestFSP.Count = 1
  1061  	config.Backend = cfg.Backend
  1062  	cmn.GCO.CommitUpdate(config)
  1063  
  1064  	fs.CSM.Reg(fs.ObjectType, &fs.ObjectContentResolver{})
  1065  	fs.CSM.Reg(fs.WorkfileType, &fs.WorkfileContentResolver{})
  1066  	fs.CSM.Reg(fs.ECSliceType, &fs.ECSliceContentResolver{})
  1067  	fs.CSM.Reg(fs.ECMetaType, &fs.ECMetaContentResolver{})
  1068  }
  1069  
  1070  func initMountpaths(t *testing.T, proxyURL string) {
  1071  	tools.CheckSkip(t, &tools.SkipTestArgs{RequiredDeployment: tools.ClusterTypeLocal})
  1072  	fsOnce.Do(initFS)
  1073  	baseParams := tools.BaseAPIParams(proxyURL)
  1074  	fs.TestNew(nil)
  1075  	smap := tools.GetClusterMap(t, proxyURL)
  1076  	for _, target := range smap.Tmap {
  1077  		mpathList, err := api.GetMountpaths(baseParams, target)
  1078  		tassert.CheckFatal(t, err)
  1079  		ensureNoDisabledMountpaths(t, target, mpathList)
  1080  
  1081  		for _, mpath := range mpathList.Available {
  1082  			fs.Add(mpath, target.ID())
  1083  		}
  1084  	}
  1085  }
  1086  
  1087  func findObjOnDisk(bck cmn.Bck, objName string) (fqn string) {
  1088  	fsWalkFunc := func(path string, de fs.DirEntry) error {
  1089  		if fqn != "" {
  1090  			return filepath.SkipDir
  1091  		}
  1092  		if de.IsDir() {
  1093  			return nil
  1094  		}
  1095  
  1096  		ct, err := core.NewCTFromFQN(path, nil)
  1097  		if err != nil {
  1098  			return nil
  1099  		}
  1100  		if ct.ObjectName() == objName {
  1101  			fqn = path
  1102  			return errObjectFound
  1103  		}
  1104  		return nil
  1105  	}
  1106  
  1107  	fs.WalkBck(&fs.WalkBckOpts{
  1108  		WalkOpts: fs.WalkOpts{
  1109  			Bck:      bck,
  1110  			CTs:      []string{fs.ObjectType},
  1111  			Callback: fsWalkFunc,
  1112  			Sorted:   true, // false is unsupported and asserts
  1113  		},
  1114  	})
  1115  	return fqn
  1116  }
  1117  
  1118  func detectNewBucket(oldList, newList cmn.Bcks) (cmn.Bck, error) {
  1119  	for _, nbck := range newList {
  1120  		found := false
  1121  		for _, obck := range oldList {
  1122  			if obck.Name == nbck.Name {
  1123  				found = true
  1124  				break
  1125  			}
  1126  		}
  1127  		if !found {
  1128  			return nbck, nil
  1129  		}
  1130  	}
  1131  	return cmn.Bck{}, fmt.Errorf("new bucket is not found (old: %v, new: %v)", oldList, newList)
  1132  }
  1133  
  1134  // xaction is running
  1135  func xactSnapRunning(snaps xact.MultiSnap) (running, resetProbeFreq bool) {
  1136  	tid, _, err := snaps.RunningTarget("")
  1137  	debug.AssertNoErr(err)
  1138  	running = tid != ""
  1139  	resetProbeFreq = !running // e.g. idle
  1140  	return
  1141  }
  1142  
  1143  // finished = did start in the past (use check above to confirm) and currently not running
  1144  func xactSnapNotRunning(snaps xact.MultiSnap) (bool, bool) {
  1145  	running, resetProbeFreq := xactSnapRunning(snaps)
  1146  	return !running, resetProbeFreq
  1147  }