
     1  // Package integration_test.
     2  /*
     3   * Copyright (c) 2018-2024, NVIDIA CORPORATION. All rights reserved.
     4   */
     5  package integration_test
     7  import (
     8  	"fmt"
     9  	"math/rand"
    10  	"os"
    11  	"path"
    12  	"path/filepath"
    13  	"strconv"
    14  	"strings"
    15  	"sync"
    16  	"testing"
    17  	"time"
    19  	""
    20  	""
    21  	""
    22  	""
    23  	""
    24  	""
    25  	""
    26  	""
    27  	""
    28  	""
    29  	""
    30  	""
    31  	""
    32  	""
    33  	""
    34  	""
    35  	""
    36  	""
    37  )
    39  const (
    40  	ecTestDir = "ec-test/"
    42  	ECPutTimeOut = time.Minute * 4 // maximum wait time after PUT to be sure that the object is EC'ed/replicated
    44  	ecObjLimit     = 256 * cos.KiB
    45  	ecMinSmallSize = 32 * cos.KiB
    46  	ecSmallDelta   = 200 * cos.KiB
    47  	ecMinBigSize   = ecObjLimit * 2
    48  	ecBigDelta     = 10 * cos.MiB
    49  )
    51  type ecSliceMD struct {
    52  	size int64
    53  }
    55  type ecOptions struct {
    56  	seed         int64
    57  	objSize      int64
    58  	objSizeLimit int64
    59  	concurrency  int
    60  	objCount     int
    61  	dataCnt      int
    62  	parityCnt    int
    63  	minTargets   int
    64  	pattern      string
    65  	sema         *cos.DynSemaphore
    66  	silent       bool
    67  	rnd          *rand.Rand
    68  	smap         *meta.Smap
    69  }
    71  // Initializes the EC options, validates the number of targets.
    72  // If initial dataCnt value is negative, it sets the number of data and
    73  // parity slices to maximum possible for the cluster.
    74  //
    75  //nolint:revive,gocritic // modifies-value-receiver on purpose
    76  func (o ecOptions) init(t *testing.T, proxyURL string) *ecOptions {
    77  	o.smap = tools.GetClusterMap(t, proxyURL)
    78  	if cnt := o.smap.CountActiveTs(); cnt < o.minTargets {
    79  		t.Skipf("not enough targets in the cluster: expected at least %d, got %d", o.minTargets, cnt)
    80  	}
    81  	if o.concurrency > 0 {
    82  		o.sema = cos.NewDynSemaphore(o.concurrency)
    83  	}
    84  	o.seed = time.Now().UnixNano()
    85  	o.rnd = rand.New(rand.NewSource(o.seed))
    86  	if o.dataCnt < 0 {
    87  		total := o.smap.CountActiveTs() - 2
    88  		o.parityCnt = total / 2
    89  		o.dataCnt = total - o.parityCnt
    90  	}
    91  	return &o
    92  }
    94  func (o *ecOptions) sliceTotal() int {
    95  	if o.objSizeLimit == cmn.ObjSizeToAlwaysReplicate {
    96  		return 0
    97  	}
    98  	return o.dataCnt + o.parityCnt
    99  }
   101  type ecTest struct {
   102  	name         string
   103  	objSizeLimit int64
   104  	data         int
   105  	parity       int
   106  }
   108  var ecTests = []ecTest{
   109  	{"EC 1:1", cmn.ObjSizeToAlwaysReplicate, 1, 1},
   110  	{"EC 1:1", ecObjLimit, 1, 1},
   111  	{"EC 1:2", ecObjLimit, 1, 2},
   112  	{"EC 2:2", ecObjLimit, 2, 2},
   113  }
   115  func defaultECBckProps(o *ecOptions) *cmn.BpropsToSet {
   116  	return &cmn.BpropsToSet{
   117  		EC: &cmn.ECConfToSet{
   118  			Enabled:      apc.Ptr(true),
   119  			ObjSizeLimit: apc.Ptr[int64](ecObjLimit),
   120  			DataSlices:   apc.Ptr(o.dataCnt),
   121  			ParitySlices: apc.Ptr(o.parityCnt),
   122  		},
   123  	}
   124  }
   126  // Since all replicas are identical, it is difficult to differentiate main one from others.
   127  // The main replica is the replica that is on the target chosen by proxy using HrwTarget
   128  // algorithm on GET request from a client.
   129  // The function uses heuristics to detect the main one: it should be the oldest
   130  func ecGetAllSlices(t *testing.T, bck cmn.Bck, objName string) (map[string]ecSliceMD, string) {
   131  	var (
   132  		main string
   134  		foundParts = make(map[string]ecSliceMD)
   135  		oldest     = time.Now().Add(time.Hour)
   136  	)
   138  	cb := func(fqn string, de fs.DirEntry) error {
   139  		if de.IsDir() {
   140  			return nil
   141  		}
   142  		ct, err := core.NewCTFromFQN(fqn, nil)
   143  		tassert.CheckFatal(t, err)
   144  		if !strings.Contains(ct.ObjectName(), objName) {
   145  			return nil
   146  		}
   147  		stat, err := os.Stat(fqn)
   148  		if err != nil {
   149  			if os.IsNotExist(err) {
   150  				return nil
   151  			}
   152  			return err
   153  		}
   154  		foundParts[fqn] = ecSliceMD{stat.Size()}
   155  		if ct.ContentType() == fs.ObjectType && oldest.After(stat.ModTime()) {
   156  			main = fqn
   157  			oldest = stat.ModTime()
   158  		}
   159  		return nil
   160  	}
   162  	fs.WalkBck(&fs.WalkBckOpts{
   163  		WalkOpts: fs.WalkOpts{
   164  			Bck:      bck,
   165  			CTs:      []string{fs.ECSliceType, fs.ECMetaType, fs.ObjectType},
   166  			Callback: cb,
   167  			Sorted:   true, // false is unsupported and asserts
   168  		},
   169  	})
   171  	return foundParts, main
   172  }
   174  func ecCheckSlices(t *testing.T, sliceList map[string]ecSliceMD,
   175  	bck cmn.Bck, objPath string, objSize, sliceSize int64, totalCnt int) {
   176  	tassert.Errorf(t, len(sliceList) == totalCnt, "Expected number of objects for %s/%s: %d, found: %d\n%+v",
   177  		bck, objPath, totalCnt, len(sliceList), sliceList)
   179  	if !bck.IsAIS() && !bck.IsRemoteAIS() {
   180  		var ok bool
   181  		config := tools.GetClusterConfig(t)
   182  		_, ok = config.Backend.Providers[bck.Provider]
   183  		tassert.Errorf(t, ok, "invalid provider %s, expected to be in: %v",
   184  			bck.Provider, config.Backend.Providers)
   185  	}
   187  	metaCnt := 0
   188  	for k, md := range sliceList {
   189  		ct, err := core.NewCTFromFQN(k, nil)
   190  		tassert.CheckFatal(t, err)
   192  		if ct.ContentType() == fs.ECMetaType {
   193  			metaCnt++
   194  			tassert.Errorf(t, md.size <= 4*cos.KiB, "Metafile %q size is too big: %d", k, md.size)
   195  		} else if ct.ContentType() == fs.ECSliceType {
   196  			tassert.Errorf(t, md.size == sliceSize, "Slice %q size mismatch: %d, expected %d", k, md.size, sliceSize)
   197  		} else {
   198  			tassert.Errorf(t, ct.ContentType() == fs.ObjectType, "invalid content type %s, expected: %s", ct.ContentType(), fs.ObjectType)
   199  			tassert.Errorf(t, ct.Bck().Name == bck.Name, "invalid bucket name %s, expected: %s", ct.Bck().Name, bck.Name)
   200  			tassert.Errorf(t, ct.ObjectName() == objPath, "invalid object name %s, expected: %s", ct.ObjectName(), objPath)
   201  			tassert.Errorf(t, md.size == objSize, "%q size mismatch: got %d, expected %d", k, md.size, objSize)
   202  		}
   203  	}
   205  	metaCntMust := totalCnt / 2
   206  	tassert.Errorf(t, metaCnt == metaCntMust, "Number of metafiles for %s mismatch: %d, expected %d", objPath, metaCnt, metaCntMust)
   207  }
   209  func waitForECFinishes(t *testing.T, totalCnt int, objSize, sliceSize int64, doEC bool, bck cmn.Bck, objName string) (
   210  	foundParts map[string]ecSliceMD, mainObjPath string) {
   211  	deadLine := time.Now().Add(ECPutTimeOut)
   212  	for time.Now().Before(deadLine) {
   213  		foundParts, mainObjPath = ecGetAllSlices(t, bck, objName)
   214  		if len(foundParts) == totalCnt {
   215  			same := true
   216  			for nm, md := range foundParts {
   217  				ct, err := core.NewCTFromFQN(nm, nil)
   218  				tassert.CheckFatal(t, err)
   219  				if doEC {
   220  					if ct.ContentType() == fs.ECSliceType {
   221  						if md.size != sliceSize {
   222  							same = false
   223  							break
   224  						}
   225  					}
   226  				} else {
   227  					if ct.ContentType() == fs.ObjectType {
   228  						if md.size != objSize {
   229  							same = false
   230  							break
   231  						}
   232  					}
   233  				}
   234  			}
   236  			if same {
   237  				break
   238  			}
   239  		}
   240  		time.Sleep(time.Millisecond * 20)
   241  	}
   243  	return
   244  }
   246  // Generates a random size for object
   247  // Returns:
   248  // - how many files should be generated (including metafiles)
   249  // - generated object size
   250  // - an object slice size (it equals object size for replicated objects)
   251  // - whether to encode(true) or to replicate(false) the object
   252  func randObjectSize(n, every int, o *ecOptions) (
   253  	totalCnt int, objSize, sliceSize int64, doEC bool) {
   254  	if o.objSize != 0 {
   255  		doEC = o.objSizeLimit != cmn.ObjSizeToAlwaysReplicate && o.objSize >= o.objSizeLimit
   256  		objSize = o.objSize
   257  		if doEC {
   258  			totalCnt = 2 + (o.sliceTotal())*2
   259  			sliceSize = ec.SliceSize(objSize, o.dataCnt)
   260  		} else {
   261  			totalCnt = 2 + o.parityCnt*2
   262  			sliceSize = objSize
   263  		}
   264  		return
   265  	}
   267  	// Big object case
   268  	// full object copy+meta: 1+1
   269  	// number of metafiles: parity+slices
   270  	// number of slices: slices+parity
   271  	totalCnt = 2 + (o.sliceTotal())*2
   272  	objSize = int64(ecMinBigSize + o.rnd.Intn(ecBigDelta))
   273  	sliceSize = ec.SliceSize(objSize, o.dataCnt)
   274  	if (n+1)%every == 0 || o.objSizeLimit == cmn.ObjSizeToAlwaysReplicate {
   275  		// Small object case
   276  		// full object copy+meta: 1+1
   277  		// number of metafiles: parity
   278  		// number of slices: parity
   279  		totalCnt = 2 + o.parityCnt*2
   280  		objSize = int64(ecMinSmallSize + o.rnd.Intn(ecSmallDelta))
   281  		sliceSize = objSize
   282  	}
   283  	doEC = objSize >= o.objSizeLimit
   284  	if o.objSizeLimit == cmn.ObjSizeToAlwaysReplicate {
   285  		doEC = false
   286  		totalCnt = 2 + o.parityCnt*2
   287  	}
   288  	return
   289  }
   291  func calculateSlicesCount(slices map[string]ecSliceMD) map[string]int {
   292  	calc := make(map[string]int, len(slices))
   293  	for k := range slices {
   294  		o := filepath.Base(k)
   295  		if n, ok := calc[o]; ok {
   296  			calc[o] = n + 1
   297  		} else {
   298  			calc[o] = 1
   299  		}
   300  	}
   301  	return calc
   302  }
   304  func compareSlicesCount(t *testing.T, orig, found map[string]int) {
   305  	for k, v := range orig {
   306  		if fnd, ok := found[k]; !ok {
   307  			t.Errorf("%s - no files found", k)
   308  		} else if fnd != v {
   309  			t.Errorf("Object %s must have %d files, found %d", k, v, fnd)
   310  		}
   311  	}
   312  	for k, v := range found {
   313  		if _, ok := orig[k]; !ok {
   314  			t.Errorf("%s - should not exist (%d files found)", k, v)
   315  			continue
   316  		}
   317  	}
   318  }
   320  func doECPutsAndCheck(t *testing.T, baseParams api.BaseParams, bck cmn.Bck, o *ecOptions) {
   321  	const (
   322  		smallEvery = 10 // Every N-th object is small
   323  		objPatt    = "obj-%s-%04d"
   324  	)
   326  	wg := &sync.WaitGroup{}
   327  	sizes := make(chan int64, o.objCount)
   329  	for idx := range o.objCount {
   330  		wg.Add(1)
   331  		o.sema.Acquire()
   333  		go func(i int) {
   334  			totalCnt, objSize, sliceSize, doEC := randObjectSize(i, smallEvery, o)
   335  			sizes <- objSize
   336  			objName := fmt.Sprintf(objPatt, bck.Name, i)
   337  			objPath := ecTestDir + objName
   339  			if i%10 == 0 {
   340  				if doEC {
   341  					tlog.Logf("Object %s, size %9d[%9d]\n", objName, objSize, sliceSize)
   342  				} else {
   343  					tlog.Logf("Object %s, size %9d[%9s]\n", objName, objSize, "-")
   344  				}
   345  			}
   347  			r, err := readers.NewRand(objSize, cos.ChecksumNone)
   348  			defer func() {
   349  				r.Close()
   350  				o.sema.Release()
   351  				wg.Done()
   352  			}()
   353  			tassert.CheckFatal(t, err)
   354  			putArgs := api.PutArgs{BaseParams: baseParams, Bck: bck, ObjName: objPath, Reader: r}
   355  			_, err = api.PutObject(&putArgs)
   356  			tassert.CheckFatal(t, err)
   358  			foundParts, _ := waitForECFinishes(t, totalCnt, objSize, sliceSize, doEC, bck, objPath)
   359  			mainObjPath := ""
   360  			if len(foundParts) != totalCnt {
   361  				t.Errorf("Expected number of files %s: %d, found: %d\n%+v",
   362  					objName, totalCnt, len(foundParts), foundParts)
   363  				return
   364  			}
   365  			metaCnt, sliceCnt, replCnt := 0, 0, 0
   366  			for k, md := range foundParts {
   367  				ct, err := core.NewCTFromFQN(k, nil)
   368  				tassert.CheckFatal(t, err)
   369  				if ct.ContentType() == fs.ECMetaType {
   370  					metaCnt++
   371  					tassert.Errorf(t, md.size <= 512, "Metafile %q size is too big: %d", k, md.size)
   372  				} else if ct.ContentType() == fs.ECSliceType {
   373  					sliceCnt++
   374  					if md.size != sliceSize && doEC {
   375  						t.Errorf("Slice %q size mismatch: %d, expected %d", k, md.size, sliceSize)
   376  					}
   377  					if md.size != objSize && !doEC {
   378  						t.Errorf("Copy %q size mismatch: %d, expected %d", k, md.size, objSize)
   379  					}
   380  				} else {
   381  					tassert.Errorf(t, ct.ContentType() == fs.ObjectType, "invalid content type %s, expected: %s", ct.ContentType(), fs.ObjectType)
   382  					tassert.Errorf(t, ct.Bck().Provider == bck.Provider, "invalid provider %s, expected: %s", ct.Bck().Provider, apc.AIS)
   383  					tassert.Errorf(t, ct.Bck().Name == bck.Name, "invalid bucket name %s, expected: %s", ct.Bck().Name, bck.Name)
   384  					tassert.Errorf(t, ct.ObjectName() == objPath, "invalid object name %s, expected: %s", ct.ObjectName(), objPath)
   385  					tassert.Errorf(t, md.size == objSize, "%q size mismatch: got %d, expected %d", k, md.size, objSize)
   386  					mainObjPath = k
   387  					replCnt++
   388  				}
   389  			}
   391  			metaCntMust := o.parityCnt + 1
   392  			if doEC {
   393  				metaCntMust = o.sliceTotal() + 1
   394  			}
   395  			tassert.Errorf(t, metaCnt == metaCntMust, "Number of metafiles mismatch: %d, expected %d", metaCnt, o.parityCnt+1)
   396  			if doEC {
   397  				tassert.Errorf(t, sliceCnt == o.sliceTotal(), "Number of chunks mismatch: %d, expected %d", sliceCnt, o.sliceTotal())
   398  			} else {
   399  				tassert.Errorf(t, replCnt == o.parityCnt+1, "Number replicas mismatch: %d, expected %d", replCnt, o.parityCnt)
   400  			}
   402  			if mainObjPath == "" {
   403  				t.Errorf("Full copy is not found")
   404  				return
   405  			}
   407  			tassert.CheckFatal(t, os.Remove(mainObjPath))
   408  			partsAfterRemove, _ := ecGetAllSlices(t, bck, objPath)
   409  			_, ok := partsAfterRemove[mainObjPath]
   410  			if ok || len(partsAfterRemove) >= len(foundParts) {
   411  				t.Errorf("Object is not deleted: %#v", partsAfterRemove)
   412  				return
   413  			}
   415  			_, err = api.GetObject(baseParams, bck, objPath, nil)
   416  			tassert.CheckFatal(t, err)
   418  			if doEC {
   419  				partsAfterRestore, _ := ecGetAllSlices(t, bck, objPath)
   420  				md, ok := partsAfterRestore[mainObjPath]
   421  				if !ok || len(partsAfterRestore) != len(foundParts) {
   422  					t.Errorf("Object is not restored: %#v", partsAfterRestore)
   423  					return
   424  				}
   426  				if md.size != objSize {
   427  					t.Errorf("Object is restored incorrectly, size mismatches: %d, expected %d", md.size, objSize)
   428  					return
   429  				}
   430  			}
   431  		}(idx)
   432  	}
   434  	wg.Wait()
   435  	close(sizes)
   437  	szTotal := int64(0)
   438  	szLen := len(sizes)
   439  	for sz := range sizes {
   440  		szTotal += sz
   441  	}
   442  	if szLen != 0 {
   443  		t.Logf("Average size of the bucket %s: %s\n", bck, cos.ToSizeIEC(szTotal/int64(szLen), 1))
   444  	}
   445  }
   447  func assertBucketSize(t *testing.T, baseParams api.BaseParams, bck cmn.Bck, objCount int) {
   448  	bckObjectsCnt := bucketSize(t, baseParams, bck)
   449  	tassert.Fatalf(t, bckObjectsCnt == objCount, "Invalid number of objects: %d, expected %d", bckObjectsCnt, objCount)
   450  }
   452  func bucketSize(t *testing.T, baseParams api.BaseParams, bck cmn.Bck) int {
   453  	msg := &apc.LsoMsg{Props: "size,status"}
   454  	objList, err := api.ListObjects(baseParams, bck, msg, api.ListArgs{})
   455  	tassert.CheckFatal(t, err)
   456  	return len(objList.Entries)
   457  }
   459  func putRandomFile(t *testing.T, baseParams api.BaseParams, bck cmn.Bck, objPath string, size int) {
   460  	r, err := readers.NewRand(int64(size), cos.ChecksumNone)
   461  	tassert.CheckFatal(t, err)
   462  	_, err = api.PutObject(&api.PutArgs{
   463  		BaseParams: baseParams,
   464  		Bck:        bck,
   465  		ObjName:    objPath,
   466  		Reader:     r,
   467  	})
   468  	tassert.CheckFatal(t, err)
   469  }
   471  func newLocalBckWithProps(t *testing.T, baseParams api.BaseParams, bck cmn.Bck, bckProps *cmn.BpropsToSet, o *ecOptions) {
   472  	proxyURL := tools.RandomProxyURL()
   473  	tools.CreateBucket(t, proxyURL, bck, nil, true /*cleanup*/)
   475  	tlog.Logf("Changing EC %d:%d, objLimit [%d] [ seed = %d ], concurrent: %d\n",
   476  		o.dataCnt, o.parityCnt, o.objSizeLimit, o.seed, o.concurrency)
   477  	_, err := api.SetBucketProps(baseParams, bck, bckProps)
   478  	tassert.CheckFatal(t, err)
   479  }
   481  func setBucketECProps(t *testing.T, baseParams api.BaseParams, bck cmn.Bck, bckProps *cmn.BpropsToSet) {
   482  	tlog.Logf("Changing EC %d:%d\n", *bckProps.EC.DataSlices, *bckProps.EC.ParitySlices)
   483  	_, err := api.SetBucketProps(baseParams, bck, bckProps)
   484  	tassert.CheckFatal(t, err)
   485  }
   487  func clearAllECObjects(t *testing.T, bck cmn.Bck, failOnDelErr bool, o *ecOptions) {
   488  	var (
   489  		wg       = sync.WaitGroup{}
   490  		proxyURL = tools.RandomProxyURL()
   491  	)
   493  	tlog.Logln("Deleting objects...")
   494  	wg.Add(o.objCount)
   495  	for idx := range o.objCount {
   496  		go func(i int) {
   497  			defer wg.Done()
   498  			objName := fmt.Sprintf(o.pattern, i)
   499  			objPath := ecTestDir + objName
   500  			err := tools.Del(proxyURL, bck, objPath, nil, nil, true)
   501  			if failOnDelErr {
   502  				tassert.CheckFatal(t, err)
   503  			} else if err != nil {
   504  				t.Log(err.Error())
   505  			}
   507  			deadline := time.Now().Add(time.Second * 10)
   508  			var partsAfterDelete map[string]int64
   509  			for time.Now().Before(deadline) {
   510  				time.Sleep(time.Millisecond * 250)
   511  				partsAfterDelete, _ := ecGetAllSlices(t, bck, objPath)
   512  				if len(partsAfterDelete) == 0 {
   513  					break
   514  				}
   515  			}
   516  			if len(partsAfterDelete) != 0 {
   517  				t.Errorf("Some slices were not cleaned up after DEL: %#v", partsAfterDelete)
   518  			}
   519  		}(idx)
   520  	}
   521  	wg.Wait()
   522  	reqArgs := xact.ArgsMsg{Kind: apc.ActECPut, Bck: bck}
   523  	api.WaitForXactionIdle(tools.BaseAPIParams(proxyURL), &reqArgs)
   524  }
   526  func objectsExist(t *testing.T, baseParams api.BaseParams, bck cmn.Bck, objPatt string, objCount int) {
   527  	wg := &sync.WaitGroup{}
   528  	getOneObj := func(objName string) {
   529  		defer wg.Done()
   530  		objPath := ecTestDir + objName
   531  		_, err := api.GetObject(baseParams, bck, objPath, nil)
   532  		tassert.CheckFatal(t, err)
   533  	}
   535  	tlog.Logln("Reading all objects...")
   536  	wg.Add(objCount)
   537  	for i := range objCount {
   538  		objName := fmt.Sprintf(objPatt, i)
   539  		go getOneObj(objName)
   540  	}
   541  	wg.Wait()
   542  }
   544  // Simulates damaged slice by changing slice's checksum in metadata file
   545  func damageMetadataCksum(t *testing.T, slicePath string) {
   546  	ct, err := core.NewCTFromFQN(slicePath, nil)
   547  	tassert.CheckFatal(t, err)
   548  	metaFQN := ct.Make(fs.ECMetaType)
   549  	md, err := ec.LoadMetadata(metaFQN)
   550  	tassert.CheckFatal(t, err)
   551  	md.CksumValue = "01234"
   552  	err = jsp.Save(metaFQN, md, jsp.Plain(), nil)
   553  	tassert.CheckFatal(t, err)
   554  }
   556  // Short test to make sure that EC options cannot be changed after
   557  // EC is enabled
   558  func TestECChange(t *testing.T) {
   559  	tools.CheckSkip(t, &tools.SkipTestArgs{MinTargets: 3})
   561  	var (
   562  		proxyURL = tools.RandomProxyURL()
   563  		bck      = cmn.Bck{
   564  			Name:     testBucketName + "-ec-change",
   565  			Provider: apc.AIS,
   566  		}
   567  	)
   569  	tools.CreateBucket(t, proxyURL, bck, nil, true /*cleanup*/)
   571  	bucketProps := &cmn.BpropsToSet{
   572  		EC: &cmn.ECConfToSet{
   573  			Enabled:      apc.Ptr(true),
   574  			ObjSizeLimit: apc.Ptr[int64](ecObjLimit),
   575  			DataSlices:   apc.Ptr(1),
   576  			ParitySlices: apc.Ptr(1),
   577  		},
   578  	}
   579  	baseParams := tools.BaseAPIParams(proxyURL)
   581  	tlog.Logln("Resetting bucket properties")
   582  	_, err := api.ResetBucketProps(baseParams, bck)
   583  	tassert.CheckFatal(t, err)
   585  	tlog.Logln("Trying to set too many slices")
   586  	bucketProps.EC.DataSlices = apc.Ptr(25)
   587  	bucketProps.EC.ParitySlices = apc.Ptr(25)
   588  	_, err = api.SetBucketProps(baseParams, bck, bucketProps)
   589  	tassert.Errorf(t, err != nil, "Enabling EC must fail in case of the number of targets fewer than the number of slices")
   591  	tlog.Logln("Enabling EC")
   592  	bucketProps.EC.DataSlices = apc.Ptr(1)
   593  	bucketProps.EC.ParitySlices = apc.Ptr(1)
   594  	_, err = api.SetBucketProps(baseParams, bck, bucketProps)
   595  	tassert.CheckFatal(t, err)
   597  	tlog.Logln("Trying to set EC options to the same values")
   598  	_, err = api.SetBucketProps(baseParams, bck, bucketProps)
   599  	tassert.CheckFatal(t, err)
   601  	tlog.Logln("Trying to disable EC")
   602  	bucketProps.EC.Enabled = apc.Ptr(false)
   603  	_, err = api.SetBucketProps(baseParams, bck, bucketProps)
   604  	tassert.Errorf(t, err == nil, "Disabling EC failed: %v", err)
   606  	tlog.Logln("Trying to re-enable EC")
   607  	bucketProps.EC.Enabled = apc.Ptr(true)
   608  	_, err = api.SetBucketProps(baseParams, bck, bucketProps)
   609  	tassert.Errorf(t, err == nil, "Enabling EC failed: %v", err)
   611  	tlog.Logln("Trying to modify EC options when EC is enabled")
   612  	bucketProps.EC.Enabled = apc.Ptr(true)
   613  	bucketProps.EC.ObjSizeLimit = apc.Ptr[int64](300000)
   614  	_, err = api.SetBucketProps(baseParams, bck, bucketProps)
   615  	tassert.Errorf(t, err != nil, "Modifiying EC properties must fail")
   617  	tlog.Logln("Resetting bucket properties")
   618  	_, err = api.ResetBucketProps(baseParams, bck)
   619  	tassert.Errorf(t, err == nil, "Resetting properties should work")
   620  }
   622  func createECReplicas(t *testing.T, baseParams api.BaseParams, bck cmn.Bck, objName string, o *ecOptions) {
   623  	o.sema.Acquire()
   624  	defer o.sema.Release()
   626  	totalCnt := 2 + o.parityCnt*2
   627  	objSize := int64(ecMinSmallSize + o.rnd.Intn(ecSmallDelta))
   628  	sliceSize := objSize
   630  	objPath := ecTestDir + objName
   632  	tlog.Logf("Creating %s, size %8d\n", objPath, objSize)
   633  	r, err := readers.NewRand(objSize, cos.ChecksumNone)
   634  	tassert.CheckFatal(t, err)
   635  	_, err = api.PutObject(&api.PutArgs{BaseParams: baseParams, Bck: bck, ObjName: objPath, Reader: r})
   636  	tassert.CheckFatal(t, err)
   638  	tlog.Logf("waiting for %s\n", objPath)
   639  	foundParts, mainObjPath := waitForECFinishes(t, totalCnt, objSize, sliceSize, false, bck, objPath)
   641  	ecCheckSlices(t, foundParts, bck, objPath, objSize, sliceSize, totalCnt)
   642  	tassert.Errorf(t, mainObjPath != "", "Full copy is not found")
   643  }
   645  func createECObject(t *testing.T, baseParams api.BaseParams, bck cmn.Bck, objName string, idx int, o *ecOptions) {
   646  	const (
   647  		smallEvery = 7 // Every N-th object is small
   648  	)
   650  	o.sema.Acquire()
   651  	defer o.sema.Release()
   653  	totalCnt, objSize, sliceSize, doEC := randObjectSize(idx, smallEvery, o)
   654  	objPath := ecTestDir + objName
   655  	ecStr := "-"
   656  	if doEC {
   657  		ecStr = "EC"
   658  	}
   660  	tlog.LogfCond(!o.silent, "Creating %s, size %8d [%2s]\n", objPath, objSize, ecStr)
   661  	r, err := readers.NewRand(objSize, cos.ChecksumNone)
   662  	tassert.CheckFatal(t, err)
   663  	_, err = api.PutObject(&api.PutArgs{BaseParams: baseParams, Bck: bck, ObjName: objPath, Reader: r})
   664  	tassert.CheckFatal(t, err)
   666  	tlog.LogfCond(!o.silent, "waiting for %s\n", objPath)
   667  	foundParts, mainObjPath := waitForECFinishes(t, totalCnt, objSize, sliceSize, doEC, bck, objPath)
   669  	ecCheckSlices(t, foundParts, bck, objPath, objSize, sliceSize, totalCnt)
   670  	if mainObjPath == "" {
   671  		t.Errorf("Full copy is not found")
   672  	}
   673  }
   675  func createDamageRestoreECFile(t *testing.T, baseParams api.BaseParams, bck cmn.Bck, objName string, idx int, o *ecOptions) {
   676  	const (
   677  		sleepRestoreTime = 5 * time.Second // wait time after GET restores slices
   678  		smallEvery       = 7               // Every N-th object is small
   679  		sliceDelPct      = 50              // %% of objects that have damaged body and a slice
   680  	)
   682  	delSlice := false // delete only main object
   683  	deletedFiles := 1
   684  	if o.dataCnt+o.parityCnt > 2 && o.rnd.Intn(100) < sliceDelPct {
   685  		// delete a random slice, too
   686  		delSlice = true
   687  		deletedFiles = 2
   688  	}
   690  	totalCnt, objSize, sliceSize, doEC := randObjectSize(idx, smallEvery, o)
   691  	objPath := ecTestDir + objName
   692  	ecStr, delStr := "-", "obj"
   693  	if doEC {
   694  		ecStr = "EC"
   695  	}
   696  	if delSlice {
   697  		delStr = "obj+slice"
   698  	}
   699  	tlog.LogfCond(!o.silent, "Creating %s, size %8d [%2s] [%s]\n", objPath, objSize, ecStr, delStr)
   700  	r, err := readers.NewRand(objSize, cos.ChecksumNone)
   701  	tassert.CheckFatal(t, err)
   702  	_, err = api.PutObject(&api.PutArgs{BaseParams: baseParams, Bck: bck, ObjName: objPath, Reader: r})
   703  	tassert.CheckFatal(t, err)
   705  	tlog.LogfCond(!o.silent, "waiting for %s\n", objPath)
   706  	foundParts, mainObjPath := waitForECFinishes(t, totalCnt, objSize, sliceSize, doEC, bck, objPath)
   708  	ecCheckSlices(t, foundParts, bck, objPath, objSize, sliceSize, totalCnt)
   709  	if mainObjPath == "" {
   710  		t.Errorf("Full copy is not found")
   711  		return
   712  	}
   714  	tlog.LogfCond(!o.silent, "Damaging %s [removing %s]\n", objPath, mainObjPath)
   715  	tassert.CheckFatal(t, os.Remove(mainObjPath))
   717  	ct, err := core.NewCTFromFQN(mainObjPath, nil)
   718  	tassert.CheckFatal(t, err)
   719  	metafile := ct.Make(fs.ECMetaType)
   720  	tlog.LogfCond(!o.silent, "Damaging %s [removing %s]\n", objPath, metafile)
   721  	tassert.CheckFatal(t, cos.RemoveFile(metafile))
   722  	if delSlice {
   723  		sliceToDel := ""
   724  		for k := range foundParts {
   725  			ct, err := core.NewCTFromFQN(k, nil)
   726  			tassert.CheckFatal(t, err)
   727  			if k != mainObjPath && ct.ContentType() == fs.ECSliceType && doEC {
   728  				sliceToDel = k
   729  				break
   730  			} else if k != mainObjPath && ct.ContentType() == fs.ObjectType && !doEC {
   731  				sliceToDel = k
   732  				break
   733  			}
   734  		}
   735  		if sliceToDel == "" {
   736  			t.Errorf("Failed to select random slice for %s", objName)
   737  			return
   738  		}
   739  		tlog.LogfCond(!o.silent, "Removing slice/replica: %s\n", sliceToDel)
   740  		tassert.CheckFatal(t, os.Remove(sliceToDel))
   742  		ct, err := core.NewCTFromFQN(sliceToDel, nil)
   743  		tassert.CheckFatal(t, err)
   744  		metafile := ct.Make(fs.ECMetaType)
   745  		if doEC {
   746  			tlog.LogfCond(!o.silent, "Removing slice meta %s\n", metafile)
   747  		} else {
   748  			tlog.LogfCond(!o.silent, "Removing replica meta %s\n", metafile)
   749  		}
   750  		tassert.CheckFatal(t, cos.RemoveFile(metafile))
   751  	}
   753  	partsAfterRemove, _ := ecGetAllSlices(t, bck, objPath)
   754  	_, ok := partsAfterRemove[mainObjPath]
   755  	if ok || len(partsAfterRemove) != len(foundParts)-deletedFiles*2 {
   756  		tlog.Logf("Files are not deleted [%d - %d], leftovers:\n", len(foundParts), len(partsAfterRemove))
   757  		for k := range partsAfterRemove {
   758  			tlog.Logf("     %s\n", k)
   759  		}
   760  		// Not an error as a directory can contain leftovers
   761  		tlog.Logln("Some slices were not deleted")
   762  		return
   763  	}
   765  	tlog.LogfCond(!o.silent, "Restoring %s\n", objPath)
   766  	_, err = api.GetObject(baseParams, bck, objPath, nil)
   767  	if err != nil {
   768  		tlog.Logf("... retrying %s\n", objPath)
   769  		time.Sleep(time.Second)
   770  		_, err = api.GetObject(baseParams, bck, objPath, nil)
   771  	}
   772  	tassert.CheckFatal(t, err)
   774  	// For remote buckets, due to performance reason, GFN is not used and
   775  	// EC is not called - object is reread from the remote bucket instead
   776  	if doEC && bck.IsAIS() {
   777  		deadline := time.Now().Add(sleepRestoreTime)
   778  		var partsAfterRestore map[string]ecSliceMD
   779  		for time.Now().Before(deadline) {
   780  			time.Sleep(time.Millisecond * 250)
   781  			partsAfterRestore, _ = ecGetAllSlices(t, bck, objPath)
   782  			if len(partsAfterRestore) == totalCnt {
   783  				break
   784  			}
   785  		}
   786  		ecCheckSlices(t, partsAfterRestore, bck, objPath, objSize, sliceSize, totalCnt)
   787  	}
   788  }
   790  // Simple stress testing EC for remote buckets
   791  func TestECRestoreObjAndSliceRemote(t *testing.T) {
   792  	var (
   793  		bck        = cliBck
   794  		proxyURL   = tools.RandomProxyURL()
   795  		baseParams = tools.BaseAPIParams(proxyURL)
   796  		useDisks   = []bool{false, true}
   797  	)
   799  	o := ecOptions{
   800  		minTargets:   4,
   801  		objCount:     25,
   802  		concurrency:  8,
   803  		pattern:      "obj-rest-remote-%04d",
   804  		objSizeLimit: ecObjLimit,
   805  	}.init(t, proxyURL)
   807  	tools.CheckSkip(t, &tools.SkipTestArgs{RemoteBck: true, Bck: bck})
   809  	initMountpaths(t, proxyURL)
   810  	if testing.Short() {
   811  		useDisks = []bool{false}
   812  	}
   814  	for _, useDisk := range useDisks {
   815  		for _, test := range ecTests {
   816  			testName := fmt.Sprintf("%s/disk_only/%t",, useDisk)
   817  			t.Run(testName, func(t *testing.T) {
   818  				if useDisk {
   819  					tools.SetClusterConfig(t, cos.StrKVs{
   820  						"ec.disk_only": strconv.FormatBool(useDisk),
   821  					})
   822  					defer tools.SetClusterConfig(t, cos.StrKVs{
   823  						"ec.disk_only": "false",
   824  					})
   825  				}
   826  				if o.smap.CountActiveTs() <= {
   827  					t.Skip(cmn.ErrNotEnoughTargets)
   828  				}
   829  				o.parityCnt = test.parity
   830  				o.dataCnt =
   831  				o.objSizeLimit = test.objSizeLimit
   832  				setBucketECProps(t, baseParams, bck, defaultECBckProps(o))
   833  				defer api.SetBucketProps(baseParams, bck, &cmn.BpropsToSet{
   834  					EC: &cmn.ECConfToSet{Enabled: apc.Ptr(false)},
   835  				})
   837  				defer func() {
   838  					tlog.Logln("Wait for PUTs to finish...")
   839  					args := xact.ArgsMsg{Kind: apc.ActECPut}
   840  					err := api.WaitForXactionIdle(baseParams, &args)
   841  					tassert.CheckError(t, err)
   843  					clearAllECObjects(t, bck, true, o)
   844  					reqArgs := xact.ArgsMsg{Kind: apc.ActECPut, Bck: bck}
   845  					err = api.WaitForXactionIdle(baseParams, &reqArgs)
   846  					tassert.CheckError(t, err)
   847  				}()
   849  				wg := sync.WaitGroup{}
   850  				wg.Add(o.objCount)
   851  				for i := range o.objCount {
   852  					o.sema.Acquire()
   853  					go func(i int) {
   854  						defer func() {
   855  							o.sema.Release()
   856  							wg.Done()
   857  						}()
   858  						objName := fmt.Sprintf(o.pattern, i)
   859  						createDamageRestoreECFile(t, baseParams, bck, objName, i, o)
   860  					}(i)
   861  				}
   862  				wg.Wait()
   863  			})
   864  		}
   865  	}
   866  }
   868  // Quick check that EC can restore a damaged object and a missing slice
   869  //   - PUTs an object to the bucket
   870  //   - filepath.Walk checks that the number of metafiles and slices are correct
   871  //   - Either original object or original object and a random slice are deleted
   872  //   - GET should detect that original object is gone
   873  //   - The target restores the original object from slices and missing slices
   874  func TestECRestoreObjAndSlice(t *testing.T) {
   875  	var (
   876  		bck = cmn.Bck{
   877  			Name:     testBucketName + "-obj-n-slice",
   878  			Provider: apc.AIS,
   879  		}
   880  		proxyURL   = tools.RandomProxyURL()
   881  		baseParams = tools.BaseAPIParams(proxyURL)
   882  		useDisks   = []bool{false, true}
   883  	)
   885  	o := ecOptions{
   886  		minTargets:  4,
   887  		objCount:    50,
   888  		concurrency: 8,
   889  		pattern:     "obj-rest-%04d",
   890  		silent:      testing.Short(),
   891  	}.init(t, proxyURL)
   892  	initMountpaths(t, proxyURL)
   893  	if testing.Short() {
   894  		useDisks = []bool{false}
   895  	}
   897  	for _, useDisk := range useDisks {
   898  		for _, test := range ecTests {
   899  			testName := fmt.Sprintf("%s/disk_only/%t",, useDisk)
   900  			t.Run(testName, func(t *testing.T) {
   901  				if useDisk {
   902  					tools.SetClusterConfig(t, cos.StrKVs{
   903  						"ec.disk_only": strconv.FormatBool(useDisk),
   904  					})
   905  					defer tools.SetClusterConfig(t, cos.StrKVs{
   906  						"ec.disk_only": "false",
   907  					})
   908  				}
   909  				if o.smap.CountActiveTs() <= {
   910  					t.Skip(cmn.ErrNotEnoughTargets)
   911  				}
   912  				o.parityCnt = test.parity
   913  				o.dataCnt =
   914  				o.objSizeLimit = test.objSizeLimit
   915  				newLocalBckWithProps(t, baseParams, bck, defaultECBckProps(o), o)
   917  				wg := sync.WaitGroup{}
   918  				wg.Add(o.objCount)
   919  				for i := range o.objCount {
   920  					o.sema.Acquire()
   921  					go func(i int) {
   922  						defer func() {
   923  							o.sema.Release()
   924  							wg.Done()
   925  						}()
   926  						objName := fmt.Sprintf(o.pattern, i)
   927  						createDamageRestoreECFile(t, baseParams, bck, objName, i, o)
   928  					}(i)
   929  				}
   930  				wg.Wait()
   931  				assertBucketSize(t, baseParams, bck, o.objCount)
   932  			})
   933  		}
   934  	}
   935  }
   937  func putECFile(baseParams api.BaseParams, bck cmn.Bck, objName string) error {
   938  	objSize := int64(ecMinBigSize * 2)
   939  	objPath := ecTestDir + objName
   941  	r, err := readers.NewRand(objSize, cos.ChecksumNone)
   942  	if err != nil {
   943  		return err
   944  	}
   945  	_, err = api.PutObject(&api.PutArgs{
   946  		BaseParams: baseParams,
   947  		Bck:        bck,
   948  		ObjName:    objPath,
   949  		Reader:     r,
   950  	})
   951  	return err
   952  }
   954  // Returns path to main object and map of all object's slices and ioContext
   955  func createECFile(t *testing.T, baseParams api.BaseParams, bck cmn.Bck, objName string, o *ecOptions) (map[string]ecSliceMD, string) {
   956  	totalCnt := 2 + (o.sliceTotal())*2
   957  	objSize := int64(ecMinBigSize * 2)
   958  	sliceSize := ec.SliceSize(objSize, o.dataCnt)
   960  	err := putECFile(baseParams, bck, objName)
   961  	tassert.CheckFatal(t, err)
   963  	foundParts, mainObjPath := waitForECFinishes(t, totalCnt, objSize, sliceSize, true, bck, ecTestDir+objName)
   964  	tassert.Fatalf(t, mainObjPath != "", "Full copy %s was not found", mainObjPath)
   966  	objPath := ecTestDir + objName
   967  	ecCheckSlices(t, foundParts, bck, objPath, objSize, sliceSize, totalCnt)
   969  	return foundParts, mainObjPath
   970  }
   972  // Creates 2 EC files and then corrupts their slices
   973  // Checks that after corrupting one slice it is still possible to recover an object
   974  // Checks that after corrupting all slices it is not possible to recover an object
   975  func TestECChecksum(t *testing.T) {
   976  	if docker.IsRunning() {
   977  		t.Skipf("test %q requires xattrs to be set, doesn't work with docker", t.Name())
   978  	}
   980  	var (
   981  		proxyURL = tools.RandomProxyURL()
   982  		bck      = cmn.Bck{
   983  			Name:     testBucketName + "-ec-cksum",
   984  			Provider: apc.AIS,
   985  		}
   986  	)
   988  	o := ecOptions{
   989  		minTargets:   4,
   990  		dataCnt:      1,
   991  		parityCnt:    1,
   992  		pattern:      "obj-cksum-%04d",
   993  		objSizeLimit: ecObjLimit,
   994  	}.init(t, proxyURL)
   995  	baseParams := tools.BaseAPIParams(proxyURL)
   996  	initMountpaths(t, proxyURL)
   998  	newLocalBckWithProps(t, baseParams, bck, defaultECBckProps(o), o)
  1000  	objName1 := fmt.Sprintf(o.pattern, 1)
  1001  	objPath1 := ecTestDir + objName1
  1002  	foundParts1, mainObjPath1 := createECFile(t, baseParams, bck, objName1, o)
  1004  	objName2 := fmt.Sprintf(o.pattern, 2)
  1005  	objPath2 := ecTestDir + objName2
  1006  	foundParts2, mainObjPath2 := createECFile(t, baseParams, bck, objName2, o)
  1008  	tlog.Logf("Removing main object %s\n", mainObjPath1)
  1009  	tassert.CheckFatal(t, os.Remove(mainObjPath1))
  1011  	// Corrupt just one slice, EC should be able to restore the original object
  1012  	for k := range foundParts1 {
  1013  		ct, err := core.NewCTFromFQN(k, nil)
  1014  		tassert.CheckFatal(t, err)
  1016  		if k != mainObjPath1 && ct.ContentType() == fs.ECSliceType {
  1017  			damageMetadataCksum(t, k)
  1018  			break
  1019  		}
  1020  	}
  1022  	_, err := api.GetObject(baseParams, bck, objPath1, nil)
  1023  	tassert.CheckFatal(t, err)
  1025  	tlog.Logf("Removing main object %s\n", mainObjPath2)
  1026  	tassert.CheckFatal(t, os.Remove(mainObjPath2))
  1028  	// Corrupt all slices, EC should not be able to restore
  1029  	for k := range foundParts2 {
  1030  		ct, err := core.NewCTFromFQN(k, nil)
  1031  		tassert.CheckFatal(t, err)
  1033  		if k != mainObjPath2 && ct.ContentType() == fs.ECSliceType {
  1034  			damageMetadataCksum(t, k)
  1035  		}
  1036  	}
  1038  	_, err = api.GetObject(baseParams, bck, objPath2, nil)
  1039  	tassert.Fatalf(t, err != nil, "Object should not be restored when checksums are wrong")
  1040  }
  1042  func TestECEnabledDisabledEnabled(t *testing.T) {
  1043  	tools.CheckSkip(t, &tools.SkipTestArgs{Long: true})
  1045  	var (
  1046  		bck = cmn.Bck{
  1047  			Name:     testBucketName + "-ec-props",
  1048  			Provider: apc.AIS,
  1049  		}
  1050  		proxyURL   = tools.RandomProxyURL()
  1051  		baseParams = tools.BaseAPIParams(proxyURL)
  1052  	)
  1054  	o := ecOptions{
  1055  		minTargets:   4,
  1056  		dataCnt:      1,
  1057  		parityCnt:    1,
  1058  		objCount:     25,
  1059  		concurrency:  8,
  1060  		pattern:      "obj-rest-%04d",
  1061  		objSizeLimit: ecObjLimit,
  1062  	}.init(t, proxyURL)
  1064  	initMountpaths(t, proxyURL)
  1065  	newLocalBckWithProps(t, baseParams, bck, defaultECBckProps(o), o)
  1067  	// End of preparation, create files with EC enabled, check if are restored properly
  1069  	wg := sync.WaitGroup{}
  1070  	wg.Add(o.objCount)
  1071  	for i := range o.objCount {
  1072  		o.sema.Acquire()
  1073  		go func(i int) {
  1074  			defer func() {
  1075  				o.sema.Release()
  1076  				wg.Done()
  1077  			}()
  1078  			objName := fmt.Sprintf(o.pattern, i)
  1079  			createDamageRestoreECFile(t, baseParams, bck, objName, i, o)
  1080  		}(i)
  1081  	}
  1082  	wg.Wait()
  1084  	if t.Failed() {
  1085  		t.FailNow()
  1086  	}
  1088  	assertBucketSize(t, baseParams, bck, o.objCount)
  1090  	// Disable EC, put normal files, check if were created properly
  1091  	_, err := api.SetBucketProps(baseParams, bck, &cmn.BpropsToSet{
  1092  		EC: &cmn.ECConfToSet{Enabled: apc.Ptr(false)},
  1093  	})
  1094  	tassert.CheckError(t, err)
  1096  	wg.Add(o.objCount)
  1097  	for i := o.objCount; i < 2*o.objCount; i++ {
  1098  		go func(i int) {
  1099  			defer wg.Done()
  1100  			objName := fmt.Sprintf(o.pattern, i)
  1101  			putRandomFile(t, baseParams, bck, objName, cos.MiB)
  1102  		}(i)
  1103  	}
  1105  	wg.Wait()
  1107  	if t.Failed() {
  1108  		t.FailNow()
  1109  	}
  1111  	assertBucketSize(t, baseParams, bck, o.objCount*2)
  1113  	// Enable EC again, check if EC was started properly and creates files with EC correctly
  1114  	_, err = api.SetBucketProps(baseParams, bck, &cmn.BpropsToSet{
  1115  		EC: &cmn.ECConfToSet{Enabled: apc.Ptr(true)},
  1116  	})
  1117  	tassert.CheckError(t, err)
  1119  	wg.Add(o.objCount)
  1120  	for i := 2 * o.objCount; i < 3*o.objCount; i++ {
  1121  		objName := fmt.Sprintf(o.pattern, i)
  1122  		o.sema.Acquire()
  1123  		go func(i int) {
  1124  			defer func() {
  1125  				o.sema.Release()
  1126  				wg.Done()
  1127  			}()
  1128  			createDamageRestoreECFile(t, baseParams, bck, objName, i, o)
  1129  		}(i)
  1130  	}
  1131  	wg.Wait()
  1133  	if t.Failed() {
  1134  		t.FailNow()
  1135  	}
  1137  	assertBucketSize(t, baseParams, bck, o.objCount*3)
  1138  }
  1140  func TestECDisableEnableDuringLoad(t *testing.T) {
  1141  	tools.CheckSkip(t, &tools.SkipTestArgs{Long: true})
  1143  	var (
  1144  		bck = cmn.Bck{
  1145  			Name:     testBucketName + "-ec-load",
  1146  			Provider: apc.AIS,
  1147  		}
  1148  		proxyURL   = tools.RandomProxyURL()
  1149  		baseParams = tools.BaseAPIParams(proxyURL)
  1150  	)
  1152  	o := ecOptions{
  1153  		minTargets:   4,
  1154  		dataCnt:      1,
  1155  		parityCnt:    1,
  1156  		objCount:     5,
  1157  		concurrency:  8,
  1158  		pattern:      "obj-disable-enable-load-%04d",
  1159  		objSizeLimit: ecObjLimit,
  1160  	}.init(t, proxyURL)
  1162  	initMountpaths(t, proxyURL)
  1163  	newLocalBckWithProps(t, baseParams, bck, defaultECBckProps(o), o)
  1164  	// End of preparation, create files with EC enabled, check if are restored properly
  1166  	wg := &sync.WaitGroup{}
  1167  	wg.Add(o.objCount)
  1168  	for i := range o.objCount {
  1169  		o.sema.Acquire()
  1170  		go func(i int) {
  1171  			defer func() {
  1172  				o.sema.Release()
  1173  				wg.Done()
  1174  			}()
  1175  			objName := fmt.Sprintf(o.pattern, i)
  1176  			createDamageRestoreECFile(t, baseParams, bck, objName, i, o)
  1177  		}(i)
  1178  	}
  1179  	wg.Wait()
  1181  	assertBucketSize(t, baseParams, bck, o.objCount)
  1183  	var (
  1184  		numCreated = 0
  1185  		abortCh    = &cos.StopCh{}
  1186  		wgPut      = &sync.WaitGroup{}
  1187  	)
  1188  	abortCh.Init()
  1189  	wgPut.Add(1)
  1191  	go func() {
  1192  		ticker := time.NewTicker(3 * time.Millisecond)
  1193  		defer wgPut.Done()
  1194  		for {
  1195  			select {
  1196  			case <-abortCh.Listen():
  1197  				ticker.Stop()
  1198  				return
  1199  			case <-ticker.C:
  1200  				objName := fmt.Sprintf(o.pattern, o.objCount+numCreated)
  1201  				wgPut.Add(1)
  1202  				go func() {
  1203  					defer wgPut.Done()
  1204  					putRandomFile(t, baseParams, bck, objName, cos.KiB)
  1205  				}()
  1206  				numCreated++
  1207  			}
  1208  		}
  1209  	}()
  1211  	time.Sleep(time.Second)
  1213  	tlog.Logf("Disabling EC for the bucket %s\n", bck)
  1214  	_, err := api.SetBucketProps(baseParams, bck, &cmn.BpropsToSet{
  1215  		EC: &cmn.ECConfToSet{Enabled: apc.Ptr(false)},
  1216  	})
  1217  	tassert.CheckError(t, err)
  1219  	time.Sleep(15 * time.Millisecond)
  1220  	tlog.Logf("Enabling EC for the bucket %s\n", bck)
  1221  	_, err = api.SetBucketProps(baseParams, bck, &cmn.BpropsToSet{
  1222  		EC: &cmn.ECConfToSet{Enabled: apc.Ptr(true)},
  1223  	})
  1224  	tassert.CheckError(t, err)
  1225  	reqArgs := xact.ArgsMsg{Kind: apc.ActECEncode, Bck: bck}
  1226  	_, err = api.WaitForXactionIC(baseParams, &reqArgs)
  1227  	tassert.CheckError(t, err)
  1229  	abortCh.Close()
  1230  	wgPut.Wait()
  1232  	if t.Failed() {
  1233  		t.FailNow()
  1234  	}
  1236  	// Disabling and enabling EC should not result in put's failing.
  1237  	assertBucketSize(t, baseParams, bck, o.objCount+numCreated)
  1238  }
  1240  // Stress test to check that EC works as expected.
  1241  //   - Changes bucket props to use EC
  1242  //   - Generates `objCount` objects, size between `ecObjMinSize` and `ecObjMinSize`+ecObjMaxSize`
  1243  //   - Objects smaller `ecObjLimit` must be copies, while others must be EC'ed
  1244  //   - PUTs objects to the bucket
  1245  //   - filepath.Walk checks that the number of metafiles and slices are correct
  1246  //   - The original object is deleted
  1247  //   - GET should detect that original object is gone and that there are EC slices
  1248  //   - The target restores the original object from slices/copies and returns it
  1249  //   - No errors must occur
  1250  func TestECStress(t *testing.T) {
  1251  	tools.CheckSkip(t, &tools.SkipTestArgs{Long: true})
  1253  	var (
  1254  		bck = cmn.Bck{
  1255  			Name:     testBucketName + "-ec-stress",
  1256  			Provider: apc.AIS,
  1257  		}
  1258  		proxyURL   = tools.RandomProxyURL()
  1259  		baseParams = tools.BaseAPIParams(proxyURL)
  1260  	)
  1262  	o := ecOptions{
  1263  		minTargets:  4,
  1264  		objCount:    400,
  1265  		concurrency: 12,
  1266  		pattern:     "obj-stress-%04d",
  1267  	}.init(t, proxyURL)
  1268  	initMountpaths(t, proxyURL)
  1270  	for _, test := range ecTests {
  1271  		t.Run(, func(t *testing.T) {
  1272  			if o.smap.CountActiveTs() <= {
  1273  				t.Skip(cmn.ErrNotEnoughTargets)
  1274  			}
  1275  			o.parityCnt = test.parity
  1276  			o.dataCnt =
  1277  			o.objSizeLimit = test.objSizeLimit
  1278  			newLocalBckWithProps(t, baseParams, bck, defaultECBckProps(o), o)
  1279  			doECPutsAndCheck(t, baseParams, bck, o)
  1281  			msg := &apc.LsoMsg{Props: "size,status"}
  1282  			objList, err := api.ListObjects(baseParams, bck, msg, api.ListArgs{})
  1283  			tassert.CheckFatal(t, err)
  1284  			tassert.Fatalf(t, len(objList.Entries) == o.objCount,
  1285  				"Invalid number of objects: %d, expected %d", len(objList.Entries), o.objCount)
  1286  		})
  1287  	}
  1288  }
  1290  // Stress 2 buckets at the same time
  1291  func TestECStressManyBuckets(t *testing.T) {
  1292  	tools.CheckSkip(t, &tools.SkipTestArgs{Long: true})
  1294  	var (
  1295  		bck1 = cmn.Bck{
  1296  			Name:     testBucketName + "1",
  1297  			Provider: apc.AIS,
  1298  		}
  1299  		bck2 = cmn.Bck{
  1300  			Name:     testBucketName + "2",
  1301  			Provider: apc.AIS,
  1302  		}
  1303  		proxyURL = tools.RandomProxyURL()
  1304  	)
  1306  	o1 := ecOptions{
  1307  		minTargets:   4,
  1308  		parityCnt:    1,
  1309  		dataCnt:      1,
  1310  		objCount:     200,
  1311  		concurrency:  12,
  1312  		pattern:      "obj-stress-manybck-%04d",
  1313  		objSizeLimit: ecObjLimit,
  1314  	}.init(t, proxyURL)
  1315  	o2 := ecOptions{
  1316  		minTargets:   4,
  1317  		parityCnt:    1,
  1318  		dataCnt:      1,
  1319  		objCount:     200,
  1320  		concurrency:  12,
  1321  		pattern:      "obj-stress-manybck-%04d",
  1322  		objSizeLimit: ecObjLimit,
  1323  	}.init(t, proxyURL)
  1325  	initMountpaths(t, proxyURL)
  1326  	baseParams := tools.BaseAPIParams(proxyURL)
  1327  	newLocalBckWithProps(t, baseParams, bck1, defaultECBckProps(o1), o1)
  1328  	newLocalBckWithProps(t, baseParams, bck2, defaultECBckProps(o2), o2)
  1330  	// Run EC on different buckets concurrently
  1331  	wg := &sync.WaitGroup{}
  1332  	wg.Add(2)
  1333  	go func() {
  1334  		defer wg.Done()
  1335  		doECPutsAndCheck(t, baseParams, bck1, o1)
  1336  	}()
  1337  	go func() {
  1338  		defer wg.Done()
  1339  		doECPutsAndCheck(t, baseParams, bck2, o2)
  1340  	}()
  1341  	wg.Wait()
  1343  	msg := &apc.LsoMsg{Props: "size,status"}
  1344  	objList, err := api.ListObjects(baseParams, bck1, msg, api.ListArgs{})
  1345  	tassert.CheckFatal(t, err)
  1346  	tassert.Fatalf(t, len(objList.Entries) == o1.objCount, "Bucket %s: Invalid number of objects: %d, expected %d", bck1.String(), len(objList.Entries), o1.objCount)
  1348  	msg = &apc.LsoMsg{Props: "size,status"}
  1349  	objList, err = api.ListObjects(baseParams, bck2, msg, api.ListArgs{})
  1350  	tassert.CheckFatal(t, err)
  1351  	tassert.Fatalf(t, len(objList.Entries) == o2.objCount, "Bucket %s: Invalid number of objects: %d, expected %d", bck2.String(), len(objList.Entries), o2.objCount)
  1352  }
  1354  // ExtraStress test to check that EC works as expected
  1355  //   - Changes bucket props to use EC
  1356  //   - Generates `objCount` objects, size between `ecObjMinSize` and `ecObjMinSize`+ecObjMaxSize`
  1357  //   - Objects smaller `ecObjLimit` must be copies, while others must be EC'ed
  1358  //   - PUTs ALL objects to the bucket stressing both EC and transport
  1359  //   - filepath.Walk checks that the number of metafiles at the end is correct
  1360  //   - No errors must occur
  1361  func TestECExtraStress(t *testing.T) {
  1362  	tools.CheckSkip(t, &tools.SkipTestArgs{Long: true})
  1364  	const (
  1365  		objStart = "obj-extra-"
  1366  	)
  1368  	var (
  1369  		bck = cmn.Bck{
  1370  			Name:     testBucketName + "-extrastress",
  1371  			Provider: apc.AIS,
  1372  		}
  1373  		proxyURL = tools.RandomProxyURL()
  1374  	)
  1376  	o := ecOptions{
  1377  		minTargets:  4,
  1378  		objCount:    400,
  1379  		concurrency: 12,
  1380  		pattern:     objStart + "%04d",
  1381  	}.init(t, proxyURL)
  1382  	initMountpaths(t, proxyURL)
  1384  	for _, test := range ecTests {
  1385  		t.Run(, func(t *testing.T) {
  1386  			if o.smap.CountActiveTs() <= {
  1387  				t.Skip(cmn.ErrNotEnoughTargets)
  1388  			}
  1389  			o.parityCnt = test.parity
  1390  			o.dataCnt =
  1391  			o.objSizeLimit = test.objSizeLimit
  1392  			ecStressCore(t, o, proxyURL, bck)
  1393  		})
  1394  	}
  1395  }
  1397  func ecStressCore(t *testing.T, o *ecOptions, proxyURL string, bck cmn.Bck) {
  1398  	const (
  1399  		objStart   = "obj-extra-"
  1400  		smallEvery = 7
  1401  	)
  1402  	var (
  1403  		waitAllTime = time.Minute * 4 // should be enough for all object to complete EC
  1404  		totalSlices atomic.Int64
  1405  		baseParams  = tools.BaseAPIParams(proxyURL)
  1406  	)
  1408  	newLocalBckWithProps(t, baseParams, bck, defaultECBckProps(o), o)
  1410  	started := time.Now()
  1412  	type sCnt struct {
  1413  		obj string
  1414  		cnt int
  1415  	}
  1416  	cntCh := make(chan sCnt, o.objCount)
  1417  	wg := &sync.WaitGroup{}
  1418  	wg.Add(o.objCount)
  1419  	for idx := range o.objCount {
  1420  		o.sema.Acquire()
  1422  		go func(i int) {
  1423  			defer func() {
  1424  				o.sema.Release()
  1425  				wg.Done()
  1426  			}()
  1428  			objName := fmt.Sprintf(o.pattern, i)
  1429  			totalCnt, objSize, sliceSize, doEC := randObjectSize(i, smallEvery, o)
  1430  			objPath := ecTestDir + objName
  1431  			if doEC {
  1432  				tlog.Logf("Object %s, size %9d[%9d]\n", objName, objSize, sliceSize)
  1433  			} else {
  1434  				tlog.Logf("Object %s, size %9d[%9s]\n", objName, objSize, "-")
  1435  			}
  1436  			r, err := readers.NewRand(objSize, cos.ChecksumNone)
  1437  			tassert.Errorf(t, err == nil, "Failed to create reader: %v", err)
  1438  			putArgs := api.PutArgs{BaseParams: baseParams, Bck: bck, ObjName: objPath, Reader: r}
  1439  			_, err = api.PutObject(&putArgs)
  1440  			tassert.Errorf(t, err == nil, "PUT failed: %v", err)
  1442  			totalSlices.Add(int64(totalCnt))
  1443  			cntCh <- sCnt{obj: objName, cnt: totalCnt}
  1444  		}(idx)
  1445  	}
  1447  	wg.Wait()
  1448  	close(cntCh)
  1450  	var foundParts map[string]ecSliceMD
  1451  	startedWaiting := time.Now()
  1452  	deadLine := startedWaiting.Add(waitAllTime)
  1453  	for time.Now().Before(deadLine) {
  1454  		foundParts, _ = ecGetAllSlices(t, bck, objStart)
  1455  		if len(foundParts) == int(totalSlices.Load()) {
  1456  			delta := time.Since(startedWaiting)
  1457  			t.Logf("waiting %v for EC to complete\n", delta)
  1458  			break
  1459  		}
  1460  		time.Sleep(time.Millisecond * 30)
  1461  	}
  1462  	if len(foundParts) != int(totalSlices.Load()) {
  1463  		slices := make(map[string]int, o.objCount)
  1464  		for sl := range cntCh {
  1465  			slices[sl.obj] = sl.cnt
  1466  		}
  1467  		fndSlices := calculateSlicesCount(foundParts)
  1468  		compareSlicesCount(t, slices, fndSlices)
  1470  		t.Fatalf("Expected total number of files: %d, found: %d\n",
  1471  			totalSlices.Load(), len(foundParts))
  1472  	}
  1473  	delta := time.Since(started)
  1474  	t.Logf("Total test time %v\n", delta)
  1476  	msg := &apc.LsoMsg{Props: "size,status"}
  1477  	objList, err := api.ListObjects(baseParams, bck, msg, api.ListArgs{})
  1478  	tassert.CheckFatal(t, err)
  1479  	tassert.Fatalf(t, len(objList.Entries) == o.objCount, "Invalid number of objects: %d, expected %d", len(objList.Entries), o.objCount)
  1480  }
  1482  // Quick check that EC keeps xattrs:
  1483  // - enable EC and versioning for the bucket
  1484  // - put/damage/restore one by one a few objects two times to increase their versions
  1485  // - get the list of the objects at the end and check that they all have the correct versions
  1486  func TestECXattrs(t *testing.T) {
  1487  	const (
  1488  		sleepRestoreTime = time.Second * 5 // wait time after GET restores slices
  1489  		finalVersion     = "2"
  1490  		smallEvery       = 4
  1491  	)
  1493  	tools.CheckSkip(t, &tools.SkipTestArgs{Long: true})
  1495  	var (
  1496  		bck = cmn.Bck{
  1497  			Name:     testBucketName + "-attrs",
  1498  			Provider: apc.AIS,
  1499  		}
  1500  		proxyURL = tools.RandomProxyURL()
  1501  	)
  1503  	o := ecOptions{
  1504  		minTargets:   4,
  1505  		dataCnt:      1,
  1506  		parityCnt:    1,
  1507  		objCount:     30,
  1508  		concurrency:  8,
  1509  		pattern:      "obj-xattr-%04d",
  1510  		objSizeLimit: ecObjLimit,
  1511  	}.init(t, proxyURL)
  1512  	initMountpaths(t, proxyURL)
  1514  	baseParams := tools.BaseAPIParams(proxyURL)
  1515  	bckProps := defaultECBckProps(o)
  1516  	bckProps.Versioning = &cmn.VersionConfToSet{
  1517  		Enabled: apc.Ptr(true),
  1518  	}
  1520  	newLocalBckWithProps(t, baseParams, bck, bckProps, o)
  1522  	oneObj := func(idx int, objName string) {
  1523  		totalCnt, objSize, sliceSize, doEC := randObjectSize(idx, smallEvery, o)
  1524  		objPath := ecTestDir + objName
  1525  		ecStr, delStr := "-", "obj"
  1526  		if doEC {
  1527  			ecStr = "EC"
  1528  		}
  1529  		tlog.Logf("Creating %s, size %8d [%2s] [%s]\n", objPath, objSize, ecStr, delStr)
  1530  		r, err := readers.NewRand(objSize, cos.ChecksumNone)
  1531  		tassert.CheckFatal(t, err)
  1532  		_, err = api.PutObject(&api.PutArgs{BaseParams: baseParams, Bck: bck, ObjName: objPath, Reader: r})
  1533  		tassert.CheckFatal(t, err)
  1535  		tlog.Logf("waiting for %s\n", objPath)
  1536  		foundParts, mainObjPath := waitForECFinishes(t, totalCnt, objSize, sliceSize, doEC, bck, objPath)
  1538  		ecCheckSlices(t, foundParts, bck, objPath, objSize, sliceSize, totalCnt)
  1539  		if mainObjPath == "" {
  1540  			t.Fatalf("Full copy is not found")
  1541  		}
  1543  		tlog.Logf("Damaging %s [removing %s]\n", objPath, mainObjPath)
  1544  		tassert.CheckFatal(t, os.Remove(mainObjPath))
  1546  		ct, err := core.NewCTFromFQN(mainObjPath, nil)
  1547  		tassert.CheckFatal(t, err)
  1548  		metafile := ct.Make(fs.ECMetaType)
  1549  		tlog.Logf("Damaging %s [removing %s]\n", objPath, metafile)
  1550  		tassert.CheckFatal(t, cos.RemoveFile(metafile))
  1552  		partsAfterRemove, _ := ecGetAllSlices(t, bck, objPath)
  1553  		_, ok := partsAfterRemove[mainObjPath]
  1554  		if ok || len(partsAfterRemove) != len(foundParts)-2 {
  1555  			t.Fatalf("Files are not deleted [%d - %d]: %#v", len(foundParts), len(partsAfterRemove), partsAfterRemove)
  1556  		}
  1558  		tlog.Logf("Restoring %s\n", objPath)
  1559  		_, err = api.GetObject(baseParams, bck, objPath, nil)
  1560  		if err != nil {
  1561  			tlog.Logf("... retrying %s\n", objPath)
  1562  			time.Sleep(time.Second)
  1563  			_, err = api.GetObject(baseParams, bck, objPath, nil)
  1564  		}
  1565  		tassert.CheckFatal(t, err)
  1567  		if doEC {
  1568  			deadline := time.Now().Add(sleepRestoreTime)
  1569  			var partsAfterRestore map[string]ecSliceMD
  1570  			for time.Now().Before(deadline) {
  1571  				time.Sleep(time.Millisecond * 250)
  1572  				partsAfterRestore, _ = ecGetAllSlices(t, bck, objPath)
  1573  				if len(partsAfterRestore) == totalCnt {
  1574  					break
  1575  				}
  1576  			}
  1577  			ecCheckSlices(t, partsAfterRestore, bck, objPath, objSize, sliceSize, totalCnt)
  1578  		}
  1579  	}
  1581  	// PUT objects twice to make their version 2
  1582  	for range 2 {
  1583  		for i := range o.objCount {
  1584  			objName := fmt.Sprintf(o.pattern, i)
  1585  			oneObj(i, objName)
  1586  		}
  1587  	}
  1589  	msg := &apc.LsoMsg{Props: "size,status,version"}
  1590  	objList, err := api.ListObjects(baseParams, bck, msg, api.ListArgs{})
  1591  	tassert.CheckFatal(t, err)
  1593  	// check that all returned objects and their repicas have the same version
  1594  	for _, e := range objList.Entries {
  1595  		if e.Version != finalVersion {
  1596  			t.Errorf("%s[status=%d] must have version %s but it is %s\n", e.Name, e.Flags, finalVersion, e.Version)
  1597  		}
  1598  	}
  1600  	if len(objList.Entries) != o.objCount {
  1601  		t.Fatalf("Invalid number of objects: %d, expected %d", len(objList.Entries), 1)
  1602  	}
  1603  }
  1605  // 1. start putting EC files into the cluster
  1606  // 2. in the middle of puts destroy bucket
  1607  // 3. wait for puts to finish
  1608  // 4. create bucket with the same name
  1609  // 5. check that EC is working properly for this bucket
  1610  func TestECDestroyBucket(t *testing.T) {
  1611  	tools.CheckSkip(t, &tools.SkipTestArgs{Long: true})
  1613  	var (
  1614  		bck = cmn.Bck{
  1615  			Name:     testBucketName + "-DESTROY",
  1616  			Provider: apc.AIS,
  1617  		}
  1618  		proxyURL   = tools.RandomProxyURL()
  1619  		baseParams = tools.BaseAPIParams(proxyURL)
  1620  	)
  1622  	o := ecOptions{
  1623  		minTargets:   4,
  1624  		dataCnt:      1,
  1625  		parityCnt:    1,
  1626  		objCount:     100,
  1627  		concurrency:  10,
  1628  		pattern:      "obj-destroy-bck-%04d",
  1629  		objSizeLimit: ecObjLimit,
  1630  	}.init(t, proxyURL)
  1632  	initMountpaths(t, proxyURL)
  1633  	bckProps := defaultECBckProps(o)
  1634  	newLocalBckWithProps(t, baseParams, bck, bckProps, o)
  1636  	wg := &sync.WaitGroup{}
  1637  	errCnt := atomic.NewInt64(0)
  1638  	sucCnt := atomic.NewInt64(0)
  1640  	for i := range o.objCount {
  1641  		o.sema.Acquire()
  1642  		wg.Add(1)
  1643  		go func(i int) {
  1644  			defer func() {
  1645  				o.sema.Release()
  1646  				wg.Done()
  1647  			}()
  1649  			objName := fmt.Sprintf(o.pattern, i)
  1650  			if i%10 == 0 {
  1651  				tlog.Logf("ec object %s into bucket %s\n", objName, bck)
  1652  			}
  1653  			if putECFile(baseParams, bck, objName) != nil {
  1654  				errCnt.Inc()
  1655  			} else {
  1656  				sucCnt.Inc()
  1657  			}
  1658  		}(i)
  1660  		if i == 4*o.objCount/5 {
  1661  			// DestroyBucket when put requests are still executing
  1662  			o.sema.Acquire()
  1663  			wg.Add(1)
  1664  			go func() {
  1665  				defer func() {
  1666  					o.sema.Release()
  1667  					wg.Done()
  1668  				}()
  1670  				tlog.Logf("Destroying bucket %s\n", bck)
  1671  				tools.DestroyBucket(t, proxyURL, bck)
  1672  			}()
  1673  		}
  1674  	}
  1676  	wg.Wait()
  1677  	tlog.Logf("EC put files resulted in error in %d out of %d files\n", errCnt.Load(), o.objCount)
  1678  	args := xact.ArgsMsg{Kind: apc.ActECPut}
  1679  	api.WaitForXactionIC(baseParams, &args)
  1681  	// create bucket with the same name and check if puts are successful
  1682  	newLocalBckWithProps(t, baseParams, bck, bckProps, o)
  1683  	doECPutsAndCheck(t, baseParams, bck, o)
  1685  	// check if get requests are successful
  1686  	msg := &apc.LsoMsg{Props: "size,status,version"}
  1687  	objList, err := api.ListObjects(baseParams, bck, msg, api.ListArgs{})
  1688  	tassert.CheckFatal(t, err)
  1689  	tassert.Errorf(t, len(objList.Entries) == o.objCount, "Invalid number of objects: %d, expected %d", len(objList.Entries), o.objCount)
  1690  }
  1692  // Lost target test:
  1693  // - puts some objects
  1694  // - kills a random target
  1695  // - gets all objects
  1696  // - nothing must fail
  1697  // - register the target back
  1698  func TestECEmergencyTargetForSlices(t *testing.T) {
  1699  	const (
  1700  		smallEvery = 4
  1701  	)
  1703  	tools.CheckSkip(t, &tools.SkipTestArgs{Long: true})
  1705  	var (
  1706  		bck = cmn.Bck{
  1707  			Name:     testBucketName + "-slice-emergency",
  1708  			Provider: apc.AIS,
  1709  		}
  1710  		proxyURL   = tools.RandomProxyURL()
  1711  		baseParams = tools.BaseAPIParams(proxyURL)
  1712  	)
  1714  	o := ecOptions{
  1715  		minTargets:   5,
  1716  		dataCnt:      -1,
  1717  		objCount:     100,
  1718  		concurrency:  12,
  1719  		pattern:      "obj-emt-%04d",
  1720  		objSizeLimit: ecObjLimit,
  1721  	}.init(t, proxyURL)
  1722  	initMountpaths(t, proxyURL)
  1724  	// Increase number of EC data slices, now there's just enough targets to handle EC requests
  1725  	// Encoding will fail if even one is missing, restoring should still work
  1726  	o.dataCnt++
  1728  	sgl := memsys.PageMM().NewSGL(0)
  1729  	defer sgl.Free()
  1731  	newLocalBckWithProps(t, baseParams, bck, defaultECBckProps(o), o)
  1733  	wg := &sync.WaitGroup{}
  1735  	// 1. PUT objects
  1736  	putOneObj := func(idx int) {
  1737  		defer func() {
  1738  			wg.Done()
  1739  			o.sema.Release()
  1740  		}()
  1742  		var (
  1743  			start   = time.Now()
  1744  			objName = fmt.Sprintf(o.pattern, idx)
  1745  			objPath = ecTestDir + objName
  1747  			totalCnt, objSize, sliceSize, doEC = randObjectSize(idx, smallEvery, o)
  1748  		)
  1749  		ecStr := "-"
  1750  		if doEC {
  1751  			ecStr = "EC"
  1752  		}
  1753  		tlog.Logf("Creating %s, size %8d [%2s]\n", objPath, objSize, ecStr)
  1754  		r, err := readers.NewRand(objSize, cos.ChecksumNone)
  1755  		tassert.CheckFatal(t, err)
  1756  		_, err = api.PutObject(&api.PutArgs{BaseParams: baseParams, Bck: bck, ObjName: objPath, Reader: r})
  1757  		tassert.CheckFatal(t, err)
  1758  		t.Logf("Object %s put in %v", objName, time.Since(start))
  1759  		start = time.Now()
  1761  		foundParts, mainObjPath := waitForECFinishes(t, totalCnt, objSize, sliceSize, doEC, bck, objPath)
  1763  		ecCheckSlices(t, foundParts, bck, objPath, objSize, sliceSize, totalCnt)
  1764  		if mainObjPath == "" {
  1765  			t.Errorf("Full copy is not found")
  1766  			return
  1767  		}
  1768  		t.Logf("Object %s EC in %v", objName, time.Since(start))
  1769  	}
  1771  	wg.Add(o.objCount)
  1772  	for i := range o.objCount {
  1773  		o.sema.Acquire()
  1774  		go putOneObj(i)
  1775  	}
  1776  	wg.Wait()
  1777  	if t.Failed() {
  1778  		t.FailNow()
  1779  	}
  1781  	_, removedTarget := tools.RmTargetSkipRebWait(t, proxyURL, o.smap)
  1782  	defer func() {
  1783  		val := &apc.ActValRmNode{DaemonID: removedTarget.ID()}
  1784  		rebID, err := api.StopMaintenance(baseParams, val)
  1785  		tassert.CheckError(t, err)
  1786  		tools.WaitForRebalanceByID(t, baseParams, rebID)
  1787  	}()
  1789  	// 3. Read objects
  1790  	objectsExist(t, baseParams, bck, o.pattern, o.objCount)
  1792  	// 4. Check that ListObjects returns correct number of items
  1793  	tlog.Logln("Reading bucket list...")
  1794  	msg := &apc.LsoMsg{Props: "size,status,version"}
  1795  	objList, err := api.ListObjects(baseParams, bck, msg, api.ListArgs{})
  1796  	tassert.CheckFatal(t, err)
  1797  	tassert.Errorf(t, len(objList.Entries) == o.objCount, "Invalid number of objects: %d, expected %d", len(objList.Entries), o.objCount)
  1798  }
  1800  func TestECEmergencyTargetForReplica(t *testing.T) {
  1801  	tools.CheckSkip(t, &tools.SkipTestArgs{Long: true})
  1803  	var (
  1804  		bck = cmn.Bck{
  1805  			Name:     testBucketName + "-replica-emergency",
  1806  			Provider: apc.AIS,
  1807  		}
  1808  		proxyURL = tools.RandomProxyURL()
  1809  	)
  1811  	o := ecOptions{
  1812  		minTargets:   5,
  1813  		dataCnt:      -1,
  1814  		objCount:     50,
  1815  		concurrency:  8,
  1816  		pattern:      "obj-rest-%04d",
  1817  		objSizeLimit: ecObjLimit,
  1818  	}.init(t, proxyURL)
  1820  	if o.smap.CountActiveTs() > 10 {
  1821  		// Reason: calculating main obj directory based on DeamonID
  1822  		// see getOneObj, 'HACK' annotation
  1823  		t.Skip("Test requires at most 10 targets")
  1824  	}
  1825  	initMountpaths(t, proxyURL)
  1827  	for _, target := range o.smap.Tmap {
  1828  		target.Digest()
  1829  	}
  1831  	// Increase number of EC data slices, now there's just enough targets to handle EC requests
  1832  	// Encoding will fail if even one is missing, restoring should still work
  1833  	o.dataCnt++
  1835  	baseParams := tools.BaseAPIParams(proxyURL)
  1836  	bckProps := defaultECBckProps(o)
  1837  	newLocalBckWithProps(t, baseParams, bck, bckProps, o)
  1839  	wg := sync.WaitGroup{}
  1841  	// PUT object
  1842  	wg.Add(o.objCount)
  1843  	for i := range o.objCount {
  1844  		go func(i int) {
  1845  			defer wg.Done()
  1846  			objName := fmt.Sprintf(o.pattern, i)
  1847  			createECReplicas(t, baseParams, bck, objName, o)
  1848  		}(i)
  1849  	}
  1851  	wg.Wait()
  1853  	if t.Failed() {
  1854  		t.FailNow()
  1855  	}
  1857  	// kill #dataslices of targets, normal EC restore won't be possible
  1858  	// 2. Kill a random target
  1859  	removedTargets := make(meta.Nodes, 0, o.dataCnt)
  1860  	smap := tools.GetClusterMap(t, proxyURL)
  1862  	for i := o.dataCnt - 1; i >= 0; i-- {
  1863  		var removedTarget *meta.Snode
  1864  		smap, removedTarget = tools.RmTargetSkipRebWait(t, proxyURL, smap)
  1865  		removedTargets = append(removedTargets, removedTarget)
  1866  	}
  1868  	defer func() {
  1869  		var rebID string
  1870  		for _, target := range removedTargets {
  1871  			rebID, _ = tools.RestoreTarget(t, proxyURL, target)
  1872  		}
  1873  		if rebID == "" {
  1874  			return
  1875  		}
  1876  		tools.WaitForRebalanceByID(t, baseParams, rebID)
  1877  	}()
  1879  	hasTarget := func(targets meta.Nodes, target *meta.Snode) bool {
  1880  		for _, tr := range targets {
  1881  			if tr.ID() == target.ID() {
  1882  				return true
  1883  			}
  1884  		}
  1885  		return false
  1886  	}
  1888  	getOneObj := func(i int) {
  1889  		defer wg.Done()
  1891  		objName := fmt.Sprintf(o.pattern, i)
  1892  		// 1) hack: calculate which targets stored a replica
  1893  		cbck := meta.NewBck(bck.Name, bck.Provider, cmn.NsGlobal)
  1894  		targets, err := o.smap.HrwTargetList(cbck.MakeUname(ecTestDir+objName), o.parityCnt+1)
  1895  		tassert.CheckFatal(t, err)
  1897  		mainTarget := targets[0]
  1898  		targets = targets[1:]
  1900  		replicas, _ := ecGetAllSlices(t, bck, objName)
  1901  		// HACK: this tells directory of target based on last number of it's port
  1902  		// This is usually true, but undefined if target has > 9 nodes
  1903  		// as the last digit becomes ambiguous
  1904  		targetDir := mainTarget.ID()[len(mainTarget.ID())-1]
  1906  		for p := range replicas {
  1907  			if strings.Contains(p, path.Join(rootDir, string(targetDir))) {
  1908  				// Delete the actual main object
  1909  				// NOTE: this might fail if the targetDir is not calculated correctly
  1910  				tassert.CheckFatal(t, os.Remove(p))
  1911  				break
  1912  			}
  1913  		}
  1915  		for _, target := range targets {
  1916  			if !hasTarget(removedTargets, target) {
  1917  				// there exists a target which was not killed and stores replica
  1918  				objPath := ecTestDir + objName
  1919  				_, err := api.GetObject(baseParams, bck, objPath, nil)
  1920  				tassert.CheckFatal(t, err)
  1921  				return
  1922  			}
  1923  		}
  1924  	}
  1926  	tlog.Logln("Reading all objects...")
  1927  	wg.Add(o.objCount)
  1928  	for i := range o.objCount {
  1929  		go getOneObj(i)
  1930  	}
  1931  	wg.Wait()
  1933  	// it is OK to have some Del failed with "object not found" because
  1934  	// some targets are still dead at this point
  1935  	clearAllECObjects(t, bck, false, o)
  1936  }
  1938  // Lost mountpah test:
  1939  // - puts some objects
  1940  // - disable a random mountpath
  1941  // - gets all objects
  1942  // - nothing must fail
  1943  // - enable the mountpath back
  1944  func TestECEmergencyMountpath(t *testing.T) {
  1945  	const (
  1946  		smallEvery = 4
  1947  	)
  1949  	tools.CheckSkip(t, &tools.SkipTestArgs{Long: true})
  1951  	var (
  1952  		bck = cmn.Bck{
  1953  			Name:     testBucketName + "-mpath-emergency",
  1954  			Provider: apc.AIS,
  1955  		}
  1956  		proxyURL   = tools.RandomProxyURL()
  1957  		baseParams = tools.BaseAPIParams(proxyURL)
  1958  	)
  1960  	o := ecOptions{
  1961  		minTargets:   5,
  1962  		dataCnt:      1,
  1963  		parityCnt:    1,
  1964  		objCount:     400,
  1965  		concurrency:  24,
  1966  		pattern:      "obj-em-mpath-%04d",
  1967  		objSizeLimit: ecObjLimit,
  1968  	}.init(t, proxyURL)
  1970  	removeTarget, _ := o.smap.GetRandTarget()
  1971  	mpathList, err := api.GetMountpaths(baseParams, removeTarget)
  1972  	tassert.CheckFatal(t, err)
  1973  	ensureNoDisabledMountpaths(t, removeTarget, mpathList)
  1974  	if len(mpathList.Available) < 2 {
  1975  		t.Fatalf("%s requires 2 or more mountpaths", t.Name())
  1976  	}
  1977  	initMountpaths(t, proxyURL)
  1979  	sgl := memsys.PageMM().NewSGL(0)
  1980  	defer sgl.Free()
  1982  	bckProps := defaultECBckProps(o)
  1983  	newLocalBckWithProps(t, baseParams, bck, bckProps, o)
  1985  	wg := &sync.WaitGroup{}
  1987  	// 1. PUT objects
  1988  	putOneObj := func(idx int) {
  1989  		defer func() {
  1990  			wg.Done()
  1991  			o.sema.Release()
  1992  		}()
  1993  		var (
  1994  			objName = fmt.Sprintf(o.pattern, idx)
  1995  			objPath = ecTestDir + objName
  1997  			totalCnt, objSize, sliceSize, doEC = randObjectSize(idx, smallEvery, o)
  1998  		)
  1999  		ecStr := "-"
  2000  		if doEC {
  2001  			ecStr = "EC"
  2002  		}
  2003  		tlog.Logf("Creating %s, size %8d [%2s]\n", objPath, objSize, ecStr)
  2004  		r, err := readers.NewRand(objSize, cos.ChecksumNone)
  2005  		tassert.CheckFatal(t, err)
  2006  		_, err = api.PutObject(&api.PutArgs{BaseParams: baseParams, Bck: bck, ObjName: objPath, Reader: r})
  2007  		tassert.CheckFatal(t, err)
  2009  		foundParts, mainObjPath := waitForECFinishes(t, totalCnt, objSize, sliceSize, doEC, bck, objPath)
  2010  		ecCheckSlices(t, foundParts, bck, objPath, objSize, sliceSize, totalCnt)
  2011  		if mainObjPath == "" {
  2012  			t.Errorf("Full copy is not found")
  2013  			return
  2014  		}
  2015  	}
  2017  	wg.Add(o.objCount)
  2018  	for i := range o.objCount {
  2019  		o.sema.Acquire()
  2020  		go putOneObj(i)
  2021  	}
  2022  	wg.Wait()
  2023  	if t.Failed() {
  2024  		t.FailNow()
  2025  	}
  2027  	// 2. Disable a random mountpath
  2028  	mpathID := o.rnd.Intn(len(mpathList.Available))
  2029  	removeMpath := mpathList.Available[mpathID]
  2030  	tlog.Logf("Disabling a mountpath %s at target: %s\n", removeMpath, removeTarget.ID())
  2031  	err = api.DisableMountpath(baseParams, removeTarget, removeMpath, false /*dont-resil*/)
  2032  	tassert.CheckFatal(t, err)
  2034  	tools.WaitForResilvering(t, baseParams, removeTarget)
  2036  	defer func() {
  2037  		tlog.Logf("Enabling mountpath %s at target %s...\n", removeMpath, removeTarget.ID())
  2038  		err = api.EnableMountpath(baseParams, removeTarget, removeMpath)
  2039  		tassert.CheckFatal(t, err)
  2041  		tools.WaitForResilvering(t, baseParams, removeTarget)
  2042  		ensureNumMountpaths(t, removeTarget, mpathList)
  2043  	}()
  2045  	// 3. Read objects
  2046  	objectsExist(t, baseParams, bck, o.pattern, o.objCount)
  2048  	// 4. Check that ListObjects returns correct number of items
  2049  	tlog.Logf("DONE\nReading bucket list...\n")
  2050  	msg := &apc.LsoMsg{Props: "size,status,version"}
  2051  	objList, err := api.ListObjects(baseParams, bck, msg, api.ListArgs{})
  2052  	tassert.CheckFatal(t, err)
  2053  	if len(objList.Entries) != o.objCount {
  2054  		t.Fatalf("Invalid number of objects: %d, expected %d", len(objList.Entries), o.objCount)
  2055  	}
  2057  	// Wait for ec to finish
  2058  	flt := xact.ArgsMsg{Kind: apc.ActECPut, Bck: bck}
  2059  	_ = api.WaitForXactionIdle(baseParams, &flt)
  2060  }
  2062  func TestECRebalance(t *testing.T) {
  2063  	tools.CheckSkip(t, &tools.SkipTestArgs{Long: true, RequiredDeployment: tools.ClusterTypeLocal})
  2065  	var (
  2066  		bck = cmn.Bck{
  2067  			Name:     testBucketName + "-ec-rebalance",
  2068  			Provider: apc.AIS,
  2069  		}
  2070  		proxyURL = tools.RandomProxyURL()
  2071  	)
  2072  	o := ecOptions{
  2073  		objCount:     30,
  2074  		concurrency:  8,
  2075  		pattern:      "obj-reb-chk-%04d",
  2076  		silent:       true,
  2077  		objSizeLimit: ecObjLimit,
  2078  	}.init(t, proxyURL)
  2079  	initMountpaths(t, proxyURL)
  2081  	for _, test := range ecTests {
  2082  		t.Run(, func(t *testing.T) {
  2083  			if o.smap.CountActiveTs() <= {
  2084  				t.Skip(cmn.ErrNotEnoughTargets)
  2085  			}
  2086  			o.parityCnt = test.parity
  2087  			o.dataCnt =
  2088  			o.objSizeLimit = test.objSizeLimit
  2089  			ecOnlyRebalance(t, o, proxyURL, bck)
  2090  		})
  2091  	}
  2092  }
  2094  func TestECMountpaths(t *testing.T) {
  2095  	tools.CheckSkip(t, &tools.SkipTestArgs{RequiredDeployment: tools.ClusterTypeLocal})
  2097  	var (
  2098  		bck = cmn.Bck{
  2099  			Name:     testBucketName + "-ec-mpaths",
  2100  			Provider: apc.AIS,
  2101  		}
  2102  		proxyURL = tools.RandomProxyURL()
  2103  	)
  2104  	o := ecOptions{
  2105  		objCount:     30,
  2106  		concurrency:  8,
  2107  		pattern:      "obj-reb-mp-%04d",
  2108  		silent:       true,
  2109  		objSizeLimit: ecObjLimit,
  2110  	}.init(t, proxyURL)
  2111  	initMountpaths(t, proxyURL)
  2113  	for _, test := range ecTests {
  2114  		t.Run(, func(t *testing.T) {
  2115  			if o.smap.CountActiveTs() <= {
  2116  				t.Skipf("%s: %v", t.Name(), cmn.ErrNotEnoughTargets)
  2117  			}
  2118  			o.parityCnt = test.parity
  2119  			o.dataCnt =
  2120  			o.objSizeLimit = test.objSizeLimit
  2121  			ecMountpaths(t, o, proxyURL, bck)
  2122  		})
  2123  	}
  2125  	reqArgs := xact.ArgsMsg{Kind: apc.ActECPut, Bck: bck}
  2126  	api.WaitForXactionIdle(tools.BaseAPIParams(proxyURL), &reqArgs)
  2127  }
  2129  // The test only checks that the number of object after rebalance equals
  2130  // the number of objects before it
  2131  func ecOnlyRebalance(t *testing.T, o *ecOptions, proxyURL string, bck cmn.Bck) {
  2132  	baseParams := tools.BaseAPIParams(proxyURL)
  2134  	newLocalBckWithProps(t, baseParams, bck, defaultECBckProps(o), o)
  2136  	wg := sync.WaitGroup{}
  2137  	wg.Add(o.objCount)
  2138  	for i := range o.objCount {
  2139  		go func(i int) {
  2140  			defer wg.Done()
  2141  			objName := fmt.Sprintf(o.pattern, i)
  2142  			createECObject(t, baseParams, bck, objName, i, o)
  2143  		}(i)
  2144  	}
  2145  	wg.Wait()
  2147  	if t.Failed() {
  2148  		t.FailNow()
  2149  	}
  2151  	msg := &apc.LsoMsg{Props: apc.GetPropsSize}
  2152  	oldObjList, err := api.ListObjects(baseParams, bck, msg, api.ListArgs{})
  2153  	tassert.CheckFatal(t, err)
  2154  	tlog.Logf("%d objects created, starting rebalance\n", len(oldObjList.Entries))
  2156  	removedTarget, err := o.smap.GetRandTarget()
  2157  	tassert.CheckFatal(t, err)
  2158  	args := &apc.ActValRmNode{DaemonID: removedTarget.ID()}
  2159  	rebID, err := api.StartMaintenance(baseParams, args)
  2160  	tassert.CheckFatal(t, err)
  2161  	defer func() {
  2162  		rebID, _ := tools.RestoreTarget(t, proxyURL, removedTarget)
  2163  		tools.WaitForRebalanceByID(t, baseParams, rebID)
  2164  	}()
  2165  	tools.WaitForRebalanceByID(t, baseParams, rebID)
  2167  	newObjList, err := api.ListObjects(baseParams, bck, msg, api.ListArgs{})
  2168  	tassert.CheckFatal(t, err)
  2169  	if len(oldObjList.Entries) != len(newObjList.Entries) {
  2170  		for _, o := range oldObjList.Entries {
  2171  			found := false
  2172  			for _, n := range newObjList.Entries {
  2173  				if n.Name == o.Name {
  2174  					found = true
  2175  					break
  2176  				}
  2177  			}
  2178  			if !found {
  2179  				t.Errorf("Old %s[%d] not found", o.Name, o.Size)
  2180  			}
  2181  		}
  2182  		t.Fatalf("%d objects before rebalance, %d objects after",
  2183  			len(oldObjList.Entries), len(newObjList.Entries))
  2184  	}
  2186  	for _, en := range newObjList.Entries {
  2187  		oah, err := api.GetObject(baseParams, bck, en.Name, nil)
  2188  		if err != nil {
  2189  			t.Errorf("Failed to read %s: %v", en.Name, err)
  2190  			continue // to avoid printing other error in this case
  2191  		}
  2192  		if oah.Size() != en.Size {
  2193  			t.Errorf("%s size mismatch read %d, props %d", en.Name, oah.Size(), en.Size)
  2194  		}
  2195  	}
  2196  }
  2198  // Simple test to check if EC correctly finds all the objects and its slices
  2199  // that will be used by rebalance
  2200  func TestECBucketEncode(t *testing.T) {
  2201  	const (
  2202  		parityCnt = 2
  2203  		dataCnt   = 1
  2204  	)
  2205  	var (
  2206  		proxyURL = tools.RandomProxyURL()
  2207  		m        = ioContext{
  2208  			t:        t,
  2209  			num:      150,
  2210  			proxyURL: proxyURL,
  2211  		}
  2212  	)
  2214  	m.initAndSaveState(true /*cleanup*/)
  2215  	baseParams := tools.BaseAPIParams(proxyURL)
  2217  	if nt := m.smap.CountActiveTs(); nt < parityCnt+dataCnt+1 {
  2218  		t.Skipf("%s: not enough targets (%d): (d=%d, p=%d) requires at least %d",
  2219  			t.Name(), nt, dataCnt, parityCnt, parityCnt+dataCnt+1)
  2220  	}
  2222  	initMountpaths(t, proxyURL)
  2223  	tools.CreateBucket(t, proxyURL, m.bck, nil, true /*cleanup*/)
  2225  	m.puts()
  2227  	objList, err := api.ListObjects(baseParams, m.bck, nil, api.ListArgs{})
  2228  	tassert.CheckFatal(t, err)
  2229  	tlog.Logf("Object count: %d\n", len(objList.Entries))
  2230  	if len(objList.Entries) != m.num {
  2231  		t.Fatalf("list_objects %s invalid number of files %d, expected %d", m.bck, len(objList.Entries), m.num)
  2232  	}
  2234  	tlog.Logf("Enabling EC\n")
  2235  	bckPropsToUpate := &cmn.BpropsToSet{
  2236  		EC: &cmn.ECConfToSet{
  2237  			Enabled:      apc.Ptr(true),
  2238  			ObjSizeLimit: apc.Ptr[int64](1),
  2239  			DataSlices:   apc.Ptr(1),
  2240  			ParitySlices: apc.Ptr(parityCnt),
  2241  		},
  2242  	}
  2243  	_, err = api.SetBucketProps(baseParams, m.bck, bckPropsToUpate)
  2244  	tassert.CheckFatal(t, err)
  2246  	tlog.Logf("Wait for EC %s\n", m.bck)
  2247  	xargs := xact.ArgsMsg{Kind: apc.ActECEncode, Bck: m.bck, Timeout: tools.RebalanceTimeout}
  2248  	_, err = api.WaitForXactionIC(baseParams, &xargs)
  2249  	tassert.CheckFatal(t, err)
  2251  	objList, err = api.ListObjects(baseParams, m.bck, nil, api.ListArgs{})
  2252  	tassert.CheckFatal(t, err)
  2254  	if len(objList.Entries) != m.num {
  2255  		t.Fatalf("bucket %s: expected %d objects, got %d", m.bck, m.num, len(objList.Entries))
  2256  	}
  2257  	tlog.Logf("Object counts after EC finishes: %d (%d)\n", len(objList.Entries), (parityCnt+1)*m.num)
  2258  	//
  2259  	// TODO: support querying bucket for total number of entries with respect to mirroring and EC
  2260  	//
  2261  }
  2263  // Creates two buckets (with EC enabled and disabled), fill them with data,
  2264  // and then runs two parallel rebalances
  2265  func TestECAndRegularRebalance(t *testing.T) {
  2266  	tools.CheckSkip(t, &tools.SkipTestArgs{Long: true, RequiredDeployment: tools.ClusterTypeLocal})
  2268  	var (
  2269  		bckReg = cmn.Bck{
  2270  			Name:     testBucketName + "-REG",
  2271  			Provider: apc.AIS,
  2272  		}
  2273  		bckEC = cmn.Bck{
  2274  			Name:     testBucketName + "-EC",
  2275  			Provider: apc.AIS,
  2276  		}
  2277  		proxyURL = tools.RandomProxyURL()
  2278  	)
  2279  	o := ecOptions{
  2280  		minTargets:   5,
  2281  		objCount:     90,
  2282  		concurrency:  8,
  2283  		pattern:      "obj-reb-chk-%04d",
  2284  		silent:       true,
  2285  		objSizeLimit: ecObjLimit,
  2286  	}.init(t, proxyURL)
  2287  	initMountpaths(t, proxyURL)
  2289  	for _, test := range ecTests {
  2290  		t.Run(, func(t *testing.T) {
  2291  			if o.smap.CountActiveTs() <= {
  2292  				t.Skip(cmn.ErrNotEnoughTargets)
  2293  			}
  2294  			o.parityCnt = test.parity
  2295  			o.dataCnt =
  2296  			o.objSizeLimit = test.objSizeLimit
  2297  			ecAndRegularRebalance(t, o, proxyURL, bckReg, bckEC)
  2298  		})
  2299  	}
  2300  }
  2302  func ecAndRegularRebalance(t *testing.T, o *ecOptions, proxyURL string, bckReg, bckEC cmn.Bck) {
  2303  	baseParams := tools.BaseAPIParams(proxyURL)
  2305  	tools.CreateBucket(t, proxyURL, bckReg, nil, true /*cleanup*/)
  2306  	newLocalBckWithProps(t, baseParams, bckEC, defaultECBckProps(o), o)
  2308  	// select a target that loses its mpath(simulate drive death),
  2309  	// and that has mpaths changed (simulate mpath added)
  2310  	tgtList := o.smap.Tmap.ActiveNodes()
  2311  	tgtLost := tgtList[0]
  2313  	tlog.Logf("Put %s in maintenance (no rebalance)\n", tgtLost.StringEx())
  2314  	args := &apc.ActValRmNode{DaemonID: tgtLost.ID(), SkipRebalance: true}
  2315  	_, err := api.StartMaintenance(baseParams, args)
  2316  	tassert.CheckFatal(t, err)
  2317  	registered := false
  2318  	defer func() {
  2319  		if !registered {
  2320  			args := &apc.ActValRmNode{DaemonID: tgtLost.ID()}
  2321  			rebID, err := api.StopMaintenance(baseParams, args)
  2322  			tassert.CheckError(t, err)
  2323  			tools.WaitForRebalanceByID(t, baseParams, rebID)
  2324  		}
  2325  	}()
  2327  	// fill EC bucket
  2328  	wg := sync.WaitGroup{}
  2329  	wg.Add(o.objCount)
  2330  	for i := range o.objCount {
  2331  		go func(i int) {
  2332  			defer wg.Done()
  2333  			objName := fmt.Sprintf(o.pattern, i)
  2334  			createECObject(t, baseParams, bckEC, objName, i, o)
  2335  		}(i)
  2336  	}
  2337  	wg.Wait()
  2339  	if t.Failed() {
  2340  		t.FailNow()
  2341  	}
  2343  	_, _, err = tools.PutRandObjs(tools.PutObjectsArgs{
  2344  		ProxyURL:  proxyURL,
  2345  		Bck:       bckReg,
  2346  		ObjPath:   ecTestDir,
  2347  		ObjCnt:    o.objCount,
  2348  		ObjSize:   fileSize,
  2349  		CksumType: bckReg.DefaultProps(initialClusterConfig).Cksum.Type,
  2350  	})
  2351  	tassert.CheckFatal(t, err)
  2353  	msg := &apc.LsoMsg{}
  2354  	resECOld, err := api.ListObjects(baseParams, bckEC, msg, api.ListArgs{})
  2355  	tassert.CheckFatal(t, err)
  2356  	resRegOld, err := api.ListObjects(baseParams, bckReg, msg, api.ListArgs{})
  2357  	tassert.CheckFatal(t, err)
  2358  	tlog.Logf("Created %d objects in %s, %d objects in %s. Starting rebalance\n",
  2359  		len(resECOld.Entries), bckEC, len(resRegOld.Entries), bckReg)
  2361  	tlog.Logf("Take %s out of maintenance mode ...\n", tgtLost.StringEx())
  2362  	args = &apc.ActValRmNode{DaemonID: tgtLost.ID()}
  2363  	rebID, err := api.StopMaintenance(baseParams, args)
  2364  	tassert.CheckFatal(t, err)
  2365  	registered = true
  2366  	tools.WaitForRebalanceByID(t, baseParams, rebID)
  2368  	tlog.Logln("list objects after rebalance")
  2369  	resECNew, err := api.ListObjects(baseParams, bckEC, msg, api.ListArgs{})
  2370  	tassert.CheckFatal(t, err)
  2371  	tlog.Logf("%d objects in %s after rebalance\n",
  2372  		len(resECNew.Entries), bckEC)
  2373  	resRegNew, err := api.ListObjects(baseParams, bckReg, msg, api.ListArgs{})
  2374  	tassert.CheckFatal(t, err)
  2375  	tlog.Logf("%d objects in %s after rebalance\n",
  2376  		len(resRegNew.Entries), bckReg)
  2378  	tlog.Logln("Test object readability after rebalance")
  2379  	for _, obj := range resECOld.Entries {
  2380  		_, err := api.GetObject(baseParams, bckEC, obj.Name, nil)
  2381  		tassert.CheckError(t, err)
  2382  	}
  2383  	for _, obj := range resRegOld.Entries {
  2384  		_, err := api.GetObject(baseParams, bckReg, obj.Name, nil)
  2385  		tassert.CheckError(t, err)
  2386  	}
  2387  }
  2389  // Simple resilver for EC bucket
  2390  //  1. Create a bucket
  2391  //  2. Remove mpath from one target
  2392  //  3. Creates enough objects to have at least one per mpath
  2393  //     So, minimal is <target count>*<mpath count>*2.
  2394  //     For tests 100 looks good
  2395  //  4. Attach removed mpath
  2396  //  5. Wait for rebalance to finish
  2397  //  6. Check that all objects returns the non-zero number of Data and Parity
  2398  //     slices in HEAD response
  2399  //  7. Extra check: the number of objects after rebalance equals initial number
  2400  func TestECResilver(t *testing.T) {
  2401  	tools.CheckSkip(t, &tools.SkipTestArgs{Long: true})
  2403  	var (
  2404  		bck = cmn.Bck{
  2405  			Name:     testBucketName + "-ec-resilver",
  2406  			Provider: apc.AIS,
  2407  		}
  2408  		proxyURL = tools.RandomProxyURL()
  2409  	)
  2410  	o := ecOptions{
  2411  		objCount:     100,
  2412  		concurrency:  8,
  2413  		pattern:      "obj-reb-loc-%04d",
  2414  		silent:       true,
  2415  		objSizeLimit: ecObjLimit,
  2416  	}.init(t, proxyURL)
  2417  	initMountpaths(t, proxyURL)
  2419  	for _, test := range ecTests {
  2420  		t.Run(, func(t *testing.T) {
  2421  			if o.smap.CountActiveTs() <= {
  2422  				t.Skip(cmn.ErrNotEnoughTargets)
  2423  			}
  2424  			o.parityCnt = test.parity
  2425  			o.dataCnt =
  2426  			o.objSizeLimit = test.objSizeLimit
  2427  			ecResilver(t, o, proxyURL, bck)
  2428  		})
  2429  	}
  2430  }
  2432  func ecResilver(t *testing.T, o *ecOptions, proxyURL string, bck cmn.Bck) {
  2433  	baseParams := tools.BaseAPIParams(proxyURL)
  2435  	newLocalBckWithProps(t, baseParams, bck, defaultECBckProps(o), o)
  2437  	tgtList := o.smap.Tmap.ActiveNodes()
  2438  	tgtLost := tgtList[0]
  2439  	lostFSList, err := api.GetMountpaths(baseParams, tgtLost)
  2440  	tassert.CheckFatal(t, err)
  2441  	if len(lostFSList.Available) < 2 {
  2442  		t.Fatalf("%s has only %d mountpaths, required 2 or more", tgtLost.ID(), len(lostFSList.Available))
  2443  	}
  2444  	lostPath := lostFSList.Available[0]
  2445  	err = api.DetachMountpath(baseParams, tgtLost, lostPath, false /*dont-resil*/)
  2446  	tassert.CheckFatal(t, err)
  2447  	time.Sleep(time.Second)
  2449  	wg := sync.WaitGroup{}
  2451  	wg.Add(o.objCount)
  2452  	for i := range o.objCount {
  2453  		go func(i int) {
  2454  			defer wg.Done()
  2455  			objName := fmt.Sprintf(o.pattern, i)
  2456  			createECObject(t, baseParams, bck, objName, i, o)
  2457  		}(i)
  2458  	}
  2459  	wg.Wait()
  2460  	tlog.Logf("Created %d objects\n", o.objCount)
  2462  	err = api.AttachMountpath(baseParams, tgtLost, lostPath)
  2463  	tassert.CheckFatal(t, err)
  2464  	// loop above may fail (even if AddMountpath works) and mark a test failed
  2465  	if t.Failed() {
  2466  		t.FailNow()
  2467  	}
  2469  	tools.WaitForResilvering(t, baseParams, nil)
  2471  	msg := &apc.LsoMsg{Props: apc.GetPropsSize}
  2472  	resEC, err := api.ListObjects(baseParams, bck, msg, api.ListArgs{})
  2473  	tassert.CheckFatal(t, err)
  2474  	tlog.Logf("%d objects in %s after rebalance\n", len(resEC.Entries), bck)
  2475  	if len(resEC.Entries) != o.objCount {
  2476  		t.Errorf("Expected %d objects after rebalance, found %d", o.objCount, len(resEC.Entries))
  2477  	}
  2479  	for i := range o.objCount {
  2480  		objName := ecTestDir + fmt.Sprintf(o.pattern, i)
  2481  		props, err := api.HeadObject(baseParams, bck, objName, apc.FltPresent, false /*silent*/)
  2482  		if err != nil {
  2483  			t.Errorf("HEAD for %s failed: %v", objName, err)
  2484  		} else if props.EC.DataSlices == 0 || props.EC.ParitySlices == 0 {
  2485  			t.Errorf("%s has not EC info", objName)
  2486  		}
  2487  	}
  2488  }
  2490  // 1. Create bucket
  2491  // 2. Choose 2 random nodes, unregister the first one
  2492  // 3. Put N objects to EC-enabled bucket
  2493  // 4. Register the target back, rebalance kicks in
  2494  // 5. Start reading objects in a loop (nothing should fail)
  2495  // 6. Unregister the second target while rebalance is running
  2496  // 7. Wait until rebalance finishes (if any is running)
  2497  // 8. Stop reading loop and read all objects once more (nothing should fail)
  2498  // 9. Get the number of objects in the bucket (must be the same as at start)
  2499  func TestECAndRegularUnregisterWhileRebalancing(t *testing.T) {
  2500  	tools.CheckSkip(t, &tools.SkipTestArgs{Long: true, RequiredDeployment: tools.ClusterTypeLocal})
  2502  	var (
  2503  		bckEC = cmn.Bck{
  2504  			Name:     testBucketName + "-EC",
  2505  			Provider: apc.AIS,
  2506  		}
  2507  		proxyURL   = tools.RandomProxyURL()
  2508  		baseParams = tools.BaseAPIParams(proxyURL)
  2509  		o          = ecOptions{
  2510  			minTargets:   5,
  2511  			objCount:     300,
  2512  			concurrency:  8,
  2513  			pattern:      "obj-reb-chk-%04d",
  2514  			silent:       true,
  2515  			objSizeLimit: ecObjLimit,
  2516  		}.init(t, proxyURL)
  2517  	)
  2519  	initMountpaths(t, proxyURL)
  2520  	for _, test := range ecTests {
  2521  		t.Run(, func(t *testing.T) {
  2522  			if o.smap.CountActiveTs() <= {
  2523  				t.Skip(cmn.ErrNotEnoughTargets)
  2524  			}
  2525  			o.parityCnt = test.parity
  2526  			o.dataCnt =
  2527  			o.objSizeLimit = test.objSizeLimit
  2528  			newLocalBckWithProps(t, baseParams, bckEC, defaultECBckProps(o), o)
  2529  			defer tools.WaitForRebalAndResil(t, baseParams)
  2530  			ecAndRegularUnregisterWhileRebalancing(t, o, bckEC)
  2532  			// Make sure that the next test gets accurate (without any intermediate modifications) smap.
  2533  			o.smap = tools.GetClusterMap(t, proxyURL)
  2534  		})
  2535  	}
  2536  }
  2538  func ecAndRegularUnregisterWhileRebalancing(t *testing.T, o *ecOptions, bckEC cmn.Bck) {
  2539  	const startTimeout = 10 * time.Second
  2540  	var (
  2541  		proxyURL   = tools.RandomProxyURL()
  2542  		baseParams = tools.BaseAPIParams(proxyURL)
  2543  		smap       = o.smap
  2544  	)
  2545  	// select a target that loses its mpath(simulate drive death),
  2546  	// and that has mpaths changed (simulate mpath added)
  2547  	tgtList := smap.Tmap.ActiveNodes()
  2548  	tgtLost := tgtList[0]
  2549  	tgtGone := tgtList[1]
  2551  	tlog.Logf("Put %s in maintenance (no rebalance)\n", tgtLost.StringEx())
  2552  	args := &apc.ActValRmNode{DaemonID: tgtLost.ID(), SkipRebalance: true}
  2553  	_, err := api.StartMaintenance(baseParams, args)
  2554  	tassert.CheckFatal(t, err)
  2555  	_, err = tools.WaitForClusterState(proxyURL, "target removed",
  2556  		smap.Version, smap.CountActivePs(), smap.CountActiveTs()-1)
  2557  	tassert.CheckFatal(t, err)
  2558  	registered := false
  2560  	// FIXME: There are multiple defers calling JoinCluster, and it's very unclear what will happen when.
  2561  	// This is the first defer, so it will be called last. Hence, we wait for rebalance to complete here.
  2562  	// See:
  2563  	defer func() {
  2564  		if !registered {
  2565  			args := &apc.ActValRmNode{DaemonID: tgtLost.ID()}
  2566  			rebID, err := api.StopMaintenance(baseParams, args)
  2567  			tassert.CheckError(t, err)
  2568  			tools.WaitForRebalanceByID(t, baseParams, rebID)
  2569  		}
  2570  	}()
  2572  	// fill EC bucket
  2573  	wg := sync.WaitGroup{}
  2574  	wg.Add(o.objCount)
  2575  	for i := range o.objCount {
  2576  		go func(i int) {
  2577  			defer wg.Done()
  2578  			objName := fmt.Sprintf(o.pattern, i)
  2579  			createECObject(t, baseParams, bckEC, objName, i, o)
  2580  		}(i)
  2581  	}
  2582  	wg.Wait()
  2584  	if t.Failed() {
  2585  		t.FailNow()
  2586  	}
  2588  	msg := &apc.LsoMsg{}
  2589  	resECOld, err := api.ListObjects(baseParams, bckEC, msg, api.ListArgs{})
  2590  	tassert.CheckFatal(t, err)
  2591  	tlog.Logf("Created %d objects in %s - starting global rebalance...\n", len(resECOld.Entries), bckEC)
  2593  	tlog.Logf("Take %s out of maintenance mode ...\n", tgtLost.StringEx())
  2594  	args = &apc.ActValRmNode{DaemonID: tgtLost.ID()}
  2595  	_, err = api.StopMaintenance(baseParams, args)
  2596  	tassert.CheckFatal(t, err)
  2597  	registered = true
  2599  	stopCh := cos.NewStopCh()
  2600  	wg.Add(1)
  2601  	defer func() {
  2602  		stopCh.Close()
  2603  		wg.Wait()
  2604  	}()
  2605  	go func() {
  2606  		defer wg.Done()
  2607  		for {
  2608  			for _, obj := range resECOld.Entries {
  2609  				_, err := api.GetObject(baseParams, bckEC, obj.Name, nil)
  2610  				tassert.CheckError(t, err)
  2611  				select {
  2612  				case <-stopCh.Listen():
  2613  					return
  2614  				default:
  2615  				}
  2616  				time.Sleep(time.Millisecond) // do not flood targets...
  2617  			}
  2618  		}
  2619  	}()
  2620  	xargs := xact.ArgsMsg{Kind: apc.ActRebalance, Timeout: startTimeout}
  2621  	err = api.WaitForXactionNode(baseParams, &xargs, xactSnapRunning)
  2622  	tassert.CheckError(t, err)
  2624  	err = api.AbortXaction(baseParams, &xargs)
  2625  	tassert.CheckError(t, err)
  2626  	tools.WaitForRebalAndResil(t, baseParams)
  2627  	tassert.CheckError(t, err)
  2629  	tlog.Logf("Put %s in maintenance\n", tgtGone.StringEx())
  2630  	args = &apc.ActValRmNode{DaemonID: tgtGone.ID()}
  2631  	rebID, err := api.StartMaintenance(baseParams, args)
  2632  	tassert.CheckFatal(t, err)
  2633  	defer func() {
  2634  		args = &apc.ActValRmNode{DaemonID: tgtGone.ID()}
  2635  		rebID, _ := api.StopMaintenance(baseParams, args)
  2636  		tools.WaitForRebalanceByID(t, baseParams, rebID)
  2637  	}()
  2639  	stopCh.Close()
  2641  	tassert.CheckFatal(t, err)
  2642  	tools.WaitForRebalanceByID(t, baseParams, rebID)
  2643  	tlog.Logln("Reading objects")
  2644  	for _, obj := range resECOld.Entries {
  2645  		_, err := api.GetObject(baseParams, bckEC, obj.Name, nil)
  2646  		tassert.CheckError(t, err)
  2647  	}
  2648  	tlog.Logln("list objects after rebalance")
  2649  	resECNew, err := api.ListObjects(baseParams, bckEC, msg, api.ListArgs{})
  2650  	tassert.CheckFatal(t, err)
  2651  	tlog.Logf("%d objects in %s after rebalance\n",
  2652  		len(resECNew.Entries), bckEC)
  2653  	if len(resECNew.Entries) != len(resECOld.Entries) {
  2654  		t.Errorf("The number of objects before and after rebalance mismatches")
  2655  	}
  2657  	tlog.Logln("Test object readability after rebalance")
  2658  	for _, obj := range resECOld.Entries {
  2659  		_, err := api.GetObject(baseParams, bckEC, obj.Name, nil)
  2660  		tassert.CheckError(t, err)
  2661  	}
  2663  	tlog.Logln("list objects after reading")
  2664  	resECNew, err = api.ListObjects(baseParams, bckEC, msg, api.ListArgs{})
  2665  	tassert.CheckFatal(t, err)
  2666  	tlog.Logf("%d objects in %s after reading\n",
  2667  		len(resECNew.Entries), bckEC)
  2668  	if len(resECNew.Entries) != len(resECOld.Entries) {
  2669  		t.Errorf("Incorrect number of objects: %d (expected %d)",
  2670  			len(resECNew.Entries), len(resECOld.Entries))
  2671  	}
  2672  }
  2674  // The test only checks that the number of object after rebalance equals
  2675  // the number of objects before it
  2676  func ecMountpaths(t *testing.T, o *ecOptions, proxyURL string, bck cmn.Bck) {
  2677  	type removedMpath struct {
  2678  		si    *meta.Snode
  2679  		mpath string
  2680  	}
  2681  	baseParams := tools.BaseAPIParams(proxyURL)
  2682  	newLocalBckWithProps(t, baseParams, bck, defaultECBckProps(o), o)
  2684  	wg := sync.WaitGroup{}
  2685  	wg.Add(o.objCount)
  2686  	for i := range o.objCount {
  2687  		go func(i int) {
  2688  			defer wg.Done()
  2689  			objName := fmt.Sprintf(o.pattern, i)
  2690  			createECObject(t, baseParams, bck, objName, i, o)
  2691  		}(i)
  2692  	}
  2693  	wg.Wait()
  2695  	if t.Failed() {
  2696  		t.FailNow()
  2697  	}
  2699  	msg := &apc.LsoMsg{Props: apc.GetPropsSize}
  2700  	objList, err := api.ListObjects(baseParams, bck, msg, api.ListArgs{})
  2701  	tassert.CheckFatal(t, err)
  2702  	tlog.Logf("%d objects created, removing %d mountpaths\n", len(objList.Entries), o.parityCnt)
  2704  	allMpaths := tools.GetTargetsMountpaths(t, o.smap, baseParams)
  2705  	removed := make(map[string]*removedMpath, o.parityCnt)
  2706  	defer func() {
  2707  		for _, rmMpath := range removed {
  2708  			err := api.AttachMountpath(baseParams,, rmMpath.mpath)
  2709  			tassert.CheckError(t, err)
  2710  		}
  2711  		tools.WaitForResilvering(t, baseParams, nil)
  2712  	}()
  2713  	// Choose `parity` random mpaths and disable them
  2714  	i := 0
  2715  	for tsi, paths := range allMpaths {
  2716  		mpath := paths[rand.Intn(len(paths))]
  2717  		uid := tsi.ID() + "/" + mpath
  2718  		if _, ok := removed[uid]; ok {
  2719  			continue
  2720  		}
  2721  		err := api.DetachMountpath(baseParams, tsi, mpath, true /*dont-resil*/)
  2722  		tassert.CheckFatal(t, err)
  2723  		rmMpath := &removedMpath{si: tsi, mpath: mpath}
  2724  		removed[uid] = rmMpath
  2725  		i++
  2726  		tlog.Logf("%d. Disabled %s : %s\n", i, tsi.StringEx(), mpath)
  2727  		if i >= o.parityCnt {
  2728  			break
  2729  		}
  2730  	}
  2732  	for _, en := range objList.Entries {
  2733  		_, err := api.GetObject(baseParams, bck, en.Name, nil)
  2734  		tassert.CheckError(t, err)
  2735  	}
  2736  }
  2738  // Test EC metadata versioning.
  2739  func TestECGenerations(t *testing.T) {
  2740  	var (
  2741  		bck = cmn.Bck{
  2742  			Name:     testBucketName + "-obj-gens",
  2743  			Provider: apc.AIS,
  2744  		}
  2745  		proxyURL    = tools.RandomProxyURL()
  2746  		baseParams  = tools.BaseAPIParams(proxyURL)
  2747  		generations = 3
  2748  	)
  2750  	o := ecOptions{
  2751  		minTargets:   4,
  2752  		objCount:     10,
  2753  		concurrency:  4,
  2754  		pattern:      "obj-gen-%04d",
  2755  		silent:       testing.Short(),
  2756  		objSizeLimit: ecObjLimit,
  2757  	}.init(t, proxyURL)
  2758  	initMountpaths(t, proxyURL)
  2759  	lastWrite := make([]int64, o.objCount)
  2761  	for _, test := range ecTests {
  2762  		t.Run(, func(t *testing.T) {
  2763  			if o.smap.CountActiveTs() <= {
  2764  				t.Skip(cmn.ErrNotEnoughTargets)
  2765  			}
  2766  			o.parityCnt = test.parity
  2767  			o.dataCnt =
  2768  			o.objSizeLimit = test.objSizeLimit
  2769  			newLocalBckWithProps(t, baseParams, bck, defaultECBckProps(o), o)
  2771  			wg := sync.WaitGroup{}
  2772  			for gen := range generations {
  2773  				wg.Add(o.objCount)
  2774  				for i := range o.objCount {
  2775  					o.sema.Acquire()
  2776  					go func(i, gen int) {
  2777  						defer func() {
  2778  							o.sema.Release()
  2779  							wg.Done()
  2780  						}()
  2781  						objName := fmt.Sprintf(o.pattern, i)
  2782  						createDamageRestoreECFile(t, baseParams, bck, objName, i, o)
  2783  						if gen == generations-2 {
  2784  							lastWrite[i] = mono.NanoTime()
  2785  						}
  2786  					}(i, gen)
  2787  				}
  2788  				wg.Wait()
  2789  			}
  2791  			currentTime := mono.NanoTime()
  2792  			for i := range o.objCount {
  2793  				objName := ecTestDir + fmt.Sprintf(o.pattern, i)
  2794  				props, err := api.HeadObject(baseParams, bck, objName, apc.FltPresent, false /*silent*/)
  2795  				tassert.CheckError(t, err)
  2796  				if err == nil && props.EC.Generation > lastWrite[i] && props.EC.Generation < currentTime {
  2797  					t.Errorf("Object %s, generation %d expected between %d and %d",
  2798  						objName, props.EC.Generation, lastWrite[i], currentTime)
  2799  				}
  2800  			}
  2801  		})
  2802  	}
  2803  }