github.com/NVIDIA/aistore@v1.3.23-0.20240517131212-7df6609be51d/ais/test/promote_test.go (about)

     1  // Package integration_test.
     2  /*
     3   * Copyright (c) 2022-2024, NVIDIA CORPORATION. All rights reserved.
     4   */
     5  package integration_test
     6  
     7  import (
     8  	"fmt"
     9  	iofs "io/fs"
    10  	"math/rand"
    11  	"os"
    12  	"os/exec"
    13  	"path/filepath"
    14  	"strings"
    15  	"testing"
    16  	"time"
    17  
    18  	"github.com/NVIDIA/aistore/api"
    19  	"github.com/NVIDIA/aistore/api/apc"
    20  	"github.com/NVIDIA/aistore/cmn/cos"
    21  	"github.com/NVIDIA/aistore/core/meta"
    22  	"github.com/NVIDIA/aistore/tools"
    23  	"github.com/NVIDIA/aistore/tools/tassert"
    24  	"github.com/NVIDIA/aistore/tools/tlog"
    25  	"github.com/NVIDIA/aistore/xact"
    26  )
    27  
    28  // TODO: stress notFshare
    29  
    30  const subdir = "subdir" // to promote recursively
    31  
    32  type prmTests struct {
    33  	num          int
    34  	singleTarget bool
    35  	recurs       bool
    36  	deleteSrc    bool
    37  	overwriteDst bool
    38  	notFshare    bool
    39  }
    40  
    41  // flow: TestPromote (tests) => runProvider x (provider tests) => test.do(bck)
    42  func TestPromote(t *testing.T) {
    43  	tests := []prmTests{
    44  		// short and long
    45  		{num: 10000, singleTarget: false, recurs: false, deleteSrc: false, overwriteDst: false, notFshare: false},
    46  		{num: 10000, singleTarget: false, recurs: true, deleteSrc: true, overwriteDst: false, notFshare: false},
    47  		{num: 10, singleTarget: false, recurs: false, deleteSrc: true, overwriteDst: true, notFshare: false},
    48  		// long
    49  		{num: 10000, singleTarget: true, recurs: false, deleteSrc: false, overwriteDst: false, notFshare: false},
    50  		{num: 10000, singleTarget: true, recurs: true, deleteSrc: false, overwriteDst: true, notFshare: false},
    51  		{num: 10, singleTarget: true, recurs: false, deleteSrc: false, overwriteDst: false, notFshare: false},
    52  		{num: 10, singleTarget: false, recurs: true, deleteSrc: false, overwriteDst: false, notFshare: false},
    53  		{num: 10000, singleTarget: false, recurs: true, deleteSrc: true, overwriteDst: false, notFshare: true},
    54  		{num: 10000, singleTarget: true, recurs: true, deleteSrc: false, overwriteDst: true, notFshare: true},
    55  		{num: 10, singleTarget: false, recurs: true, deleteSrc: false, overwriteDst: false, notFshare: true},
    56  	}
    57  	// see also "filtering" below
    58  	if testing.Short() {
    59  		tests = tests[0:3]
    60  	}
    61  	for _, test := range tests {
    62  		var name string
    63  		if test.num < 32 {
    64  			name += "/few-files"
    65  		}
    66  		if test.singleTarget {
    67  			name += "/single-target"
    68  		}
    69  		if test.recurs {
    70  			name += "/recurs"
    71  		} else {
    72  			name += "/non-recurs"
    73  		}
    74  		if test.deleteSrc {
    75  			name += "/delete-src"
    76  		} else {
    77  			name += "/keep-src"
    78  		}
    79  		if test.overwriteDst {
    80  			name += "/overwrite-dst"
    81  		} else {
    82  			name += "/skip-existing-dst"
    83  		}
    84  		if test.notFshare {
    85  			name += "/execute-autonomously"
    86  		} else {
    87  			name += "/collaborate-on-fshare"
    88  		}
    89  		name = name[1:]
    90  		t.Run(name, func(t *testing.T) { runProviderTests(t, test.do) })
    91  	}
    92  }
    93  
    94  // generate ngen files in tempdir and tempdir/subdir, respectively
    95  var genfiles = `for f in {%d..%d}; do b=$RANDOM;
    96  for i in {1..3}; do echo $b; done > %s/$f.test;
    97  for i in {1..5}; do echo $b --- $b; done > %s/%s/$f.test.test;
    98  done`
    99  
   100  func (test *prmTests) generate(t *testing.T, from, to int, tempdir, subdir string) {
   101  	tlog.Logf("Generating %d (%d + %d) files...\n", test.num*2, test.num, test.num)
   102  	cmd := fmt.Sprintf(genfiles, from, to, tempdir, tempdir, subdir)
   103  	_, err := exec.Command("bash", "-c", cmd).CombinedOutput()
   104  	tassert.CheckFatal(t, err)
   105  }
   106  
   107  func (test *prmTests) do(t *testing.T, bck *meta.Bck) {
   108  	if bck.IsCloud() {
   109  		// NOTE: filtering out some test permutations to save time
   110  		if testing.Short() {
   111  			fmt := "%s is cloud bucket"
   112  			tools.ShortSkipf(t, fmt, bck)
   113  		}
   114  		if strings.Contains(t.Name(), "few-files") ||
   115  			strings.Contains(t.Name(), "single-target") ||
   116  			strings.Contains(t.Name(), "recurs") {
   117  			t.Skipf("skipping %s for Cloud bucket", t.Name())
   118  		}
   119  
   120  		// also, reducing the number of files to promote
   121  		test.num = min(test.num, 50)
   122  	}
   123  
   124  	var (
   125  		m          = ioContext{t: t, bck: bck.Clone()}
   126  		from       = 10000
   127  		to         = from + test.num - 1
   128  		baseParams = tools.BaseAPIParams()
   129  	)
   130  	m.saveCluState(m.proxyURL)
   131  
   132  	tempdir, err := os.MkdirTemp("", "prm")
   133  	tassert.CheckFatal(t, err)
   134  	subdirFQN := filepath.Join(tempdir, subdir)
   135  	err = cos.CreateDir(subdirFQN)
   136  	tassert.CheckFatal(t, err)
   137  
   138  	if m.bck.IsRemote() {
   139  		m.del()
   140  	}
   141  	t.Cleanup(func() {
   142  		_ = os.RemoveAll(tempdir)
   143  		if m.bck.IsRemote() {
   144  			m.del()
   145  		}
   146  	})
   147  	test.generate(t, from, to, tempdir, subdir)
   148  
   149  	// prepare request
   150  	args := apc.PromoteArgs{
   151  		SrcFQN:         tempdir,
   152  		Recursive:      test.recurs,
   153  		OverwriteDst:   test.overwriteDst,
   154  		DeleteSrc:      test.deleteSrc,
   155  		SrcIsNotFshare: test.notFshare,
   156  	}
   157  	var target *meta.Snode
   158  	if test.singleTarget {
   159  		target, _ = m.smap.GetRandTarget()
   160  		tlog.Logf("Promoting via %s\n", target.StringEx())
   161  		args.DaemonID = target.ID()
   162  	}
   163  
   164  	// (I) do
   165  	xid, err := api.Promote(baseParams, m.bck, &args)
   166  	tassert.CheckFatal(t, err)
   167  
   168  	// wait for the operation to finish and collect stats
   169  	locObjs, outObjs, inObjs := test.wait(t, xid, tempdir, target, &m)
   170  
   171  	// list
   172  	tlog.Logln("Listing and counting...")
   173  	list, err := api.ListObjects(baseParams, m.bck, nil, api.ListArgs{})
   174  	tassert.CheckFatal(t, err)
   175  
   176  	//
   177  	// run checks
   178  	//
   179  	cnt, cntsub := countFiles(t, tempdir)
   180  	if !test.deleteSrc {
   181  		tassert.Errorf(t, cnt == test.num && cntsub == test.num,
   182  			"delete-src == false: expected cnt (%d) == cntsub (%d) == num (%d) gererated",
   183  			cnt, cntsub, test.num)
   184  	}
   185  
   186  	// num promoted
   187  	expNum, s := test.num, ""
   188  	if test.recurs {
   189  		expNum = test.num * 2
   190  		s = " recursively"
   191  	}
   192  	tassert.Fatalf(t, len(list.Entries) == expNum, "expected to%s promote %d files, got %d", s, expNum, len(list.Entries))
   193  
   194  	// delete source
   195  	if test.deleteSrc {
   196  		if test.recurs {
   197  			tassert.Errorf(t, cnt == 0 && cntsub == 0,
   198  				"delete-src == true, recursive: expected cnt (%d) == cntsub (%d) == 0",
   199  				cnt, cntsub)
   200  		} else {
   201  			tassert.Errorf(t, cnt == 0 && cntsub == test.num,
   202  				"delete-src == true, non-recursive: expected cnt (%d) == 0 and cntsub (%d) == (%d)",
   203  				cnt, cntsub, test.num)
   204  		}
   205  	}
   206  	// vs xaction stats
   207  	if xid != "" {
   208  		if test.singleTarget {
   209  			tassert.Errorf(t, locObjs == int64(expNum),
   210  				"single-target promote: expected promoted-objs-num==%d, got %d", expNum, locObjs)
   211  		} else if !test.notFshare {
   212  			tassert.Errorf(t, int(locObjs) == expNum && int(inObjs) == 0 && int(outObjs) == 0,
   213  				"file share: expected each target to handle the entire content locally, got (loc, out, in) = (%d, %d, %d)",
   214  				locObjs, outObjs, inObjs)
   215  		}
   216  	}
   217  
   218  	// (II) do more when _not_ overwriting destination, namely:
   219  	// delete a few promoted objects, and then immediately
   220  	// promote them again from the original (non-deleted) source
   221  	if test.overwriteDst || test.deleteSrc {
   222  		return
   223  	}
   224  	tlog.Logln("Running test case _not_ to overwrite destination...")
   225  	l := len(list.Entries)
   226  	numDel := max(l/100, 2)
   227  	idx := rand.Intn(l)
   228  	if idx+numDel >= l {
   229  		if numDel >= l {
   230  			idx, numDel = 0, l
   231  		} else {
   232  			idx = l - numDel
   233  		}
   234  	}
   235  	tlog.Logf("Deleting %d random objects\n", numDel)
   236  	for i := range numDel {
   237  		name := list.Entries[idx+i].Name
   238  		err := api.DeleteObject(baseParams, m.bck, name)
   239  		tassert.CheckFatal(t, err)
   240  	}
   241  
   242  	// do
   243  	xid, err = api.Promote(baseParams, m.bck, &args)
   244  	tassert.CheckFatal(t, err)
   245  
   246  	locObjs, outObjs, inObjs = test.wait(t, xid, tempdir, target, &m)
   247  
   248  	// list
   249  	tlog.Logln("Listing and counting the 2nd time...")
   250  	list, err = api.ListObjects(baseParams, m.bck, nil, api.ListArgs{})
   251  	tassert.CheckFatal(t, err)
   252  
   253  	// num promoted
   254  	tassert.Errorf(t, len(list.Entries) == expNum, "expected to%s promote %d, got %d", s, test.num*2, len(list.Entries))
   255  
   256  	// xaction stats versus `numDel` - but note:
   257  	// other than the selected few objects that were deleted prior to promoting the 2nd time,
   258  	// all the rest already exists and is not expected to "show up" in the stats
   259  	if xid != "" {
   260  		if test.singleTarget {
   261  			tassert.Errorf(t, locObjs == int64(numDel),
   262  				"single-target promote: expected to \"undelete\" %d objects, got %d", expNum, locObjs)
   263  		} else if !test.notFshare {
   264  			tassert.Errorf(t, int(locObjs) == numDel && int(inObjs) == 0 && int(outObjs) == 0,
   265  				"file share: expected each target to handle the entire content locally, got (loc, out, in) = (%d, %d, %d)",
   266  				locObjs, outObjs, inObjs)
   267  		}
   268  	}
   269  }
   270  
   271  // wait for an xaction (if there's one) and then query all targets for stats
   272  func (test *prmTests) wait(t *testing.T, xid, tempdir string, target *meta.Snode, m *ioContext) (locObjs, outObjs, inObjs int64) {
   273  	time.Sleep(4 * time.Second)
   274  	xargs := xact.ArgsMsg{Kind: apc.ActPromote, Timeout: tools.RebalanceTimeout}
   275  	xname := fmt.Sprintf("%q", apc.ActPromote)
   276  	if xid != "" {
   277  		xargs.ID = xid
   278  		xname = fmt.Sprintf("x-%s[%s]", apc.ActPromote, xid)
   279  		tassert.Errorf(t, cos.IsValidUUID(xid), "expecting valid x-UUID %q", xid)
   280  	}
   281  
   282  	// wait "cases" 1. through 3.
   283  	if xid != "" && !test.singleTarget { // 1. cluster-wide xaction
   284  		tlog.Logf("Waiting for global %s(%s=>%s)\n", xname, tempdir, m.bck)
   285  		notifStatus, err := api.WaitForXactionIC(baseParams, &xargs)
   286  		tassert.CheckFatal(t, err)
   287  		if notifStatus != nil && (notifStatus.AbortedX || notifStatus.ErrMsg != "") {
   288  			tlog.Logf("Warning: notif-status: %+v\n", notifStatus)
   289  		}
   290  	} else if xid != "" && test.singleTarget { // 2. single-target xaction
   291  		xargs.DaemonID = target.ID()
   292  		tlog.Logf("Waiting for %s(%s=>%s) at %s\n", xname, tempdir, m.bck, target.StringEx())
   293  		err := api.WaitForXactionNode(baseParams, &xargs, xactSnapNotRunning)
   294  		tassert.CheckFatal(t, err)
   295  	} else { // 3. synchronous execution
   296  		tlog.Logf("Promoting without xaction (%s=>%s)\n", tempdir, m.bck)
   297  	}
   298  
   299  	// collect stats
   300  	xs, err := api.QueryXactionSnaps(baseParams, &xargs)
   301  	tassert.CheckFatal(t, err)
   302  	if xid != "" {
   303  		locObjs, outObjs, inObjs = xs.ObjCounts(xid)
   304  		tlog.Logf("%s[%s]: (loc, out, in) = (%d, %d, %d)\n", xname, xid, locObjs, outObjs, inObjs)
   305  		return
   306  	}
   307  	uuids := xs.GetUUIDs()
   308  	for _, xid := range uuids {
   309  		locObjs, outObjs, inObjs = xs.ObjCounts(xid)
   310  		tlog.Logf("%s[%s]: (loc, out, in) = (%d, %d, %d)\n", xname, xid, locObjs, outObjs, inObjs)
   311  	}
   312  	return 0, 0, 0
   313  }
   314  
   315  func countFiles(t *testing.T, dir string) (n, nsubdir int) {
   316  	f := func(path string, de iofs.DirEntry, err error) error {
   317  		if err == nil && de.Type().IsRegular() {
   318  			if filepath.Dir(path) == dir {
   319  				n++
   320  			} else {
   321  				nsubdir++
   322  			}
   323  		}
   324  		return nil
   325  	}
   326  	err := filepath.WalkDir(dir, f)
   327  	tassert.CheckFatal(t, err)
   328  	return
   329  }