github.com/NVIDIA/aistore@v1.3.23-0.20240517131212-7df6609be51d/ais/test/promote_test.go (about) 1 // Package integration_test. 2 /* 3 * Copyright (c) 2022-2024, NVIDIA CORPORATION. All rights reserved. 4 */ 5 package integration_test 6 7 import ( 8 "fmt" 9 iofs "io/fs" 10 "math/rand" 11 "os" 12 "os/exec" 13 "path/filepath" 14 "strings" 15 "testing" 16 "time" 17 18 "github.com/NVIDIA/aistore/api" 19 "github.com/NVIDIA/aistore/api/apc" 20 "github.com/NVIDIA/aistore/cmn/cos" 21 "github.com/NVIDIA/aistore/core/meta" 22 "github.com/NVIDIA/aistore/tools" 23 "github.com/NVIDIA/aistore/tools/tassert" 24 "github.com/NVIDIA/aistore/tools/tlog" 25 "github.com/NVIDIA/aistore/xact" 26 ) 27 28 // TODO: stress notFshare 29 30 const subdir = "subdir" // to promote recursively 31 32 type prmTests struct { 33 num int 34 singleTarget bool 35 recurs bool 36 deleteSrc bool 37 overwriteDst bool 38 notFshare bool 39 } 40 41 // flow: TestPromote (tests) => runProvider x (provider tests) => test.do(bck) 42 func TestPromote(t *testing.T) { 43 tests := []prmTests{ 44 // short and long 45 {num: 10000, singleTarget: false, recurs: false, deleteSrc: false, overwriteDst: false, notFshare: false}, 46 {num: 10000, singleTarget: false, recurs: true, deleteSrc: true, overwriteDst: false, notFshare: false}, 47 {num: 10, singleTarget: false, recurs: false, deleteSrc: true, overwriteDst: true, notFshare: false}, 48 // long 49 {num: 10000, singleTarget: true, recurs: false, deleteSrc: false, overwriteDst: false, notFshare: false}, 50 {num: 10000, singleTarget: true, recurs: true, deleteSrc: false, overwriteDst: true, notFshare: false}, 51 {num: 10, singleTarget: true, recurs: false, deleteSrc: false, overwriteDst: false, notFshare: false}, 52 {num: 10, singleTarget: false, recurs: true, deleteSrc: false, overwriteDst: false, notFshare: false}, 53 {num: 10000, singleTarget: false, recurs: true, deleteSrc: true, overwriteDst: false, notFshare: true}, 54 {num: 10000, singleTarget: true, recurs: true, deleteSrc: false, overwriteDst: true, notFshare: true}, 55 {num: 10, singleTarget: false, recurs: true, deleteSrc: false, overwriteDst: false, notFshare: true}, 56 } 57 // see also "filtering" below 58 if testing.Short() { 59 tests = tests[0:3] 60 } 61 for _, test := range tests { 62 var name string 63 if test.num < 32 { 64 name += "/few-files" 65 } 66 if test.singleTarget { 67 name += "/single-target" 68 } 69 if test.recurs { 70 name += "/recurs" 71 } else { 72 name += "/non-recurs" 73 } 74 if test.deleteSrc { 75 name += "/delete-src" 76 } else { 77 name += "/keep-src" 78 } 79 if test.overwriteDst { 80 name += "/overwrite-dst" 81 } else { 82 name += "/skip-existing-dst" 83 } 84 if test.notFshare { 85 name += "/execute-autonomously" 86 } else { 87 name += "/collaborate-on-fshare" 88 } 89 name = name[1:] 90 t.Run(name, func(t *testing.T) { runProviderTests(t, test.do) }) 91 } 92 } 93 94 // generate ngen files in tempdir and tempdir/subdir, respectively 95 var genfiles = `for f in {%d..%d}; do b=$RANDOM; 96 for i in {1..3}; do echo $b; done > %s/$f.test; 97 for i in {1..5}; do echo $b --- $b; done > %s/%s/$f.test.test; 98 done` 99 100 func (test *prmTests) generate(t *testing.T, from, to int, tempdir, subdir string) { 101 tlog.Logf("Generating %d (%d + %d) files...\n", test.num*2, test.num, test.num) 102 cmd := fmt.Sprintf(genfiles, from, to, tempdir, tempdir, subdir) 103 _, err := exec.Command("bash", "-c", cmd).CombinedOutput() 104 tassert.CheckFatal(t, err) 105 } 106 107 func (test *prmTests) do(t *testing.T, bck *meta.Bck) { 108 if bck.IsCloud() { 109 // NOTE: filtering out some test permutations to save time 110 if testing.Short() { 111 fmt := "%s is cloud bucket" 112 tools.ShortSkipf(t, fmt, bck) 113 } 114 if strings.Contains(t.Name(), "few-files") || 115 strings.Contains(t.Name(), "single-target") || 116 strings.Contains(t.Name(), "recurs") { 117 t.Skipf("skipping %s for Cloud bucket", t.Name()) 118 } 119 120 // also, reducing the number of files to promote 121 test.num = min(test.num, 50) 122 } 123 124 var ( 125 m = ioContext{t: t, bck: bck.Clone()} 126 from = 10000 127 to = from + test.num - 1 128 baseParams = tools.BaseAPIParams() 129 ) 130 m.saveCluState(m.proxyURL) 131 132 tempdir, err := os.MkdirTemp("", "prm") 133 tassert.CheckFatal(t, err) 134 subdirFQN := filepath.Join(tempdir, subdir) 135 err = cos.CreateDir(subdirFQN) 136 tassert.CheckFatal(t, err) 137 138 if m.bck.IsRemote() { 139 m.del() 140 } 141 t.Cleanup(func() { 142 _ = os.RemoveAll(tempdir) 143 if m.bck.IsRemote() { 144 m.del() 145 } 146 }) 147 test.generate(t, from, to, tempdir, subdir) 148 149 // prepare request 150 args := apc.PromoteArgs{ 151 SrcFQN: tempdir, 152 Recursive: test.recurs, 153 OverwriteDst: test.overwriteDst, 154 DeleteSrc: test.deleteSrc, 155 SrcIsNotFshare: test.notFshare, 156 } 157 var target *meta.Snode 158 if test.singleTarget { 159 target, _ = m.smap.GetRandTarget() 160 tlog.Logf("Promoting via %s\n", target.StringEx()) 161 args.DaemonID = target.ID() 162 } 163 164 // (I) do 165 xid, err := api.Promote(baseParams, m.bck, &args) 166 tassert.CheckFatal(t, err) 167 168 // wait for the operation to finish and collect stats 169 locObjs, outObjs, inObjs := test.wait(t, xid, tempdir, target, &m) 170 171 // list 172 tlog.Logln("Listing and counting...") 173 list, err := api.ListObjects(baseParams, m.bck, nil, api.ListArgs{}) 174 tassert.CheckFatal(t, err) 175 176 // 177 // run checks 178 // 179 cnt, cntsub := countFiles(t, tempdir) 180 if !test.deleteSrc { 181 tassert.Errorf(t, cnt == test.num && cntsub == test.num, 182 "delete-src == false: expected cnt (%d) == cntsub (%d) == num (%d) gererated", 183 cnt, cntsub, test.num) 184 } 185 186 // num promoted 187 expNum, s := test.num, "" 188 if test.recurs { 189 expNum = test.num * 2 190 s = " recursively" 191 } 192 tassert.Fatalf(t, len(list.Entries) == expNum, "expected to%s promote %d files, got %d", s, expNum, len(list.Entries)) 193 194 // delete source 195 if test.deleteSrc { 196 if test.recurs { 197 tassert.Errorf(t, cnt == 0 && cntsub == 0, 198 "delete-src == true, recursive: expected cnt (%d) == cntsub (%d) == 0", 199 cnt, cntsub) 200 } else { 201 tassert.Errorf(t, cnt == 0 && cntsub == test.num, 202 "delete-src == true, non-recursive: expected cnt (%d) == 0 and cntsub (%d) == (%d)", 203 cnt, cntsub, test.num) 204 } 205 } 206 // vs xaction stats 207 if xid != "" { 208 if test.singleTarget { 209 tassert.Errorf(t, locObjs == int64(expNum), 210 "single-target promote: expected promoted-objs-num==%d, got %d", expNum, locObjs) 211 } else if !test.notFshare { 212 tassert.Errorf(t, int(locObjs) == expNum && int(inObjs) == 0 && int(outObjs) == 0, 213 "file share: expected each target to handle the entire content locally, got (loc, out, in) = (%d, %d, %d)", 214 locObjs, outObjs, inObjs) 215 } 216 } 217 218 // (II) do more when _not_ overwriting destination, namely: 219 // delete a few promoted objects, and then immediately 220 // promote them again from the original (non-deleted) source 221 if test.overwriteDst || test.deleteSrc { 222 return 223 } 224 tlog.Logln("Running test case _not_ to overwrite destination...") 225 l := len(list.Entries) 226 numDel := max(l/100, 2) 227 idx := rand.Intn(l) 228 if idx+numDel >= l { 229 if numDel >= l { 230 idx, numDel = 0, l 231 } else { 232 idx = l - numDel 233 } 234 } 235 tlog.Logf("Deleting %d random objects\n", numDel) 236 for i := range numDel { 237 name := list.Entries[idx+i].Name 238 err := api.DeleteObject(baseParams, m.bck, name) 239 tassert.CheckFatal(t, err) 240 } 241 242 // do 243 xid, err = api.Promote(baseParams, m.bck, &args) 244 tassert.CheckFatal(t, err) 245 246 locObjs, outObjs, inObjs = test.wait(t, xid, tempdir, target, &m) 247 248 // list 249 tlog.Logln("Listing and counting the 2nd time...") 250 list, err = api.ListObjects(baseParams, m.bck, nil, api.ListArgs{}) 251 tassert.CheckFatal(t, err) 252 253 // num promoted 254 tassert.Errorf(t, len(list.Entries) == expNum, "expected to%s promote %d, got %d", s, test.num*2, len(list.Entries)) 255 256 // xaction stats versus `numDel` - but note: 257 // other than the selected few objects that were deleted prior to promoting the 2nd time, 258 // all the rest already exists and is not expected to "show up" in the stats 259 if xid != "" { 260 if test.singleTarget { 261 tassert.Errorf(t, locObjs == int64(numDel), 262 "single-target promote: expected to \"undelete\" %d objects, got %d", expNum, locObjs) 263 } else if !test.notFshare { 264 tassert.Errorf(t, int(locObjs) == numDel && int(inObjs) == 0 && int(outObjs) == 0, 265 "file share: expected each target to handle the entire content locally, got (loc, out, in) = (%d, %d, %d)", 266 locObjs, outObjs, inObjs) 267 } 268 } 269 } 270 271 // wait for an xaction (if there's one) and then query all targets for stats 272 func (test *prmTests) wait(t *testing.T, xid, tempdir string, target *meta.Snode, m *ioContext) (locObjs, outObjs, inObjs int64) { 273 time.Sleep(4 * time.Second) 274 xargs := xact.ArgsMsg{Kind: apc.ActPromote, Timeout: tools.RebalanceTimeout} 275 xname := fmt.Sprintf("%q", apc.ActPromote) 276 if xid != "" { 277 xargs.ID = xid 278 xname = fmt.Sprintf("x-%s[%s]", apc.ActPromote, xid) 279 tassert.Errorf(t, cos.IsValidUUID(xid), "expecting valid x-UUID %q", xid) 280 } 281 282 // wait "cases" 1. through 3. 283 if xid != "" && !test.singleTarget { // 1. cluster-wide xaction 284 tlog.Logf("Waiting for global %s(%s=>%s)\n", xname, tempdir, m.bck) 285 notifStatus, err := api.WaitForXactionIC(baseParams, &xargs) 286 tassert.CheckFatal(t, err) 287 if notifStatus != nil && (notifStatus.AbortedX || notifStatus.ErrMsg != "") { 288 tlog.Logf("Warning: notif-status: %+v\n", notifStatus) 289 } 290 } else if xid != "" && test.singleTarget { // 2. single-target xaction 291 xargs.DaemonID = target.ID() 292 tlog.Logf("Waiting for %s(%s=>%s) at %s\n", xname, tempdir, m.bck, target.StringEx()) 293 err := api.WaitForXactionNode(baseParams, &xargs, xactSnapNotRunning) 294 tassert.CheckFatal(t, err) 295 } else { // 3. synchronous execution 296 tlog.Logf("Promoting without xaction (%s=>%s)\n", tempdir, m.bck) 297 } 298 299 // collect stats 300 xs, err := api.QueryXactionSnaps(baseParams, &xargs) 301 tassert.CheckFatal(t, err) 302 if xid != "" { 303 locObjs, outObjs, inObjs = xs.ObjCounts(xid) 304 tlog.Logf("%s[%s]: (loc, out, in) = (%d, %d, %d)\n", xname, xid, locObjs, outObjs, inObjs) 305 return 306 } 307 uuids := xs.GetUUIDs() 308 for _, xid := range uuids { 309 locObjs, outObjs, inObjs = xs.ObjCounts(xid) 310 tlog.Logf("%s[%s]: (loc, out, in) = (%d, %d, %d)\n", xname, xid, locObjs, outObjs, inObjs) 311 } 312 return 0, 0, 0 313 } 314 315 func countFiles(t *testing.T, dir string) (n, nsubdir int) { 316 f := func(path string, de iofs.DirEntry, err error) error { 317 if err == nil && de.Type().IsRegular() { 318 if filepath.Dir(path) == dir { 319 n++ 320 } else { 321 nsubdir++ 322 } 323 } 324 return nil 325 } 326 err := filepath.WalkDir(dir, f) 327 tassert.CheckFatal(t, err) 328 return 329 }