github.com/NVIDIA/aistore@v1.3.23-0.20240517131212-7df6609be51d/ais/test/common_test.go (about) 1 // Package integration_test. 2 /* 3 * Copyright (c) 2018-2024, NVIDIA CORPORATION. All rights reserved. 4 */ 5 package integration_test 6 7 import ( 8 "context" 9 "errors" 10 "fmt" 11 "math/rand" 12 "net/http" 13 "path/filepath" 14 "strings" 15 "sync" 16 "testing" 17 "time" 18 19 "github.com/NVIDIA/aistore/api" 20 "github.com/NVIDIA/aistore/api/apc" 21 "github.com/NVIDIA/aistore/cmn" 22 "github.com/NVIDIA/aistore/cmn/atomic" 23 "github.com/NVIDIA/aistore/cmn/cos" 24 "github.com/NVIDIA/aistore/cmn/debug" 25 "github.com/NVIDIA/aistore/cmn/feat" 26 "github.com/NVIDIA/aistore/core" 27 "github.com/NVIDIA/aistore/core/meta" 28 "github.com/NVIDIA/aistore/fs" 29 "github.com/NVIDIA/aistore/tools" 30 "github.com/NVIDIA/aistore/tools/readers" 31 "github.com/NVIDIA/aistore/tools/tassert" 32 "github.com/NVIDIA/aistore/tools/tlog" 33 "github.com/NVIDIA/aistore/tools/trand" 34 "github.com/NVIDIA/aistore/xact" 35 jsoniter "github.com/json-iterator/go" 36 ) 37 38 // more tools 39 40 const rebalanceObjectDistributionTestCoef = 0.3 41 42 const ( 43 prefixDir = "filter" 44 largeFileSize = 4 * cos.MiB 45 46 workerCnt = 10 47 ) 48 49 const testMpath = "/tmp/ais/mountpath" 50 51 var ( 52 cliBck cmn.Bck 53 errObjectFound = errors.New("found") // to interrupt fs.Walk when object found 54 fsOnce sync.Once 55 ) 56 57 type ioContext struct { 58 t *testing.T 59 smap *meta.Smap 60 controlCh chan struct{} 61 stopCh chan struct{} 62 objNames []string 63 bck cmn.Bck 64 fileSize uint64 65 proxyURL string 66 prefix string 67 otherTasksToTrigger int 68 originalTargetCount int 69 originalProxyCount int 70 num int 71 numGetsEachFile int 72 getErrIsFatal bool 73 silent bool 74 fixedSize bool 75 deleteRemoteBckObjs bool 76 ordered bool // true - object names make sequence, false - names are random 77 78 numGetErrs atomic.Uint64 79 numPutErrs int 80 } 81 82 func (m *ioContext) initAndSaveState(cleanup bool) { 83 m.init(cleanup) 84 m.saveCluState(m.proxyURL) 85 } 86 87 func (m *ioContext) saveCluState(proxyURL string) { 88 m.smap = tools.GetClusterMap(m.t, proxyURL) 89 m.originalTargetCount = m.smap.CountActiveTs() 90 m.originalProxyCount = m.smap.CountActivePs() 91 tlog.Logf("targets: %d, proxies: %d\n", m.originalTargetCount, m.originalProxyCount) 92 } 93 94 func (m *ioContext) waitAndCheckCluState() { 95 smap, err := tools.WaitForClusterState( 96 m.proxyURL, 97 "cluster state", 98 m.smap.Version, 99 m.originalProxyCount, 100 m.originalTargetCount, 101 ) 102 tassert.CheckFatal(m.t, err) 103 m.checkCluState(smap) 104 } 105 106 func (m *ioContext) checkCluState(smap *meta.Smap) { 107 proxyCount := smap.CountActivePs() 108 targetCount := smap.CountActiveTs() 109 if targetCount != m.originalTargetCount || 110 proxyCount != m.originalProxyCount { 111 m.t.Errorf( 112 "cluster state is not preserved. targets (before: %d, now: %d); proxies: (before: %d, now: %d)", 113 targetCount, m.originalTargetCount, 114 proxyCount, m.originalProxyCount, 115 ) 116 } 117 } 118 119 func (m *ioContext) init(cleanup bool) { 120 m.proxyURL = tools.RandomProxyURL() 121 if m.proxyURL == "" { 122 // if random selection failed, use RO url 123 m.proxyURL = tools.GetPrimaryURL() 124 } 125 if m.fileSize == 0 { 126 m.fileSize = cos.KiB 127 } 128 if m.num > 0 { 129 m.objNames = make([]string, 0, m.num) 130 } 131 if m.otherTasksToTrigger > 0 { 132 m.controlCh = make(chan struct{}, m.otherTasksToTrigger) 133 } 134 if m.bck.Name == "" { 135 m.bck.Name = trand.String(15) 136 } 137 if m.bck.Provider == "" { 138 m.bck.Provider = apc.AIS 139 } 140 if m.numGetsEachFile == 0 { 141 m.numGetsEachFile = 1 142 } 143 m.stopCh = make(chan struct{}) 144 145 if m.bck.IsRemote() { 146 if m.deleteRemoteBckObjs { 147 m.del(-1 /*delete all*/, 0 /* lsmsg.Flags */) 148 } else { 149 tools.EvictRemoteBucket(m.t, m.proxyURL, m.bck) // evict from AIStore 150 } 151 } 152 153 if cleanup { 154 // cleanup m.bck upon exit from the test 155 m.t.Cleanup(m._cleanup) 156 } 157 } 158 159 func (m *ioContext) _cleanup() { 160 m.del() 161 if m.bck.IsRemote() { 162 // Ensure all local objects are removed. 163 tools.EvictRemoteBucket(m.t, m.proxyURL, m.bck) 164 } 165 } 166 167 func (m *ioContext) expectTargets(n int) { 168 if m.originalTargetCount < n { 169 m.t.Skipf("Must have %d or more targets in the cluster, have only %d", n, m.originalTargetCount) 170 } 171 } 172 173 func (m *ioContext) expectProxies(n int) { 174 if m.originalProxyCount < n { 175 m.t.Skipf("Must have %d or more proxies in the cluster, have only %d", n, m.originalProxyCount) 176 } 177 } 178 179 func (m *ioContext) checkObjectDistribution(t *testing.T) { 180 var ( 181 requiredCount = int64(rebalanceObjectDistributionTestCoef * (float64(m.num) / float64(m.originalTargetCount))) 182 targetObjectCount = make(map[string]int64) 183 ) 184 tlog.Logf("Checking if each target has a required number of object in bucket %s...\n", m.bck) 185 baseParams := tools.BaseAPIParams(m.proxyURL) 186 lst, err := api.ListObjects(baseParams, m.bck, &apc.LsoMsg{Props: apc.GetPropsLocation}, api.ListArgs{}) 187 tassert.CheckFatal(t, err) 188 for _, obj := range lst.Entries { 189 tname, _ := core.ParseObjLoc(obj.Location) 190 tid := meta.N2ID(tname) 191 targetObjectCount[tid]++ 192 } 193 if len(targetObjectCount) != m.originalTargetCount { 194 t.Fatalf("Rebalance error, %d/%d targets received no objects from bucket %s\n", 195 m.originalTargetCount-len(targetObjectCount), m.originalTargetCount, m.bck) 196 } 197 for targetURL, objCount := range targetObjectCount { 198 if objCount < requiredCount { 199 t.Fatalf("Rebalance error, target %s didn't receive required number of objects\n", targetURL) 200 } 201 } 202 } 203 204 func (m *ioContext) puts(ignoreErrs ...bool) { 205 if !m.bck.IsAIS() { 206 m.remotePuts(false /*evict*/) 207 return 208 } 209 baseParams := tools.BaseAPIParams(m.proxyURL) 210 p, err := api.HeadBucket(baseParams, m.bck, false /* don't add */) 211 tassert.CheckFatal(m.t, err) 212 213 var ignoreErr bool 214 if len(ignoreErrs) > 0 { 215 ignoreErr = ignoreErrs[0] 216 } 217 if !m.silent { 218 var s, k string 219 if m.fixedSize { 220 s = fmt.Sprintf(" (size %d)", m.fileSize) 221 } else if m.fileSize > 0 { 222 s = fmt.Sprintf(" (approx. size %d)", m.fileSize) 223 } 224 if k = m.prefix; k != "" { 225 k = "/" + k + "*" 226 } 227 tlog.Logf("PUT %d objects%s => %s%s\n", m.num, s, m.bck, k) 228 } 229 m.objNames, m.numPutErrs, err = tools.PutRandObjs(tools.PutObjectsArgs{ 230 ProxyURL: m.proxyURL, 231 Bck: m.bck, 232 ObjPath: m.prefix, 233 ObjCnt: m.num, 234 ObjSize: m.fileSize, 235 FixedSize: m.fixedSize, 236 CksumType: p.Cksum.Type, 237 WorkerCnt: 0, // TODO: Should we set something custom? 238 IgnoreErr: ignoreErr, 239 Ordered: m.ordered, 240 }) 241 tassert.CheckFatal(m.t, err) 242 } 243 244 // remotePuts by default empties remote bucket and puts new `m.num` objects 245 // into the bucket. If `override` parameter is set then the existing objects 246 // are updated with new ones (new version and checksum). 247 func (m *ioContext) remotePuts(evict bool, overrides ...bool) { 248 var override bool 249 if len(overrides) > 0 { 250 override = overrides[0] 251 } 252 253 if !override { 254 // Cleanup the remote bucket. 255 m.del() 256 m.objNames = m.objNames[:0] 257 } 258 259 m._remoteFill(m.num, evict, override) 260 } 261 262 // remoteRefill calculates number of missing objects and refills the bucket. 263 // It is expected that the number of missing objects is positive meaning that 264 // some of the objects were removed before calling remoteRefill. 265 func (m *ioContext) remoteRefill() { 266 var ( 267 baseParams = tools.BaseAPIParams() 268 msg = &apc.LsoMsg{Prefix: m.prefix, Props: apc.GetPropsName} 269 ) 270 271 objList, err := api.ListObjects(baseParams, m.bck, msg, api.ListArgs{}) 272 tassert.CheckFatal(m.t, err) 273 274 m.objNames = m.objNames[:0] 275 for _, obj := range objList.Entries { 276 m.objNames = append(m.objNames, obj.Name) 277 } 278 279 leftToFill := m.num - len(objList.Entries) 280 tassert.Errorf(m.t, leftToFill > 0, "leftToFill %d", leftToFill) 281 282 m._remoteFill(leftToFill, false /*evict*/, false /*override*/) 283 } 284 285 func (m *ioContext) _remoteFill(objCnt int, evict, override bool) { 286 var ( 287 baseParams = tools.BaseAPIParams() 288 errCh = make(chan error, objCnt) 289 wg = cos.NewLimitedWaitGroup(20, 0) 290 ) 291 if !m.silent { 292 tlog.Logf("remote PUT %d objects (size %s) => %s\n", objCnt, cos.ToSizeIEC(int64(m.fileSize), 0), m.bck) 293 } 294 p, err := api.HeadBucket(baseParams, m.bck, false /* don't add */) 295 tassert.CheckFatal(m.t, err) 296 297 for i := range objCnt { 298 r, err := readers.NewRand(int64(m.fileSize), p.Cksum.Type) 299 tassert.CheckFatal(m.t, err) 300 301 var objName string 302 if override { 303 objName = m.objNames[i] 304 } else if m.ordered { 305 objName = fmt.Sprintf("%s%d", m.prefix, i) 306 } else { 307 objName = fmt.Sprintf("%s%s-%d", m.prefix, trand.String(8), i) 308 } 309 wg.Add(1) 310 go func() { 311 defer wg.Done() 312 tools.Put(m.proxyURL, m.bck, objName, r, errCh) 313 }() 314 if !override { 315 m.objNames = append(m.objNames, objName) 316 } 317 } 318 wg.Wait() 319 tassert.SelectErr(m.t, errCh, "put", true) 320 tlog.Logf("remote bucket %s: %d cached objects\n", m.bck, m.num) 321 322 if evict { 323 m.evict() 324 } 325 } 326 327 func (m *ioContext) evict() { 328 var ( 329 baseParams = tools.BaseAPIParams() 330 msg = &apc.LsoMsg{Prefix: m.prefix, Props: apc.GetPropsName} 331 ) 332 333 objList, err := api.ListObjects(baseParams, m.bck, msg, api.ListArgs{}) 334 tassert.CheckFatal(m.t, err) 335 if len(objList.Entries) != m.num { 336 m.t.Fatalf("list_objects err: %d != %d", len(objList.Entries), m.num) 337 } 338 339 tlog.Logf("evicting remote bucket %s...\n", m.bck) 340 err = api.EvictRemoteBucket(baseParams, m.bck, false) 341 tassert.CheckFatal(m.t, err) 342 } 343 344 func (m *ioContext) remotePrefetch(prefetchCnt int) { 345 var ( 346 baseParams = tools.BaseAPIParams() 347 msg = &apc.LsoMsg{Prefix: m.prefix, Props: apc.GetPropsName} 348 ) 349 350 objList, err := api.ListObjects(baseParams, m.bck, msg, api.ListArgs{}) 351 tassert.CheckFatal(m.t, err) 352 353 tlog.Logf("remote PREFETCH %d objects...\n", prefetchCnt) 354 355 wg := &sync.WaitGroup{} 356 for idx, obj := range objList.Entries { 357 if idx >= prefetchCnt { 358 break 359 } 360 361 wg.Add(1) 362 go func(obj *cmn.LsoEnt) { 363 _, err := api.GetObject(baseParams, m.bck, obj.Name, nil) 364 tassert.CheckError(m.t, err) 365 wg.Done() 366 }(obj) 367 } 368 wg.Wait() 369 } 370 371 func isContextDeadline(err error) bool { 372 if err == nil { 373 return false 374 } 375 return err == context.DeadlineExceeded || strings.Contains(err.Error(), context.DeadlineExceeded.Error()) 376 } 377 378 // bucket cleanup 379 // is called in a variety of ways including (post-test) t.Cleanup => _cleanup() 380 // and (pre-test) via deleteRemoteBckObjs 381 382 const maxDelObjErrCount = 100 383 384 func (m *ioContext) del(opts ...int) { 385 var ( 386 herr *cmn.ErrHTTP 387 toRemoveCnt = -1 // remove all or opts[0] 388 baseParams = tools.BaseAPIParams() 389 ) 390 // checks, params 391 exists, err := api.QueryBuckets(baseParams, cmn.QueryBcks(m.bck), apc.FltExists) 392 if isContextDeadline(err) { 393 if m.bck.IsRemote() { 394 time.Sleep(time.Second) 395 tlog.Logf("Warning: 2nd attempt to query buckets %q\n", cmn.QueryBcks(m.bck)) 396 exists, err = api.QueryBuckets(baseParams, cmn.QueryBcks(m.bck), apc.FltExists) 397 if isContextDeadline(err) { 398 tlog.Logf("Error: failing to query buckets %q: %v - proceeding anyway...\n", cmn.QueryBcks(m.bck), err) 399 exists, err = false, nil 400 } 401 } 402 } 403 tassert.CheckFatal(m.t, err) 404 if !exists { 405 return 406 } 407 408 // list 409 lsmsg := &apc.LsoMsg{ 410 Prefix: m.prefix, 411 Props: apc.GetPropsName, 412 Flags: apc.LsBckPresent, // don't lookup unless overridden by the variadic (below) 413 } 414 if len(opts) > 0 { 415 toRemoveCnt = opts[0] 416 if len(opts) > 1 { 417 lsmsg.Flags = uint64(opts[1]) // do HEAD(remote-bucket) 418 } 419 } 420 if toRemoveCnt < 0 && m.prefix != "" { 421 lsmsg.Prefix = "" // all means all 422 } 423 objList, err := api.ListObjects(baseParams, m.bck, lsmsg, api.ListArgs{}) 424 if err != nil { 425 if errors.As(err, &herr) && herr.Status == http.StatusNotFound { 426 return 427 } 428 emsg := err.Error() 429 // ignore client timeout awaiting headers 430 if strings.Contains(emsg, "awaiting") && strings.Contains(emsg, "headers") { 431 return 432 } 433 } 434 tassert.CheckFatal(m.t, err) 435 436 // delete 437 toRemove := objList.Entries 438 if toRemoveCnt >= 0 { 439 toRemove = toRemove[:toRemoveCnt] 440 } 441 l := len(toRemove) 442 if l == 0 { 443 return 444 } 445 tlog.Logf("deleting %d object%s from %s\n", l, cos.Plural(l), m.bck.Cname("")) 446 var ( 447 errCnt atomic.Int64 448 wg = cos.NewLimitedWaitGroup(16, l) 449 ) 450 for _, obj := range toRemove { 451 if errCnt.Load() > maxDelObjErrCount { 452 tassert.CheckFatal(m.t, errors.New("too many errors")) 453 break 454 } 455 wg.Add(1) 456 go func(obj *cmn.LsoEnt) { 457 m._delOne(baseParams, obj, &errCnt) 458 wg.Done() 459 }(obj) 460 } 461 wg.Wait() 462 } 463 464 func (m *ioContext) _delOne(baseParams api.BaseParams, obj *cmn.LsoEnt, errCnt *atomic.Int64) { 465 err := api.DeleteObject(baseParams, m.bck, obj.Name) 466 if err == nil { 467 return 468 } 469 // 470 // excepting benign (TODO: rid of strings.Contains) 471 // 472 const sleepRetry = 2 * time.Second 473 e := strings.ToLower(err.Error()) 474 switch { 475 case cmn.IsErrObjNought(err): 476 return 477 case strings.Contains(e, "server closed idle connection"): 478 return // see (unexported) http.exportErrServerClosedIdle in the Go source 479 case cos.IsErrConnectionNotAvail(err): 480 errCnt.Add(maxDelObjErrCount/10 - 1) 481 // retry 482 case m.bck.IsCloud() && (cos.IsErrConnectionReset(err) || strings.Contains(e, "reset by peer")): 483 time.Sleep(sleepRetry) 484 err = api.DeleteObject(baseParams, m.bck, obj.Name) 485 case m.bck.IsCloud() && strings.Contains(e, "try again"): 486 // aws-error[InternalError: We encountered an internal error. Please try again.] 487 time.Sleep(sleepRetry) 488 err = api.DeleteObject(baseParams, m.bck, obj.Name) 489 case m.bck.IsCloud() && apc.ToScheme(m.bck.Provider) == apc.GSScheme && 490 strings.Contains(e, "gateway") && strings.Contains(e, "timeout"): 491 // e.g:. "googleapi: Error 504: , gatewayTimeout" (where the gateway is in fact LB) 492 time.Sleep(sleepRetry) 493 err = api.DeleteObject(baseParams, m.bck, obj.Name) 494 } 495 496 if err == nil || cmn.IsErrObjNought(err) { 497 return 498 } 499 errCnt.Inc() 500 if m.bck.IsCloud() && errCnt.Load() < 5 { 501 tlog.Logf("Warning: failed to cleanup %s: %v\n", m.bck.Cname(""), err) 502 } 503 tassert.CheckError(m.t, err) 504 } 505 506 func (m *ioContext) get(baseParams api.BaseParams, idx, totalGets int, getArgs *api.GetArgs, validate bool) { 507 var ( 508 err error 509 objName = m.objNames[idx%len(m.objNames)] 510 ) 511 if validate { 512 _, err = api.GetObjectWithValidation(baseParams, m.bck, objName, getArgs) 513 } else { 514 _, err = api.GetObject(baseParams, m.bck, objName, getArgs) 515 } 516 if err != nil { 517 if m.getErrIsFatal { 518 m.t.Error(err) 519 } 520 m.numGetErrs.Inc() 521 } 522 if m.getErrIsFatal && m.numGetErrs.Load() > 0 { 523 return 524 } 525 if idx > 0 && idx%5000 == 0 && !m.silent { 526 if totalGets > 0 { 527 tlog.Logf(" %d/%d GET requests completed...\n", idx, totalGets) 528 } else { 529 tlog.Logf(" %d GET requests completed...\n", idx) 530 } 531 } 532 533 // Tell other tasks they can begin to do work in parallel 534 if totalGets > 0 && idx == totalGets/2 { // only for `m.gets(nil, false)` 535 for range m.otherTasksToTrigger { 536 m.controlCh <- struct{}{} 537 } 538 } 539 } 540 541 func (m *ioContext) gets(getArgs *api.GetArgs, withValidation bool) { 542 var ( 543 baseParams = tools.BaseAPIParams() 544 totalGets = m.num * m.numGetsEachFile 545 ) 546 if !m.silent { 547 if m.numGetsEachFile == 1 { 548 tlog.Logf("GET %d objects from %s\n", m.num, m.bck) 549 } else { 550 tlog.Logf("GET %d objects %d times from %s\n", m.num, m.numGetsEachFile, m.bck) 551 } 552 } 553 wg := cos.NewLimitedWaitGroup(20, 0) 554 for i := range totalGets { 555 wg.Add(1) 556 go func(idx int) { 557 m.get(baseParams, idx, totalGets, getArgs, withValidation) 558 wg.Done() 559 }(i) 560 } 561 wg.Wait() 562 } 563 564 func (m *ioContext) getsUntilStop() { 565 var ( 566 idx = 0 567 baseParams = tools.BaseAPIParams() 568 wg = cos.NewLimitedWaitGroup(20, 0) 569 ) 570 for { 571 select { 572 case <-m.stopCh: 573 wg.Wait() 574 return 575 default: 576 wg.Add(1) 577 go func(idx int) { 578 defer wg.Done() 579 m.get(baseParams, idx, 0, nil /*api.GetArgs*/, false /*validate*/) 580 }(idx) 581 idx++ 582 if idx%5000 == 0 { 583 time.Sleep(500 * time.Millisecond) // prevents generating too many GET requests 584 } 585 } 586 } 587 } 588 589 func (m *ioContext) stopGets() { 590 m.stopCh <- struct{}{} 591 } 592 593 func (m *ioContext) ensureNumCopies(baseParams api.BaseParams, expectedCopies int, greaterOk bool) { 594 m.t.Helper() 595 time.Sleep(time.Second) 596 xargs := xact.ArgsMsg{Kind: apc.ActMakeNCopies, Bck: m.bck, Timeout: tools.RebalanceTimeout} 597 _, err := api.WaitForXactionIC(baseParams, &xargs) 598 tassert.CheckFatal(m.t, err) 599 600 // List Bucket - primarily for the copies 601 msg := &apc.LsoMsg{Flags: apc.LsObjCached, Prefix: m.prefix} 602 msg.AddProps(apc.GetPropsCopies, apc.GetPropsAtime, apc.GetPropsStatus) 603 objectList, err := api.ListObjects(baseParams, m.bck, msg, api.ListArgs{}) 604 tassert.CheckFatal(m.t, err) 605 606 total := 0 607 copiesToNumObjects := make(map[int]int) 608 for _, entry := range objectList.Entries { 609 if entry.Atime == "" { 610 m.t.Errorf("%s: access time is empty", m.bck.Cname(entry.Name)) 611 } 612 total++ 613 if greaterOk && int(entry.Copies) > expectedCopies { 614 copiesToNumObjects[expectedCopies]++ 615 } else { 616 copiesToNumObjects[int(entry.Copies)]++ 617 } 618 } 619 tlog.Logf("objects (total, copies) = (%d, %v)\n", total, copiesToNumObjects) 620 if total != m.num { 621 m.t.Errorf("list_objects: expecting %d objects, got %d", m.num, total) 622 } 623 624 if len(copiesToNumObjects) != 1 { 625 s, _ := jsoniter.MarshalIndent(copiesToNumObjects, "", " ") 626 m.t.Errorf("some objects do not have expected number of copies: %s", s) 627 } 628 629 for copies := range copiesToNumObjects { 630 if copies != expectedCopies { 631 m.t.Errorf("Expecting %d objects all to have %d replicas, got: %d", total, expectedCopies, copies) 632 } 633 } 634 } 635 636 func (m *ioContext) ensureNoGetErrors() { 637 m.t.Helper() 638 if m.numGetErrs.Load() > 0 { 639 m.t.Fatalf("Number of get errors is non-zero: %d\n", m.numGetErrs.Load()) 640 } 641 } 642 643 func (m *ioContext) ensureNumMountpaths(target *meta.Snode, mpList *apc.MountpathList) { 644 ensureNumMountpaths(m.t, target, mpList) 645 } 646 647 func ensureNumMountpaths(t *testing.T, target *meta.Snode, mpList *apc.MountpathList) { 648 t.Helper() 649 tname := target.StringEx() 650 baseParams := tools.BaseAPIParams() 651 mpl, err := api.GetMountpaths(baseParams, target) 652 tassert.CheckFatal(t, err) 653 for range 6 { 654 if len(mpl.Available) == len(mpList.Available) && 655 len(mpl.Disabled) == len(mpList.Disabled) && 656 len(mpl.WaitingDD) == len(mpList.WaitingDD) { 657 break 658 } 659 time.Sleep(time.Second) 660 } 661 if len(mpl.Available) != len(mpList.Available) { 662 t.Errorf("%s ended up with %d mountpaths (dd=%v, disabled=%v), expecting: %d", 663 tname, len(mpl.Available), mpl.WaitingDD, mpl.Disabled, len(mpList.Available)) 664 } else if len(mpl.Disabled) != len(mpList.Disabled) || len(mpl.WaitingDD) != len(mpList.WaitingDD) { 665 t.Errorf("%s ended up with (dd=%v, disabled=%v) mountpaths, expecting (%v and %v), respectively", 666 tname, mpl.WaitingDD, mpl.Disabled, mpList.WaitingDD, mpList.Disabled) 667 } 668 } 669 670 func ensureNoDisabledMountpaths(t *testing.T, target *meta.Snode, mpList *apc.MountpathList) { 671 t.Helper() 672 for range 6 { 673 if len(mpList.WaitingDD) == 0 && len(mpList.Disabled) == 0 { 674 break 675 } 676 time.Sleep(time.Second) 677 } 678 if len(mpList.WaitingDD) != 0 || len(mpList.Disabled) != 0 { 679 t.Fatalf("%s: disabled mountpaths at the start of the %q (avail=%d, dd=%v, disabled=%v)\n", 680 target.StringEx(), t.Name(), len(mpList.Available), mpList.WaitingDD, mpList.Disabled) 681 } 682 } 683 684 // background: shuffle=on increases the chance to have still-running rebalance 685 // at the beginning of a new rename, rebalance, copy-bucket and similar 686 func ensurePrevRebalanceIsFinished(baseParams api.BaseParams, err error) bool { 687 herr, ok := err.(*cmn.ErrHTTP) 688 if !ok { 689 return false 690 } 691 // TODO: improve checking for cmn.ErrLimitedCoexistence 692 if !strings.Contains(herr.Message, "is currently running,") { 693 return false 694 } 695 tlog.Logln("Warning: wait for unfinished rebalance(?)") 696 time.Sleep(5 * time.Second) 697 args := xact.ArgsMsg{Kind: apc.ActRebalance, Timeout: tools.RebalanceTimeout} 698 _, _ = api.WaitForXactionIC(baseParams, &args) 699 time.Sleep(5 * time.Second) 700 return true 701 } 702 703 func (m *ioContext) startMaintenanceNoRebalance() *meta.Snode { 704 target, _ := m.smap.GetRandTarget() 705 tlog.Logf("Put %s in maintenance\n", target.StringEx()) 706 args := &apc.ActValRmNode{DaemonID: target.ID(), SkipRebalance: true} 707 _, err := api.StartMaintenance(tools.BaseAPIParams(m.proxyURL), args) 708 tassert.CheckFatal(m.t, err) 709 m.smap, err = tools.WaitForClusterState( 710 m.proxyURL, 711 "put target in maintenance", 712 m.smap.Version, 713 m.smap.CountActivePs(), 714 m.smap.CountActiveTs()-1, 715 ) 716 tassert.CheckFatal(m.t, err) 717 return target 718 } 719 720 func (m *ioContext) stopMaintenance(target *meta.Snode) string { 721 tlog.Logf("Take %s out of maintenance mode...\n", target.StringEx()) 722 bp := tools.BaseAPIParams(m.proxyURL) 723 rebID, err := api.StopMaintenance(bp, &apc.ActValRmNode{DaemonID: target.ID()}) 724 tassert.CheckFatal(m.t, err) 725 if rebID == "" { 726 return "" 727 } 728 tassert.Fatalf(m.t, xact.IsValidRebID(rebID), "invalid reb ID %q", rebID) 729 730 xargs := xact.ArgsMsg{ID: rebID, Kind: apc.ActRebalance, Timeout: tools.RebalanceStartTimeout} 731 api.WaitForXactionNode(bp, &xargs, xactSnapRunning) 732 733 return rebID 734 } 735 736 func (m *ioContext) setNonDefaultBucketProps() { 737 baseParams := tools.BaseAPIParams() 738 copies := int64(rand.Intn(2)) 739 props := &cmn.BpropsToSet{ 740 Mirror: &cmn.MirrorConfToSet{ 741 Enabled: apc.Ptr(copies > 0), 742 Copies: apc.Ptr[int64](copies), 743 }, 744 Cksum: &cmn.CksumConfToSet{ 745 Type: apc.Ptr(cos.ChecksumSHA512), 746 EnableReadRange: apc.Ptr(true), 747 ValidateWarmGet: apc.Ptr(true), 748 ValidateColdGet: apc.Ptr(false), 749 }, 750 Extra: &cmn.ExtraToSet{ 751 AWS: &cmn.ExtraPropsAWSToSet{CloudRegion: apc.Ptr("us-notheast")}, 752 }, 753 } 754 _, err := api.SetBucketProps(baseParams, m.bck, props) 755 tassert.CheckFatal(m.t, err) 756 } 757 758 func runProviderTests(t *testing.T, f func(*testing.T, *meta.Bck)) { 759 tests := []struct { 760 name string 761 bck cmn.Bck 762 backendBck cmn.Bck 763 skipArgs tools.SkipTestArgs 764 props *cmn.BpropsToSet 765 }{ 766 { 767 name: "local", 768 bck: cmn.Bck{Name: trand.String(10), Provider: apc.AIS}, 769 }, 770 { 771 name: "remote", 772 bck: cliBck, 773 skipArgs: tools.SkipTestArgs{ 774 Long: true, 775 RemoteBck: true, 776 }, 777 }, 778 { 779 name: "remote_ais", 780 bck: cmn.Bck{ 781 Name: trand.String(10), 782 Provider: apc.AIS, Ns: cmn.Ns{UUID: tools.RemoteCluster.UUID}, 783 }, 784 skipArgs: tools.SkipTestArgs{ 785 RequiresRemoteCluster: true, 786 Long: true, 787 }, 788 }, 789 { 790 name: "backend", 791 bck: cmn.Bck{Name: trand.String(10), Provider: apc.AIS}, 792 backendBck: cliBck, 793 skipArgs: tools.SkipTestArgs{ 794 Long: true, 795 RemoteBck: true, 796 }, 797 }, 798 { 799 name: "local_3_copies", 800 bck: cmn.Bck{Name: trand.String(10), Provider: apc.AIS}, 801 props: &cmn.BpropsToSet{ 802 Mirror: &cmn.MirrorConfToSet{ 803 Enabled: apc.Ptr(true), 804 Copies: apc.Ptr[int64](3), 805 }, 806 }, 807 skipArgs: tools.SkipTestArgs{Long: true}, 808 }, 809 { 810 name: "local_ec_2_2", 811 bck: cmn.Bck{Name: trand.String(10), Provider: apc.AIS}, 812 props: &cmn.BpropsToSet{ 813 EC: &cmn.ECConfToSet{ 814 DataSlices: apc.Ptr(2), 815 ParitySlices: apc.Ptr(2), 816 ObjSizeLimit: apc.Ptr[int64](0), 817 }, 818 }, 819 skipArgs: tools.SkipTestArgs{Long: true}, 820 }, 821 } 822 for i := range tests { 823 test := tests[i] 824 t.Run(test.name, func(t *testing.T) { 825 if test.backendBck.IsEmpty() { 826 test.skipArgs.Bck = test.bck 827 } else { 828 test.skipArgs.Bck = test.backendBck 829 if !test.backendBck.IsCloud() { 830 t.Skipf("backend bucket must be a Cloud bucket (have %q)", test.backendBck) 831 } 832 } 833 tools.CheckSkip(t, &test.skipArgs) 834 835 baseParams := tools.BaseAPIParams() 836 837 if test.props != nil && test.props.Mirror != nil { 838 skip := tools.SkipTestArgs{ 839 MinMountpaths: int(*test.props.Mirror.Copies), 840 } 841 tools.CheckSkip(t, &skip) 842 } 843 if test.props != nil && test.props.EC != nil { 844 skip := tools.SkipTestArgs{ 845 MinTargets: *test.props.EC.DataSlices + *test.props.EC.ParitySlices + 1, 846 } 847 tools.CheckSkip(t, &skip) 848 } 849 850 if test.bck.IsAIS() || test.bck.IsRemoteAIS() { 851 err := api.CreateBucket(baseParams, test.bck, test.props) 852 tassert.CheckFatal(t, err) 853 854 if !test.backendBck.IsEmpty() { 855 tools.SetBackendBck(t, baseParams, test.bck, test.backendBck) 856 } 857 t.Cleanup(func() { 858 api.DestroyBucket(baseParams, test.bck) 859 }) 860 } 861 862 p, err := api.HeadBucket(baseParams, test.bck, false /* don't add */) 863 tassert.CheckFatal(t, err) 864 865 bck := meta.CloneBck(&test.bck) 866 bck.Props = p 867 868 f(t, bck) 869 }) 870 } 871 } 872 873 func numberOfFilesWithPrefix(fileNames []string, namePrefix string) int { 874 numFiles := 0 875 for _, fileName := range fileNames { 876 if strings.HasPrefix(fileName, namePrefix) { 877 numFiles++ 878 } 879 } 880 return numFiles 881 } 882 883 func prefixCreateFiles(t *testing.T, proxyURL string, bck cmn.Bck, cksumType string) []string { 884 const ( 885 objCnt = 100 886 fileSize = cos.KiB 887 ) 888 889 // Create specific files to test corner cases. 890 var ( 891 extraNames = []string{"dir/obj01", "dir/obj02", "dir/obj03", "dir1/dir2/obj04", "dir1/dir2/obj05"} 892 fileNames = make([]string, 0, objCnt) 893 wg = &sync.WaitGroup{} 894 errCh = make(chan error, objCnt+len(extraNames)) 895 ) 896 897 for range objCnt { 898 fileName := trand.String(20) 899 keyName := fmt.Sprintf("%s/%s", prefixDir, fileName) 900 901 // NOTE: Since this test is to test prefix fetch, the reader type is ignored, always use rand reader. 902 r, err := readers.NewRand(fileSize, cksumType) 903 if err != nil { 904 t.Fatal(err) 905 } 906 907 wg.Add(1) 908 go func() { 909 defer wg.Done() 910 tools.Put(proxyURL, bck, keyName, r, errCh) 911 }() 912 fileNames = append(fileNames, fileName) 913 } 914 915 for _, fName := range extraNames { 916 keyName := fmt.Sprintf("%s/%s", prefixDir, fName) 917 // NOTE: Since this test is to test prefix fetch, the reader type is ignored, always use rand reader. 918 r, err := readers.NewRand(fileSize, cksumType) 919 if err != nil { 920 t.Fatal(err) 921 } 922 923 wg.Add(1) 924 go func() { 925 defer wg.Done() 926 tools.Put(proxyURL, bck, keyName, r, errCh) 927 }() 928 fileNames = append(fileNames, fName) 929 } 930 931 wg.Wait() 932 tassert.SelectErr(t, errCh, "put", false) 933 return fileNames 934 } 935 936 func prefixLookupDefault(t *testing.T, proxyURL string, bck cmn.Bck, fileNames []string) { 937 tlog.Logf("Looking up for files in alphabetic order\n") 938 939 var ( 940 letters = "abcdefghijklmnopqrstuvwxyz" 941 baseParams = tools.BaseAPIParams(proxyURL) 942 ) 943 for i := range len(letters) { 944 key := letters[i : i+1] 945 lookFor := fmt.Sprintf("%s/%s", prefixDir, key) 946 msg := &apc.LsoMsg{Prefix: lookFor} 947 objList, err := api.ListObjects(baseParams, bck, msg, api.ListArgs{}) 948 if err != nil { 949 t.Errorf("List files with prefix failed, err = %v", err) 950 return 951 } 952 953 numFiles := len(objList.Entries) 954 realNumFiles := numberOfFilesWithPrefix(fileNames, key) 955 956 if numFiles == realNumFiles { 957 if numFiles != 0 { 958 tlog.Logf("Found %v files starting with %q\n", numFiles, key) 959 } 960 } else { 961 t.Errorf("Expected number of files with prefix %q is %v but found %v files", key, realNumFiles, numFiles) 962 tlog.Logf("Objects returned:\n") 963 for id, oo := range objList.Entries { 964 tlog.Logf(" %d[%d]. %s\n", i, id, oo.Name) 965 } 966 } 967 } 968 } 969 970 func prefixLookupCornerCases(t *testing.T, proxyURL string, bck cmn.Bck, objNames []string) { 971 tlog.Logf("Testing corner cases\n") 972 973 tools.SetClusterConfig(t, cos.StrKVs{"features": feat.DontOptimizeVirtualDir.String()}) 974 t.Cleanup(func() { 975 tools.SetClusterConfig(t, cos.StrKVs{"features": "0"}) 976 }) 977 978 tests := []struct { 979 title string 980 prefix string 981 }{ 982 {"Entire list (dir)", "dir"}, 983 {"dir/", "dir/"}, 984 {"dir1", "dir1"}, 985 {"dir1/", "dir1/"}, 986 } 987 baseParams := tools.BaseAPIParams(proxyURL) 988 for idx, test := range tests { 989 p := fmt.Sprintf("%s/%s", prefixDir, test.prefix) 990 991 objCount := 0 992 for _, objName := range objNames { 993 fullObjName := fmt.Sprintf("%s/%s", prefixDir, objName) 994 if strings.HasPrefix(fullObjName, p) { 995 objCount++ 996 } 997 } 998 999 tlog.Logf("%d. Prefix: %s [%s]\n", idx, test.title, p) 1000 msg := &apc.LsoMsg{Prefix: p} 1001 objList, err := api.ListObjects(baseParams, bck, msg, api.ListArgs{}) 1002 if err != nil { 1003 t.Errorf("List files with prefix failed, err = %v", err) 1004 return 1005 } 1006 1007 if len(objList.Entries) != objCount { 1008 t.Errorf("Expected number of objects with prefix %q is %d but found %d", 1009 test.prefix, objCount, len(objList.Entries)) 1010 tlog.Logf("Objects returned:\n") 1011 for id, oo := range objList.Entries { 1012 tlog.Logf(" %d[%d]. %s\n", idx, id, oo.Name) 1013 } 1014 } 1015 } 1016 } 1017 1018 func prefixLookup(t *testing.T, proxyURL string, bck cmn.Bck, fileNames []string) { 1019 prefixLookupDefault(t, proxyURL, bck, fileNames) 1020 prefixLookupCornerCases(t, proxyURL, bck, fileNames) 1021 } 1022 1023 func prefixCleanup(t *testing.T, proxyURL string, bck cmn.Bck, fileNames []string) { 1024 var ( 1025 wg = cos.NewLimitedWaitGroup(40, 0) 1026 errCh = make(chan error, len(fileNames)) 1027 ) 1028 1029 for _, fileName := range fileNames { 1030 keyName := fmt.Sprintf("%s/%s", prefixDir, fileName) 1031 wg.Add(1) 1032 go func() { 1033 defer wg.Done() 1034 tools.Del(proxyURL, bck, keyName, nil, errCh, true) 1035 }() 1036 } 1037 wg.Wait() 1038 1039 select { 1040 case e := <-errCh: 1041 tlog.Logf("Failed to DEL: %s\n", e) 1042 t.Fail() 1043 default: 1044 } 1045 } 1046 1047 func initFS() { 1048 proxyURL := tools.GetPrimaryURL() 1049 primary, err := tools.GetPrimaryProxy(proxyURL) 1050 if err != nil { 1051 tlog.Logf("Error: %v", err) 1052 } 1053 baseParams := tools.BaseAPIParams(proxyURL) 1054 cfg, err := api.GetDaemonConfig(baseParams, primary) 1055 if err != nil { 1056 tlog.Logf("Error: %v", err) 1057 } 1058 1059 config := cmn.GCO.BeginUpdate() 1060 config.TestFSP.Count = 1 1061 config.Backend = cfg.Backend 1062 cmn.GCO.CommitUpdate(config) 1063 1064 fs.CSM.Reg(fs.ObjectType, &fs.ObjectContentResolver{}) 1065 fs.CSM.Reg(fs.WorkfileType, &fs.WorkfileContentResolver{}) 1066 fs.CSM.Reg(fs.ECSliceType, &fs.ECSliceContentResolver{}) 1067 fs.CSM.Reg(fs.ECMetaType, &fs.ECMetaContentResolver{}) 1068 } 1069 1070 func initMountpaths(t *testing.T, proxyURL string) { 1071 tools.CheckSkip(t, &tools.SkipTestArgs{RequiredDeployment: tools.ClusterTypeLocal}) 1072 fsOnce.Do(initFS) 1073 baseParams := tools.BaseAPIParams(proxyURL) 1074 fs.TestNew(nil) 1075 smap := tools.GetClusterMap(t, proxyURL) 1076 for _, target := range smap.Tmap { 1077 mpathList, err := api.GetMountpaths(baseParams, target) 1078 tassert.CheckFatal(t, err) 1079 ensureNoDisabledMountpaths(t, target, mpathList) 1080 1081 for _, mpath := range mpathList.Available { 1082 fs.Add(mpath, target.ID()) 1083 } 1084 } 1085 } 1086 1087 func findObjOnDisk(bck cmn.Bck, objName string) (fqn string) { 1088 fsWalkFunc := func(path string, de fs.DirEntry) error { 1089 if fqn != "" { 1090 return filepath.SkipDir 1091 } 1092 if de.IsDir() { 1093 return nil 1094 } 1095 1096 ct, err := core.NewCTFromFQN(path, nil) 1097 if err != nil { 1098 return nil 1099 } 1100 if ct.ObjectName() == objName { 1101 fqn = path 1102 return errObjectFound 1103 } 1104 return nil 1105 } 1106 1107 fs.WalkBck(&fs.WalkBckOpts{ 1108 WalkOpts: fs.WalkOpts{ 1109 Bck: bck, 1110 CTs: []string{fs.ObjectType}, 1111 Callback: fsWalkFunc, 1112 Sorted: true, // false is unsupported and asserts 1113 }, 1114 }) 1115 return fqn 1116 } 1117 1118 func detectNewBucket(oldList, newList cmn.Bcks) (cmn.Bck, error) { 1119 for _, nbck := range newList { 1120 found := false 1121 for _, obck := range oldList { 1122 if obck.Name == nbck.Name { 1123 found = true 1124 break 1125 } 1126 } 1127 if !found { 1128 return nbck, nil 1129 } 1130 } 1131 return cmn.Bck{}, fmt.Errorf("new bucket is not found (old: %v, new: %v)", oldList, newList) 1132 } 1133 1134 // xaction is running 1135 func xactSnapRunning(snaps xact.MultiSnap) (running, resetProbeFreq bool) { 1136 tid, _, err := snaps.RunningTarget("") 1137 debug.AssertNoErr(err) 1138 running = tid != "" 1139 resetProbeFreq = !running // e.g. idle 1140 return 1141 } 1142 1143 // finished = did start in the past (use check above to confirm) and currently not running 1144 func xactSnapNotRunning(snaps xact.MultiSnap) (bool, bool) { 1145 running, resetProbeFreq := xactSnapRunning(snaps) 1146 return !running, resetProbeFreq 1147 }