github.com/NVIDIA/aistore@v1.3.23-0.20240517131212-7df6609be51d/ais/test/archive_test.go (about) 1 // Package integration_test. 2 /* 3 * Copyright (c) 2021-2024, NVIDIA CORPORATION. All rights reserved. 4 */ 5 package integration_test 6 7 import ( 8 "archive/tar" 9 "fmt" 10 "math/rand" 11 "net/url" 12 "os" 13 "path" 14 "path/filepath" 15 "testing" 16 "time" 17 18 "github.com/NVIDIA/aistore/api" 19 "github.com/NVIDIA/aistore/api/apc" 20 "github.com/NVIDIA/aistore/cmn" 21 "github.com/NVIDIA/aistore/cmn/archive" 22 "github.com/NVIDIA/aistore/cmn/atomic" 23 "github.com/NVIDIA/aistore/cmn/cos" 24 "github.com/NVIDIA/aistore/cmn/mono" 25 "github.com/NVIDIA/aistore/core/meta" 26 "github.com/NVIDIA/aistore/tools" 27 "github.com/NVIDIA/aistore/tools/readers" 28 "github.com/NVIDIA/aistore/tools/tarch" 29 "github.com/NVIDIA/aistore/tools/tassert" 30 "github.com/NVIDIA/aistore/tools/tlog" 31 "github.com/NVIDIA/aistore/tools/trand" 32 "github.com/NVIDIA/aistore/xact" 33 ) 34 35 // 36 // GET from 37 // 38 39 func TestGetFromArch(t *testing.T) { 40 const tmpDir = "/tmp" 41 runProviderTests(t, func(t *testing.T, bck *meta.Bck) { 42 var ( 43 m = ioContext{ 44 t: t, 45 bck: bck.Clone(), 46 } 47 baseParams = tools.BaseAPIParams(m.proxyURL) 48 errCh = make(chan error, m.num) 49 numArchived = 100 50 randomNames = make([]string, numArchived) 51 subtests = []struct { 52 ext string // one of archive.FileExtensions 53 nested bool // subdirs 54 autodetect bool // auto-detect by magic 55 mime bool // specify mime type 56 }{ 57 { 58 ext: archive.ExtTar, nested: false, autodetect: false, mime: false, 59 }, 60 { 61 ext: archive.ExtTarGz, nested: false, autodetect: false, mime: false, 62 }, 63 { 64 ext: archive.ExtZip, nested: false, autodetect: false, mime: false, 65 }, 66 { 67 ext: archive.ExtTarLz4, nested: false, autodetect: false, mime: false, 68 }, 69 { 70 ext: archive.ExtTar, nested: true, autodetect: true, mime: false, 71 }, 72 { 73 ext: archive.ExtTarGz, nested: true, autodetect: true, mime: false, 74 }, 75 { 76 ext: archive.ExtZip, nested: true, autodetect: true, mime: false, 77 }, 78 { 79 ext: archive.ExtTarLz4, nested: true, autodetect: true, mime: false, 80 }, 81 { 82 ext: archive.ExtTar, nested: true, autodetect: true, mime: true, 83 }, 84 { 85 ext: archive.ExtTarGz, nested: true, autodetect: true, mime: true, 86 }, 87 { 88 ext: archive.ExtZip, nested: true, autodetect: true, mime: true, 89 }, 90 { 91 ext: archive.ExtTarLz4, nested: true, autodetect: true, mime: true, 92 }, 93 } 94 ) 95 if testing.Short() { 96 numArchived = 10 97 } 98 var ( 99 sparsePrint atomic.Int64 100 corruptAutoDetectOnce atomic.Int64 101 ) 102 for _, test := range subtests { 103 tname := fmt.Sprintf("%s/nested=%t/detect=%t/mime=%t", test.ext, test.nested, test.autodetect, test.mime) 104 for _, tf := range []tar.Format{tar.FormatUnknown, tar.FormatGNU, tar.FormatPAX} { 105 tarFormat := tf 106 t.Run(path.Join(tname, "format-"+tarFormat.String()), func(t *testing.T) { 107 var ( 108 err error 109 fsize = rand.Intn(10*cos.KiB) + 1 110 archName = tmpDir + "/" + cos.GenTie() + test.ext 111 dirs = []string{"a", "b", "c", "a/b", "a/c", "b/c", "a/b/c", "a/c/b", "b/a/c"} 112 ) 113 for i := range numArchived { 114 j := rand.Int() 115 randomNames[i] = fmt.Sprintf("%d.txt", j) 116 if test.nested { 117 k := j % len(dirs) 118 dir := dirs[k] 119 randomNames[i] = dir + "/" + randomNames[i] 120 } 121 if j%3 == 0 { 122 randomNames[i] = "/" + randomNames[i] 123 } 124 } 125 err = tarch.CreateArchRandomFiles( 126 archName, 127 tarFormat, 128 test.ext, 129 numArchived, 130 fsize, 131 false, // duplication 132 nil, // record extensions 133 randomNames, // pregenerated filenames 134 ) 135 tassert.CheckFatal(t, err) 136 137 objname := filepath.Base(archName) 138 if test.autodetect { 139 objname = objname[0 : len(objname)-len(test.ext)] 140 } 141 142 var ( 143 reader readers.Reader 144 corrupted bool 145 ) 146 if test.autodetect && corruptAutoDetectOnce.Inc() == 1 { 147 corrupted = true 148 tlog.Logf("============== damaging %s - overwriting w/ random data\n", archName) 149 reader, err = readers.NewRandFile(filepath.Dir(archName), 150 filepath.Base(archName), 1024, cos.ChecksumNone) 151 } else { 152 reader, err = readers.NewExistingFile(archName, cos.ChecksumNone) 153 } 154 tassert.CheckFatal(t, err) 155 defer os.Remove(archName) 156 157 tools.Put(m.proxyURL, m.bck, objname, reader, errCh) 158 tassert.SelectErr(t, errCh, "put", true) 159 160 for _, randomName := range randomNames { 161 var mime string 162 if test.mime { 163 mime = "application/x-" + test.ext[1:] 164 } 165 getArgs := api.GetArgs{ 166 Query: url.Values{ 167 apc.QparamArchpath: []string{randomName}, 168 apc.QparamArchmime: []string{mime}, 169 }, 170 } 171 oah, err := api.GetObject(baseParams, m.bck, objname, &getArgs) 172 if sparsePrint.Inc()%13 == 0 { 173 tlog.Logf("%s?%s=%s(%dB)\n", m.bck.Cname(objname), apc.QparamArchpath, 174 randomName, oah.Size()) 175 } 176 if corrupted { 177 tassert.Errorf(t, err != nil, "expecting error reading corrupted arch %q", archName) 178 break 179 } 180 if err != nil { 181 tlog.Logf("Error reading %s?%s=%s(%dB), where randomName=%q, objname=%q, mime=%q, archName=%q\n", 182 m.bck.Cname(objname), apc.QparamArchpath, randomName, oah.Size(), 183 randomName, objname, mime, archName) 184 tassert.CheckFatal(t, err) 185 } 186 } 187 }) 188 } 189 } 190 }) 191 } 192 193 // archive multple obj-s with an option to append if exists 194 func TestArchMultiObj(t *testing.T) { 195 tools.CheckSkip(t, &tools.SkipTestArgs{Long: true}) 196 runProviderTests(t, func(t *testing.T, bck *meta.Bck) { 197 testArch(t, bck) 198 }) 199 } 200 201 func testArch(t *testing.T, bck *meta.Bck) { 202 var ( 203 numPuts = 100 204 m = ioContext{ 205 t: t, 206 bck: bck.Clone(), 207 num: numPuts, 208 prefix: "archive/", 209 ordered: true, 210 } 211 proxyURL = tools.RandomProxyURL(t) 212 baseParams = tools.BaseAPIParams(proxyURL) 213 numArchs = 15 214 numInArch = min(m.num/2, 7) 215 fmtRange = "%s{%d..%d}" 216 subtests = []struct { 217 ext string // one of archive.FileExtensions (same as: supported arch formats) 218 list bool 219 inclSrcBckName bool 220 abrt bool 221 apnd bool 222 }{ 223 { 224 ext: archive.ExtTar, list: true, 225 }, 226 { 227 ext: archive.ExtTar, list: false, inclSrcBckName: true, 228 }, 229 { 230 ext: archive.ExtTar, list: false, 231 }, 232 { 233 ext: archive.ExtTar, list: true, apnd: true, 234 }, 235 { 236 ext: archive.ExtTarLz4, list: true, 237 }, 238 } 239 subtestsLong = []struct { 240 ext string // one of archive.FileExtensions (same as: supported arch formats) 241 list bool 242 inclSrcBckName bool 243 abrt bool 244 apnd bool 245 }{ 246 { 247 ext: archive.ExtTgz, list: true, 248 }, 249 { 250 ext: archive.ExtTgz, list: false, inclSrcBckName: true, 251 }, 252 { 253 ext: archive.ExtTgz, list: true, inclSrcBckName: true, apnd: true, 254 }, 255 { 256 ext: archive.ExtTgz, list: false, apnd: true, 257 }, 258 { 259 ext: archive.ExtZip, list: true, 260 }, 261 { 262 ext: archive.ExtZip, list: false, inclSrcBckName: true, 263 }, 264 { 265 ext: archive.ExtZip, list: true, 266 }, 267 { 268 ext: archive.ExtTarLz4, list: false, 269 }, 270 } 271 ) 272 if testing.Short() { 273 numArchs = 2 274 } else { // test-long 275 subtests = append(subtests, subtestsLong...) 276 } 277 for _, test := range subtests { 278 var ( 279 abrt string 280 listOrRange = "list" 281 ) 282 if !test.list { 283 listOrRange = "range" 284 } 285 tm := mono.NanoTime() 286 if tm&0x3 == 0 && !test.apnd { 287 test.abrt = true 288 abrt = "/abort" 289 } 290 tname := fmt.Sprintf("%s/%s%s", test.ext, listOrRange, abrt) 291 if test.inclSrcBckName { 292 tname += "/incl-src=" + m.bck.Name 293 } 294 if test.apnd { 295 tname += "/append" 296 } 297 t.Run(tname, func(t *testing.T) { 298 if m.bck.IsRemote() { 299 m.num = numPuts >> 1 300 } 301 m.init(true /*cleanup*/) 302 m.fileSize = min(m.fileSize+m.fileSize/3, 32*cos.KiB) 303 m.puts() 304 if m.bck.IsRemote() { 305 defer m.del(-1) 306 } 307 bckTo := cmn.Bck{Name: trand.String(10), Provider: apc.AIS} 308 tools.CreateBucket(t, proxyURL, bckTo, nil, true /*cleanup*/) 309 310 if test.list { 311 for i := range numArchs { 312 archName := fmt.Sprintf("test_lst_%02d%s", i, test.ext) 313 list := make([]string, 0, numInArch) 314 for range numInArch { 315 list = append(list, m.objNames[rand.Intn(m.num)]) 316 } 317 go func(archName string, list []string, i int) { 318 msg := cmn.ArchiveBckMsg{ 319 ToBck: bckTo, 320 ArchiveMsg: apc.ArchiveMsg{ArchName: archName}, 321 } 322 msg.ListRange.ObjNames = list 323 msg.InclSrcBname = test.inclSrcBckName 324 325 xids, err := api.ArchiveMultiObj(baseParams, m.bck, &msg) 326 tassert.CheckFatal(t, err) 327 tlog.Logf("[%s] %2d: arch list %d objects %s => %s\n", xids, i, len(list), m.bck, bckTo) 328 }(archName, list, i) 329 } 330 } else { 331 for i := range numArchs { 332 archName := fmt.Sprintf("test_rng_%02d%s", i, test.ext) 333 start := rand.Intn(m.num - numInArch) 334 go func(archName string, start, i int) { 335 msg := cmn.ArchiveBckMsg{ 336 ToBck: bckTo, 337 ArchiveMsg: apc.ArchiveMsg{ArchName: archName}, 338 } 339 msg.ListRange.Template = fmt.Sprintf(fmtRange, m.prefix, start, start+numInArch-1) 340 msg.InclSrcBname = test.inclSrcBckName 341 342 xids, err := api.ArchiveMultiObj(baseParams, m.bck, &msg) 343 tassert.CheckFatal(t, err) 344 tlog.Logf("[%s] %2d: arch range %s %s => %s\n", 345 xids, i, msg.ListRange.Template, m.bck, bckTo) 346 }(archName, start, i) 347 } 348 } 349 350 flt := xact.ArgsMsg{Kind: apc.ActArchive, Bck: m.bck} 351 if test.abrt { 352 time.Sleep(time.Duration(rand.Intn(5)+1) * time.Second) 353 tlog.Logln("Aborting...") 354 api.AbortXaction(baseParams, &flt) 355 } 356 357 var lstToAppend *cmn.LsoRes 358 for ii := range 2 { 359 api.WaitForXactionIdle(baseParams, &flt) 360 361 tlog.Logf("List %s\n", bckTo) 362 msg := &apc.LsoMsg{Prefix: "test_"} 363 msg.AddProps(apc.GetPropsName, apc.GetPropsSize) 364 objList, err := api.ListObjects(baseParams, bckTo, msg, api.ListArgs{}) 365 tassert.CheckFatal(t, err) 366 for _, en := range objList.Entries { 367 tlog.Logf("%s: %dB\n", en.Name, en.Size) 368 } 369 num := len(objList.Entries) 370 if num < numArchs && ii == 0 { 371 tlog.Logf("Warning: expected %d, have %d - retrying...\n", numArchs, num) 372 time.Sleep(7 * time.Second) // TODO: ditto 373 continue 374 } 375 tassert.Errorf(t, num == numArchs || test.abrt, "expected %d, have %d", numArchs, num) 376 lstToAppend = objList 377 break 378 } 379 380 msg := &apc.LsoMsg{Prefix: "test_"} 381 msg.AddProps(apc.GetPropsName, apc.GetPropsSize) 382 msg.SetFlag(apc.LsArchDir) 383 objList, err := api.ListObjects(baseParams, bckTo, msg, api.ListArgs{}) 384 tassert.CheckFatal(t, err) 385 num := len(objList.Entries) 386 expectedNum := numArchs + numArchs*numInArch 387 388 tassert.Errorf(t, num == expectedNum || test.abrt, "expected %d, have %d", expectedNum, num) 389 390 // multi-object APPEND 391 if test.apnd { 392 for _, e := range lstToAppend.Entries { 393 start := rand.Intn(m.num - numInArch) 394 go func(archName string, start int) { 395 msg := cmn.ArchiveBckMsg{ 396 ToBck: bckTo, 397 ArchiveMsg: apc.ArchiveMsg{ArchName: archName}, 398 } 399 msg.ListRange.Template = fmt.Sprintf(fmtRange, m.prefix, start, start+numInArch-1) 400 msg.InclSrcBname = test.inclSrcBckName 401 402 msg.AppendIfExists = true // here 403 404 xids, err := api.ArchiveMultiObj(baseParams, m.bck, &msg) 405 tassert.CheckFatal(t, err) 406 tlog.Logf("[%s] APPEND %s/%s => %s/%s\n", 407 xids, m.bck, msg.ListRange.Template, bckTo, archName) 408 }(e.Name, start) 409 } 410 411 time.Sleep(10 * time.Second) 412 flt := xact.ArgsMsg{Kind: apc.ActArchive, Bck: m.bck} 413 api.WaitForXactionIdle(baseParams, &flt) 414 } 415 416 var ( 417 objName string 418 mime = "application/x-" + test.ext[1:] 419 ) 420 for _, en := range objList.Entries { 421 if !en.IsInsideArch() { 422 objName = en.Name 423 continue 424 } 425 if rand.Intn(3) > 0 { 426 continue 427 } 428 429 getArgs := api.GetArgs{ 430 Query: url.Values{ 431 apc.QparamArchpath: []string{en.Name}, 432 apc.QparamArchmime: []string{mime}, 433 }, 434 } 435 oah, err := api.GetObject(baseParams, bckTo, objName, &getArgs) 436 if err != nil { 437 t.Errorf("%s?%s=%s(%dB): %v", bckTo.Cname(objName), apc.QparamArchpath, en.Name, oah.Size(), err) 438 } 439 } 440 }) 441 } 442 } 443 444 // exercises `api.ArchiveMultiObj` followed by api.PutApndArch(local rand-reader) 445 func TestAppendToArch(t *testing.T) { 446 var ( 447 bckFrom = cmn.Bck{Name: trand.String(10), Provider: apc.AIS} 448 bckTo = cmn.Bck{Name: trand.String(10), Provider: apc.AIS} 449 m = ioContext{ 450 t: t, 451 bck: bckFrom, 452 num: 10, 453 prefix: "archive/", 454 ordered: true, 455 } 456 proxyURL = tools.RandomProxyURL(t) 457 baseParams = tools.BaseAPIParams(proxyURL) 458 numArchs = m.num 459 numAdd = m.num 460 numInArch = min(m.num/2, 7) 461 objPattern = "test_lst_%04d%s" 462 archPath = "extra/newfile%04d" 463 subtests = []struct { 464 ext string // one of archive.FileExtensions (same as: supported arch formats) 465 multi bool // false - append a single file, true - append a list of objects 466 }{ 467 { 468 ext: archive.ExtTar, multi: false, 469 }, 470 { 471 ext: archive.ExtTar, multi: true, 472 }, 473 { 474 ext: archive.ExtTgz, multi: false, 475 }, 476 { 477 ext: archive.ExtTgz, multi: true, 478 }, 479 { 480 ext: archive.ExtTarLz4, multi: false, 481 }, 482 } 483 subtestsLong = []struct { 484 ext string // one of archive.FileExtensions (same as: supported arch formats) 485 multi bool // false - append a single file, true - append a list of objects 486 }{ 487 { 488 ext: archive.ExtZip, multi: false, 489 }, 490 { 491 ext: archive.ExtZip, multi: true, 492 }, 493 { 494 ext: archive.ExtTarLz4, multi: true, 495 }, 496 } 497 ) 498 if !testing.Short() { // test-long, and see one other Skip below 499 subtests = append(subtests, subtestsLong...) 500 } 501 for _, test := range subtests { 502 tname := fmt.Sprintf("%s/multi=%t", test.ext, test.multi) 503 t.Run(tname, func(t *testing.T) { 504 tools.CreateBucket(t, proxyURL, bckFrom, nil, true /*cleanup*/) 505 tools.CreateBucket(t, proxyURL, bckTo, nil, true /*cleanup*/) 506 m.init(true /*cleanup*/) 507 m.fileSize = min(m.fileSize+m.fileSize/3, 32*cos.KiB) 508 m.puts() 509 510 if testing.Short() && test.ext != archive.ExtTar { 511 // skip all multi-object appends 512 if test.multi { 513 tools.ShortSkipf(t) 514 } 515 // reduce 516 numArchs = 2 517 numAdd = 3 518 } 519 520 for i := range numArchs { 521 archName := fmt.Sprintf(objPattern, i, test.ext) 522 list := make([]string, 0, numInArch) 523 for range numInArch { 524 list = append(list, m.objNames[rand.Intn(m.num)]) 525 } 526 go func(archName string, list []string) { 527 msg := cmn.ArchiveBckMsg{ 528 ToBck: bckTo, 529 ArchiveMsg: apc.ArchiveMsg{ArchName: archName}, 530 } 531 msg.ListRange.ObjNames = list 532 533 _, err := api.ArchiveMultiObj(baseParams, m.bck, &msg) 534 tassert.CheckFatal(t, err) 535 }(archName, list) 536 } 537 538 wargs := xact.ArgsMsg{Kind: apc.ActArchive, Bck: m.bck} 539 api.WaitForXactionIdle(baseParams, &wargs) 540 541 lsmsg := &apc.LsoMsg{Prefix: "test_lst"} 542 lsmsg.AddProps(apc.GetPropsName, apc.GetPropsSize) 543 objList, err := api.ListObjects(baseParams, bckTo, lsmsg, api.ListArgs{}) 544 tassert.CheckFatal(t, err) 545 num := len(objList.Entries) 546 tassert.Errorf(t, num == numArchs, "expected %d, have %d", numArchs, num) 547 548 var sparsePrint atomic.Int64 549 for i := range numArchs { 550 archName := fmt.Sprintf(objPattern, i, test.ext) 551 if test.multi { 552 tlog.Logf("APPEND multi-obj %s => %s/%s\n", bckFrom, bckTo, archName) 553 list := make([]string, 0, numAdd) 554 for range numAdd { 555 list = append(list, m.objNames[rand.Intn(m.num)]) 556 } 557 msg := cmn.ArchiveBckMsg{ 558 ToBck: bckTo, 559 ArchiveMsg: apc.ArchiveMsg{ArchName: archName}, 560 } 561 msg.AppendIfExists = true 562 msg.ListRange.ObjNames = list 563 go func() { 564 _, err = api.ArchiveMultiObj(baseParams, bckFrom, &msg) 565 tassert.CheckError(t, err) 566 }() 567 } else { 568 for j := range numAdd { 569 reader, _ := readers.NewRand(fileSize, cos.ChecksumNone) 570 putArgs := api.PutArgs{ 571 BaseParams: baseParams, 572 Bck: bckTo, 573 ObjName: archName, 574 Reader: reader, 575 Size: fileSize, 576 } 577 archpath := fmt.Sprintf(archPath, j) + cos.GenTie() 578 appendArchArgs := api.PutApndArchArgs{ 579 PutArgs: putArgs, 580 ArchPath: archpath, 581 Flags: apc.ArchAppend, // existence required 582 } 583 if sparsePrint.Inc()%13 == 0 { 584 tlog.Logf("APPEND local rand => %s/%s/%s\n", bckTo, archName, archpath) 585 } 586 err = api.PutApndArch(&appendArchArgs) 587 tassert.CheckError(t, err) 588 } 589 } 590 } 591 if test.multi { 592 wargs := xact.ArgsMsg{Kind: apc.ActArchive, Bck: m.bck} 593 api.WaitForXactionIdle(baseParams, &wargs) 594 } 595 596 lsmsg.SetFlag(apc.LsArchDir) 597 objList, err = api.ListObjects(baseParams, bckTo, lsmsg, api.ListArgs{}) 598 tassert.CheckError(t, err) 599 num = len(objList.Entries) 600 expectedNum := numArchs + numArchs*(numInArch+numAdd) 601 602 tassert.Errorf(t, num == expectedNum, "expected %d, have %d", expectedNum, num) 603 }) 604 } 605 }