github.com/NVIDIA/aistore@v1.3.23-0.20240517131212-7df6609be51d/ais/test/dsort_test.go (about) 1 //nolint:dupl // copy-paste benign and can wait 2 // Package integration_test. 3 /* 4 * Copyright (c) 2018-2024, NVIDIA CORPORATION. All rights reserved. 5 */ 6 package integration_test 7 8 import ( 9 "archive/tar" 10 "bytes" 11 "fmt" 12 "net/http" 13 "os" 14 "path/filepath" 15 rdebug "runtime/debug" 16 "strconv" 17 "strings" 18 "sync" 19 "testing" 20 "time" 21 22 "github.com/NVIDIA/aistore/api" 23 "github.com/NVIDIA/aistore/api/apc" 24 "github.com/NVIDIA/aistore/cmn" 25 "github.com/NVIDIA/aistore/cmn/archive" 26 "github.com/NVIDIA/aistore/cmn/cos" 27 "github.com/NVIDIA/aistore/cmn/debug" 28 "github.com/NVIDIA/aistore/core/meta" 29 "github.com/NVIDIA/aistore/ext/dsort" 30 "github.com/NVIDIA/aistore/ext/dsort/shard" 31 "github.com/NVIDIA/aistore/sys" 32 "github.com/NVIDIA/aistore/tools" 33 "github.com/NVIDIA/aistore/tools/docker" 34 "github.com/NVIDIA/aistore/tools/readers" 35 "github.com/NVIDIA/aistore/tools/tarch" 36 "github.com/NVIDIA/aistore/tools/tassert" 37 "github.com/NVIDIA/aistore/tools/tlog" 38 "github.com/NVIDIA/aistore/tools/trand" 39 "github.com/NVIDIA/aistore/xact" 40 jsoniter "github.com/json-iterator/go" 41 ) 42 43 const ( 44 dsortDescAllPrefix = apc.ActDsort + "-test-integration" 45 46 scopeConfig = "config" 47 scopeSpec = "spec" 48 ) 49 50 const ( 51 startingDS = "starting dsort" 52 ) 53 54 var ( 55 dsortDescCurPrefix = fmt.Sprintf("%s-%d-", dsortDescAllPrefix, os.Getpid()) 56 57 dsorterTypes = []string{dsort.GeneralType, dsort.MemType} 58 dsortPhases = []string{dsort.ExtractionPhase, dsort.SortingPhase, dsort.CreationPhase} 59 dsortAlgorithms = []string{dsort.Alphanumeric, dsort.Shuffle} 60 dsortSettingScopes = []string{scopeConfig, scopeSpec} 61 ) 62 63 type ( 64 dsortTestSpec struct { 65 p bool // determines if the tests should be ran in parallel mode 66 types []string 67 tarFormats []tar.Format 68 phases []string 69 reactions []string 70 scopes []string 71 algs []string 72 } 73 74 dsortFramework struct { 75 m *ioContext 76 77 dsorterType string 78 79 outputBck cmn.Bck 80 inputPrefix string 81 outputPrefix string 82 83 inputTempl apc.ListRange 84 outputTempl string 85 orderFileURL string 86 shardCnt int 87 shardCntToSkip int 88 filesPerShard int 89 fileSz int // in a shard 90 shardSize int 91 outputShardCnt int 92 recordDuplicationsCnt int 93 recordExts []string 94 95 inputShards []string 96 97 tarFormat tar.Format 98 inputExt string 99 outputExt string 100 alg *dsort.Algorithm 101 missingKeys bool 102 outputShardSize string 103 maxMemUsage string 104 dryRun bool 105 106 missingShards string 107 duplicatedRecords string 108 109 baseParams api.BaseParams 110 managerUUID string 111 } 112 113 shardRecords struct { 114 name string 115 recordNames []string 116 } 117 ) 118 119 func generateDsortDesc() string { 120 return dsortDescCurPrefix + time.Now().Format(time.RFC3339Nano) 121 } 122 123 //nolint:gocritic // ignoring (dsortTestSpec) hugeParam 124 func runDsortTest(t *testing.T, dts dsortTestSpec, f any) { 125 if dts.p { 126 t.Parallel() 127 } 128 129 for _, dsorterType := range dts.types { 130 dsorterType := dsorterType // pin 131 t.Run(dsorterType, func(t *testing.T) { 132 if dts.p { 133 t.Parallel() 134 } 135 136 if len(dts.tarFormats) > 0 { 137 g := f.(func(dsorterType string, tarFormat tar.Format, t *testing.T)) 138 for _, tf := range dts.tarFormats { 139 tarFormat := tf // pin 140 t.Run("format-"+tarFormat.String(), func(t *testing.T) { 141 if dts.p { 142 t.Parallel() 143 } 144 g(dsorterType, tarFormat, t) 145 }) 146 } 147 } else if len(dts.phases) > 0 { 148 g := f.(func(dsorterType, phase string, t *testing.T)) 149 for _, phase := range dts.phases { 150 phase := phase // pin 151 t.Run(phase, func(t *testing.T) { 152 if dts.p { 153 t.Parallel() 154 } 155 g(dsorterType, phase, t) 156 }) 157 } 158 } else if len(dts.reactions) > 0 { 159 for _, reaction := range dts.reactions { 160 reaction := reaction // pin 161 t.Run(reaction, func(t *testing.T) { 162 if dts.p { 163 t.Parallel() 164 } 165 166 if len(dts.scopes) > 0 { 167 for _, scope := range dts.scopes { 168 scope := scope // pin 169 t.Run(scope, func(t *testing.T) { 170 if dts.p { 171 t.Parallel() 172 } 173 174 g := f.(func(dsorterType, reaction, scope string, t *testing.T)) 175 g(dsorterType, reaction, scope, t) 176 }) 177 } 178 } else { 179 g := f.(func(dsorterType, reaction string, t *testing.T)) 180 g(dsorterType, reaction, t) 181 } 182 }) 183 } 184 } else if len(dts.algs) > 0 { 185 g := f.(func(dsorterType, alg string, t *testing.T)) 186 for _, alg := range dts.algs { 187 alg := alg // pin 188 t.Run(alg, func(t *testing.T) { 189 if dts.p { 190 t.Parallel() 191 } 192 g(dsorterType, alg, t) 193 }) 194 } 195 } else { 196 g := f.(func(dsorterType string, t *testing.T)) 197 g(dsorterType, t) 198 } 199 }) 200 } 201 } 202 203 //////////////////// 204 // dsortFramework // 205 //////////////////// 206 207 func (df *dsortFramework) job() string { 208 if df.managerUUID == "" { 209 return "dsort[-]" 210 } 211 return "dsort[" + df.managerUUID + "]" 212 } 213 214 func (df *dsortFramework) init() { 215 if df.inputTempl.Template == "" { 216 df.inputTempl = apc.ListRange{Template: fmt.Sprintf("input-{0..%d}", df.shardCnt-1)} 217 } 218 if df.outputTempl == "" { 219 df.outputTempl = "output-{00000..10000}" 220 } 221 if df.inputExt == "" { 222 df.inputExt = dsort.DefaultExt 223 } 224 225 // Assumption is that all prefixes end with dash: "-" 226 df.inputPrefix = df.inputTempl.Template[:strings.Index(df.inputTempl.Template, "-")+1] 227 df.outputPrefix = df.outputTempl[:strings.Index(df.outputTempl, "-")+1] 228 229 if df.fileSz == 0 { 230 df.fileSz = cos.KiB 231 } 232 233 df.shardSize = df.filesPerShard * df.fileSz 234 if df.outputShardSize == "-1" { 235 df.outputShardSize = "" 236 pt, err := cos.ParseBashTemplate(df.outputTempl) 237 cos.AssertNoErr(err) 238 df.outputShardCnt = int(pt.Count()) 239 } else { 240 outputShardSize := int64(10 * df.filesPerShard * df.fileSz) 241 df.outputShardSize = cos.ToSizeIEC(outputShardSize, 0) 242 df.outputShardCnt = (df.shardCnt * df.shardSize) / int(outputShardSize) 243 } 244 245 if df.alg == nil { 246 df.alg = &dsort.Algorithm{} 247 } 248 249 df.baseParams = tools.BaseAPIParams(df.m.proxyURL) 250 } 251 252 func (df *dsortFramework) gen() dsort.RequestSpec { 253 return dsort.RequestSpec{ 254 Description: generateDsortDesc(), 255 InputBck: df.m.bck, 256 OutputBck: df.outputBck, 257 InputExtension: df.inputExt, 258 OutputExtension: df.outputExt, 259 InputFormat: df.inputTempl, 260 OutputFormat: df.outputTempl, 261 OutputShardSize: df.outputShardSize, 262 Algorithm: *df.alg, 263 OrderFileURL: df.orderFileURL, 264 ExtractConcMaxLimit: 10, 265 CreateConcMaxLimit: 10, 266 MaxMemUsage: df.maxMemUsage, 267 DsorterType: df.dsorterType, 268 DryRun: df.dryRun, 269 270 Config: cmn.DsortConf{ 271 MissingShards: df.missingShards, 272 DuplicatedRecords: df.duplicatedRecords, 273 }, 274 } 275 } 276 277 func (df *dsortFramework) start() { 278 var ( 279 err error 280 spec = df.gen() 281 ) 282 df.managerUUID, err = api.StartDsort(df.baseParams, &spec) 283 tassert.CheckFatal(df.m.t, err) 284 } 285 286 func (df *dsortFramework) createInputShards() { 287 const tmpDir = "/tmp" 288 var ( 289 wg = cos.NewLimitedWaitGroup(sys.NumCPU(), 0) 290 errCh = make(chan error, df.shardCnt) 291 292 mu = &sync.Mutex{} // to collect inputShards (obj names) 293 ) 294 debug.Assert(len(df.inputShards) == 0) 295 296 tlog.Logf("creating %d shards...\n", df.shardCnt) 297 for i := df.shardCntToSkip; i < df.shardCnt; i++ { 298 wg.Add(1) 299 go func(i int) { 300 defer wg.Done() 301 var ( 302 err error 303 duplication = i < df.recordDuplicationsCnt 304 path = fmt.Sprintf("%s/%s/%s%d", tmpDir, df.m.bck.Name, df.inputPrefix, i) 305 tarName string 306 ) 307 if df.alg.Kind == dsort.Content { 308 tarName = path + archive.ExtTar 309 } else { 310 tarName = path + df.inputExt 311 } 312 if df.alg.Kind == dsort.Content { 313 err = tarch.CreateArchCustomFiles(tarName, df.tarFormat, df.inputExt, df.filesPerShard, 314 df.fileSz, df.alg.ContentKeyType, df.alg.Ext, df.missingKeys) 315 } else if df.inputExt == archive.ExtTar { 316 err = tarch.CreateArchRandomFiles(tarName, df.tarFormat, df.inputExt, df.filesPerShard, 317 df.fileSz, duplication, df.recordExts, nil) 318 } else { 319 err = tarch.CreateArchRandomFiles(tarName, df.tarFormat, df.inputExt, df.filesPerShard, 320 df.fileSz, duplication, nil, nil) 321 } 322 tassert.CheckFatal(df.m.t, err) 323 324 reader, err := readers.NewExistingFile(tarName, cos.ChecksumNone) 325 tassert.CheckFatal(df.m.t, err) 326 327 objName := filepath.Base(tarName) 328 tools.Put(df.m.proxyURL, df.m.bck, objName, reader, errCh) 329 330 mu.Lock() 331 df.inputShards = append(df.inputShards, objName) 332 mu.Unlock() 333 334 os.Remove(tarName) 335 }(i) 336 } 337 wg.Wait() 338 close(errCh) 339 for err := range errCh { 340 tassert.CheckFatal(df.m.t, err) 341 } 342 tlog.Logf("%s: done creating shards\n", df.job()) 343 } 344 345 func (df *dsortFramework) checkOutputShards(zeros int) { 346 var ( 347 lastValue any 348 lastName string 349 inversions int 350 idx int 351 baseParams = tools.BaseAPIParams(df.m.proxyURL) 352 records = make(map[string]int, 100) 353 354 realOutputShardCnt int 355 skipped int 356 ) 357 tlog.Logf("%s: checking that files are sorted...\n", df.job()) 358 outer: 359 for i := range df.outputShardCnt { 360 var ( 361 buffer bytes.Buffer 362 shardName = fmt.Sprintf("%s%0*d%s", df.outputPrefix, zeros, i, df.inputExt) 363 getArgs = api.GetArgs{Writer: &buffer} 364 bucket = df.m.bck 365 ) 366 if df.outputBck.Name != "" { 367 bucket = df.outputBck 368 } 369 370 _, err := api.GetObject(baseParams, bucket, shardName, &getArgs) 371 if err != nil { 372 herr, ok := err.(*cmn.ErrHTTP) 373 if ok && herr.Status == http.StatusNotFound && shard.IsCompressed(df.inputExt) && i > 0 { 374 // check for NotFound a few more, and break; see also 'skipped == 0' check below 375 switch skipped { 376 case 0: 377 tlog.Logf("%s: computed output shard count (%d) vs compression: [%s] is the first not-found\n", 378 df.job(), df.outputShardCnt, shardName) 379 fallthrough 380 case 1, 2, 3: 381 skipped++ 382 continue 383 default: 384 break outer 385 } 386 } 387 tassert.CheckFatal(df.m.t, err) 388 } 389 390 tassert.Fatalf(df.m.t, skipped == 0, "%s: got out of order shard %s (not-found >= %d)", df.job(), shardName, skipped) 391 392 realOutputShardCnt++ 393 394 if df.alg.Kind == dsort.Content { 395 files, err := tarch.GetFilesFromArchBuffer(cos.Ext(shardName), buffer, df.alg.Ext) 396 tassert.CheckFatal(df.m.t, err) 397 for _, file := range files { 398 if file.Ext == df.alg.Ext { 399 if strings.TrimSuffix(file.Name, filepath.Ext(file.Name)) != 400 strings.TrimSuffix(lastName, filepath.Ext(lastName)) { 401 // custom files should go AFTER the regular files 402 df.m.t.Fatalf("%s: names out of order (shard: %s, lastName: %s, curName: %s)", 403 df.job(), shardName, lastName, file.Name) 404 } 405 406 switch df.alg.ContentKeyType { 407 case shard.ContentKeyInt: 408 intValue, err := strconv.ParseInt(string(file.Content), 10, 64) 409 tassert.CheckFatal(df.m.t, err) 410 if lastValue != nil && intValue < lastValue.(int64) { 411 df.m.t.Fatalf("%s: int values are not in correct order (shard: %s, lastIntValue: %d, curIntValue: %d)", df.job(), shardName, lastValue.(int64), intValue) 412 } 413 lastValue = intValue 414 case shard.ContentKeyFloat: 415 floatValue, err := strconv.ParseFloat(string(file.Content), 64) 416 tassert.CheckFatal(df.m.t, err) 417 if lastValue != nil && floatValue < lastValue.(float64) { 418 df.m.t.Fatalf("%s: string values are not in correct order (shard: %s, lastStringValue: %f, curStringValue: %f)", df.job(), shardName, lastValue.(float64), floatValue) 419 } 420 lastValue = floatValue 421 case shard.ContentKeyString: 422 stringValue := string(file.Content) 423 if lastValue != nil && stringValue < lastValue.(string) { 424 df.m.t.Fatalf("%s: string values are not in correct order (shard: %s, lastStringValue: %s, curStringValue: %s)", df.job(), shardName, lastValue.(string), stringValue) 425 } 426 lastValue = stringValue 427 default: 428 df.m.t.Fail() 429 } 430 } else { 431 lastName = file.Name 432 } 433 } 434 } else { 435 files, err := tarch.GetFileInfosFromArchBuffer(buffer, df.inputExt) 436 tassert.CheckFatal(df.m.t, err) 437 if len(files) == 0 { 438 df.m.t.Fatalf("%s: number of files inside shard is 0", df.job()) 439 } 440 441 for _, file := range files { 442 if df.alg.Kind == "" || df.alg.Kind == dsort.Alphanumeric { 443 if lastName > file.Name() && canonicalName(lastName) != canonicalName(file.Name()) { 444 df.m.t.Fatalf("%s: names out of order (shard: %s, lastName: %s, curName: %s)", 445 df.job(), shardName, lastName, file.Name()) 446 } 447 } else if df.alg.Kind == dsort.Shuffle { 448 if lastName > file.Name() { 449 inversions++ 450 } 451 } 452 if file.Size() != int64(df.fileSz) { 453 df.m.t.Fatalf("%s: file sizes has changed (expected: %d, got: %d)", 454 df.job(), df.fileSz, file.Size()) 455 } 456 lastName = file.Name() 457 458 // For each record object see if they we weren't split (they should 459 // be one after another). 460 recordCanonicalName := canonicalName(file.Name()) 461 prevIdx, ok := records[recordCanonicalName] 462 if ok && prevIdx != idx-1 { 463 df.m.t.Errorf("%s: record object %q was splitted", df.job(), file.Name()) 464 } 465 records[recordCanonicalName] = idx 466 467 // Check if the record objects are in the correct order. 468 if len(df.recordExts) > 0 { 469 ext := cos.Ext(file.Name()) 470 expectedExt := df.recordExts[idx%len(df.recordExts)] 471 if ext != expectedExt { 472 df.m.t.Errorf("%s: record objects %q order has been disrupted: %s != %s", 473 df.job(), file.Name(), ext, expectedExt, 474 ) 475 } 476 } 477 idx++ 478 } 479 } 480 } 481 482 if shard.IsCompressed(df.inputExt) { 483 tlog.Logf("%s: computed output shard count (%d) vs resulting compressed (%d)\n", 484 df.job(), df.outputShardCnt, realOutputShardCnt) 485 } 486 if df.alg.Kind == dsort.Shuffle { 487 if inversions == 0 { 488 df.m.t.Fatalf("%s: shuffle sorting did not create any inversions", df.job()) 489 } 490 } 491 } 492 493 func canonicalName(recordName string) string { 494 return strings.TrimSuffix(recordName, cos.Ext(recordName)) 495 } 496 497 func (df *dsortFramework) checkReactionResult(reaction string, expectedProblemsCnt int) { 498 tlog.Logf("%s: checking metrics and \"reaction\"\n", df.job()) 499 all, err := api.MetricsDsort(df.baseParams, df.managerUUID) 500 tassert.CheckFatal(df.m.t, err) 501 if len(all) != df.m.originalTargetCount { 502 df.m.t.Errorf("%s: number of metrics %d is not same as number of targets %d", df.job(), 503 len(all), df.m.originalTargetCount) 504 } 505 506 switch reaction { 507 case cmn.IgnoreReaction: 508 for target, jmetrics := range all { 509 metrics := jmetrics.Metrics 510 if len(metrics.Warnings) != 0 { 511 df.m.t.Errorf("%s: target %q has %s warnings: %s", df.job(), target, apc.ActDsort, metrics.Warnings) 512 } 513 if len(metrics.Errors) != 0 { 514 df.m.t.Errorf("%s: target %q has %s errors: %s", df.job(), target, apc.ActDsort, metrics.Errors) 515 } 516 } 517 case cmn.WarnReaction: 518 totalWarnings := 0 519 for target, jmetrics := range all { 520 metrics := jmetrics.Metrics 521 totalWarnings += len(metrics.Warnings) 522 523 if len(metrics.Errors) != 0 { 524 df.m.t.Errorf("%s: target %q has %s errors: %s", df.job(), target, apc.ActDsort, metrics.Errors) 525 } 526 } 527 528 if totalWarnings != expectedProblemsCnt { 529 df.m.t.Errorf("%s: number of total warnings %d is different than number of deleted shards: %d", df.job(), totalWarnings, expectedProblemsCnt) 530 } 531 case cmn.AbortReaction: 532 totalErrors := 0 533 for target, jmetrics := range all { 534 metrics := jmetrics.Metrics 535 if !metrics.Aborted.Load() { 536 df.m.t.Errorf("%s: %s was not aborted by target: %s", df.job(), apc.ActDsort, target) 537 } 538 totalErrors += len(metrics.Errors) 539 } 540 541 if totalErrors == 0 { 542 df.m.t.Errorf("%s: expected errors on abort, got nothing", df.job()) 543 } 544 } 545 } 546 547 func (df *dsortFramework) getRecordNames(bck cmn.Bck) []shardRecords { 548 allShardRecords := make([]shardRecords, 0, 10) 549 550 list, err := api.ListObjects(df.baseParams, bck, nil, api.ListArgs{}) 551 tassert.CheckFatal(df.m.t, err) 552 553 if len(list.Entries) == 0 { 554 df.m.t.Errorf("number of objects in bucket %q is 0", bck) 555 } 556 for _, obj := range list.Entries { 557 var ( 558 buffer bytes.Buffer 559 getArgs = api.GetArgs{Writer: &buffer} 560 ) 561 _, err := api.GetObject(df.baseParams, bck, obj.Name, &getArgs) 562 tassert.CheckFatal(df.m.t, err) 563 564 files, err := tarch.GetFileInfosFromArchBuffer(buffer, archive.ExtTar) 565 tassert.CheckFatal(df.m.t, err) 566 567 shard := shardRecords{ 568 name: obj.Name, 569 recordNames: make([]string, len(files)), 570 } 571 for idx, file := range files { 572 shard.recordNames[idx] = file.Name() 573 } 574 allShardRecords = append(allShardRecords, shard) 575 } 576 577 return allShardRecords 578 } 579 580 func (df *dsortFramework) checkMetrics(expectAbort bool) map[string]*dsort.JobInfo { 581 tlog.Logf("%s: checking metrics\n", df.job()) 582 all, err := api.MetricsDsort(df.baseParams, df.managerUUID) 583 tassert.CheckFatal(df.m.t, err) 584 if len(all) != df.m.originalTargetCount { 585 df.m.t.Errorf("%s: number of metrics %d is not same as number of targets %d", 586 df.job(), len(all), df.m.originalTargetCount) 587 } 588 for target, jmetrics := range all { 589 m := jmetrics.Metrics 590 if expectAbort && !m.Aborted.Load() { 591 df.m.t.Errorf("%s: %s was not aborted by target: %s", df.job(), apc.ActDsort, target) 592 } else if !expectAbort && m.Aborted.Load() { 593 df.m.t.Errorf("%s: %s was aborted by target: %s", df.job(), apc.ActDsort, target) 594 } 595 } 596 return all 597 } 598 599 // helper for dispatching i-th dsort job 600 func dispatchDsortJob(m *ioContext, dsorterType string, i int) { 601 df := &dsortFramework{ 602 m: m, 603 dsorterType: dsorterType, 604 inputTempl: apc.ListRange{Template: fmt.Sprintf("input%d-{0..999}", i)}, 605 outputTempl: fmt.Sprintf("output%d-{00000..01000}", i), 606 shardCnt: 500, 607 filesPerShard: 50, 608 maxMemUsage: "99%", 609 } 610 611 df.init() 612 df.createInputShards() 613 614 tlog.Logln(startingDS) 615 df.start() 616 617 _, err := tools.WaitForDsortToFinish(m.proxyURL, df.managerUUID) 618 tassert.CheckFatal(m.t, err) 619 tlog.Logf("%s: finished\n", df.job()) 620 621 df.checkMetrics(false /* expectAbort */) 622 df.checkOutputShards(5) 623 } 624 625 func waitForDsortPhase(t *testing.T, proxyURL, managerUUID, phaseName string, callback func()) { 626 tlog.Logf("waiting for %s phase...\n", phaseName) 627 baseParams := tools.BaseAPIParams(proxyURL) 628 for { 629 all, err := api.MetricsDsort(baseParams, managerUUID) 630 if err != nil { // in case of error call callback anyway 631 t.Error(err) 632 callback() 633 break 634 } 635 636 phase := true 637 for _, jmetrics := range all { 638 metrics := jmetrics.Metrics 639 switch phaseName { 640 case dsort.ExtractionPhase: 641 phase = phase && (metrics.Extraction.Running || metrics.Extraction.Finished) 642 case dsort.SortingPhase: 643 phase = phase && (metrics.Sorting.Running || metrics.Sorting.Finished) 644 case dsort.CreationPhase: 645 phase = phase && (metrics.Creation.Running || metrics.Creation.Finished) 646 default: 647 t.Fatal(phaseName) 648 } 649 } 650 651 if phase { 652 callback() 653 break 654 } 655 time.Sleep(100 * time.Millisecond) 656 } 657 } 658 659 // 660 // tests 661 // 662 663 func TestDsort(t *testing.T) { 664 for _, ext := range []string{archive.ExtTar, archive.ExtTarLz4, archive.ExtZip} { 665 for _, lr := range []string{"list", "range"} { 666 t.Run(ext+"/"+lr, func(t *testing.T) { 667 testDsort(t, ext, lr) 668 }) 669 } 670 } 671 } 672 673 func testDsort(t *testing.T, ext, lr string) { 674 runDsortTest( 675 // Include empty ("") type - in this case type must be selected automatically. 676 t, dsortTestSpec{p: true, types: append(dsorterTypes, "")}, 677 func(dsorterType string, t *testing.T) { 678 var ( 679 m = &ioContext{ 680 t: t, 681 } 682 df = &dsortFramework{ 683 m: m, 684 inputExt: ext, 685 dsorterType: dsorterType, 686 shardCnt: 500, 687 filesPerShard: 100, 688 maxMemUsage: "99%", 689 } 690 ) 691 if testing.Short() { 692 df.shardCnt /= 10 693 } 694 695 // Initialize ioContext 696 m.initAndSaveState(true /*cleanup*/) 697 m.expectTargets(1) 698 699 // Create ais bucket 700 tools.CreateBucket(t, m.proxyURL, m.bck, nil, true /*cleanup*/) 701 702 df.init() 703 df.createInputShards() 704 705 if lr == "list" { 706 // iterate list 707 df.inputTempl.ObjNames = df.inputShards 708 df.inputTempl.Template = "" 709 df.missingShards = cmn.AbortReaction // (when shards are explicitly enumerated...) 710 } 711 712 tlog.Logln(startingDS) 713 df.start() 714 715 _, err := tools.WaitForDsortToFinish(m.proxyURL, df.managerUUID) 716 tassert.CheckFatal(t, err) 717 tlog.Logf("%s: finished\n", df.job()) 718 719 df.checkMetrics(false /* expectAbort */) 720 df.checkOutputShards(5) 721 }, 722 ) 723 } 724 725 func TestDsortNonExistingBuckets(t *testing.T) { 726 runDsortTest( 727 t, dsortTestSpec{p: true, types: dsorterTypes}, 728 func(dsorterType string, t *testing.T) { 729 var ( 730 m = &ioContext{ 731 t: t, 732 } 733 df = &dsortFramework{ 734 m: m, 735 dsorterType: dsorterType, 736 outputBck: cmn.Bck{ 737 Name: trand.String(15), 738 Provider: apc.AIS, 739 }, 740 shardCnt: 500, 741 filesPerShard: 100, 742 maxMemUsage: "99%", 743 } 744 ) 745 746 // Initialize ioContext 747 m.initAndSaveState(true /*cleanup*/) 748 m.expectTargets(3) 749 750 df.init() 751 752 // Create ais:// output 753 tools.CreateBucket(t, m.proxyURL, df.outputBck, nil, true /*cleanup*/) 754 755 tlog.Logln(startingDS) 756 spec := df.gen() 757 tlog.Logf("dsort %s(-) => %s\n", m.bck, df.outputBck) 758 if _, err := api.StartDsort(df.baseParams, &spec); err == nil { 759 t.Error("expected dsort to fail when input bucket doesn't exist") 760 } 761 762 // Now destroy output bucket and create input bucket 763 tools.DestroyBucket(t, m.proxyURL, df.outputBck) 764 tools.CreateBucket(t, m.proxyURL, m.bck, nil, true /*cleanup*/) 765 766 tlog.Logf("dsort %s => %s(-)\n", m.bck, df.outputBck) 767 if _, err := api.StartDsort(df.baseParams, &spec); err != nil { 768 t.Errorf("expected dsort to create output bucket on the fly, got: %v", err) 769 } 770 }, 771 ) 772 } 773 774 func TestDsortEmptyBucket(t *testing.T) { 775 runDsortTest( 776 t, dsortTestSpec{p: true, types: dsorterTypes, reactions: cmn.SupportedReactions}, 777 func(dsorterType, reaction string, t *testing.T) { 778 var ( 779 m = &ioContext{ 780 t: t, 781 } 782 df = &dsortFramework{ 783 m: m, 784 dsorterType: dsorterType, 785 shardCnt: 100, 786 filesPerShard: 10, 787 maxMemUsage: "99%", 788 missingShards: reaction, 789 } 790 ) 791 792 // Initialize ioContext 793 m.initAndSaveState(true /*cleanup*/) 794 m.expectTargets(3) 795 tools.CreateBucket(t, m.proxyURL, m.bck, nil, true /*cleanup*/) 796 797 df.init() 798 799 tlog.Logln(startingDS) 800 df.start() 801 802 _, err := tools.WaitForDsortToFinish(m.proxyURL, df.managerUUID) 803 tassert.CheckFatal(t, err) 804 tlog.Logf("%s: finished\n", df.job()) 805 806 df.checkMetrics(reaction == cmn.AbortReaction /*expectAbort*/) 807 df.checkReactionResult(reaction, df.shardCnt) 808 }, 809 ) 810 } 811 812 func TestDsortOutputBucket(t *testing.T) { 813 tools.CheckSkip(t, &tools.SkipTestArgs{Long: true}) 814 815 runDsortTest( 816 t, dsortTestSpec{p: true, types: dsorterTypes}, 817 func(dsorterType string, t *testing.T) { 818 var ( 819 m = &ioContext{ 820 t: t, 821 } 822 df = &dsortFramework{ 823 m: m, 824 dsorterType: dsorterType, 825 outputBck: cmn.Bck{ 826 Name: trand.String(15), 827 Provider: apc.AIS, 828 }, 829 shardCnt: 500, 830 filesPerShard: 100, 831 maxMemUsage: "99%", 832 } 833 ) 834 835 m.initAndSaveState(true /*cleanup*/) 836 m.expectTargets(3) 837 // Create ais buckets 838 tools.CreateBucket(t, m.proxyURL, m.bck, nil, true /*cleanup*/) 839 840 // Create local output bucket 841 tools.CreateBucket(t, m.proxyURL, df.outputBck, nil, true /*cleanup*/) 842 843 df.init() 844 df.createInputShards() 845 846 tlog.Logf("starting dsort: %d/%d\n", df.shardCnt, df.filesPerShard) 847 df.start() 848 849 _, err := tools.WaitForDsortToFinish(m.proxyURL, df.managerUUID) 850 tassert.CheckFatal(t, err) 851 tlog.Logf("%s: finished\n", df.job()) 852 853 df.checkMetrics(false /* expectAbort */) 854 df.checkOutputShards(5) 855 }, 856 ) 857 } 858 859 // TestDsortParallel runs multiple dSorts in parallel 860 func TestDsortParallel(t *testing.T) { 861 tools.CheckSkip(t, &tools.SkipTestArgs{Long: true}) 862 863 runDsortTest( 864 t, dsortTestSpec{p: false, types: dsorterTypes}, 865 func(dsorterType string, t *testing.T) { 866 var ( 867 m = &ioContext{ 868 t: t, 869 } 870 dSortsCount = 5 871 ) 872 873 m.initAndSaveState(true /*cleanup*/) 874 m.expectTargets(3) 875 tools.CreateBucket(t, m.proxyURL, m.bck, nil, true /*cleanup*/) 876 877 wg := &sync.WaitGroup{} 878 for i := range dSortsCount { 879 wg.Add(1) 880 go func(i int) { 881 defer wg.Done() 882 dispatchDsortJob(m, dsorterType, i) 883 }(i) 884 } 885 wg.Wait() 886 }, 887 ) 888 } 889 890 // TestDsortChain runs multiple dSorts one after another 891 func TestDsortChain(t *testing.T) { 892 tools.CheckSkip(t, &tools.SkipTestArgs{Long: true}) 893 894 runDsortTest( 895 t, dsortTestSpec{p: true, types: dsorterTypes}, 896 func(dsorterType string, t *testing.T) { 897 var ( 898 m = &ioContext{ 899 t: t, 900 } 901 dSortsCount = 5 902 ) 903 904 m.initAndSaveState(true /*cleanup*/) 905 m.expectTargets(3) 906 tools.CreateBucket(t, m.proxyURL, m.bck, nil, true /*cleanup*/) 907 908 for i := range dSortsCount { 909 dispatchDsortJob(m, dsorterType, i) 910 } 911 }, 912 ) 913 } 914 915 func TestDsortShuffle(t *testing.T) { 916 runDsortTest( 917 t, dsortTestSpec{p: true, types: dsorterTypes}, 918 func(dsorterType string, t *testing.T) { 919 var ( 920 m = &ioContext{ 921 t: t, 922 } 923 df = &dsortFramework{ 924 m: m, 925 dsorterType: dsorterType, 926 alg: &dsort.Algorithm{Kind: dsort.Shuffle}, 927 shardCnt: 500, 928 filesPerShard: 10, 929 maxMemUsage: "99%", 930 } 931 ) 932 933 m.initAndSaveState(true /*cleanup*/) 934 m.expectTargets(3) 935 tools.CreateBucket(t, m.proxyURL, m.bck, nil, true /*cleanup*/) 936 937 df.init() 938 df.createInputShards() 939 940 tlog.Logf("starting dsort: %d/%d\n", df.shardCnt, df.filesPerShard) 941 df.start() 942 943 _, err := tools.WaitForDsortToFinish(m.proxyURL, df.managerUUID) 944 tassert.CheckFatal(t, err) 945 tlog.Logf("%s: finished\n", df.job()) 946 947 df.checkMetrics(false /* expectAbort */) 948 df.checkOutputShards(5) 949 }, 950 ) 951 } 952 953 func TestDsortDisk(t *testing.T) { 954 runDsortTest( 955 t, dsortTestSpec{p: true, types: dsorterTypes}, 956 func(dsorterType string, t *testing.T) { 957 var ( 958 m = &ioContext{ 959 t: t, 960 } 961 df = &dsortFramework{ 962 m: m, 963 dsorterType: dsorterType, 964 outputTempl: "output-%d", 965 shardCnt: 100, 966 filesPerShard: 10, 967 maxMemUsage: "1KB", 968 } 969 ) 970 971 m.initAndSaveState(true /*cleanup*/) 972 m.expectTargets(3) 973 tools.CreateBucket(t, m.proxyURL, m.bck, nil, true /*cleanup*/) 974 975 df.init() 976 df.createInputShards() 977 tlog.Logf("starting dsort with spilling to disk... (%d/%d)\n", df.shardCnt, df.filesPerShard) 978 df.start() 979 980 _, err := tools.WaitForDsortToFinish(m.proxyURL, df.managerUUID) 981 tassert.CheckFatal(t, err) 982 tlog.Logf("%s: finished\n", df.job()) 983 984 all := df.checkMetrics(false /* expectAbort */) 985 for target, jmetrics := range all { 986 metrics := jmetrics.Metrics 987 if metrics.Extraction.ExtractedToDiskCnt == 0 && metrics.Extraction.ExtractedCnt > 0 { 988 t.Errorf("target %s did not extract any files do disk", target) 989 } 990 } 991 992 df.checkOutputShards(0) 993 }, 994 ) 995 } 996 997 func TestDsortCompressionDisk(t *testing.T) { 998 for _, ext := range []string{archive.ExtTgz, archive.ExtTarLz4, archive.ExtZip} { 999 t.Run(ext, func(t *testing.T) { 1000 runDsortTest( 1001 t, dsortTestSpec{p: true, types: dsorterTypes}, 1002 func(dsorterType string, t *testing.T) { 1003 var ( 1004 m = &ioContext{ 1005 t: t, 1006 } 1007 df = &dsortFramework{ 1008 m: m, 1009 dsorterType: dsorterType, 1010 shardCnt: 200, 1011 filesPerShard: 50, 1012 inputExt: ext, 1013 maxMemUsage: "1KB", 1014 } 1015 ) 1016 1017 m.initAndSaveState(true /*cleanup*/) 1018 m.expectTargets(3) 1019 tools.CreateBucket(t, m.proxyURL, m.bck, nil, true /*cleanup*/) 1020 1021 df.init() 1022 df.createInputShards() 1023 1024 tlog.Logf("starting dsort: %d/%d, %s\n", 1025 df.shardCnt, df.filesPerShard, df.inputExt) 1026 df.start() 1027 1028 _, err := tools.WaitForDsortToFinish(m.proxyURL, df.managerUUID) 1029 tassert.CheckFatal(t, err) 1030 tlog.Logf("%s: finished\n", df.job()) 1031 1032 df.checkMetrics(false /* expectAbort */) 1033 df.checkOutputShards(5) 1034 }, 1035 ) 1036 }) 1037 } 1038 } 1039 1040 func TestDsortMemDisk(t *testing.T) { 1041 tools.CheckSkip(t, &tools.SkipTestArgs{Long: true}) 1042 1043 var ( 1044 m = &ioContext{ 1045 t: t, 1046 } 1047 df = &dsortFramework{ 1048 m: m, 1049 dsorterType: dsort.GeneralType, 1050 shardCnt: 500, 1051 fileSz: cos.MiB, 1052 filesPerShard: 5, 1053 } 1054 mem sys.MemStat 1055 ) 1056 1057 m.initAndSaveState(true /*cleanup*/) 1058 m.expectTargets(3) 1059 tools.CreateBucket(t, m.proxyURL, m.bck, nil, true /*cleanup*/) 1060 1061 df.init() 1062 df.createInputShards() 1063 1064 // Try to free all memory to get estimated actual used memory size 1065 rdebug.FreeOSMemory() 1066 1067 // Get current memory 1068 err := mem.Get() 1069 tassert.CheckFatal(t, err) 1070 df.maxMemUsage = cos.ToSizeIEC(int64(mem.ActualUsed+500*cos.MiB), 2) 1071 1072 tlog.Logf("starting dsort with memory and disk (max mem usage: %s)... (%d/%d)\n", df.maxMemUsage, 1073 df.shardCnt, df.filesPerShard) 1074 df.start() 1075 1076 _, err = tools.WaitForDsortToFinish(m.proxyURL, df.managerUUID) 1077 tassert.CheckFatal(t, err) 1078 tlog.Logf("%s: finished\n", df.job()) 1079 1080 all := df.checkMetrics(false /* expectAbort */) 1081 var ( 1082 extractedToDisk int64 1083 extractedTotal int64 1084 ) 1085 for _, jmetrics := range all { 1086 metrics := jmetrics.Metrics 1087 extractedToDisk += metrics.Extraction.ExtractedToDiskCnt 1088 extractedTotal += metrics.Extraction.ExtractedCnt 1089 } 1090 1091 if extractedToDisk == 0 { 1092 t.Error("all extractions by all targets were done exclusively into memory") 1093 } 1094 if extractedToDisk == extractedTotal { 1095 t.Error("all extractions by all targets were done exclusively into disk") 1096 } 1097 1098 df.checkOutputShards(5) 1099 } 1100 1101 func TestDsortMinMemCompression(t *testing.T) { 1102 tools.CheckSkip(t, &tools.SkipTestArgs{Long: true}) 1103 for _, ext := range []string{archive.ExtTarGz, archive.ExtTarLz4, archive.ExtZip} { 1104 for _, maxMem := range []string{"10%", "1%"} { 1105 t.Run(ext+"/mem="+maxMem, func(t *testing.T) { 1106 minMemCompression(t, ext, maxMem) 1107 }) 1108 } 1109 } 1110 } 1111 1112 func minMemCompression(t *testing.T, ext, maxMem string) { 1113 var ( 1114 m = &ioContext{ 1115 t: t, 1116 } 1117 df = &dsortFramework{ 1118 m: m, 1119 dsorterType: dsort.GeneralType, 1120 shardCnt: 500, 1121 fileSz: cos.MiB, 1122 filesPerShard: 5, 1123 inputExt: ext, 1124 maxMemUsage: maxMem, 1125 } 1126 mem sys.MemStat 1127 ) 1128 1129 m.initAndSaveState(true /*cleanup*/) 1130 m.expectTargets(3) 1131 tools.CreateBucket(t, m.proxyURL, m.bck, nil, true /*cleanup*/) 1132 1133 df.init() 1134 df.createInputShards() 1135 1136 // Try to free all memory to get estimated actual used memory size 1137 rdebug.FreeOSMemory() 1138 1139 // Get current memory 1140 err := mem.Get() 1141 tassert.CheckFatal(t, err) 1142 df.maxMemUsage = cos.ToSizeIEC(int64(mem.ActualUsed+300*cos.MiB), 2) 1143 1144 tlog.Logf("starting dsort with memory, disk, and compression (max mem usage: %s) ... %d/%d, %s\n", 1145 df.maxMemUsage, df.shardCnt, df.filesPerShard, df.inputExt) 1146 df.start() 1147 1148 _, err = tools.WaitForDsortToFinish(m.proxyURL, df.managerUUID) 1149 tassert.CheckFatal(t, err) 1150 tlog.Logf("%s: finished\n", df.job()) 1151 1152 all := df.checkMetrics(false /*expectAbort*/) 1153 var ( 1154 extractedToDisk int64 1155 extractedTotal int64 1156 ) 1157 for _, jmetrics := range all { 1158 metrics := jmetrics.Metrics 1159 extractedToDisk += metrics.Extraction.ExtractedToDiskCnt 1160 extractedTotal += metrics.Extraction.ExtractedCnt 1161 } 1162 1163 if extractedToDisk == 0 { 1164 t.Error("all extractions by all targets were done exclusively into memory") 1165 } 1166 if extractedToDisk == extractedTotal { 1167 t.Error("all extractions by all targets were done exclusively into disk") 1168 } 1169 1170 df.checkOutputShards(5) 1171 } 1172 1173 func TestDsortZipLz4(t *testing.T) { 1174 tools.CheckSkip(t, &tools.SkipTestArgs{Long: true}) 1175 1176 for _, ext := range []string{archive.ExtZip, archive.ExtTarLz4} { 1177 t.Run(ext, func(t *testing.T) { 1178 runDsortTest( 1179 t, dsortTestSpec{p: true, types: dsorterTypes}, 1180 func(dsorterType string, t *testing.T) { 1181 var ( 1182 err error 1183 m = &ioContext{ 1184 t: t, 1185 } 1186 df = &dsortFramework{ 1187 m: m, 1188 dsorterType: dsorterType, 1189 shardCnt: 500, 1190 filesPerShard: 100, 1191 inputExt: ext, 1192 maxMemUsage: "99%", 1193 } 1194 ) 1195 1196 m.initAndSaveState(true /*cleanup*/) 1197 m.expectTargets(3) 1198 tools.CreateBucket(t, m.proxyURL, m.bck, nil, true /*cleanup*/) 1199 1200 df.init() 1201 df.createInputShards() 1202 1203 tlog.Logf("starting dsort: %d/%d, %s\n", df.shardCnt, df.filesPerShard, df.inputExt) 1204 df.start() 1205 1206 _, err = tools.WaitForDsortToFinish(m.proxyURL, df.managerUUID) 1207 tassert.CheckFatal(t, err) 1208 tlog.Logf("%s: finished\n", df.job()) 1209 1210 df.checkMetrics(false /* expectAbort */) 1211 df.checkOutputShards(5) 1212 }, 1213 ) 1214 }) 1215 } 1216 } 1217 1218 func TestDsortMaxMemCompression(t *testing.T) { 1219 tools.CheckSkip(t, &tools.SkipTestArgs{Long: true}) 1220 for _, ext := range []string{archive.ExtTgz, archive.ExtTarLz4, archive.ExtZip} { 1221 t.Run(ext, func(t *testing.T) { 1222 runDsortTest( 1223 t, dsortTestSpec{p: true, types: dsorterTypes}, 1224 func(dsorterType string, t *testing.T) { 1225 var ( 1226 err error 1227 m = &ioContext{ 1228 t: t, 1229 } 1230 df = &dsortFramework{ 1231 m: m, 1232 dsorterType: dsorterType, 1233 shardCnt: 500, 1234 filesPerShard: 50, 1235 inputExt: ext, 1236 maxMemUsage: "99%", 1237 } 1238 ) 1239 1240 m.initAndSaveState(true /*cleanup*/) 1241 m.expectTargets(3) 1242 tools.CreateBucket(t, m.proxyURL, m.bck, nil, true /*cleanup*/) 1243 1244 df.init() 1245 df.createInputShards() 1246 1247 tlog.Logf("starting dsort: %d/%d, %s\n", df.shardCnt, df.filesPerShard, df.inputExt) 1248 df.start() 1249 1250 _, err = tools.WaitForDsortToFinish(m.proxyURL, df.managerUUID) 1251 tassert.CheckFatal(t, err) 1252 tlog.Logf("%s: finished\n", df.job()) 1253 1254 df.checkMetrics(false /* expectAbort */) 1255 df.checkOutputShards(5) 1256 }, 1257 ) 1258 }) 1259 } 1260 } 1261 1262 func TestDsortContent(t *testing.T) { 1263 tools.CheckSkip(t, &tools.SkipTestArgs{Long: true}) 1264 1265 runDsortTest( 1266 t, dsortTestSpec{p: true, types: dsorterTypes}, 1267 func(dsorterType string, t *testing.T) { 1268 cases := []struct { 1269 extension string 1270 contentKeyType string 1271 missingKeys bool 1272 }{ 1273 {".loss", shard.ContentKeyInt, false}, 1274 {".cls", shard.ContentKeyFloat, false}, 1275 {".smth", shard.ContentKeyString, false}, 1276 1277 {".loss", shard.ContentKeyInt, true}, 1278 {".cls", shard.ContentKeyFloat, true}, 1279 {".smth", shard.ContentKeyString, true}, 1280 } 1281 1282 for _, entry := range cases { 1283 entry := entry // pin 1284 test := fmt.Sprintf("%s-%v", entry.contentKeyType, entry.missingKeys) 1285 t.Run(test, func(t *testing.T) { 1286 t.Parallel() 1287 1288 var ( 1289 m = &ioContext{ 1290 t: t, 1291 } 1292 df = &dsortFramework{ 1293 m: m, 1294 dsorterType: dsorterType, 1295 alg: &dsort.Algorithm{ 1296 Kind: dsort.Content, 1297 Ext: entry.extension, 1298 ContentKeyType: entry.contentKeyType, 1299 }, 1300 missingKeys: entry.missingKeys, 1301 shardCnt: 500, 1302 filesPerShard: 100, 1303 maxMemUsage: "90%", 1304 } 1305 ) 1306 1307 m.initAndSaveState(true /*cleanup*/) 1308 m.expectTargets(3) 1309 tools.CreateBucket(t, m.proxyURL, m.bck, nil, true /*cleanup*/) 1310 1311 df.init() 1312 df.createInputShards() 1313 1314 tlog.Logf("starting dsort: %d/%d\n", df.shardCnt, df.filesPerShard) 1315 df.start() 1316 1317 aborted, err := tools.WaitForDsortToFinish(m.proxyURL, df.managerUUID) 1318 tassert.CheckFatal(t, err) 1319 if entry.missingKeys && !aborted { 1320 t.Errorf("%s was not aborted", apc.ActDsort) 1321 } 1322 1323 tlog.Logf("%s: checking metrics\n", df.job()) 1324 all, err := api.MetricsDsort(df.baseParams, df.managerUUID) 1325 tassert.CheckFatal(t, err) 1326 if len(all) != m.originalTargetCount { 1327 t.Errorf("number of metrics %d is not same as the number of targets %d", 1328 len(all), m.originalTargetCount) 1329 } 1330 1331 for target, jmetrics := range all { 1332 metrics := jmetrics.Metrics 1333 if entry.missingKeys && !metrics.Aborted.Load() { 1334 t.Errorf("%s was not aborted by target: %s", target, apc.ActDsort) 1335 } 1336 } 1337 1338 if !entry.missingKeys { 1339 df.checkOutputShards(5) 1340 } 1341 }) 1342 } 1343 }, 1344 ) 1345 } 1346 1347 func TestDsortAbort(t *testing.T) { 1348 runDsortTest( 1349 t, dsortTestSpec{p: true, types: dsorterTypes}, 1350 func(dsorterType string, t *testing.T) { 1351 for _, asXaction := range []bool{false, true} { 1352 test := dsorterType + "/" + fmt.Sprintf("as-xaction=%t", asXaction) 1353 t.Run(test, func(t *testing.T) { 1354 var ( 1355 err error 1356 m = &ioContext{ 1357 t: t, 1358 } 1359 df = &dsortFramework{ 1360 m: m, 1361 dsorterType: dsorterType, 1362 shardCnt: 500, 1363 filesPerShard: 10, 1364 } 1365 ) 1366 1367 m.initAndSaveState(false /*cleanup*/) 1368 m.expectTargets(3) 1369 tools.CreateBucket(t, m.proxyURL, m.bck, nil, true /*cleanup*/) 1370 1371 df.init() 1372 df.createInputShards() 1373 1374 tlog.Logf("starting dsort: %d/%d\n", df.shardCnt, df.filesPerShard) 1375 df.start() 1376 1377 if asXaction { 1378 tlog.Logf("aborting dsort[%s] via api.AbortXaction\n", df.managerUUID) 1379 err = api.AbortXaction(df.baseParams, &xact.ArgsMsg{ID: df.managerUUID}) 1380 } else { 1381 tlog.Logf("aborting dsort[%s] via api.AbortDsort\n", df.managerUUID) 1382 err = api.AbortDsort(df.baseParams, df.managerUUID) 1383 } 1384 tassert.CheckFatal(t, err) 1385 1386 _, err = tools.WaitForDsortToFinish(m.proxyURL, df.managerUUID) 1387 tassert.CheckFatal(t, err) 1388 1389 df.checkMetrics(true /* expectAbort */) 1390 }) 1391 } 1392 }, 1393 ) 1394 } 1395 1396 func TestDsortAbortDuringPhases(t *testing.T) { 1397 tools.CheckSkip(t, &tools.SkipTestArgs{Long: true}) 1398 1399 runDsortTest( 1400 t, dsortTestSpec{p: true, types: dsorterTypes, phases: dsortPhases}, 1401 func(dsorterType, phase string, t *testing.T) { 1402 for _, asXaction := range []bool{false, true} { 1403 test := dsorterType + "/" + fmt.Sprintf("as-xaction=%t", asXaction) 1404 t.Run(test, func(t *testing.T) { 1405 var ( 1406 m = &ioContext{ 1407 t: t, 1408 } 1409 df = &dsortFramework{ 1410 m: m, 1411 dsorterType: dsorterType, 1412 shardCnt: 500, 1413 filesPerShard: 200, 1414 } 1415 ) 1416 1417 if phase == dsort.SortingPhase && asXaction { 1418 t.Skipf("skipping %s", t.Name()) // TODO -- FIXME: remove 1419 } 1420 1421 m.initAndSaveState(true /*cleanup*/) 1422 m.expectTargets(3) 1423 1424 tools.CreateBucket(t, m.proxyURL, m.bck, nil, true /*cleanup*/) 1425 1426 df.init() 1427 df.createInputShards() 1428 1429 tlog.Logf("starting dsort (abort on: %s)...\n", phase) 1430 df.start() 1431 1432 waitForDsortPhase(t, m.proxyURL, df.managerUUID, phase, func() { 1433 var err error 1434 if asXaction { 1435 tlog.Logf("aborting dsort[%s] via api.AbortXaction\n", df.managerUUID) 1436 err = api.AbortXaction(df.baseParams, &xact.ArgsMsg{ID: df.managerUUID}) 1437 } else { 1438 tlog.Logf("aborting dsort[%s] via api.AbortDsort\n", df.managerUUID) 1439 err = api.AbortDsort(df.baseParams, df.managerUUID) 1440 } 1441 tassert.CheckFatal(t, err) 1442 }) 1443 1444 _, err := tools.WaitForDsortToFinish(m.proxyURL, df.managerUUID) 1445 tassert.CheckFatal(t, err) 1446 1447 df.checkMetrics(true /* expectAbort */) 1448 }) 1449 } 1450 }, 1451 ) 1452 } 1453 1454 func TestDsortKillTargetDuringPhases(t *testing.T) { 1455 t.Skip("test is flaky, run it only when necessary") 1456 1457 runDsortTest( 1458 t, dsortTestSpec{p: false, types: dsorterTypes, phases: dsortPhases}, 1459 func(dsorterType, phase string, t *testing.T) { 1460 var ( 1461 m = &ioContext{ 1462 t: t, 1463 } 1464 df = &dsortFramework{ 1465 m: m, 1466 dsorterType: dsorterType, 1467 outputTempl: "output-{0..100000}", 1468 shardCnt: 1000, 1469 filesPerShard: 500, 1470 } 1471 target *meta.Snode 1472 ) 1473 1474 m.initAndSaveState(true /*cleanup*/) 1475 m.expectTargets(3) 1476 1477 df.init() 1478 1479 tools.CreateBucket(t, m.proxyURL, m.bck, nil, true /*cleanup*/) 1480 1481 df.createInputShards() 1482 1483 tlog.Logf("starting dsort (abort on: %s)...\n", phase) 1484 df.start() 1485 1486 waitForDsortPhase(t, m.proxyURL, df.managerUUID, phase, func() { 1487 // It may require calling AbortXaction(rebalance) & 1488 // WaitForRebalAndResil() before unregistering 1489 target = m.startMaintenanceNoRebalance() 1490 }) 1491 1492 aborted, err := tools.WaitForDsortToFinish(m.proxyURL, df.managerUUID) 1493 tassert.CheckError(t, err) 1494 if !aborted { 1495 t.Errorf("%s was not aborted", apc.ActDsort) 1496 } 1497 1498 tlog.Logf("%s: checking metrics\n", df.job()) 1499 all, err := api.MetricsDsort(df.baseParams, df.managerUUID) 1500 tassert.CheckError(t, err) 1501 if len(all) == m.originalTargetCount { 1502 t.Errorf("number of metrics %d is same as number of original targets %d", 1503 len(all), m.originalTargetCount) 1504 } 1505 1506 for target, jmetrics := range all { 1507 metrics := jmetrics.Metrics 1508 if !metrics.Aborted.Load() { 1509 t.Errorf("%s was not aborted by target: %s", apc.ActDsort, target) 1510 } 1511 } 1512 1513 rebID := m.stopMaintenance(target) 1514 tools.WaitForRebalanceByID(t, df.baseParams, rebID) 1515 }, 1516 ) 1517 } 1518 1519 func TestDsortManipulateMountpathDuringPhases(t *testing.T) { 1520 t.Skipf("skipping %s", t.Name()) 1521 1522 runDsortTest( 1523 t, dsortTestSpec{p: false, types: dsorterTypes, phases: dsortPhases}, 1524 func(dsorterType, phase string, t *testing.T) { 1525 for _, adding := range []bool{false, true} { 1526 t.Run(strconv.FormatBool(adding), func(t *testing.T) { 1527 var ( 1528 m = &ioContext{ 1529 t: t, 1530 } 1531 df = &dsortFramework{ 1532 m: m, 1533 dsorterType: dsorterType, 1534 outputTempl: "output-{0..100000}", 1535 shardCnt: 500, 1536 filesPerShard: 200, 1537 } 1538 1539 mountpaths = make(map[*meta.Snode]string) 1540 ) 1541 1542 m.initAndSaveState(true /*cleanup*/) 1543 m.expectTargets(3) 1544 1545 // Initialize `df.baseParams` 1546 df.init() 1547 1548 targets := m.smap.Tmap.ActiveNodes() 1549 for idx, target := range targets { 1550 if adding { 1551 mpath := fmt.Sprintf("%s-%d", testMpath, idx) 1552 if docker.IsRunning() { 1553 err := docker.CreateMpathDir(0, mpath) 1554 tassert.CheckFatal(t, err) 1555 } else { 1556 err := cos.CreateDir(mpath) 1557 tassert.CheckFatal(t, err) 1558 } 1559 1560 mountpaths[target] = mpath 1561 } else { 1562 targetMountpaths, err := api.GetMountpaths(df.baseParams, target) 1563 tassert.CheckFatal(t, err) 1564 mountpaths[target] = targetMountpaths.Available[0] 1565 } 1566 } 1567 1568 t.Cleanup(func() { 1569 // Wait for any resilver that might be still running. 1570 tools.WaitForResilvering(t, df.baseParams, nil) 1571 1572 for target, mpath := range mountpaths { 1573 if adding { 1574 tlog.Logf("removing mountpath %q from %s...\n", mpath, target.ID()) 1575 err := api.DetachMountpath(df.baseParams, target, mpath, true /*dont-resil*/) 1576 tassert.CheckError(t, err) 1577 err = os.RemoveAll(mpath) 1578 tassert.CheckError(t, err) 1579 } else { 1580 tlog.Logf("adding mountpath %q to %s...\n", mpath, target.ID()) 1581 err := api.AttachMountpath(df.baseParams, target, mpath) 1582 tassert.CheckError(t, err) 1583 } 1584 } 1585 1586 tools.WaitForResilvering(t, df.baseParams, nil) 1587 }) 1588 1589 tools.CreateBucket(t, m.proxyURL, m.bck, nil, true /*cleanup*/) 1590 1591 df.createInputShards() 1592 1593 tlog.Logf("starting dsort (abort on: %s)...\n", phase) 1594 df.start() 1595 1596 waitForDsortPhase(t, m.proxyURL, df.managerUUID, phase, func() { 1597 for target, mpath := range mountpaths { 1598 if adding { 1599 tlog.Logf("adding new mountpath %q to %s...\n", mpath, target.ID()) 1600 err := api.AttachMountpath(df.baseParams, target, mpath) 1601 tassert.CheckFatal(t, err) 1602 } else { 1603 tlog.Logf("removing mountpath %q from %s...\n", mpath, target.ID()) 1604 err := api.DetachMountpath(df.baseParams, target, 1605 mpath, false /*dont-resil*/) 1606 tassert.CheckFatal(t, err) 1607 } 1608 } 1609 }) 1610 1611 _, err := tools.WaitForDsortToFinish(m.proxyURL, df.managerUUID) 1612 tassert.CheckError(t, err) 1613 1614 df.checkMetrics(true /*expectAbort*/) 1615 }) 1616 } 1617 }, 1618 ) 1619 } 1620 1621 func TestDsortAddTarget(t *testing.T) { 1622 tools.CheckSkip(t, &tools.SkipTestArgs{Long: true}) 1623 1624 runDsortTest( 1625 t, dsortTestSpec{p: false, types: dsorterTypes}, 1626 func(dsorterType string, t *testing.T) { 1627 var ( 1628 m = &ioContext{ 1629 t: t, 1630 } 1631 df = &dsortFramework{ 1632 m: m, 1633 dsorterType: dsorterType, 1634 outputTempl: "output-{0..100000}", 1635 shardCnt: 1000, 1636 filesPerShard: 200, 1637 } 1638 ) 1639 1640 m.initAndSaveState(true /*cleanup*/) 1641 m.expectTargets(3) 1642 1643 df.init() 1644 1645 target := m.startMaintenanceNoRebalance() 1646 1647 tools.CreateBucket(t, m.proxyURL, m.bck, nil, true /*cleanup*/) 1648 1649 df.createInputShards() 1650 1651 tlog.Logf("starting dsort: %d/%d\n", df.shardCnt, df.filesPerShard) 1652 df.start() 1653 1654 defer tools.WaitForRebalAndResil(t, df.baseParams) 1655 1656 waitForDsortPhase(t, m.proxyURL, df.managerUUID, dsort.ExtractionPhase, func() { 1657 m.stopMaintenance(target) 1658 }) 1659 1660 aborted, err := tools.WaitForDsortToFinish(m.proxyURL, df.managerUUID) 1661 tassert.CheckFatal(t, err) 1662 if !aborted { 1663 t.Errorf("%s was not aborted", apc.ActDsort) 1664 } 1665 1666 tlog.Logf("%s: checking metrics\n", df.job()) 1667 allMetrics, err := api.MetricsDsort(df.baseParams, df.managerUUID) 1668 tassert.CheckFatal(t, err) 1669 if len(allMetrics) != m.originalTargetCount-1 { 1670 t.Errorf("number of metrics %d is different than number of targets when %s started %d", 1671 len(allMetrics), apc.ActDsort, m.originalTargetCount-1) 1672 } 1673 }, 1674 ) 1675 } 1676 1677 func TestDsortMetricsAfterFinish(t *testing.T) { 1678 runDsortTest( 1679 t, dsortTestSpec{p: true, types: dsorterTypes}, 1680 func(dsorterType string, t *testing.T) { 1681 var ( 1682 m = &ioContext{ 1683 t: t, 1684 } 1685 df = &dsortFramework{ 1686 m: m, 1687 dsorterType: dsorterType, 1688 outputTempl: "output-{0..1000}", 1689 shardCnt: 50, 1690 filesPerShard: 10, 1691 } 1692 ) 1693 1694 m.initAndSaveState(true /*cleanup*/) 1695 m.expectTargets(3) 1696 1697 tools.CreateBucket(t, m.proxyURL, m.bck, nil, true /*cleanup*/) 1698 1699 df.init() 1700 df.createInputShards() 1701 1702 tlog.Logf("starting dsort: %d/%d\n", df.shardCnt, df.filesPerShard) 1703 df.start() 1704 1705 _, err := tools.WaitForDsortToFinish(m.proxyURL, df.managerUUID) 1706 tassert.CheckFatal(t, err) 1707 tlog.Logf("%s: finished\n", df.job()) 1708 1709 df.checkMetrics(false /* expectAbort */) 1710 df.checkOutputShards(0) 1711 1712 tlog.Logln("checking if metrics are still accessible after some time..") 1713 time.Sleep(2 * time.Second) 1714 1715 // Check if metrics can be fetched after some time 1716 df.checkMetrics(false /* expectAbort */) 1717 }, 1718 ) 1719 } 1720 1721 func TestDsortSelfAbort(t *testing.T) { 1722 runDsortTest( 1723 t, dsortTestSpec{p: true, types: dsorterTypes}, 1724 func(dsorterType string, t *testing.T) { 1725 var ( 1726 m = &ioContext{ 1727 t: t, 1728 } 1729 df = &dsortFramework{ 1730 m: m, 1731 dsorterType: dsorterType, 1732 shardCnt: 500, 1733 filesPerShard: 100, 1734 missingShards: cmn.AbortReaction, 1735 } 1736 ) 1737 1738 m.initAndSaveState(true /*cleanup*/) 1739 m.expectTargets(3) 1740 1741 tools.CreateBucket(t, m.proxyURL, m.bck, nil, true /*cleanup*/) 1742 1743 df.init() 1744 1745 tlog.Logf("starting dsort: %d/%d\n", df.shardCnt, df.filesPerShard) 1746 df.start() 1747 1748 _, err := tools.WaitForDsortToFinish(m.proxyURL, df.managerUUID) 1749 tassert.CheckFatal(t, err) 1750 tlog.Logf("%s: finished\n", df.job()) 1751 1752 // Wait a while for all targets to abort 1753 time.Sleep(2 * time.Second) 1754 1755 df.checkMetrics(true /* expectAbort */) 1756 }, 1757 ) 1758 } 1759 1760 func TestDsortOnOOM(t *testing.T) { 1761 t.Skip("test can take more than couple minutes, run it only when necessary") 1762 1763 runDsortTest( 1764 t, dsortTestSpec{p: false, types: dsorterTypes}, 1765 func(dsorterType string, t *testing.T) { 1766 var ( 1767 m = &ioContext{ 1768 t: t, 1769 } 1770 df = &dsortFramework{ 1771 m: m, 1772 dsorterType: dsorterType, 1773 filesPerShard: 200, 1774 fileSz: 10 * cos.MiB, 1775 maxMemUsage: "80%", 1776 } 1777 mem sys.MemStat 1778 ) 1779 1780 err := mem.Get() 1781 tassert.CheckFatal(t, err) 1782 1783 // Calculate number of shards to cause OOM and overestimate it to make sure 1784 // that if dsort doesn't prevent it, it will happen. Notice that maxMemUsage 1785 // is 80% so dsort should never go above this number in memory usage. 1786 df.shardCnt = int(float64(mem.ActualFree/uint64(df.fileSz)/uint64(df.filesPerShard)) * 1.4) 1787 1788 m.initAndSaveState(true /*cleanup*/) 1789 m.expectTargets(3) 1790 1791 tools.CreateBucket(t, m.proxyURL, m.bck, nil, true /*cleanup*/) 1792 1793 df.init() 1794 df.createInputShards() 1795 1796 tlog.Logf("starting dsort: %d/%d\n", df.shardCnt, df.filesPerShard) 1797 df.start() 1798 1799 _, err = tools.WaitForDsortToFinish(m.proxyURL, df.managerUUID) 1800 tassert.CheckFatal(t, err) 1801 tlog.Logf("%s: finished\n", df.job()) 1802 1803 df.checkMetrics(false /* expectAbort */) 1804 df.checkOutputShards(5) 1805 }, 1806 ) 1807 } 1808 1809 func TestDsortMissingShards(t *testing.T) { 1810 tools.CheckSkip(t, &tools.SkipTestArgs{Long: true}) 1811 for _, ext := range []string{archive.ExtTar, archive.ExtTarLz4} { 1812 t.Run(ext, func(t *testing.T) { 1813 runDsortTest( 1814 t, dsortTestSpec{ 1815 p: false, 1816 types: dsorterTypes, 1817 reactions: cmn.SupportedReactions, 1818 scopes: dsortSettingScopes, 1819 }, 1820 func(dsorterType, reaction, scope string, t *testing.T) { 1821 if scope != scopeConfig { 1822 t.Parallel() 1823 } 1824 1825 var ( 1826 m = &ioContext{ 1827 t: t, 1828 } 1829 df = &dsortFramework{ 1830 m: m, 1831 dsorterType: dsorterType, 1832 outputTempl: "output-{0..100000}", 1833 shardCnt: 500, 1834 shardCntToSkip: 50, 1835 filesPerShard: 200, 1836 inputExt: ext, 1837 } 1838 ) 1839 1840 m.initAndSaveState(true /*cleanup*/) 1841 m.expectTargets(3) 1842 1843 tools.CreateBucket(t, m.proxyURL, m.bck, nil, true /*cleanup*/) 1844 1845 switch scope { 1846 case scopeConfig: 1847 defer tools.SetClusterConfig(t, 1848 cos.StrKVs{"distributed_sort.missing_shards": cmn.IgnoreReaction}) 1849 tools.SetClusterConfig(t, cos.StrKVs{"distributed_sort.missing_shards": reaction}) 1850 1851 tlog.Logf("changed `missing_shards` config to: %s\n", reaction) 1852 case scopeSpec: 1853 df.missingShards = reaction 1854 tlog.Logf("set `missing_shards` in request spec to: %s\n", reaction) 1855 default: 1856 cos.AssertMsg(false, scope) 1857 } 1858 1859 df.init() 1860 df.createInputShards() 1861 1862 tlog.Logf("starting dsort: %d/%d, %s\n", df.shardCnt, df.filesPerShard, df.inputExt) 1863 df.start() 1864 1865 _, err := tools.WaitForDsortToFinish(m.proxyURL, df.managerUUID) 1866 tassert.CheckFatal(t, err) 1867 tlog.Logf("%s: finished\n", df.job()) 1868 1869 df.checkReactionResult(reaction, df.shardCntToSkip) 1870 }, 1871 ) 1872 }) 1873 } 1874 } 1875 1876 func TestDsortDuplications(t *testing.T) { 1877 tools.CheckSkip(t, &tools.SkipTestArgs{Long: true}) 1878 for _, ext := range []string{archive.ExtTar, archive.ExtTarLz4, archive.ExtTarGz, archive.ExtZip} { // all supported formats 1879 t.Run(ext, func(t *testing.T) { 1880 runDsortTest( 1881 t, dsortTestSpec{ 1882 p: false, 1883 types: dsorterTypes, 1884 reactions: cmn.SupportedReactions, 1885 scopes: dsortSettingScopes, 1886 }, 1887 func(dsorterType, reaction, scope string, t *testing.T) { 1888 if scope != scopeConfig { 1889 t.Parallel() 1890 } 1891 var ( 1892 m = &ioContext{ 1893 t: t, 1894 } 1895 df = &dsortFramework{ 1896 m: m, 1897 dsorterType: dsorterType, 1898 outputTempl: "output-{0..100000}", 1899 shardCnt: 500, 1900 filesPerShard: 200, 1901 recordDuplicationsCnt: 50, 1902 inputExt: ext, 1903 } 1904 ) 1905 m.initAndSaveState(false /*cleanup*/) 1906 m.expectTargets(3) 1907 tools.CreateBucket(t, m.proxyURL, m.bck, nil, true /*cleanup*/) 1908 1909 switch scope { 1910 case scopeConfig: 1911 defer tools.SetClusterConfig(t, 1912 cos.StrKVs{"distributed_sort.duplicated_records": cmn.AbortReaction}) 1913 tools.SetClusterConfig(t, cos.StrKVs{"distributed_sort.duplicated_records": reaction}) 1914 1915 tlog.Logf("changed `duplicated_records` config to: %s\n", reaction) 1916 case scopeSpec: 1917 df.duplicatedRecords = reaction 1918 tlog.Logf("set `duplicated_records` in request spec to: %s\n", reaction) 1919 default: 1920 cos.AssertMsg(false, scope) 1921 } 1922 1923 df.init() 1924 df.createInputShards() 1925 1926 tlog.Logf("starting dsort: %d/%d, %s\n", df.shardCnt, df.filesPerShard, df.inputExt) 1927 df.start() 1928 1929 _, err := tools.WaitForDsortToFinish(m.proxyURL, df.managerUUID) 1930 tassert.CheckFatal(t, err) 1931 tlog.Logf("%s: finished\n", df.job()) 1932 1933 df.checkReactionResult(reaction, df.recordDuplicationsCnt) 1934 }, 1935 ) 1936 }) 1937 } 1938 } 1939 1940 func TestDsortOrderFile(t *testing.T) { 1941 runDsortTest( 1942 t, dsortTestSpec{p: true, types: dsorterTypes}, 1943 func(dsorterType string, t *testing.T) { 1944 var ( 1945 err error 1946 m = &ioContext{ 1947 t: t, 1948 } 1949 df = &dsortFramework{ 1950 m: m, 1951 dsorterType: dsorterType, 1952 outputBck: cmn.Bck{ 1953 Name: trand.String(15), 1954 Provider: apc.AIS, 1955 }, 1956 shardCnt: 100, 1957 filesPerShard: 10, 1958 } 1959 1960 orderFileName = "orderFileName" 1961 ekm = make(map[string]string, 10) 1962 shardFmts = []string{ 1963 "shard-%d-suf", 1964 "input-%d-pref", 1965 "smth-%d", 1966 } 1967 proxyURL = tools.RandomProxyURL() 1968 baseParams = tools.BaseAPIParams(proxyURL) 1969 ) 1970 1971 m.initAndSaveState(true /*cleanup*/) 1972 m.expectTargets(3) 1973 1974 // Set URL for order file (points to the object in cluster). 1975 df.orderFileURL = fmt.Sprintf( 1976 "%s/%s/%s/%s/%s?%s=%s", 1977 proxyURL, apc.Version, apc.Objects, m.bck.Name, orderFileName, 1978 apc.QparamProvider, apc.AIS, 1979 ) 1980 1981 df.init() 1982 1983 tools.CreateBucket(t, m.proxyURL, m.bck, nil, true /*cleanup*/) 1984 1985 // Create local output bucket 1986 tools.CreateBucket(t, m.proxyURL, df.outputBck, nil, true /*cleanup*/) 1987 1988 df.createInputShards() 1989 1990 // Generate content for the orderFile 1991 tlog.Logln("generating and putting order file into cluster...") 1992 var ( 1993 buffer bytes.Buffer 1994 shardRecords = df.getRecordNames(m.bck) 1995 ) 1996 for _, shard := range shardRecords { 1997 for idx, recordName := range shard.recordNames { 1998 buffer.WriteString(fmt.Sprintf("%s\t%s\n", recordName, shardFmts[idx%len(shardFmts)])) 1999 ekm[recordName] = shardFmts[idx%len(shardFmts)] 2000 } 2001 } 2002 args := api.PutArgs{ 2003 BaseParams: baseParams, 2004 Bck: m.bck, 2005 ObjName: orderFileName, 2006 Reader: readers.NewBytes(buffer.Bytes()), 2007 } 2008 _, err = api.PutObject(&args) 2009 tassert.CheckFatal(t, err) 2010 2011 tlog.Logln(startingDS) 2012 spec := df.gen() 2013 managerUUID, err := api.StartDsort(baseParams, &spec) 2014 tassert.CheckFatal(t, err) 2015 2016 _, err = tools.WaitForDsortToFinish(m.proxyURL, managerUUID) 2017 tassert.CheckFatal(t, err) 2018 tlog.Logf("%s: finished\n", df.job()) 2019 2020 allMetrics, err := api.MetricsDsort(baseParams, managerUUID) 2021 tassert.CheckFatal(t, err) 2022 if len(allMetrics) != m.originalTargetCount { 2023 t.Errorf("number of metrics %d is not same as number of targets %d", len(allMetrics), m.originalTargetCount) 2024 } 2025 2026 tlog.Logln("checking if all records are in specified shards...") 2027 shardRecords = df.getRecordNames(df.outputBck) 2028 for _, shard := range shardRecords { 2029 for _, recordName := range shard.recordNames { 2030 match := false 2031 // Some shard with specified format contains the record 2032 for i := range 30 { 2033 match = match || fmt.Sprintf(ekm[recordName], i) == shard.name 2034 } 2035 if !match { 2036 t.Errorf("record %q was not part of any shard with format %q but was in shard %q", 2037 recordName, ekm[recordName], shard.name) 2038 } 2039 } 2040 } 2041 }, 2042 ) 2043 } 2044 2045 func TestDsortOrderJSONFile(t *testing.T) { 2046 runDsortTest( 2047 t, dsortTestSpec{p: true, types: dsorterTypes}, 2048 func(dsorterType string, t *testing.T) { 2049 var ( 2050 err error 2051 m = &ioContext{ 2052 t: t, 2053 } 2054 df = &dsortFramework{ 2055 m: m, 2056 dsorterType: dsorterType, 2057 outputBck: cmn.Bck{ 2058 Name: trand.String(15), 2059 Provider: apc.AIS, 2060 }, 2061 shardCnt: 100, 2062 filesPerShard: 10, 2063 } 2064 2065 orderFileName = "order_file_name.json" 2066 ekm = make(map[string]string, 10) 2067 shardFmts = []string{ 2068 "shard-%d-suf", 2069 "input-%d-pref", 2070 "smth-%d", 2071 } 2072 proxyURL = tools.RandomProxyURL() 2073 baseParams = tools.BaseAPIParams(proxyURL) 2074 ) 2075 2076 m.initAndSaveState(true /*cleanup*/) 2077 m.expectTargets(3) 2078 2079 // Set URL for order file (points to the object in cluster). 2080 df.orderFileURL = fmt.Sprintf( 2081 "%s/%s/%s/%s/%s?%s=%s", 2082 proxyURL, apc.Version, apc.Objects, m.bck.Name, orderFileName, 2083 apc.QparamProvider, apc.AIS, 2084 ) 2085 2086 df.init() 2087 2088 tools.CreateBucket(t, m.proxyURL, m.bck, nil, true /*cleanup*/) 2089 2090 // Create local output bucket 2091 tools.CreateBucket(t, m.proxyURL, df.outputBck, nil, true /*cleanup*/) 2092 2093 df.createInputShards() 2094 2095 // Generate content for the orderFile 2096 tlog.Logln("generating and putting order file into cluster...") 2097 var ( 2098 content = make(map[string][]string, 10) 2099 shardRecords = df.getRecordNames(m.bck) 2100 ) 2101 for _, shard := range shardRecords { 2102 for idx, recordName := range shard.recordNames { 2103 shardFmt := shardFmts[idx%len(shardFmts)] 2104 content[shardFmt] = append(content[shardFmt], recordName) 2105 ekm[recordName] = shardFmts[idx%len(shardFmts)] 2106 } 2107 } 2108 jsonBytes, err := jsoniter.Marshal(content) 2109 tassert.CheckFatal(t, err) 2110 args := api.PutArgs{ 2111 BaseParams: baseParams, 2112 Bck: m.bck, 2113 ObjName: orderFileName, 2114 Reader: readers.NewBytes(jsonBytes), 2115 } 2116 _, err = api.PutObject(&args) 2117 tassert.CheckFatal(t, err) 2118 2119 tlog.Logln(startingDS) 2120 spec := df.gen() 2121 managerUUID, err := api.StartDsort(baseParams, &spec) 2122 tassert.CheckFatal(t, err) 2123 2124 _, err = tools.WaitForDsortToFinish(m.proxyURL, managerUUID) 2125 tassert.CheckFatal(t, err) 2126 tlog.Logf("%s: finished\n", df.job()) 2127 2128 allMetrics, err := api.MetricsDsort(baseParams, managerUUID) 2129 tassert.CheckFatal(t, err) 2130 if len(allMetrics) != m.originalTargetCount { 2131 t.Errorf("number of metrics %d is not same as number of targets %d", 2132 len(allMetrics), m.originalTargetCount) 2133 } 2134 2135 tlog.Logln("checking if all records are in specified shards...") 2136 shardRecords = df.getRecordNames(df.outputBck) 2137 for _, shard := range shardRecords { 2138 for _, recordName := range shard.recordNames { 2139 match := false 2140 // Some shard with specified format contains the record 2141 for i := range 30 { 2142 match = match || fmt.Sprintf(ekm[recordName], i) == shard.name 2143 } 2144 if !match { 2145 t.Errorf("record %q was not part of any shard with format %q but was in shard %q", 2146 recordName, ekm[recordName], shard.name) 2147 } 2148 } 2149 } 2150 }, 2151 ) 2152 } 2153 2154 func TestDsortDryRun(t *testing.T) { 2155 tools.CheckSkip(t, &tools.SkipTestArgs{Long: true}) 2156 2157 runDsortTest( 2158 t, dsortTestSpec{p: true, types: dsorterTypes}, 2159 func(dsorterType string, t *testing.T) { 2160 var ( 2161 m = &ioContext{ 2162 t: t, 2163 } 2164 df = &dsortFramework{ 2165 m: m, 2166 dsorterType: dsorterType, 2167 shardCnt: 500, 2168 filesPerShard: 100, 2169 dryRun: true, 2170 } 2171 ) 2172 2173 m.initAndSaveState(true /*cleanup*/) 2174 m.expectTargets(3) 2175 2176 tools.CreateBucket(t, m.proxyURL, m.bck, nil, true /*cleanup*/) 2177 2178 df.init() 2179 df.createInputShards() 2180 2181 tlog.Logln(startingDS) 2182 df.start() 2183 2184 _, err := tools.WaitForDsortToFinish(m.proxyURL, df.managerUUID) 2185 tassert.CheckFatal(t, err) 2186 tlog.Logf("%s: finished\n", df.job()) 2187 2188 df.checkMetrics(false /* expectAbort */) 2189 }, 2190 ) 2191 } 2192 2193 func TestDsortDryRunDisk(t *testing.T) { 2194 tools.CheckSkip(t, &tools.SkipTestArgs{Long: true}) 2195 2196 runDsortTest( 2197 t, dsortTestSpec{p: true, types: dsorterTypes}, 2198 func(dsorterType string, t *testing.T) { 2199 var ( 2200 m = &ioContext{ 2201 t: t, 2202 } 2203 df = &dsortFramework{ 2204 m: m, 2205 dsorterType: dsorterType, 2206 shardCnt: 500, 2207 filesPerShard: 100, 2208 dryRun: true, 2209 } 2210 ) 2211 2212 m.initAndSaveState(true /*cleanup*/) 2213 m.expectTargets(3) 2214 2215 tools.CreateBucket(t, m.proxyURL, m.bck, nil, true /*cleanup*/) 2216 2217 df.init() 2218 df.createInputShards() 2219 2220 tlog.Logln(startingDS) 2221 df.start() 2222 2223 _, err := tools.WaitForDsortToFinish(m.proxyURL, df.managerUUID) 2224 tassert.CheckFatal(t, err) 2225 tlog.Logf("%s: finished\n", df.job()) 2226 2227 df.checkMetrics(false /* expectAbort */) 2228 }, 2229 ) 2230 } 2231 2232 func TestDsortLongerExt(t *testing.T) { 2233 tools.CheckSkip(t, &tools.SkipTestArgs{Long: true}) 2234 2235 runDsortTest( 2236 t, dsortTestSpec{p: true, types: dsorterTypes, algs: dsortAlgorithms}, 2237 func(dsorterType, alg string, t *testing.T) { 2238 var ( 2239 m = &ioContext{ 2240 t: t, 2241 } 2242 df = &dsortFramework{ 2243 m: m, 2244 dsorterType: dsorterType, 2245 outputTempl: "output-%05d", 2246 shardCnt: 200, 2247 filesPerShard: 10, 2248 maxMemUsage: "99%", 2249 alg: &dsort.Algorithm{Kind: alg}, 2250 recordExts: []string{".txt", ".json.info", ".info", ".json"}, 2251 } 2252 ) 2253 2254 m.initAndSaveState(true /*cleanup*/) 2255 m.expectTargets(3) 2256 2257 tools.CreateBucket(t, m.proxyURL, m.bck, nil, true /*cleanup*/) 2258 2259 df.init() 2260 df.createInputShards() 2261 2262 tlog.Logln(startingDS) 2263 df.start() 2264 2265 _, err := tools.WaitForDsortToFinish(m.proxyURL, df.managerUUID) 2266 tassert.CheckFatal(t, err) 2267 tlog.Logf("%s: finished\n", df.job()) 2268 2269 df.checkMetrics(false /*expectAbort*/) 2270 df.checkOutputShards(5) 2271 }, 2272 ) 2273 } 2274 2275 func TestDsortAutomaticallyCalculateOutputShards(t *testing.T) { 2276 tools.CheckSkip(t, &tools.SkipTestArgs{Long: true}) 2277 2278 runDsortTest( 2279 t, dsortTestSpec{p: true, types: dsorterTypes}, 2280 func(dsorterType string, t *testing.T) { 2281 var ( 2282 m = &ioContext{ 2283 t: t, 2284 } 2285 df = &dsortFramework{ 2286 m: m, 2287 dsorterType: dsorterType, 2288 shardCnt: 200, 2289 filesPerShard: 10, 2290 maxMemUsage: "99%", 2291 outputShardSize: "-1", 2292 outputTempl: "output-{0..10}", 2293 } 2294 ) 2295 2296 m.initAndSaveState(true /*cleanup*/) 2297 m.expectTargets(3) 2298 2299 tools.CreateBucket(t, m.proxyURL, m.bck, nil, true /*cleanup*/) 2300 2301 df.init() 2302 df.createInputShards() 2303 2304 tlog.Logln(startingDS) 2305 df.start() 2306 2307 _, err := tools.WaitForDsortToFinish(m.proxyURL, df.managerUUID) 2308 tassert.CheckFatal(t, err) 2309 tlog.Logf("%s: finished\n", df.job()) 2310 2311 df.checkMetrics(false /*expectAbort*/) 2312 df.checkOutputShards(0) 2313 }, 2314 ) 2315 } 2316 2317 func TestDsortWithTarFormats(t *testing.T) { 2318 tools.CheckSkip(t, &tools.SkipTestArgs{Long: true}) 2319 2320 runDsortTest( 2321 // Include empty ("") type - in this case type must be selected automatically. 2322 t, dsortTestSpec{p: true, types: append(dsorterTypes, ""), 2323 tarFormats: []tar.Format{tar.FormatUnknown, tar.FormatGNU, tar.FormatPAX}}, 2324 func(dsorterType string, tarFormat tar.Format, t *testing.T) { 2325 var ( 2326 m = &ioContext{ 2327 t: t, 2328 } 2329 df = &dsortFramework{ 2330 m: m, 2331 dsorterType: dsorterType, 2332 shardCnt: 500, 2333 filesPerShard: 100, 2334 maxMemUsage: "1B", 2335 tarFormat: tarFormat, 2336 recordExts: []string{".txt"}, 2337 } 2338 ) 2339 2340 // Initialize ioContext 2341 m.initAndSaveState(true /*cleanup*/) 2342 m.expectTargets(1) 2343 2344 // Create ais bucket 2345 tools.CreateBucket(t, m.proxyURL, m.bck, nil, true /*cleanup*/) 2346 2347 df.init() 2348 df.createInputShards() 2349 2350 tlog.Logln(startingDS) 2351 df.start() 2352 2353 _, err := tools.WaitForDsortToFinish(m.proxyURL, df.managerUUID) 2354 tassert.CheckFatal(t, err) 2355 tlog.Logf("%s: finished\n", df.job()) 2356 2357 df.checkMetrics(false /* expectAbort */) 2358 df.checkOutputShards(5) 2359 }, 2360 ) 2361 }