github.com/NVIDIA/aistore@v1.3.23-0.20240517131212-7df6609be51d/bench/tools/aisloader/run.go (about) 1 // Package aisloader 2 /* 3 * Copyright (c) 2018-2024, NVIDIA CORPORATION. All rights reserved. 4 */ 5 6 // AIS loader (aisloader) is a tool to measure storage performance. It's a load 7 // generator that can be used to benchmark and stress-test AIStore 8 // or any S3-compatible backend. 9 // In fact, aisloader can list, write, and read S3(*) buckets _directly_, which 10 // makes it quite useful, convenient, and easy to use benchmark tool to compare 11 // storage performance with aistore in front of S3 vs _without_. 12 // 13 // (*) aisloader can be further easily extended to work directly with any 14 // Cloud storage including, but not limited to, aistore-supported GCP and Azure. 15 // 16 // In addition, `aisloader` generates synthetic workloads that mimic training and 17 // inference workloads - the capability that allows to run benchmarks in isolation 18 // avoiding compute-side bottlenecks and the associated complexity (to analyze those). 19 // 20 // For usage, run: `aisloader`, or `aisloader usage`, or `aisloader --help`, 21 // or see examples.go. 22 23 package aisloader 24 25 import ( 26 "bufio" 27 "errors" 28 "flag" 29 "fmt" 30 "io" 31 "math" 32 "math/rand" 33 "os" 34 "os/signal" 35 "path/filepath" 36 "regexp" 37 "strconv" 38 "strings" 39 "sync" 40 "syscall" 41 "text/tabwriter" 42 "time" 43 44 "github.com/NVIDIA/aistore/api" 45 "github.com/NVIDIA/aistore/api/apc" 46 "github.com/NVIDIA/aistore/api/authn" 47 "github.com/NVIDIA/aistore/api/env" 48 "github.com/NVIDIA/aistore/bench/tools/aisloader/namegetter" 49 "github.com/NVIDIA/aistore/bench/tools/aisloader/stats" 50 "github.com/NVIDIA/aistore/cmn" 51 "github.com/NVIDIA/aistore/cmn/atomic" 52 "github.com/NVIDIA/aistore/cmn/cos" 53 "github.com/NVIDIA/aistore/cmn/debug" 54 "github.com/NVIDIA/aistore/cmn/mono" 55 "github.com/NVIDIA/aistore/core/meta" 56 "github.com/NVIDIA/aistore/ext/etl" 57 "github.com/NVIDIA/aistore/hk" 58 "github.com/NVIDIA/aistore/memsys" 59 "github.com/NVIDIA/aistore/stats/statsd" 60 "github.com/NVIDIA/aistore/tools/readers" 61 "github.com/NVIDIA/aistore/tools/tetl" 62 "github.com/NVIDIA/aistore/xact" 63 "github.com/OneOfOne/xxhash" 64 "github.com/aws/aws-sdk-go-v2/service/s3" 65 jsoniter "github.com/json-iterator/go" 66 ) 67 68 const ( 69 myName = "loader" 70 randomObjNameLen = 32 71 72 wo2FreeSize = 4096 73 wo2FreeDelay = 3*time.Second + time.Millisecond 74 75 ua = "aisloader" 76 77 defaultClusterIP = "localhost" 78 defaultClusterIPv4 = "127.0.0.1" 79 ) 80 81 type ( 82 params struct { 83 seed int64 // random seed; UnixNano() if omitted 84 putSizeUpperBound int64 85 minSize int64 86 maxSize int64 87 readOff int64 // read offset 88 readLen int64 // read length 89 loaderCnt uint64 90 maxputs uint64 91 putShards uint64 92 statsdPort int 93 statsShowInterval int 94 putPct int // % of puts, rest are gets 95 numWorkers int 96 batchSize int // batch is used for bootstraping(list) and delete 97 loaderIDHashLen uint 98 numEpochs uint 99 100 duration DurationExt // stop after the run for at least that much 101 102 bp api.BaseParams 103 smap *meta.Smap 104 105 bck cmn.Bck 106 bProps cmn.Bprops 107 108 loaderID string // used with multiple loader instances generating objects in parallel 109 proxyURL string 110 readerType string 111 tmpDir string // used only when usingFile 112 statsOutput string 113 cksumType string 114 statsdIP string 115 bPropsStr string 116 putSizeUpperBoundStr string // stop after writing that amount of data 117 minSizeStr string 118 maxSizeStr string 119 readOffStr string // read offset 120 readLenStr string // read length 121 subDir string 122 tokenFile string 123 fileList string // local file that contains object names (an alternative to running list-objects) 124 125 etlName string // name of a ETL to apply to each object. Omitted when etlSpecPath specified. 126 etlSpecPath string // Path to a ETL spec to apply to each object. 127 128 cleanUp BoolExt // cleanup i.e. remove and destroy everything created during bench 129 130 statsdProbe bool 131 getLoaderID bool 132 randomObjName bool 133 randomProxy bool 134 uniqueGETs bool 135 skipList bool // when true, skip listing objects before running 100% PUT workload (see also fileList) 136 verifyHash bool // verify xxhash during get 137 getConfig bool // when true, execute control plane requests (read cluster configuration) 138 jsonFormat bool 139 stoppable bool // when true, terminate by Ctrl-C 140 dryRun bool // print configuration and parameters that aisloader will use at runtime 141 traceHTTP bool // trace http latencies as per httpLatencies & https://golang.org/pkg/net/http/httptrace 142 latest bool // check in-cluster metadata and possibly GET the latest object version from the associated remote bucket 143 cached bool // list in-cluster objects - only those objects from a remote bucket that are present (\"cached\") 144 } 145 146 // sts records accumulated puts/gets information. 147 sts struct { 148 put stats.HTTPReq 149 get stats.HTTPReq 150 getConfig stats.HTTPReq 151 statsd stats.Metrics 152 } 153 154 jsonStats struct { 155 Start time.Time `json:"start_time"` // time current stats started 156 Cnt int64 `json:"count,string"` // total # of requests 157 Bytes int64 `json:"bytes,string"` // total bytes by all requests 158 Errs int64 `json:"errors"` // number of failed requests 159 Latency int64 `json:"latency"` // Average request latency in nanoseconds 160 Duration time.Duration `json:"duration"` 161 MinLatency int64 `json:"min_latency"` 162 MaxLatency int64 `json:"max_latency"` 163 Throughput int64 `json:"throughput,string"` 164 } 165 ) 166 167 var ( 168 runParams *params 169 rnd *rand.Rand 170 intervalStats sts 171 accumulatedStats sts 172 bucketObjsNames namegetter.ObjectNameGetter 173 statsPrintHeader = "%-10s%-6s%-22s\t%-22s\t%-36s\t%-22s\t%-10s\n" 174 statsdC *statsd.Client 175 getPending int64 176 putPending int64 177 traceHTTPSig atomic.Bool 178 179 flagUsage bool 180 flagVersion bool 181 flagQuiet bool 182 183 etlInitSpec *etl.InitSpecMsg 184 etlName string 185 186 useRandomObjName bool 187 objNameCnt atomic.Uint64 188 189 suffixIDMaskLen uint 190 suffixID uint64 191 192 numGets atomic.Int64 193 194 gmm *memsys.MMSA 195 stopping atomic.Bool 196 197 ip string 198 port string 199 envEndpoint string 200 201 s3svc *s3.Client // s3 client - see s3ListObjects 202 203 s3Endpoint string 204 s3Profile string 205 s3UsePathStyle bool 206 207 loggedUserToken string 208 ) 209 210 var ( 211 workCh chan *workOrder 212 resCh chan *workOrder 213 wo2Free []*workOrder 214 ) 215 216 var _version, _buildtime string 217 218 // main function 219 func Start(version, buildtime string) (err error) { 220 _version, _buildtime = version, buildtime 221 222 // global and parsed/validated 223 runParams = ¶ms{} 224 225 // discard flags of imported packages 226 // define and add aisloader's own flags 227 // parse flags 228 f := flag.NewFlagSet(os.Args[0], flag.ExitOnError) 229 addCmdLine(f, runParams) 230 231 // validate and finish initialization 232 if err = _init(runParams); err != nil { 233 return err 234 } 235 236 // print arguments unless quiet 237 if !flagQuiet && !runParams.getLoaderID { 238 printArguments(f) 239 } 240 241 if runParams.getLoaderID { 242 fmt.Printf("0x%x\n", suffixID) 243 if useRandomObjName { 244 fmt.Printf("Warning: loaderID 0x%x used only for StatsD, not for object names!\n", suffixID) 245 } 246 return nil 247 } 248 249 // If none of duration, epochs, or put upper bound is specified, it is a no op. 250 // Note that stoppable prevents being a no op 251 // This can be used as a cleanup only run (no put no get). 252 if runParams.duration.Val == 0 { 253 if runParams.putSizeUpperBound == 0 && runParams.numEpochs == 0 && !runParams.stoppable { 254 if runParams.cleanUp.Val { 255 cleanup() 256 } 257 return nil 258 } 259 260 runParams.duration.Val = time.Duration(math.MaxInt64) 261 } 262 263 if runParams.readerType == readers.TypeFile { 264 if err := cos.CreateDir(runParams.tmpDir + "/" + myName); err != nil { 265 return fmt.Errorf("failed to create local test directory %q, err = %s", runParams.tmpDir, err.Error()) 266 } 267 } 268 269 // usage is currently limited to selecting a random proxy (gateway) 270 // to access aistore (done for every I/O request) 271 if runParams.randomProxy { 272 runParams.smap, err = api.GetClusterMap(runParams.bp) 273 if err != nil { 274 return fmt.Errorf("failed to get cluster map: %v", err) 275 } 276 } 277 loggedUserToken = authn.LoadToken(runParams.tokenFile) 278 runParams.bp.Token = loggedUserToken 279 runParams.bp.UA = ua 280 281 var created bool 282 if !runParams.getConfig { 283 if err := setupBucket(runParams, &created); err != nil { 284 return err 285 } 286 } 287 288 if isDirectS3() { 289 if err := initS3Svc(); err != nil { 290 return err 291 } 292 } else { 293 if s3UsePathStyle { 294 return errors.New("cannot use '-s3-use-path-style' without '-s3endpoint'") 295 } 296 } 297 298 // list objects, or maybe not 299 if created { 300 if runParams.putPct < 100 { 301 return errors.New("new bucket, expecting 100% PUT") 302 } 303 bucketObjsNames = &namegetter.RandomNameGetter{} 304 bucketObjsNames.Init([]string{}, rnd) 305 } else if !runParams.getConfig && !runParams.skipList { 306 if err := listObjects(); err != nil { 307 return err 308 } 309 310 objsLen := bucketObjsNames.Len() 311 if runParams.putPct == 0 && objsLen == 0 { 312 return errors.New("nothing to read, the bucket is empty") 313 } 314 315 fmt.Printf("Found %s existing object%s\n\n", cos.FormatBigNum(objsLen), cos.Plural(objsLen)) 316 } else { 317 bucketObjsNames = &namegetter.RandomNameGetter{} 318 bucketObjsNames.Init([]string{}, rnd) 319 } 320 321 printRunParams(runParams) 322 if runParams.dryRun { // dry-run so just print the configurations and exit 323 os.Exit(0) 324 } 325 326 if runParams.cleanUp.Val { 327 v := "destroyed" 328 if !runParams.bck.IsAIS() { 329 v = "emptied" 330 } 331 fmt.Printf("BEWARE: cleanup is enabled, bucket %s will be %s upon termination!\n", runParams.bck, v) 332 time.Sleep(time.Second) 333 } 334 335 host, err := os.Hostname() 336 if err != nil { 337 return fmt.Errorf("failed to get host name: %s", err.Error()) 338 } 339 prefixC := fmt.Sprintf("aisloader.%s-%x", host, suffixID) 340 statsdC, err = statsd.New(runParams.statsdIP, runParams.statsdPort, prefixC, runParams.statsdProbe) 341 if err != nil { 342 fmt.Printf("%s", "Failed to connect to StatsD server") 343 time.Sleep(time.Second) 344 } 345 defer statsdC.Close() 346 347 // init housekeeper and memsys; 348 // empty config to use memsys constants; 349 // alternatively: "memsys": { "min_free": "2gb", ... } 350 hk.Init() 351 go hk.DefaultHK.Run() 352 hk.WaitStarted() 353 354 config := &cmn.Config{} 355 config.Log.Level = "3" 356 memsys.Init(prefixC, prefixC, config) 357 gmm = memsys.PageMM() 358 gmm.RegWithHK() 359 360 if etlInitSpec != nil { 361 fmt.Println(now(), "Starting ETL...") 362 etlName, err = api.ETLInit(runParams.bp, etlInitSpec) 363 if err != nil { 364 return fmt.Errorf("failed to initialize ETL: %v", err) 365 } 366 fmt.Println(now(), etlName, "started") 367 368 defer func() { 369 fmt.Println(now(), "Stopping ETL", etlName) 370 if err := api.ETLStop(runParams.bp, etlName); err != nil { 371 fmt.Printf("%s Failed to stop ETL %s: %v\n", now(), etlName, err) 372 return 373 } 374 fmt.Println(now(), etlName, "stopped") 375 }() 376 } 377 378 workCh = make(chan *workOrder, runParams.numWorkers) 379 resCh = make(chan *workOrder, runParams.numWorkers) 380 wg := &sync.WaitGroup{} 381 for range runParams.numWorkers { 382 wg.Add(1) 383 go worker(workCh, resCh, wg, &numGets) 384 } 385 if runParams.putPct != 0 { 386 wo2Free = make([]*workOrder, 0, wo2FreeSize) 387 } 388 389 timer := time.NewTimer(runParams.duration.Val) 390 391 var statsTicker *time.Ticker 392 if runParams.statsShowInterval == 0 { 393 statsTicker = time.NewTicker(math.MaxInt64) 394 } else { 395 statsTicker = time.NewTicker(time.Second * time.Duration(runParams.statsShowInterval)) 396 } 397 398 tsStart := time.Now() 399 intervalStats = newStats(tsStart) 400 accumulatedStats = newStats(tsStart) 401 402 statsWriter := os.Stdout 403 404 if runParams.statsOutput != "" { 405 f, err := cos.CreateFile(runParams.statsOutput) 406 if err != nil { 407 fmt.Println("Failed to create stats out file") 408 } 409 410 statsWriter = f 411 } 412 413 osSigChan := make(chan os.Signal, 2) 414 if runParams.stoppable { 415 signal.Notify(osSigChan, syscall.SIGHUP, syscall.SIGINT, syscall.SIGTERM) 416 } else { 417 signal.Notify(osSigChan, syscall.SIGHUP) 418 } 419 420 preWriteStats(statsWriter, runParams.jsonFormat) 421 422 // Get the workers started 423 for range runParams.numWorkers { 424 if err = postNewWorkOrder(); err != nil { 425 break 426 } 427 } 428 if err != nil { 429 goto Done 430 } 431 432 MainLoop: 433 for { 434 if runParams.putSizeUpperBound != 0 && 435 accumulatedStats.put.TotalBytes() >= runParams.putSizeUpperBound { 436 break 437 } 438 439 if runParams.numEpochs > 0 { // if defined 440 if numGets.Load() > int64(runParams.numEpochs)*int64(bucketObjsNames.Len()) { 441 break 442 } 443 } 444 445 // Prioritize showing stats otherwise we will dropping the stats intervals. 446 select { 447 case <-statsTicker.C: 448 accumulatedStats.aggregate(&intervalStats) 449 writeStats(statsWriter, runParams.jsonFormat, false /* final */, &intervalStats, &accumulatedStats) 450 sendStatsdStats(&intervalStats) 451 intervalStats = newStats(time.Now()) 452 default: 453 break 454 } 455 456 select { 457 case <-timer.C: 458 break MainLoop 459 case wo := <-resCh: 460 completeWorkOrder(wo, false) 461 if runParams.statsShowInterval == 0 && runParams.putSizeUpperBound != 0 { 462 accumulatedStats.aggregate(&intervalStats) 463 intervalStats = newStats(time.Now()) 464 } 465 if err := postNewWorkOrder(); err != nil { 466 fmt.Fprintln(os.Stderr, err.Error()) 467 break MainLoop 468 } 469 case <-statsTicker.C: 470 accumulatedStats.aggregate(&intervalStats) 471 writeStats(statsWriter, runParams.jsonFormat, false /* final */, &intervalStats, &accumulatedStats) 472 sendStatsdStats(&intervalStats) 473 intervalStats = newStats(time.Now()) 474 case sig := <-osSigChan: 475 switch sig { 476 case syscall.SIGHUP: 477 msg := "Detailed latency info is " 478 if traceHTTPSig.Toggle() { 479 msg += "disabled" 480 } else { 481 msg += "enabled" 482 } 483 fmt.Println(msg) 484 default: 485 if runParams.stoppable { 486 break MainLoop 487 } 488 } 489 } 490 } 491 492 Done: 493 timer.Stop() 494 statsTicker.Stop() 495 close(workCh) 496 wg.Wait() // wait until all workers complete their work 497 498 // Process left over work orders 499 close(resCh) 500 for wo := range resCh { 501 completeWorkOrder(wo, true) 502 } 503 504 finalizeStats(statsWriter) 505 fmt.Printf("Stats written to %s\n", statsWriter.Name()) 506 if runParams.cleanUp.Val { 507 cleanup() 508 } 509 510 fmt.Printf("\nActual run duration: %v\n", time.Since(tsStart)) 511 512 return err 513 } 514 515 func addCmdLine(f *flag.FlagSet, p *params) { 516 f.BoolVar(&flagUsage, "usage", false, "show command-line options, usage, and examples") 517 f.BoolVar(&flagVersion, "version", false, "show aisloader version") 518 f.BoolVar(&flagQuiet, "quiet", false, "when starting to run, do not print command line arguments, default settings, and usage examples") 519 f.DurationVar(&cargs.Timeout, "timeout", 10*time.Minute, "client HTTP timeout - used in LIST/GET/PUT/DELETE") 520 f.IntVar(&p.statsShowInterval, "statsinterval", 10, "interval in seconds to print performance counters; 0 - disabled") 521 f.StringVar(&p.bck.Name, "bucket", "", "bucket name or bucket URI. If empty, a bucket with random name will be created") 522 f.StringVar(&p.bck.Provider, "provider", apc.AIS, 523 "ais - for AIS bucket, \"aws\", \"azure\", \"gcp\" for Azure, Amazon, and Google clouds, respectively") 524 525 f.StringVar(&ip, "ip", defaultClusterIP, "AIS proxy/gateway IP address or hostname") 526 f.StringVar(&port, "port", "8080", "AIS proxy/gateway port") 527 528 // 529 // s3 direct (NOTE: with no aistore in-between) 530 // 531 f.StringVar(&s3Endpoint, "s3endpoint", "", "S3 endpoint to read/write s3 bucket directly (with no aistore)") 532 f.StringVar(&s3Profile, "s3profile", "", "other then default S3 config profile referencing alternative credentials") 533 f.BoolVar(&s3UsePathStyle, "s3-use-path-style", false, "use older path-style addressing (as opposed to virtual-hosted style), e.g., https://s3.amazonaws.com/BUCKET/KEY. Should only be used with 's3endpoint' option") 534 535 DurationExtVar(f, &p.duration, "duration", time.Minute, 536 "Benchmark duration (0 - run forever or until Ctrl-C). \n"+ 537 "If not specified and totalputsize > 0, aisloader runs until totalputsize reached. Otherwise aisloader runs until first of duration and "+ 538 "totalputsize reached") 539 540 f.IntVar(&p.numWorkers, "numworkers", 10, "number of goroutine workers operating on AIS in parallel") 541 f.IntVar(&p.putPct, "pctput", 0, "percentage of PUTs in the aisloader-generated workload") 542 f.StringVar(&p.tmpDir, "tmpdir", "/tmp/ais", "local directory to store temporary files") 543 f.StringVar(&p.putSizeUpperBoundStr, "totalputsize", "0", 544 "stop PUT workload once cumulative PUT size reaches or exceeds this value (can contain standard multiplicative suffix K, MB, GiB, etc.; 0 - unlimited") 545 BoolExtVar(f, &p.cleanUp, "cleanup", "when true, remove bucket upon benchmark termination (must be specified for aistore buckets)") 546 f.BoolVar(&p.verifyHash, "verifyhash", false, 547 "when true, checksum-validate GET: recompute object checksums and validate it against the one received with the GET metadata") 548 549 f.StringVar(&p.minSizeStr, "minsize", "", "minimum object size (with or without multiplicative suffix K, MB, GiB, etc.)") 550 f.StringVar(&p.maxSizeStr, "maxsize", "", "maximum object size (with or without multiplicative suffix K, MB, GiB, etc.)") 551 f.StringVar(&p.readerType, "readertype", readers.TypeSG, 552 fmt.Sprintf("[advanced usage only] type of reader: %s(default) | %s | %s | %s", readers.TypeSG, readers.TypeFile, readers.TypeRand, readers.TypeTar)) 553 f.StringVar(&p.loaderID, "loaderid", "0", "ID to identify a loader among multiple concurrent instances") 554 f.StringVar(&p.statsdIP, "statsdip", "localhost", "StatsD IP address or hostname") 555 f.StringVar(&p.tokenFile, "tokenfile", "", "authentication token (FQN)") // see also: AIS_AUTHN_TOKEN_FILE 556 f.IntVar(&p.statsdPort, "statsdport", 8125, "StatsD UDP port") 557 f.BoolVar(&p.statsdProbe, "test-probe StatsD server prior to benchmarks", false, "when enabled probes StatsD server prior to running") 558 f.IntVar(&p.batchSize, "batchsize", 100, "batch size to list and delete") 559 f.StringVar(&p.bPropsStr, "bprops", "", "JSON string formatted as per the SetBucketProps API and containing bucket properties to apply") 560 f.Int64Var(&p.seed, "seed", 0, "random seed to achieve deterministic reproducible results (0 - use current time in nanoseconds)") 561 f.BoolVar(&p.jsonFormat, "json", false, "when true, print output in JSON format") 562 f.StringVar(&p.readOffStr, "readoff", "", "read range offset (can contain multiplicative suffix K, MB, GiB, etc.)") 563 f.StringVar(&p.readLenStr, "readlen", "", "read range length (can contain multiplicative suffix; 0 - GET full object)") 564 f.Uint64Var(&p.maxputs, "maxputs", 0, "maximum number of objects to PUT") 565 f.UintVar(&p.numEpochs, "epochs", 0, "number of \"epochs\" to run whereby each epoch entails full pass through the entire listed bucket") 566 f.BoolVar(&p.skipList, "skiplist", false, "when true, skip listing objects in a bucket before running 100% PUT workload") 567 f.StringVar(&p.fileList, "filelist", "", "local or locally accessible text file file containing object names (for subsequent reading)") 568 569 // 570 // object naming 571 // 572 f.Uint64Var(&p.loaderCnt, "loadernum", 0, 573 "total number of aisloaders running concurrently and generating combined load. If defined, must be greater than the loaderid and cannot be used together with loaderidhashlen") 574 f.BoolVar(&p.getLoaderID, "getloaderid", false, 575 "when true, print stored/computed unique loaderID aka aisloader identifier and exit") 576 f.UintVar(&p.loaderIDHashLen, "loaderidhashlen", 0, 577 "Size (in bits) of the generated aisloader identifier. Cannot be used together with loadernum") 578 f.BoolVar(&p.randomObjName, "randomname", true, 579 "when true, generate object names of 32 random characters. This option is ignored when loadernum is defined") 580 f.BoolVar(&p.randomProxy, "randomproxy", false, 581 "when true, select random gateway (\"proxy\") to execute I/O request") 582 f.StringVar(&p.subDir, "subdir", "", "virtual destination directory for all aisloader-generated objects") 583 f.Uint64Var(&p.putShards, "putshards", 0, "spread generated objects over this many subdirectories (max 100k)") 584 f.BoolVar(&p.uniqueGETs, "uniquegets", true, 585 "when true, GET objects randomly and equally. Meaning, make sure *not* to GET some objects more frequently than the others") 586 587 // 588 // advanced usage 589 // 590 f.BoolVar(&p.getConfig, "getconfig", false, 591 "when true, generate control plane load by reading AIS proxy configuration (that is, instead of reading/writing data exercise control path)") 592 f.StringVar(&p.statsOutput, "stats-output", "", "filename to log statistics (empty string translates as standard output (default))") 593 f.BoolVar(&p.stoppable, "stoppable", false, "when true, stop upon CTRL-C") 594 f.BoolVar(&p.dryRun, "dry-run", false, "when true, show the configuration and parameters that aisloader will use for benchmark") 595 f.BoolVar(&p.traceHTTP, "trace-http", false, "when true, trace HTTP latencies") // see httpLatencies 596 f.StringVar(&p.cksumType, "cksum-type", cos.ChecksumXXHash, "cksum type to use for put object requests") 597 f.BoolVar(&p.latest, "latest", false, "when true, check in-cluster metadata and possibly GET the latest object version from the associated remote bucket") 598 f.BoolVar(&p.cached, "cached", false, "list in-cluster objects - only those objects from a remote bucket that are present (\"cached\")") 599 600 // ETL 601 f.StringVar(&p.etlName, "etl", "", "name of an ETL applied to each object on GET request. One of '', 'tar2tf', 'md5', 'echo'") 602 f.StringVar(&p.etlSpecPath, "etl-spec", "", "path to an ETL spec to be applied to each object on GET request.") 603 604 // temp replace flags.Usage callback: 605 // too many flags with actual parsing error quickly disappearing from view 606 orig := f.Usage 607 f.Usage = func() { 608 fmt.Println("Run `aisloader` (for inline help), `aisloader version` (for version), or see 'docs/aisloader.md' for details and usage examples.") 609 } 610 f.Parse(os.Args[1:]) 611 f.Usage = orig 612 613 if len(os.Args[1:]) == 0 { 614 printUsage(f) 615 os.Exit(0) 616 } 617 618 os.Args = []string{os.Args[0]} 619 flag.Parse() // Called so that imported packages don't complain 620 621 if flagUsage || (f.NArg() != 0 && (f.Arg(0) == "usage" || f.Arg(0) == "help")) { 622 printUsage(f) 623 os.Exit(0) 624 } 625 if flagVersion || (f.NArg() != 0 && f.Arg(0) == "version") { 626 fmt.Printf("version %s (build %s)\n", _version, _buildtime) 627 os.Exit(0) 628 } 629 } 630 631 // validate command line and finish initialization 632 func _init(p *params) (err error) { 633 // '--s3endpoint' takes precedence 634 if s3Endpoint == "" { 635 if ep := os.Getenv(env.AWS.Endpoint); ep != "" { 636 s3Endpoint = ep 637 } 638 } 639 if p.bck.Name != "" { 640 if p.cleanUp.Val && isDirectS3() { 641 return errors.New("direct S3 access via '-s3endpoint': option '-cleanup' is not supported yet") 642 } 643 if !p.cleanUp.IsSet && !isDirectS3() { 644 fmt.Println("\nNote: `-cleanup` is a required option. Beware! When -cleanup=true the bucket will be destroyed upon completion of the benchmark.") 645 fmt.Println(" The option must be specified in the command line, e.g.: `--cleanup=false`") 646 os.Exit(1) 647 } 648 } 649 650 if p.seed == 0 { 651 p.seed = mono.NanoTime() 652 } 653 rnd = rand.New(rand.NewSource(p.seed)) 654 655 if p.putSizeUpperBoundStr != "" { 656 if p.putSizeUpperBound, err = cos.ParseSize(p.putSizeUpperBoundStr, cos.UnitsIEC); err != nil { 657 return fmt.Errorf("failed to parse total PUT size %s: %v", p.putSizeUpperBoundStr, err) 658 } 659 } 660 661 if p.minSizeStr != "" { 662 if p.minSize, err = cos.ParseSize(p.minSizeStr, cos.UnitsIEC); err != nil { 663 return fmt.Errorf("failed to parse min size %s: %v", p.minSizeStr, err) 664 } 665 } else { 666 p.minSize = cos.MiB 667 } 668 669 if p.maxSizeStr != "" { 670 if p.maxSize, err = cos.ParseSize(p.maxSizeStr, cos.UnitsIEC); err != nil { 671 return fmt.Errorf("failed to parse max size %s: %v", p.maxSizeStr, err) 672 } 673 } else { 674 p.maxSize = cos.GiB 675 } 676 677 if !p.duration.IsSet { 678 if p.putSizeUpperBound != 0 || p.numEpochs != 0 { 679 // user specified putSizeUpperBound or numEpochs, but not duration, override default 1 minute 680 // and run aisloader until other threshold is reached 681 p.duration.Val = time.Duration(math.MaxInt64) 682 } else { 683 fmt.Printf("\nDuration not specified - running for %v\n\n", p.duration.Val) 684 } 685 } 686 687 // Sanity check 688 if p.maxSize < p.minSize { 689 return fmt.Errorf("invalid option: min and max size (%d, %d), respectively", p.minSize, p.maxSize) 690 } 691 692 if p.putPct < 0 || p.putPct > 100 { 693 return fmt.Errorf("invalid option: PUT percent %d", p.putPct) 694 } 695 696 if p.skipList { 697 if p.fileList != "" { 698 fmt.Printf("Warning: '-skiplist' is redundant (implied) when '-filelist' is specified") 699 } else if p.putPct != 100 { 700 return errors.New("invalid option: '-skiplist' is only valid for 100% PUT workloads") 701 } 702 } 703 704 // direct s3 access vs other command line 705 if isDirectS3() { 706 if p.randomProxy { 707 return errors.New("command line options '-s3endpoint' and '-randomproxy' are mutually exclusive") 708 } 709 if ip != "" && ip != defaultClusterIP && ip != defaultClusterIPv4 { 710 return errors.New("command line options '-s3endpoint' and '-ip' are mutually exclusive") 711 } 712 if port != "" && port != "8080" { // TODO: ditto 713 return errors.New("command line options '-s3endpoint' and '-port' are mutually exclusive") 714 } 715 if p.traceHTTP { 716 return errors.New("direct S3 access via '-s3endpoint': HTTP tracing is not supported yet") 717 } 718 if p.cleanUp.Val { 719 return errors.New("direct S3 access via '-s3endpoint': '-cleanup' option is not supported yet") 720 } 721 if p.verifyHash { 722 return errors.New("direct S3 access via '-s3endpoint': '-verifyhash' option is not supported yet") 723 } 724 if p.readOffStr != "" || p.readLenStr != "" { 725 return errors.New("direct S3 access via '-s3endpoint': Read range is not supported yet") 726 } 727 } 728 729 if p.statsShowInterval < 0 { 730 return fmt.Errorf("invalid option: stats show interval %d", p.statsShowInterval) 731 } 732 733 if p.readOffStr != "" { 734 if p.readOff, err = cos.ParseSize(p.readOffStr, cos.UnitsIEC); err != nil { 735 return fmt.Errorf("failed to parse read offset %s: %v", p.readOffStr, err) 736 } 737 } 738 if p.readLenStr != "" { 739 if p.readLen, err = cos.ParseSize(p.readLenStr, cos.UnitsIEC); err != nil { 740 return fmt.Errorf("failed to parse read length %s: %v", p.readLenStr, err) 741 } 742 } 743 744 if p.loaderID == "" { 745 return errors.New("loaderID can't be empty") 746 } 747 748 loaderID, parseErr := strconv.ParseUint(p.loaderID, 10, 64) 749 if p.loaderCnt == 0 && p.loaderIDHashLen == 0 { 750 if p.randomObjName { 751 useRandomObjName = true 752 if parseErr != nil { 753 return errors.New("loaderID as string only allowed when using loaderIDHashLen") 754 } 755 // don't have to set suffixIDLen as userRandomObjName = true 756 suffixID = loaderID 757 } else { 758 // stats will be using loaderID 759 // but as suffixIDMaskLen = 0, object names will be just consecutive numbers 760 suffixID = loaderID 761 suffixIDMaskLen = 0 762 } 763 } else { 764 if p.loaderCnt > 0 && p.loaderIDHashLen > 0 { 765 return errors.New("loadernum and loaderIDHashLen can't be > 0 at the same time") 766 } 767 768 if p.loaderIDHashLen > 0 { 769 if p.loaderIDHashLen == 0 || p.loaderIDHashLen > 63 { 770 return errors.New("loaderIDHashLen has to be larger than 0 and smaller than 64") 771 } 772 773 suffixIDMaskLen = cos.CeilAlign(p.loaderIDHashLen, 4) 774 suffixID = getIDFromString(p.loaderID, suffixIDMaskLen) 775 } else { 776 // p.loaderCnt > 0 777 if parseErr != nil { 778 return errors.New("loadername has to be a number when using loadernum") 779 } 780 if loaderID > p.loaderCnt { 781 return errors.New("loaderid has to be smaller than loadernum") 782 } 783 784 suffixIDMaskLen = loaderMaskFromTotalLoaders(p.loaderCnt) 785 suffixID = loaderID 786 } 787 } 788 789 if p.subDir != "" { 790 p.subDir = filepath.Clean(p.subDir) 791 if p.subDir[0] == '/' { 792 return errors.New("object name prefix can't start with /") 793 } 794 } 795 796 if p.putShards > 100000 { 797 return errors.New("putshards should not exceed 100000") 798 } 799 800 if err := cos.ValidateCksumType(p.cksumType); err != nil { 801 return err 802 } 803 804 if p.etlName != "" && p.etlSpecPath != "" { 805 return errors.New("etl and etl-spec flag can't be set both") 806 } 807 808 if p.etlSpecPath != "" { 809 fh, err := os.Open(p.etlSpecPath) 810 if err != nil { 811 return err 812 } 813 etlSpec, err := io.ReadAll(fh) 814 fh.Close() 815 if err != nil { 816 return err 817 } 818 etlInitSpec, err = tetl.SpecToInitMsg(etlSpec) 819 if err != nil { 820 return err 821 } 822 } 823 824 if p.etlName != "" { 825 etlSpec, err := tetl.GetTransformYaml(p.etlName) 826 if err != nil { 827 return err 828 } 829 etlInitSpec, err = tetl.SpecToInitMsg(etlSpec) 830 if err != nil { 831 return err 832 } 833 } 834 835 if p.bPropsStr != "" { 836 var bprops cmn.Bprops 837 jsonStr := strings.TrimRight(p.bPropsStr, ",") 838 if !strings.HasPrefix(jsonStr, "{") { 839 jsonStr = "{" + strings.TrimRight(jsonStr, ",") + "}" 840 } 841 842 if err := jsoniter.Unmarshal([]byte(jsonStr), &bprops); err != nil { 843 return fmt.Errorf("failed to parse bucket properties: %v", err) 844 } 845 846 p.bProps = bprops 847 if p.bProps.EC.Enabled { 848 // fill EC defaults 849 if p.bProps.EC.ParitySlices == 0 { 850 p.bProps.EC.ParitySlices = 1 851 } 852 if p.bProps.EC.DataSlices == 0 { 853 p.bProps.EC.DataSlices = 1 854 } 855 856 if p.bProps.EC.ParitySlices < 1 || p.bProps.EC.ParitySlices > 32 { 857 return fmt.Errorf( 858 "invalid number of parity slices: %d, it must be between 1 and 32", 859 p.bProps.EC.ParitySlices) 860 } 861 if p.bProps.EC.DataSlices < 1 || p.bProps.EC.DataSlices > 32 { 862 return fmt.Errorf( 863 "invalid number of data slices: %d, it must be between 1 and 32", 864 p.bProps.EC.DataSlices) 865 } 866 } 867 868 if p.bProps.Mirror.Enabled { 869 // fill mirror default properties 870 if p.bProps.Mirror.Burst == 0 { 871 p.bProps.Mirror.Burst = 512 872 } 873 if p.bProps.Mirror.Copies == 0 { 874 p.bProps.Mirror.Copies = 2 875 } 876 if p.bProps.Mirror.Copies != 2 { 877 return fmt.Errorf( 878 "invalid number of mirror copies: %d, it must equal 2", 879 p.bProps.Mirror.Copies) 880 } 881 } 882 } 883 884 var useHTTPS bool 885 if !isDirectS3() { 886 // AIS endpoint: http://ip:port _or_ AIS_ENDPOINT env 887 aisEndpoint := "http://" + ip + ":" + port 888 889 // see also: tlsArgs 890 envEndpoint = os.Getenv(env.AIS.Endpoint) 891 if envEndpoint != "" { 892 if ip != "" && ip != defaultClusterIP && ip != defaultClusterIPv4 { 893 return fmt.Errorf("'%s=%s' environment and '--ip=%s' command-line are mutually exclusive", 894 env.AIS.Endpoint, envEndpoint, ip) 895 } 896 aisEndpoint = envEndpoint 897 } 898 899 traceHTTPSig.Store(p.traceHTTP) 900 901 scheme, address := cmn.ParseURLScheme(aisEndpoint) 902 if scheme == "" { 903 scheme = "http" 904 } 905 if scheme != "http" && scheme != "https" { 906 return fmt.Errorf("invalid aistore endpoint %q: unknown URI scheme %q", aisEndpoint, scheme) 907 } 908 909 // TODO: validate against cluster map (see api.GetClusterMap below) 910 p.proxyURL = scheme + "://" + address 911 useHTTPS = scheme == "https" 912 } 913 914 p.bp = api.BaseParams{URL: p.proxyURL} 915 if useHTTPS { 916 // environment to override client config 917 cmn.EnvToTLS(&sargs) 918 p.bp.Client = cmn.NewClientTLS(cargs, sargs) 919 } else { 920 p.bp.Client = cmn.NewClient(cargs) 921 } 922 923 // NOTE: auth token is assigned below when we execute the very first API call 924 return nil 925 } 926 927 func isDirectS3() bool { 928 debug.Assert(flag.Parsed()) 929 return s3Endpoint != "" 930 } 931 932 func loaderMaskFromTotalLoaders(totalLoaders uint64) uint { 933 // take first bigger power of 2, then take first bigger or equal number 934 // divisible by 4. This makes loaderID more visible in hex object name 935 return cos.CeilAlign(cos.FastLog2Ceil(totalLoaders), 4) 936 } 937 938 func printArguments(set *flag.FlagSet) { 939 w := tabwriter.NewWriter(os.Stdout, 0, 8, 1, '\t', 0) 940 941 fmt.Fprintf(w, "==== COMMAND LINE ARGUMENTS ====\n") 942 fmt.Fprintf(w, "=========== DEFAULTS ===========\n") 943 set.VisitAll(func(f *flag.Flag) { 944 if f.Value.String() == f.DefValue { 945 _, _ = fmt.Fprintf(w, "%s:\t%s\n", f.Name, f.Value.String()) 946 } 947 }) 948 fmt.Fprintf(w, "============ CUSTOM ============\n") 949 set.VisitAll(func(f *flag.Flag) { 950 if f.Value.String() != f.DefValue { 951 _, _ = fmt.Fprintf(w, "%s:\t%s\n", f.Name, f.Value.String()) 952 } 953 }) 954 fmt.Fprintf(w, "HTTP trace:\t%v\n", runParams.traceHTTP) 955 fmt.Fprintf(w, "=================================\n\n") 956 w.Flush() 957 } 958 959 // newStats returns a new stats object with given time as the starting point 960 func newStats(t time.Time) sts { 961 return sts{ 962 put: stats.NewHTTPReq(t), 963 get: stats.NewHTTPReq(t), 964 getConfig: stats.NewHTTPReq(t), 965 statsd: stats.NewStatsdMetrics(t), 966 } 967 } 968 969 // aggregate adds another sts to self 970 func (s *sts) aggregate(other *sts) { 971 s.get.Aggregate(other.get) 972 s.put.Aggregate(other.put) 973 s.getConfig.Aggregate(other.getConfig) 974 } 975 976 func setupBucket(runParams *params, created *bool) error { 977 if strings.Contains(runParams.bck.Name, apc.BckProviderSeparator) { 978 bck, objName, err := cmn.ParseBckObjectURI(runParams.bck.Name, cmn.ParseURIOpts{}) 979 if err != nil { 980 return err 981 } 982 if objName != "" { 983 return fmt.Errorf("expecting bucket name or a bucket URI with no object name in it: %s => [%v, %s]", 984 runParams.bck, bck, objName) 985 } 986 if runParams.bck.Provider != apc.AIS /*cmdline default*/ && runParams.bck.Provider != bck.Provider { 987 return fmt.Errorf("redundant and different bucket provider: %q vs %q in %s", 988 runParams.bck.Provider, bck.Provider, bck) 989 } 990 runParams.bck = bck 991 } 992 993 const cachedText = "--cached option (to list \"cached\" objects only) " 994 995 if isDirectS3() { 996 if apc.ToScheme(runParams.bck.Provider) != apc.S3Scheme { 997 return fmt.Errorf("option --s3endpoint requires s3 bucket (have %s)", runParams.bck) 998 } 999 if runParams.cached { 1000 return errors.New(cachedText + "cannot be used together with --s3endpoint (direct S3 access)") 1001 } 1002 } 1003 if runParams.putPct == 100 && runParams.cached { 1004 return errors.New(cachedText + "is incompatible with 100% PUT workload") 1005 } 1006 if runParams.bck.Provider != apc.AIS { 1007 return nil 1008 } 1009 if runParams.cached && !runParams.bck.IsRemote() { 1010 return fmt.Errorf(cachedText+"applies to remote buckets (have %s)", runParams.bck.Cname("")) 1011 } 1012 1013 // 1014 // ais:// or ais://@remais 1015 // 1016 if runParams.bck.Name == "" { 1017 runParams.bck.Name = cos.CryptoRandS(8) 1018 fmt.Printf("New bucket name %q\n", runParams.bck.Name) 1019 } 1020 exists, err := api.QueryBuckets(runParams.bp, cmn.QueryBcks(runParams.bck), apc.FltPresent) 1021 if err != nil { 1022 return fmt.Errorf("%s not found: %v", runParams.bck, err) 1023 } 1024 if !exists { 1025 if err := api.CreateBucket(runParams.bp, runParams.bck, nil); err != nil { 1026 return fmt.Errorf("failed to create %s: %v", runParams.bck, err) 1027 } 1028 *created = true 1029 } 1030 if runParams.bPropsStr == "" { 1031 return nil 1032 } 1033 propsToUpdate := cmn.BpropsToSet{} 1034 // update bucket props if bPropsStr is set 1035 oldProps, err := api.HeadBucket(runParams.bp, runParams.bck, true /* don't add */) 1036 if err != nil { 1037 return fmt.Errorf("failed to read bucket %s properties: %v", runParams.bck, err) 1038 } 1039 change := false 1040 if runParams.bProps.EC.Enabled != oldProps.EC.Enabled { 1041 propsToUpdate.EC = &cmn.ECConfToSet{ 1042 Enabled: apc.Ptr(runParams.bProps.EC.Enabled), 1043 ObjSizeLimit: apc.Ptr[int64](runParams.bProps.EC.ObjSizeLimit), 1044 DataSlices: apc.Ptr(runParams.bProps.EC.DataSlices), 1045 ParitySlices: apc.Ptr(runParams.bProps.EC.ParitySlices), 1046 } 1047 change = true 1048 } 1049 if runParams.bProps.Mirror.Enabled != oldProps.Mirror.Enabled { 1050 propsToUpdate.Mirror = &cmn.MirrorConfToSet{ 1051 Enabled: apc.Ptr(runParams.bProps.Mirror.Enabled), 1052 Copies: apc.Ptr[int64](runParams.bProps.Mirror.Copies), 1053 Burst: apc.Ptr(runParams.bProps.Mirror.Burst), 1054 } 1055 change = true 1056 } 1057 if change { 1058 if _, err = api.SetBucketProps(runParams.bp, runParams.bck, &propsToUpdate); err != nil { 1059 return fmt.Errorf("failed to enable EC for the bucket %s properties: %v", runParams.bck, err) 1060 } 1061 } 1062 return nil 1063 } 1064 1065 func getIDFromString(val string, hashLen uint) uint64 { 1066 hash := xxhash.Checksum64S(cos.UnsafeB(val), cos.MLCG32) 1067 // keep just the loaderIDHashLen bytes 1068 hash <<= 64 - hashLen 1069 hash >>= 64 - hashLen 1070 return hash 1071 } 1072 1073 func sendStatsdStats(s *sts) { 1074 s.statsd.SendAll(statsdC) 1075 } 1076 1077 func cleanup() { 1078 stopping.Store(true) 1079 time.Sleep(time.Second) 1080 fmt.Println(now() + " Cleaning up...") 1081 if bucketObjsNames != nil { 1082 // `bucketObjsNames` has been actually assigned to/initialized. 1083 var ( 1084 w = runParams.numWorkers 1085 objsLen = bucketObjsNames.Len() 1086 n = objsLen / w 1087 wg = &sync.WaitGroup{} 1088 ) 1089 for i := range w { 1090 wg.Add(1) 1091 go cleanupObjs(bucketObjsNames.Names()[i*n:(i+1)*n], wg) 1092 } 1093 if objsLen%w != 0 { 1094 wg.Add(1) 1095 go cleanupObjs(bucketObjsNames.Names()[n*w:], wg) 1096 } 1097 wg.Wait() 1098 } 1099 1100 if runParams.bck.IsAIS() { 1101 api.DestroyBucket(runParams.bp, runParams.bck) 1102 } 1103 fmt.Println(now() + " Done") 1104 } 1105 1106 func cleanupObjs(objs []string, wg *sync.WaitGroup) { 1107 defer wg.Done() 1108 1109 t := len(objs) 1110 if t == 0 { 1111 return 1112 } 1113 1114 // Only delete objects if it's not an AIS bucket (because otherwise we just go ahead 1115 // and remove the bucket itself) 1116 if !runParams.bck.IsAIS() { 1117 b := min(t, runParams.batchSize) 1118 n := t / b 1119 for i := range n { 1120 xid, err := api.DeleteMultiObj(runParams.bp, runParams.bck, objs[i*b:(i+1)*b], "" /*template*/) 1121 if err != nil { 1122 fmt.Println("delete err ", err) 1123 } 1124 args := xact.ArgsMsg{ID: xid, Kind: apc.ActDeleteObjects} 1125 if _, err = api.WaitForXactionIC(runParams.bp, &args); err != nil { 1126 fmt.Println("wait for xaction err ", err) 1127 } 1128 } 1129 1130 if t%b != 0 { 1131 xid, err := api.DeleteMultiObj(runParams.bp, runParams.bck, objs[n*b:], "" /*template*/) 1132 if err != nil { 1133 fmt.Println("delete err ", err) 1134 } 1135 args := xact.ArgsMsg{ID: xid, Kind: apc.ActDeleteObjects} 1136 if _, err = api.WaitForXactionIC(runParams.bp, &args); err != nil { 1137 fmt.Println("wait for xaction err ", err) 1138 } 1139 } 1140 } 1141 1142 if runParams.readerType == readers.TypeFile { 1143 for _, obj := range objs { 1144 if err := os.Remove(runParams.tmpDir + "/" + obj); err != nil { 1145 fmt.Println("delete local file err ", err) 1146 } 1147 } 1148 } 1149 } 1150 1151 func objNamesFromFile() (names []string, err error) { 1152 var fh *os.File 1153 if fh, err = os.Open(runParams.fileList); err != nil { 1154 return 1155 } 1156 names = make([]string, 0, 1024) 1157 scanner := bufio.NewScanner(fh) 1158 regex := regexp.MustCompile(`\s`) 1159 for scanner.Scan() { 1160 n := strings.TrimSpace(scanner.Text()) 1161 if strings.Contains(n, " ") || regex.MatchString(n) { 1162 continue 1163 } 1164 names = append(names, n) 1165 } 1166 fh.Close() 1167 return 1168 } 1169 1170 func listObjects() error { 1171 var ( 1172 names []string 1173 err error 1174 ) 1175 switch { 1176 case runParams.fileList != "": 1177 names, err = objNamesFromFile() 1178 case isDirectS3(): 1179 names, err = s3ListObjects() 1180 default: 1181 names, err = listObjectNames(runParams.bp, runParams.bck, runParams.subDir, runParams.cached) 1182 } 1183 if err != nil { 1184 return err 1185 } 1186 1187 if !runParams.uniqueGETs { 1188 bucketObjsNames = &namegetter.RandomNameGetter{} 1189 } else { 1190 bucketObjsNames = &namegetter.RandomUniqueNameGetter{} 1191 1192 // Permutation strategies seem to be always better (they use more memory though) 1193 if runParams.putPct == 0 { 1194 bucketObjsNames = &namegetter.PermutationUniqueNameGetter{} 1195 1196 // Number from benchmarks: aisloader/tests/objnamegetter_test.go 1197 // After 50k overhead on new goroutine and WaitGroup becomes smaller than benefits 1198 if len(names) > 50000 { 1199 bucketObjsNames = &namegetter.PermutationUniqueImprovedNameGetter{} 1200 } 1201 } 1202 } 1203 bucketObjsNames.Init(names, rnd) 1204 return err 1205 }