github.com/NVIDIA/aistore@v1.3.23-0.20240517131212-7df6609be51d/ais/backend/awsinv.go (about) 1 //go:build aws 2 3 // Package backend contains implementation of various backend providers. 4 /* 5 * Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. 6 */ 7 package backend 8 9 import ( 10 "compress/gzip" 11 "context" 12 "errors" 13 "fmt" 14 "io" 15 "net/http" 16 "os" 17 "path/filepath" 18 "strconv" 19 "strings" 20 "time" 21 22 aiss3 "github.com/NVIDIA/aistore/ais/s3" 23 "github.com/NVIDIA/aistore/api/apc" 24 "github.com/NVIDIA/aistore/cmn" 25 "github.com/NVIDIA/aistore/cmn/cos" 26 "github.com/NVIDIA/aistore/cmn/debug" 27 "github.com/NVIDIA/aistore/cmn/nlog" 28 "github.com/NVIDIA/aistore/core" 29 "github.com/NVIDIA/aistore/fs" 30 "github.com/NVIDIA/aistore/memsys" 31 "github.com/aws/aws-sdk-go-v2/aws" 32 "github.com/aws/aws-sdk-go-v2/service/s3" 33 "github.com/aws/aws-sdk-go-v2/service/s3/types" 34 ) 35 36 // NOTE currently implemented main assumption/requirement: 37 // - one bucket, one inventory (for this same bucket), and one statically defined .csv 38 39 // TODO: 40 // - LsoMsg.StartAfter (a.k.a. ListObjectsV2Input.StartAfter); see also "expecting to resume" below 41 42 // constant and tunables (see also: ais/s3/inventory) 43 const numBlobWorkers = 10 44 45 const invTag = "bucket-inventory" 46 47 const invBusyTimeout = 10 * time.Second 48 49 const ( 50 invMaxLine = cos.KiB >> 1 // line buf 51 invSwapSGL = invMaxLine 52 53 invMaxPage = 8 * apc.MaxPageSizeAWS 54 invPageSGL = max(invMaxPage*invMaxLine, 2*cos.MiB) 55 ) 56 57 // NOTE: hardcoding two groups of constants - cannot find any of them in https://github.com/aws/aws-sdk-go-v2 58 // Generally, instead of reading inventory manifest line by line (and worrying about duplicated constants) 59 // it'd be much nicer to have an official JSON. 60 61 const ( 62 invManifest = "manifest.json" 63 invSchema = "fileSchema" // e.g. "fileSchema" : "Bucket, Key, Size, ETag" 64 invKey = "\"key\"" 65 ) 66 67 // canonical schema 68 const ( 69 invSchemaBucket = "Bucket" // must be the first field, always present 70 invBucketPos = 0 71 invSchemaKey = "Key" // must be the second mandatory field 72 invKeyPos = 1 73 ) 74 75 type invT struct { 76 oname string 77 mtime time.Time 78 size int64 79 } 80 81 // list inventories, read and parse manifest, return schema and unique oname 82 func (s3bp *s3bp) initInventory(cloudBck *cmn.Bck, svc *s3.Client, ctx *core.LsoInvCtx, prefix string) (*s3.ListObjectsV2Output, 83 invT, invT, int, error) { 84 var ( 85 csv invT 86 manifest invT 87 bn = aws.String(cloudBck.Name) 88 params = &s3.ListObjectsV2Input{Bucket: bn} 89 ) 90 91 params.Prefix = aws.String(prefix) 92 params.MaxKeys = aws.Int32(apc.MaxPageSizeAWS) // no more than 1000 manifests 93 94 // 1. ls inventory 95 resp, err := svc.ListObjectsV2(context.Background(), params) 96 if err != nil { 97 ecode, e := awsErrorToAISError(err, cloudBck, "") 98 return nil, csv, manifest, ecode, e 99 } 100 for _, obj := range resp.Contents { 101 name := *obj.Key 102 if cos.Ext(name) == aiss3.InvSrcExt { 103 mtime := *(obj.LastModified) 104 if csv.mtime.IsZero() || mtime.After(csv.mtime) { 105 csv.mtime = mtime 106 csv.oname = name 107 csv.size = *(obj.Size) 108 } 109 continue 110 } 111 if filepath.Base(name) == invManifest { 112 mtime := *(obj.LastModified) 113 if manifest.mtime.IsZero() || mtime.After(manifest.mtime) { 114 manifest.mtime = mtime 115 manifest.oname = name 116 } 117 } 118 } 119 if csv.oname == "" { 120 what := prefix 121 if ctx.ID == "" { 122 what = cos.Either(ctx.Name, aiss3.InvName) 123 } 124 return nil, csv, manifest, http.StatusNotFound, cos.NewErrNotFound(cloudBck, invTag+":"+what) 125 } 126 if csv.mtime.After(manifest.mtime) { 127 a, b := cos.FormatTime(manifest.mtime, cos.StampSec), cos.FormatTime(csv.mtime, cos.StampSec) 128 nlog.Warningln("using an older manifest:", manifest.oname, a, "to parse:", csv.oname, b) 129 } 130 131 // 2. read the manifest and extract `fileSchema` --> ctx 132 schema, ecode, err := s3bp._getManifest(cloudBck, svc, manifest.oname, csv.oname) 133 if err != nil { 134 return nil, csv, manifest, ecode, err 135 } 136 137 ctx.Schema = schema 138 return resp, csv, manifest, 0, nil 139 } 140 141 func cleanupOldInventory(cloudBck *cmn.Bck, svc *s3.Client, lsV2resp *s3.ListObjectsV2Output, csv, manifest invT) { 142 var ( 143 num int 144 bn = aws.String(cloudBck.Name) 145 ) 146 for _, obj := range lsV2resp.Contents { 147 name := *obj.Key 148 mtime := *(obj.LastModified) 149 if name == csv.oname || name == manifest.oname || csv.mtime.Sub(mtime) < 23*time.Hour { 150 continue 151 } 152 if _sinceAbs(csv.mtime, mtime) < 23*time.Hour { 153 continue 154 } 155 if _, errN := svc.DeleteObject(context.Background(), &s3.DeleteObjectInput{Bucket: bn, Key: obj.Key}); errN != nil { 156 ecode, e := awsErrorToAISError(errN, cloudBck, name) 157 nlog.Errorln("delete", name, e, ecode) 158 continue 159 } 160 num++ 161 } 162 if num > 0 { 163 nlog.Infoln("cleanup: removed", num, "older", invTag, "file"+cos.Plural(num)) 164 } 165 } 166 167 func checkInvLom(latest time.Time, ctx *core.LsoInvCtx) (time.Time, bool) { 168 finfo, err := os.Stat(ctx.Lom.FQN) 169 if err != nil { 170 debug.Assert(os.IsNotExist(err), err) 171 nlog.Infoln(invTag, "does not exist, getting a new one for the timestamp:", latest) 172 return time.Time{}, false 173 } 174 if cmn.Rom.FastV(5, cos.SmoduleBackend) { 175 nlog.Infoln(core.T.String(), "checking", ctx.Lom.String(), ctx.Lom.FQN, ctx.Lom.HrwFQN) 176 } 177 mtime := finfo.ModTime() 178 abs := _sinceAbs(mtime, latest) 179 if abs < time.Second { 180 debug.Assert(ctx.Size == 0 || ctx.Size == finfo.Size()) 181 ctx.Size = finfo.Size() 182 183 // start (or rather, keep) using this one 184 errN := ctx.Lom.Load(true, true) 185 debug.AssertNoErr(errN) 186 debug.Assert(ctx.Lom.SizeBytes() == finfo.Size(), ctx.Lom.SizeBytes(), finfo.Size()) 187 // TODO -- FIXME: revisit 188 // debug.Assert(_sinceAbs(mtime, ctx.Lom.Atime()) < time.Second, mtime.String(), ctx.Lom.Atime().String()) 189 return time.Time{}, true 190 } 191 192 nlog.Infoln(invTag, ctx.Lom.Cname(), "is likely being updated: [", mtime.String(), latest.String(), abs, "]") 193 return mtime, false 194 } 195 196 // get+unzip and write lom 197 func (s3bp *s3bp) getInventory(cloudBck *cmn.Bck, ctx *core.LsoInvCtx, csv invT) error { 198 lom := &core.LOM{ObjName: csv.oname} 199 if err := lom.InitBck(cloudBck); err != nil { 200 return err 201 } 202 lom.SetSize(csv.size) 203 204 wfqn := fs.CSM.Gen(ctx.Lom, fs.WorkfileType, "") 205 wfh, err := ctx.Lom.CreateFile(wfqn) 206 if err != nil { 207 return _errInv("create-file", err) 208 } 209 210 var ( 211 r = &reader{ 212 workCh: make(chan *memsys.SGL, 1), 213 doneCh: make(chan *memsys.SGL, 1), 214 } 215 uzw = &unzipWriter{ 216 r: r, 217 wfh: wfh, 218 } 219 params = &core.BlobParams{ 220 Lom: lom, 221 Msg: &apc.BlobMsg{NumWorkers: numBlobWorkers}, 222 WriteSGL: uzw.writeSGL, 223 } 224 xblob core.Xact 225 gzr *gzip.Reader 226 ) 227 // run x-blob-downloader with default (num-readers, chunk-size) tunables 228 xblob, err = s3bp.t.GetColdBlob(params, lom.ObjAttrs()) 229 if err == nil { 230 if cmn.Rom.FastV(4, cos.SmoduleBackend) { 231 nlog.Infoln("started", xblob.String(), "->", wfqn) 232 } 233 gzr, err = gzip.NewReader(r) 234 } 235 if err != nil { 236 wfh.Close() 237 cos.RemoveFile(wfqn) 238 return _errInv("blob-gunzip", err) 239 } 240 241 buf, slab := s3bp.mm.AllocSize(memsys.DefaultBuf2Size) 242 ctx.Size, err = cos.CopyBuffer(uzw, gzr, buf) 243 244 slab.Free(buf) 245 wfh.Close() 246 gzr.Close() 247 248 // finalize (NOTE a lighter version of FinalizeObj - no redundancy, no locks) 249 if err == nil { 250 lom := ctx.Lom 251 if err = lom.RenameFrom(wfqn); err == nil { 252 if err = os.Chtimes(lom.FQN, csv.mtime, csv.mtime); err == nil { 253 nlog.Infoln("new", invTag+":", lom.Cname(), ctx.Schema) 254 255 lom.SetSize(ctx.Size) 256 lom.SetAtimeUnix(csv.mtime.UnixNano()) 257 if errN := lom.PersistMain(); errN != nil { 258 debug.AssertNoErr(errN) // (unlikely) 259 nlog.Errorln("failed to persist", lom.Cname(), "err:", err, "- proceeding anyway...") 260 } else if cmn.Rom.FastV(4, cos.SmoduleBackend) { 261 nlog.Infoln("done", xblob.String(), "->", lom.Cname(), ctx.Size) 262 } 263 return nil 264 } 265 } 266 } 267 268 // otherwise 269 if nerr := cos.RemoveFile(wfqn); nerr != nil && !os.IsNotExist(nerr) { 270 nlog.Errorf("get-inv (%v), nested fail to remove (%v)", err, nerr) 271 } 272 if abrt := xblob.AbortErr(); abrt != nil { 273 return _errInv("get-inv-abort", abrt) 274 } 275 return _errInv("get-inv-gzr-uzw-fail", err) 276 } 277 278 func (*s3bp) listInventory(cloudBck *cmn.Bck, ctx *core.LsoInvCtx, msg *apc.LsoMsg, lst *cmn.LsoRes) (err error) { 279 var ( 280 custom cos.StrKVs 281 i int64 282 ) 283 msg.PageSize = calcPageSize(msg.PageSize, invMaxPage) 284 for j := len(lst.Entries); j < int(msg.PageSize); j++ { 285 lst.Entries = append(lst.Entries, &cmn.LsoEnt{}) 286 } 287 lst.ContinuationToken = "" 288 289 // when little remains: read some more unless eof 290 sgl := ctx.SGL 291 if sgl.Len() < 2*invSwapSGL && !ctx.EOF { 292 _, err = io.CopyN(sgl, ctx.Lmfh, invPageSGL-sgl.Len()-256) 293 if err != nil { 294 ctx.EOF = err == io.EOF 295 if !ctx.EOF { 296 nlog.Errorln("Warning: error reading csv", err) 297 return err 298 } 299 if sgl.Len() == 0 { 300 return err 301 } 302 } 303 } 304 305 if msg.WantProp(apc.GetPropsCustom) { 306 custom = make(cos.StrKVs, 2) 307 } 308 309 skip := msg.ContinuationToken != "" // (tentatively) 310 lbuf := make([]byte, invMaxLine) // reuse for all read lines 311 312 // avoid having line split across SGLs 313 for i < msg.PageSize && (sgl.Len() > invSwapSGL || ctx.EOF) { 314 lbuf, err = sgl.NextLine(lbuf, true) 315 if err != nil { 316 break 317 } 318 319 line := strings.Split(string(lbuf), ",") 320 debug.Assert(strings.Contains(line[invBucketPos], cloudBck.Name), line) 321 322 objName := cmn.UnquoteCEV(line[invKeyPos]) 323 324 if skip { 325 skip = false 326 if objName != msg.ContinuationToken { 327 nlog.Errorln("Warning: expecting to resume from the previously returned:", 328 msg.ContinuationToken, "vs", objName) 329 } 330 } 331 332 // prefix 333 if msg.IsFlagSet(apc.LsNoRecursion) { 334 if _, errN := cmn.HandleNoRecurs(msg.Prefix, objName); errN != nil { 335 continue 336 } 337 } else if msg.Prefix != "" && !strings.HasPrefix(objName, msg.Prefix) { 338 continue 339 } 340 341 // next entry 342 entry := lst.Entries[i] 343 i++ 344 entry.Name = objName 345 346 clear(custom) 347 for i := invKeyPos + 1; i < len(ctx.Schema); i++ { 348 switch types.InventoryOptionalField(ctx.Schema[i]) { 349 case types.InventoryOptionalFieldSize: 350 size := cmn.UnquoteCEV(line[i]) 351 entry.Size, err = strconv.ParseInt(size, 10, 64) 352 if err != nil { 353 nlog.Errorln(ctx.Lom.String(), "failed to parse size", size, err) 354 } 355 case types.InventoryOptionalFieldETag: 356 if custom != nil { 357 custom[cmn.ETag] = cmn.UnquoteCEV(line[i]) 358 } 359 case types.InventoryOptionalFieldLastModifiedDate: 360 if custom != nil { 361 custom[cmn.LastModified] = cmn.UnquoteCEV(line[i]) 362 } 363 } 364 } 365 if len(custom) > 0 { 366 entry.Custom = cmn.CustomMD2S(custom) 367 } 368 } 369 370 lst.Entries = lst.Entries[:i] 371 372 // set next continuation token 373 lbuf, err = sgl.NextLine(lbuf, false /*advance roff*/) 374 if err == nil { 375 line := strings.Split(string(lbuf), ",") 376 debug.Assert(strings.Contains(line[invBucketPos], cloudBck.Name), line) 377 lst.ContinuationToken = cmn.UnquoteCEV(line[invKeyPos]) 378 } 379 return err 380 } 381 382 // GET, parse, and validate inventory manifest 383 // (see "hardcoding" comment above) 384 // with JSON-tagged manifest structure (that'd include `json:"fileSchema"`) 385 // it'd then make sense to additionally validate: format == csv and source bucket == destination bucket == this bucket 386 func (s3bp *s3bp) _getManifest(cloudBck *cmn.Bck, svc *s3.Client, mname, csvname string) (schema []string, _ int, _ error) { 387 input := s3.GetObjectInput{Bucket: aws.String(cloudBck.Name), Key: aws.String(mname)} 388 obj, err := svc.GetObject(context.Background(), &input) 389 if err != nil { 390 ecode, e := awsErrorToAISError(err, cloudBck, mname) 391 return nil, ecode, e 392 } 393 394 sgl := s3bp.mm.NewSGL(0) 395 _, err = io.Copy(sgl, obj.Body) 396 cos.Close(obj.Body) 397 398 if err != nil { 399 sgl.Free() 400 return nil, 0, err 401 } 402 403 var ( 404 fileSchema string 405 size int64 406 lbuf = make([]byte, invMaxLine) 407 cname = cloudBck.Cname(mname) 408 ) 409 for fileSchema == "" || size == 0 { 410 lbuf, err = sgl.NextLine(lbuf, true) 411 if err != nil { 412 if err == io.EOF { 413 err = nil 414 } 415 break 416 } 417 if len(lbuf) < len(invSchema)+10 { 418 continue 419 } 420 line := strings.Split(string(lbuf), ":") 421 if len(line) < 2 { 422 continue 423 } 424 if strings.Contains(line[0], invSchema) { 425 debug.Assert(fileSchema == "", fileSchema) 426 s := strings.TrimSpace(line[1]) 427 fileSchema = cmn.UnquoteCEV(strings.TrimSuffix(s, ",")) 428 } else if strings.Contains(line[0], invKey) { 429 s := strings.TrimSpace(line[1]) 430 oname := cmn.UnquoteCEV(strings.TrimSuffix(s, ",")) 431 if oname != csvname { 432 nlog.Warningln("manifested object", oname, "vs latest csv.gz", csvname) 433 } 434 } 435 } 436 437 // parse, validate 438 if err != nil || fileSchema == "" { 439 err = _parseErr(cname, sgl, lbuf, err) 440 } else { 441 if cmn.Rom.FastV(4, cos.SmoduleBackend) { 442 nlog.Infoln("parsed manifest", cname, fileSchema, "compressed size", size) 443 } 444 // e.g. "Bucket, Key, Size, ETag" 445 schema = strings.Split(fileSchema, ", ") 446 if len(schema) < 2 { 447 err = _parseErr(cname, sgl, lbuf, errors.New("invalid schema '"+fileSchema+"'")) 448 } else if schema[invBucketPos] != invSchemaBucket || schema[invKeyPos] != invSchemaKey { 449 err = _parseErr(cname, sgl, lbuf, 450 errors.New("unexpected schema '"+fileSchema+"': expecting Bucket followed by Key")) 451 } 452 } 453 454 sgl.Free() 455 return schema, 0, err 456 } 457 458 // 459 // internal 460 // 461 462 func _parseErr(cname string, sgl *memsys.SGL, lbuf []byte, err error) error { 463 out := fmt.Sprintf("failed to parse %s for %q", cname, invSchema) 464 if s := _bhead(sgl, lbuf); s != "" { 465 out += ": [" + s + "]" 466 } 467 if err != nil { 468 out += ", err: " + err.Error() 469 } 470 return errors.New(out) 471 } 472 473 func _bhead(sgl *memsys.SGL, lbuf []byte) (s string) { 474 sgl.Rewind() 475 n, _ := sgl.Read(lbuf) 476 if n > 0 { 477 s = cos.BHead(lbuf, invMaxLine) 478 } 479 return s 480 } 481 482 func _errInv(tag string, err error) error { 483 return fmt.Errorf("%s: %s: %v", invTag, tag, err) 484 } 485 486 func _sinceAbs(t1, t2 time.Time) time.Duration { 487 if t1.After(t2) { 488 return t1.Sub(t2) 489 } 490 return t2.Sub(t1) 491 } 492 493 // 494 // chunk reader; serial reader; unzip unzipWriter 495 // 496 497 type ( 498 reader struct { 499 sgl *memsys.SGL 500 workCh chan *memsys.SGL 501 doneCh chan *memsys.SGL 502 } 503 unzipWriter struct { 504 r *reader 505 wfh *os.File 506 } 507 ) 508 509 ///////////////// 510 // unzipWriter // 511 ///////////////// 512 513 // callback of the type `core.WriteSGL` 514 func (uzw *unzipWriter) writeSGL(sgl *memsys.SGL) error { 515 uzw.r.workCh <- sgl 516 <-uzw.r.doneCh // block here 517 return nil 518 } 519 520 func (uzw *unzipWriter) Write(p []byte) (int, error) { 521 return uzw.wfh.Write(p) 522 } 523 524 //////////// 525 // reader // 526 //////////// 527 528 func (r *reader) Read(b []byte) (n int, err error) { 529 if r.sgl == nil { 530 goto next 531 } 532 read: 533 n, err = r.sgl.Read(b) 534 if err == nil { 535 debug.Assert(n > 0) 536 if r.sgl.Len() == 0 { 537 r.doneCh <- r.sgl // recycle 538 r.sgl = nil 539 } 540 return n, nil 541 } 542 if err == io.EOF { 543 // done reading multi-SGL input 544 debug.Assert(r.sgl.Len() == 0) 545 debug.Assert(n > 0) 546 err = nil 547 } 548 r.doneCh <- r.sgl // return on: sgl is fully read (EOF above) or any error 549 r.sgl = nil 550 return n, err 551 552 next: // (nil indicates EOF or error) 553 r.sgl = <-r.workCh 554 555 if r.sgl == nil { 556 // user done as well 557 close(r.workCh) 558 close(r.doneCh) 559 return 0, io.EOF 560 } 561 debug.Assert(r.sgl.Len() > 0) 562 goto read 563 }