github.com/NVIDIA/aistore@v1.3.23-0.20240517131212-7df6609be51d/ais/backend/awsinv.go (about)

     1  //go:build aws
     2  
     3  // Package backend contains implementation of various backend providers.
     4  /*
     5   * Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
     6   */
     7  package backend
     8  
     9  import (
    10  	"compress/gzip"
    11  	"context"
    12  	"errors"
    13  	"fmt"
    14  	"io"
    15  	"net/http"
    16  	"os"
    17  	"path/filepath"
    18  	"strconv"
    19  	"strings"
    20  	"time"
    21  
    22  	aiss3 "github.com/NVIDIA/aistore/ais/s3"
    23  	"github.com/NVIDIA/aistore/api/apc"
    24  	"github.com/NVIDIA/aistore/cmn"
    25  	"github.com/NVIDIA/aistore/cmn/cos"
    26  	"github.com/NVIDIA/aistore/cmn/debug"
    27  	"github.com/NVIDIA/aistore/cmn/nlog"
    28  	"github.com/NVIDIA/aistore/core"
    29  	"github.com/NVIDIA/aistore/fs"
    30  	"github.com/NVIDIA/aistore/memsys"
    31  	"github.com/aws/aws-sdk-go-v2/aws"
    32  	"github.com/aws/aws-sdk-go-v2/service/s3"
    33  	"github.com/aws/aws-sdk-go-v2/service/s3/types"
    34  )
    35  
    36  // NOTE currently implemented main assumption/requirement:
    37  // - one bucket, one inventory (for this same bucket), and one statically defined .csv
    38  
    39  // TODO:
    40  // - LsoMsg.StartAfter (a.k.a. ListObjectsV2Input.StartAfter); see also "expecting to resume" below
    41  
    42  // constant and tunables (see also: ais/s3/inventory)
    43  const numBlobWorkers = 10
    44  
    45  const invTag = "bucket-inventory"
    46  
    47  const invBusyTimeout = 10 * time.Second
    48  
    49  const (
    50  	invMaxLine = cos.KiB >> 1 // line buf
    51  	invSwapSGL = invMaxLine
    52  
    53  	invMaxPage = 8 * apc.MaxPageSizeAWS
    54  	invPageSGL = max(invMaxPage*invMaxLine, 2*cos.MiB)
    55  )
    56  
    57  // NOTE: hardcoding two groups of constants - cannot find any of them in https://github.com/aws/aws-sdk-go-v2
    58  // Generally, instead of reading inventory manifest line by line (and worrying about duplicated constants)
    59  // it'd be much nicer to have an official JSON.
    60  
    61  const (
    62  	invManifest = "manifest.json"
    63  	invSchema   = "fileSchema" // e.g. "fileSchema" : "Bucket, Key, Size, ETag"
    64  	invKey      = "\"key\""
    65  )
    66  
    67  // canonical schema
    68  const (
    69  	invSchemaBucket = "Bucket" // must be the first field, always present
    70  	invBucketPos    = 0
    71  	invSchemaKey    = "Key" // must be the second mandatory field
    72  	invKeyPos       = 1
    73  )
    74  
    75  type invT struct {
    76  	oname string
    77  	mtime time.Time
    78  	size  int64
    79  }
    80  
    81  // list inventories, read and parse manifest, return schema and unique oname
    82  func (s3bp *s3bp) initInventory(cloudBck *cmn.Bck, svc *s3.Client, ctx *core.LsoInvCtx, prefix string) (*s3.ListObjectsV2Output,
    83  	invT, invT, int, error) {
    84  	var (
    85  		csv      invT
    86  		manifest invT
    87  		bn       = aws.String(cloudBck.Name)
    88  		params   = &s3.ListObjectsV2Input{Bucket: bn}
    89  	)
    90  
    91  	params.Prefix = aws.String(prefix)
    92  	params.MaxKeys = aws.Int32(apc.MaxPageSizeAWS) // no more than 1000 manifests
    93  
    94  	// 1. ls inventory
    95  	resp, err := svc.ListObjectsV2(context.Background(), params)
    96  	if err != nil {
    97  		ecode, e := awsErrorToAISError(err, cloudBck, "")
    98  		return nil, csv, manifest, ecode, e
    99  	}
   100  	for _, obj := range resp.Contents {
   101  		name := *obj.Key
   102  		if cos.Ext(name) == aiss3.InvSrcExt {
   103  			mtime := *(obj.LastModified)
   104  			if csv.mtime.IsZero() || mtime.After(csv.mtime) {
   105  				csv.mtime = mtime
   106  				csv.oname = name
   107  				csv.size = *(obj.Size)
   108  			}
   109  			continue
   110  		}
   111  		if filepath.Base(name) == invManifest {
   112  			mtime := *(obj.LastModified)
   113  			if manifest.mtime.IsZero() || mtime.After(manifest.mtime) {
   114  				manifest.mtime = mtime
   115  				manifest.oname = name
   116  			}
   117  		}
   118  	}
   119  	if csv.oname == "" {
   120  		what := prefix
   121  		if ctx.ID == "" {
   122  			what = cos.Either(ctx.Name, aiss3.InvName)
   123  		}
   124  		return nil, csv, manifest, http.StatusNotFound, cos.NewErrNotFound(cloudBck, invTag+":"+what)
   125  	}
   126  	if csv.mtime.After(manifest.mtime) {
   127  		a, b := cos.FormatTime(manifest.mtime, cos.StampSec), cos.FormatTime(csv.mtime, cos.StampSec)
   128  		nlog.Warningln("using an older manifest:", manifest.oname, a, "to parse:", csv.oname, b)
   129  	}
   130  
   131  	// 2. read the manifest and extract `fileSchema` --> ctx
   132  	schema, ecode, err := s3bp._getManifest(cloudBck, svc, manifest.oname, csv.oname)
   133  	if err != nil {
   134  		return nil, csv, manifest, ecode, err
   135  	}
   136  
   137  	ctx.Schema = schema
   138  	return resp, csv, manifest, 0, nil
   139  }
   140  
   141  func cleanupOldInventory(cloudBck *cmn.Bck, svc *s3.Client, lsV2resp *s3.ListObjectsV2Output, csv, manifest invT) {
   142  	var (
   143  		num int
   144  		bn  = aws.String(cloudBck.Name)
   145  	)
   146  	for _, obj := range lsV2resp.Contents {
   147  		name := *obj.Key
   148  		mtime := *(obj.LastModified)
   149  		if name == csv.oname || name == manifest.oname || csv.mtime.Sub(mtime) < 23*time.Hour {
   150  			continue
   151  		}
   152  		if _sinceAbs(csv.mtime, mtime) < 23*time.Hour {
   153  			continue
   154  		}
   155  		if _, errN := svc.DeleteObject(context.Background(), &s3.DeleteObjectInput{Bucket: bn, Key: obj.Key}); errN != nil {
   156  			ecode, e := awsErrorToAISError(errN, cloudBck, name)
   157  			nlog.Errorln("delete", name, e, ecode)
   158  			continue
   159  		}
   160  		num++
   161  	}
   162  	if num > 0 {
   163  		nlog.Infoln("cleanup: removed", num, "older", invTag, "file"+cos.Plural(num))
   164  	}
   165  }
   166  
   167  func checkInvLom(latest time.Time, ctx *core.LsoInvCtx) (time.Time, bool) {
   168  	finfo, err := os.Stat(ctx.Lom.FQN)
   169  	if err != nil {
   170  		debug.Assert(os.IsNotExist(err), err)
   171  		nlog.Infoln(invTag, "does not exist, getting a new one for the timestamp:", latest)
   172  		return time.Time{}, false
   173  	}
   174  	if cmn.Rom.FastV(5, cos.SmoduleBackend) {
   175  		nlog.Infoln(core.T.String(), "checking", ctx.Lom.String(), ctx.Lom.FQN, ctx.Lom.HrwFQN)
   176  	}
   177  	mtime := finfo.ModTime()
   178  	abs := _sinceAbs(mtime, latest)
   179  	if abs < time.Second {
   180  		debug.Assert(ctx.Size == 0 || ctx.Size == finfo.Size())
   181  		ctx.Size = finfo.Size()
   182  
   183  		// start (or rather, keep) using this one
   184  		errN := ctx.Lom.Load(true, true)
   185  		debug.AssertNoErr(errN)
   186  		debug.Assert(ctx.Lom.SizeBytes() == finfo.Size(), ctx.Lom.SizeBytes(), finfo.Size())
   187  		// TODO -- FIXME: revisit
   188  		// debug.Assert(_sinceAbs(mtime, ctx.Lom.Atime()) < time.Second, mtime.String(), ctx.Lom.Atime().String())
   189  		return time.Time{}, true
   190  	}
   191  
   192  	nlog.Infoln(invTag, ctx.Lom.Cname(), "is likely being updated: [", mtime.String(), latest.String(), abs, "]")
   193  	return mtime, false
   194  }
   195  
   196  // get+unzip and write lom
   197  func (s3bp *s3bp) getInventory(cloudBck *cmn.Bck, ctx *core.LsoInvCtx, csv invT) error {
   198  	lom := &core.LOM{ObjName: csv.oname}
   199  	if err := lom.InitBck(cloudBck); err != nil {
   200  		return err
   201  	}
   202  	lom.SetSize(csv.size)
   203  
   204  	wfqn := fs.CSM.Gen(ctx.Lom, fs.WorkfileType, "")
   205  	wfh, err := ctx.Lom.CreateFile(wfqn)
   206  	if err != nil {
   207  		return _errInv("create-file", err)
   208  	}
   209  
   210  	var (
   211  		r = &reader{
   212  			workCh: make(chan *memsys.SGL, 1),
   213  			doneCh: make(chan *memsys.SGL, 1),
   214  		}
   215  		uzw = &unzipWriter{
   216  			r:   r,
   217  			wfh: wfh,
   218  		}
   219  		params = &core.BlobParams{
   220  			Lom:      lom,
   221  			Msg:      &apc.BlobMsg{NumWorkers: numBlobWorkers},
   222  			WriteSGL: uzw.writeSGL,
   223  		}
   224  		xblob core.Xact
   225  		gzr   *gzip.Reader
   226  	)
   227  	// run x-blob-downloader with default (num-readers, chunk-size) tunables
   228  	xblob, err = s3bp.t.GetColdBlob(params, lom.ObjAttrs())
   229  	if err == nil {
   230  		if cmn.Rom.FastV(4, cos.SmoduleBackend) {
   231  			nlog.Infoln("started", xblob.String(), "->", wfqn)
   232  		}
   233  		gzr, err = gzip.NewReader(r)
   234  	}
   235  	if err != nil {
   236  		wfh.Close()
   237  		cos.RemoveFile(wfqn)
   238  		return _errInv("blob-gunzip", err)
   239  	}
   240  
   241  	buf, slab := s3bp.mm.AllocSize(memsys.DefaultBuf2Size)
   242  	ctx.Size, err = cos.CopyBuffer(uzw, gzr, buf)
   243  
   244  	slab.Free(buf)
   245  	wfh.Close()
   246  	gzr.Close()
   247  
   248  	// finalize (NOTE a lighter version of FinalizeObj - no redundancy, no locks)
   249  	if err == nil {
   250  		lom := ctx.Lom
   251  		if err = lom.RenameFrom(wfqn); err == nil {
   252  			if err = os.Chtimes(lom.FQN, csv.mtime, csv.mtime); err == nil {
   253  				nlog.Infoln("new", invTag+":", lom.Cname(), ctx.Schema)
   254  
   255  				lom.SetSize(ctx.Size)
   256  				lom.SetAtimeUnix(csv.mtime.UnixNano())
   257  				if errN := lom.PersistMain(); errN != nil {
   258  					debug.AssertNoErr(errN) // (unlikely)
   259  					nlog.Errorln("failed to persist", lom.Cname(), "err:", err, "- proceeding anyway...")
   260  				} else if cmn.Rom.FastV(4, cos.SmoduleBackend) {
   261  					nlog.Infoln("done", xblob.String(), "->", lom.Cname(), ctx.Size)
   262  				}
   263  				return nil
   264  			}
   265  		}
   266  	}
   267  
   268  	// otherwise
   269  	if nerr := cos.RemoveFile(wfqn); nerr != nil && !os.IsNotExist(nerr) {
   270  		nlog.Errorf("get-inv (%v), nested fail to remove (%v)", err, nerr)
   271  	}
   272  	if abrt := xblob.AbortErr(); abrt != nil {
   273  		return _errInv("get-inv-abort", abrt)
   274  	}
   275  	return _errInv("get-inv-gzr-uzw-fail", err)
   276  }
   277  
   278  func (*s3bp) listInventory(cloudBck *cmn.Bck, ctx *core.LsoInvCtx, msg *apc.LsoMsg, lst *cmn.LsoRes) (err error) {
   279  	var (
   280  		custom cos.StrKVs
   281  		i      int64
   282  	)
   283  	msg.PageSize = calcPageSize(msg.PageSize, invMaxPage)
   284  	for j := len(lst.Entries); j < int(msg.PageSize); j++ {
   285  		lst.Entries = append(lst.Entries, &cmn.LsoEnt{})
   286  	}
   287  	lst.ContinuationToken = ""
   288  
   289  	// when little remains: read some more unless eof
   290  	sgl := ctx.SGL
   291  	if sgl.Len() < 2*invSwapSGL && !ctx.EOF {
   292  		_, err = io.CopyN(sgl, ctx.Lmfh, invPageSGL-sgl.Len()-256)
   293  		if err != nil {
   294  			ctx.EOF = err == io.EOF
   295  			if !ctx.EOF {
   296  				nlog.Errorln("Warning: error reading csv", err)
   297  				return err
   298  			}
   299  			if sgl.Len() == 0 {
   300  				return err
   301  			}
   302  		}
   303  	}
   304  
   305  	if msg.WantProp(apc.GetPropsCustom) {
   306  		custom = make(cos.StrKVs, 2)
   307  	}
   308  
   309  	skip := msg.ContinuationToken != "" // (tentatively)
   310  	lbuf := make([]byte, invMaxLine)    // reuse for all read lines
   311  
   312  	// avoid having line split across SGLs
   313  	for i < msg.PageSize && (sgl.Len() > invSwapSGL || ctx.EOF) {
   314  		lbuf, err = sgl.NextLine(lbuf, true)
   315  		if err != nil {
   316  			break
   317  		}
   318  
   319  		line := strings.Split(string(lbuf), ",")
   320  		debug.Assert(strings.Contains(line[invBucketPos], cloudBck.Name), line)
   321  
   322  		objName := cmn.UnquoteCEV(line[invKeyPos])
   323  
   324  		if skip {
   325  			skip = false
   326  			if objName != msg.ContinuationToken {
   327  				nlog.Errorln("Warning: expecting to resume from the previously returned:",
   328  					msg.ContinuationToken, "vs", objName)
   329  			}
   330  		}
   331  
   332  		// prefix
   333  		if msg.IsFlagSet(apc.LsNoRecursion) {
   334  			if _, errN := cmn.HandleNoRecurs(msg.Prefix, objName); errN != nil {
   335  				continue
   336  			}
   337  		} else if msg.Prefix != "" && !strings.HasPrefix(objName, msg.Prefix) {
   338  			continue
   339  		}
   340  
   341  		// next entry
   342  		entry := lst.Entries[i]
   343  		i++
   344  		entry.Name = objName
   345  
   346  		clear(custom)
   347  		for i := invKeyPos + 1; i < len(ctx.Schema); i++ {
   348  			switch types.InventoryOptionalField(ctx.Schema[i]) {
   349  			case types.InventoryOptionalFieldSize:
   350  				size := cmn.UnquoteCEV(line[i])
   351  				entry.Size, err = strconv.ParseInt(size, 10, 64)
   352  				if err != nil {
   353  					nlog.Errorln(ctx.Lom.String(), "failed to parse size", size, err)
   354  				}
   355  			case types.InventoryOptionalFieldETag:
   356  				if custom != nil {
   357  					custom[cmn.ETag] = cmn.UnquoteCEV(line[i])
   358  				}
   359  			case types.InventoryOptionalFieldLastModifiedDate:
   360  				if custom != nil {
   361  					custom[cmn.LastModified] = cmn.UnquoteCEV(line[i])
   362  				}
   363  			}
   364  		}
   365  		if len(custom) > 0 {
   366  			entry.Custom = cmn.CustomMD2S(custom)
   367  		}
   368  	}
   369  
   370  	lst.Entries = lst.Entries[:i]
   371  
   372  	// set next continuation token
   373  	lbuf, err = sgl.NextLine(lbuf, false /*advance roff*/)
   374  	if err == nil {
   375  		line := strings.Split(string(lbuf), ",")
   376  		debug.Assert(strings.Contains(line[invBucketPos], cloudBck.Name), line)
   377  		lst.ContinuationToken = cmn.UnquoteCEV(line[invKeyPos])
   378  	}
   379  	return err
   380  }
   381  
   382  // GET, parse, and validate inventory manifest
   383  // (see "hardcoding" comment above)
   384  // with JSON-tagged manifest structure (that'd include `json:"fileSchema"`)
   385  // it'd then make sense to additionally validate: format == csv and source bucket == destination bucket == this bucket
   386  func (s3bp *s3bp) _getManifest(cloudBck *cmn.Bck, svc *s3.Client, mname, csvname string) (schema []string, _ int, _ error) {
   387  	input := s3.GetObjectInput{Bucket: aws.String(cloudBck.Name), Key: aws.String(mname)}
   388  	obj, err := svc.GetObject(context.Background(), &input)
   389  	if err != nil {
   390  		ecode, e := awsErrorToAISError(err, cloudBck, mname)
   391  		return nil, ecode, e
   392  	}
   393  
   394  	sgl := s3bp.mm.NewSGL(0)
   395  	_, err = io.Copy(sgl, obj.Body)
   396  	cos.Close(obj.Body)
   397  
   398  	if err != nil {
   399  		sgl.Free()
   400  		return nil, 0, err
   401  	}
   402  
   403  	var (
   404  		fileSchema string
   405  		size       int64
   406  		lbuf       = make([]byte, invMaxLine)
   407  		cname      = cloudBck.Cname(mname)
   408  	)
   409  	for fileSchema == "" || size == 0 {
   410  		lbuf, err = sgl.NextLine(lbuf, true)
   411  		if err != nil {
   412  			if err == io.EOF {
   413  				err = nil
   414  			}
   415  			break
   416  		}
   417  		if len(lbuf) < len(invSchema)+10 {
   418  			continue
   419  		}
   420  		line := strings.Split(string(lbuf), ":")
   421  		if len(line) < 2 {
   422  			continue
   423  		}
   424  		if strings.Contains(line[0], invSchema) {
   425  			debug.Assert(fileSchema == "", fileSchema)
   426  			s := strings.TrimSpace(line[1])
   427  			fileSchema = cmn.UnquoteCEV(strings.TrimSuffix(s, ","))
   428  		} else if strings.Contains(line[0], invKey) {
   429  			s := strings.TrimSpace(line[1])
   430  			oname := cmn.UnquoteCEV(strings.TrimSuffix(s, ","))
   431  			if oname != csvname {
   432  				nlog.Warningln("manifested object", oname, "vs latest csv.gz", csvname)
   433  			}
   434  		}
   435  	}
   436  
   437  	// parse, validate
   438  	if err != nil || fileSchema == "" {
   439  		err = _parseErr(cname, sgl, lbuf, err)
   440  	} else {
   441  		if cmn.Rom.FastV(4, cos.SmoduleBackend) {
   442  			nlog.Infoln("parsed manifest", cname, fileSchema, "compressed size", size)
   443  		}
   444  		// e.g. "Bucket, Key, Size, ETag"
   445  		schema = strings.Split(fileSchema, ", ")
   446  		if len(schema) < 2 {
   447  			err = _parseErr(cname, sgl, lbuf, errors.New("invalid schema '"+fileSchema+"'"))
   448  		} else if schema[invBucketPos] != invSchemaBucket || schema[invKeyPos] != invSchemaKey {
   449  			err = _parseErr(cname, sgl, lbuf,
   450  				errors.New("unexpected schema '"+fileSchema+"': expecting Bucket followed by Key"))
   451  		}
   452  	}
   453  
   454  	sgl.Free()
   455  	return schema, 0, err
   456  }
   457  
   458  //
   459  // internal
   460  //
   461  
   462  func _parseErr(cname string, sgl *memsys.SGL, lbuf []byte, err error) error {
   463  	out := fmt.Sprintf("failed to parse %s for %q", cname, invSchema)
   464  	if s := _bhead(sgl, lbuf); s != "" {
   465  		out += ": [" + s + "]"
   466  	}
   467  	if err != nil {
   468  		out += ", err: " + err.Error()
   469  	}
   470  	return errors.New(out)
   471  }
   472  
   473  func _bhead(sgl *memsys.SGL, lbuf []byte) (s string) {
   474  	sgl.Rewind()
   475  	n, _ := sgl.Read(lbuf)
   476  	if n > 0 {
   477  		s = cos.BHead(lbuf, invMaxLine)
   478  	}
   479  	return s
   480  }
   481  
   482  func _errInv(tag string, err error) error {
   483  	return fmt.Errorf("%s: %s: %v", invTag, tag, err)
   484  }
   485  
   486  func _sinceAbs(t1, t2 time.Time) time.Duration {
   487  	if t1.After(t2) {
   488  		return t1.Sub(t2)
   489  	}
   490  	return t2.Sub(t1)
   491  }
   492  
   493  //
   494  // chunk reader; serial reader; unzip unzipWriter
   495  //
   496  
   497  type (
   498  	reader struct {
   499  		sgl    *memsys.SGL
   500  		workCh chan *memsys.SGL
   501  		doneCh chan *memsys.SGL
   502  	}
   503  	unzipWriter struct {
   504  		r   *reader
   505  		wfh *os.File
   506  	}
   507  )
   508  
   509  /////////////////
   510  // unzipWriter //
   511  /////////////////
   512  
   513  // callback of the type `core.WriteSGL`
   514  func (uzw *unzipWriter) writeSGL(sgl *memsys.SGL) error {
   515  	uzw.r.workCh <- sgl
   516  	<-uzw.r.doneCh // block here
   517  	return nil
   518  }
   519  
   520  func (uzw *unzipWriter) Write(p []byte) (int, error) {
   521  	return uzw.wfh.Write(p)
   522  }
   523  
   524  ////////////
   525  // reader //
   526  ////////////
   527  
   528  func (r *reader) Read(b []byte) (n int, err error) {
   529  	if r.sgl == nil {
   530  		goto next
   531  	}
   532  read:
   533  	n, err = r.sgl.Read(b)
   534  	if err == nil {
   535  		debug.Assert(n > 0)
   536  		if r.sgl.Len() == 0 {
   537  			r.doneCh <- r.sgl // recycle
   538  			r.sgl = nil
   539  		}
   540  		return n, nil
   541  	}
   542  	if err == io.EOF {
   543  		// done reading multi-SGL input
   544  		debug.Assert(r.sgl.Len() == 0)
   545  		debug.Assert(n > 0)
   546  		err = nil
   547  	}
   548  	r.doneCh <- r.sgl // return on: sgl is fully read (EOF above) or any error
   549  	r.sgl = nil
   550  	return n, err
   551  
   552  next: // (nil indicates EOF or error)
   553  	r.sgl = <-r.workCh
   554  
   555  	if r.sgl == nil {
   556  		// user done as well
   557  		close(r.workCh)
   558  		close(r.doneCh)
   559  		return 0, io.EOF
   560  	}
   561  	debug.Assert(r.sgl.Len() > 0)
   562  	goto read
   563  }