github.com/NVIDIA/aistore@v1.3.23-0.20240517131212-7df6609be51d/cmn/objattrs.go (about)

     1  // Package cmn provides common constants, types, and utilities for AIS clients
     2  // and AIStore.
     3  /*
     4   * Copyright (c) 2018-2024, NVIDIA CORPORATION. All rights reserved.
     5   */
     6  package cmn
     7  
     8  import (
     9  	"fmt"
    10  	"net/http"
    11  	"strconv"
    12  	"strings"
    13  
    14  	"github.com/NVIDIA/aistore/api/apc"
    15  	"github.com/NVIDIA/aistore/cmn/cos"
    16  	"github.com/NVIDIA/aistore/cmn/debug"
    17  )
    18  
    19  // LOM custom metadata stored under `lomCustomMD`.
    20  const (
    21  	// source of the cold-GET and download; the values include all
    22  	// 3rd party backend providers
    23  	SourceObjMD = "source"
    24  
    25  	// downloader' source is "web"
    26  	WebObjMD = "web"
    27  
    28  	// system-supported custom attrs
    29  	// NOTE: for provider specific HTTP headers, see cmn/cos/const_http.go
    30  
    31  	VersionObjMD = "version" // "generation" for GCP, "version" for AWS but only if the bucket is versioned, etc.
    32  	CRC32CObjMD  = cos.ChecksumCRC32C
    33  	MD5ObjMD     = cos.ChecksumMD5
    34  	ETag         = cos.HdrETag
    35  
    36  	OrigURLObjMD = "orig_url"
    37  
    38  	// additional backend
    39  	LastModified = "LastModified"
    40  )
    41  
    42  // object properties
    43  // NOTE: embeds system `ObjAttrs` that in turn includes custom user-defined
    44  // NOTE: compare with `apc.LsoMsg`
    45  type ObjectProps struct {
    46  	Bck Bck `json:"bucket"`
    47  	ObjAttrs
    48  	Name     string `json:"name"`
    49  	Location string `json:"location"` // see also `GetPropsLocation`
    50  	Mirror   struct {
    51  		Paths  []string `json:"paths,omitempty"`
    52  		Copies int      `json:"copies,omitempty"`
    53  	} `json:"mirror"`
    54  	EC struct {
    55  		Generation   int64 `json:"generation"`
    56  		DataSlices   int   `json:"data"`
    57  		ParitySlices int   `json:"parity"`
    58  		IsECCopy     bool  `json:"replicated"`
    59  	} `json:"ec"`
    60  	Present bool `json:"present"`
    61  }
    62  
    63  // see also apc.HdrObjAtime et al. @ api/apc/const.go (and note that naming must be consistent)
    64  type ObjAttrs struct {
    65  	Cksum    *cos.Cksum `json:"checksum,omitempty"`  // object checksum (cloned)
    66  	CustomMD cos.StrKVs `json:"custom-md,omitempty"` // custom metadata: ETag, MD5, CRC, user-defined ...
    67  	Ver      string     `json:"version,omitempty"`   // object version
    68  	Atime    int64      `json:"atime,omitempty"`     // access time (nanoseconds since UNIX epoch)
    69  	Size     int64      `json:"size,omitempty"`      // object size (bytes)
    70  }
    71  
    72  // interface guard
    73  var _ cos.OAH = (*ObjAttrs)(nil)
    74  
    75  func (oa *ObjAttrs) String() string {
    76  	return fmt.Sprintf("%dB, v%q, %s, %+v", oa.Size, oa.Ver, oa.Cksum, oa.CustomMD)
    77  }
    78  
    79  func (oa *ObjAttrs) SizeBytes(_ ...bool) int64 { return oa.Size }
    80  func (oa *ObjAttrs) Version(_ ...bool) string  { return oa.Ver }
    81  func (oa *ObjAttrs) AtimeUnix() int64          { return oa.Atime }
    82  func (oa *ObjAttrs) Checksum() *cos.Cksum      { return oa.Cksum }
    83  func (oa *ObjAttrs) SetCksum(ty, val string)   { oa.Cksum = cos.NewCksum(ty, val) }
    84  
    85  func (oa *ObjAttrs) SetSize(size int64) {
    86  	debug.Assert(oa.Size == 0)
    87  	oa.Size = size
    88  }
    89  
    90  //
    91  // custom metadata
    92  //
    93  
    94  func CustomMD2S(md cos.StrKVs) string { return fmt.Sprintf("%+v", md) }
    95  
    96  func S2CustomMD(custom, version string) (md cos.StrKVs) {
    97  	if len(custom) < 8 || !strings.HasPrefix(custom, "map[") { // Sprintf above
    98  		return nil
    99  	}
   100  	s := custom[4 : len(custom)-1]
   101  	lst := strings.Split(s, " ")
   102  	md = make(cos.StrKVs, len(lst))
   103  	md[VersionObjMD] = version
   104  	parseCustom(md, lst, SourceObjMD)
   105  	parseCustom(md, lst, CRC32CObjMD)
   106  	parseCustom(md, lst, MD5ObjMD)
   107  	parseCustom(md, lst, ETag)
   108  	return md
   109  }
   110  
   111  func parseCustom(md cos.StrKVs, lst []string, key string) {
   112  	keyX := key + ":"
   113  	for _, kv := range lst {
   114  		if strings.HasPrefix(kv, keyX) {
   115  			md[key] = kv[len(keyX):]
   116  			return
   117  		}
   118  	}
   119  }
   120  
   121  func (oa *ObjAttrs) GetCustomMD() cos.StrKVs   { return oa.CustomMD }
   122  func (oa *ObjAttrs) SetCustomMD(md cos.StrKVs) { oa.CustomMD = md }
   123  
   124  func (oa *ObjAttrs) GetCustomKey(key string) (val string, exists bool) {
   125  	val, exists = oa.CustomMD[key]
   126  	return
   127  }
   128  
   129  func (oa *ObjAttrs) SetCustomKey(k, v string) {
   130  	debug.Assert(k != "")
   131  	if oa.CustomMD == nil {
   132  		oa.CustomMD = make(cos.StrKVs, 6)
   133  	}
   134  	oa.CustomMD[k] = v
   135  }
   136  
   137  func (oa *ObjAttrs) DelCustomKeys(keys ...string) {
   138  	for _, key := range keys {
   139  		delete(oa.CustomMD, key)
   140  	}
   141  }
   142  
   143  // clone OAH => ObjAttrs (see also lom.CopyAttrs)
   144  func (oa *ObjAttrs) CopyFrom(oah cos.OAH, skipCksum bool) {
   145  	oa.Atime = oah.AtimeUnix()
   146  	oa.Size = oah.SizeBytes()
   147  	oa.Ver = oah.Version()
   148  	if !skipCksum {
   149  		oa.Cksum = oah.Checksum().Clone()
   150  	}
   151  	for k, v := range oah.GetCustomMD() {
   152  		oa.SetCustomKey(k, v)
   153  	}
   154  }
   155  
   156  //
   157  // to and from HTTP header converters (as in: HEAD /object)
   158  //
   159  
   160  // may set headers:
   161  // - standard cos.HdrContentLength ("Content-Length") & cos.HdrETag ("ETag")
   162  // - atime, version, etc. - all the rest "ais-" prefixed
   163  func ToHeader(oah cos.OAH, hdr http.Header, size int64, cksums ...*cos.Cksum) {
   164  	var cksum *cos.Cksum
   165  	if len(cksums) > 0 {
   166  		// - range checksum, or
   167  		// - archived file checksum, or
   168  		// - object checksum (when read range is _not_ checksummed)
   169  		cksum = cksums[0]
   170  	} else {
   171  		cksum = oah.Checksum()
   172  	}
   173  	if !cksum.IsEmpty() {
   174  		hdr.Set(apc.HdrObjCksumType, cksum.Ty())
   175  		hdr.Set(apc.HdrObjCksumVal, cksum.Val())
   176  	}
   177  	if at := oah.AtimeUnix(); at != 0 {
   178  		hdr.Set(apc.HdrObjAtime, cos.UnixNano2S(at))
   179  	}
   180  	if size > 0 {
   181  		hdr.Set(cos.HdrContentLength, strconv.FormatInt(size, 10))
   182  	}
   183  	if v := oah.Version(true); v != "" {
   184  		hdr.Set(apc.HdrObjVersion, v)
   185  	}
   186  	custom := oah.GetCustomMD()
   187  	for k, v := range custom {
   188  		hdr.Add(apc.HdrObjCustomMD, k+"="+v)
   189  		if k == ETag {
   190  			// TODO: redundant vs CustomMD - maybe extend cos.OAH to include get/set(ETag)
   191  			hdr.Set(cos.HdrETag, v)
   192  		}
   193  	}
   194  }
   195  
   196  // NOTE: returning checksum separately for subsequent validation
   197  func (oa *ObjAttrs) FromHeader(hdr http.Header) (cksum *cos.Cksum) {
   198  	if ty := hdr.Get(apc.HdrObjCksumType); ty != "" {
   199  		val := hdr.Get(apc.HdrObjCksumVal)
   200  		cksum = cos.NewCksum(ty, val)
   201  	}
   202  
   203  	if at := hdr.Get(apc.HdrObjAtime); at != "" {
   204  		atime, err := cos.S2UnixNano(at)
   205  		debug.AssertNoErr(err)
   206  		oa.Atime = atime
   207  	}
   208  	if sz := hdr.Get(cos.HdrContentLength); sz != "" {
   209  		size, err := strconv.ParseInt(sz, 10, 64)
   210  		debug.AssertNoErr(err)
   211  		oa.Size = size
   212  	}
   213  	if v := hdr.Get(apc.HdrObjVersion); v != "" {
   214  		oa.Ver = v
   215  	}
   216  	custom := hdr[http.CanonicalHeaderKey(apc.HdrObjCustomMD)]
   217  	for _, v := range custom {
   218  		entry := strings.SplitN(v, "=", 2)
   219  		debug.Assert(len(entry) == 2)
   220  		oa.SetCustomKey(entry[0], entry[1])
   221  	}
   222  	return
   223  }
   224  
   225  func (oa *ObjAttrs) FromLsoEntry(e *LsoEnt) {
   226  	oa.Size = e.Size
   227  	oa.Ver = e.Version
   228  
   229  	// entry.Custom = cmn.CustomMD2S(custom)
   230  	_ = CustomMD2S(nil)
   231  }
   232  
   233  // local <=> remote equality in the context of cold-GET and download. This function
   234  // decides whether we need to go ahead and re-read the object from its remote location.
   235  //
   236  // Other than a "binary" size and version checks, rest logic goes as follows: objects are
   237  // considered equal if they have a) the same version and at least one matching checksum, or
   238  // b) the same remote "source" and at least one matching checksum, or c) two matching checksums.
   239  // (See also note below.)
   240  //
   241  // Note that mismatch in any given checksum type immediately renders inequality and return
   242  // from the function.
   243  func (oa *ObjAttrs) Equal(rem cos.OAH) (eq bool) {
   244  	var (
   245  		ver      string
   246  		md5      string
   247  		etag     string
   248  		cksumVal string
   249  		count    int
   250  		sameEtag bool
   251  	)
   252  	// size check
   253  	if remSize := rem.SizeBytes(true); oa.Size != 0 && remSize != 0 && oa.Size != remSize {
   254  		return false
   255  	}
   256  
   257  	// version check
   258  	if remVer := rem.Version(true); oa.Ver != "" && remVer != "" {
   259  		if oa.Ver != remVer {
   260  			return false
   261  		}
   262  		ver = oa.Ver
   263  		// NOTE: ais own version is, currently, a nonunique sequence number - not counting
   264  		if remSrc, _ := rem.GetCustomKey(SourceObjMD); remSrc != apc.AIS {
   265  			count++
   266  		}
   267  	} else if remMeta, ok := rem.GetCustomKey(VersionObjMD); ok && remMeta != "" {
   268  		if locMeta, ok := oa.GetCustomKey(VersionObjMD); ok && locMeta != "" {
   269  			if remMeta != locMeta {
   270  				return false
   271  			}
   272  			count++
   273  			ver = locMeta
   274  		}
   275  	}
   276  
   277  	// checksum check
   278  	if a, b := rem.Checksum(), oa.Cksum; !a.IsEmpty() && !b.IsEmpty() && a.Ty() == b.Ty() {
   279  		if !a.Equal(b) {
   280  			return false
   281  		}
   282  		cksumVal = a.Val()
   283  		count++
   284  	}
   285  
   286  	// custom MD: ETag check
   287  	if remMeta, ok := rem.GetCustomKey(ETag); ok && remMeta != "" {
   288  		if locMeta, ok := oa.GetCustomKey(ETag); ok && locMeta != "" {
   289  			if remMeta != locMeta {
   290  				return false
   291  			}
   292  			etag = locMeta
   293  			if ver != locMeta && cksumVal != locMeta { // against double-counting
   294  				count++
   295  				sameEtag = true
   296  			}
   297  		}
   298  	}
   299  	// custom MD: CRC check
   300  	if remMeta, ok := rem.GetCustomKey(CRC32CObjMD); ok && remMeta != "" {
   301  		if locMeta, ok := oa.GetCustomKey(CRC32CObjMD); ok && locMeta != "" {
   302  			if remMeta != locMeta {
   303  				return false
   304  			}
   305  			if cksumVal != locMeta {
   306  				count++
   307  			}
   308  		}
   309  	}
   310  
   311  	// custom MD: MD5 check iff count < 2
   312  	// (ETag ambiguity, see: https://docs.aws.amazon.com/AmazonS3/latest/API/API_Object.htm)
   313  	if !sameEtag {
   314  		if remMeta, ok := rem.GetCustomKey(MD5ObjMD); ok && remMeta != "" {
   315  			if locMeta, ok := oa.GetCustomKey(MD5ObjMD); ok && locMeta != "" {
   316  				if remMeta != locMeta {
   317  					return
   318  				}
   319  				md5 = locMeta
   320  				if etag != md5 && cksumVal != md5 {
   321  					count++ //  (ditto)
   322  				}
   323  			}
   324  		}
   325  	}
   326  
   327  	switch {
   328  	case count >= 2: // e.g., equal because they have the same (version & md5, where version != md5)
   329  		return true
   330  	case count == 0:
   331  		return false
   332  	default:
   333  		// same version or ETag from the same (remote) backend
   334  		// (arguably, must be configurable)
   335  		if remMeta, ok := rem.GetCustomKey(SourceObjMD); ok && remMeta != "" {
   336  			if locMeta, ok := oa.GetCustomKey(SourceObjMD); ok && locMeta != "" {
   337  				if (ver != "" || etag != "") && remMeta == locMeta {
   338  					return true
   339  				}
   340  			}
   341  		}
   342  	}
   343  	return eq
   344  }