github.com/NVIDIA/aistore@v1.3.23-0.20240517131212-7df6609be51d/cmn/objattrs.go (about) 1 // Package cmn provides common constants, types, and utilities for AIS clients 2 // and AIStore. 3 /* 4 * Copyright (c) 2018-2024, NVIDIA CORPORATION. All rights reserved. 5 */ 6 package cmn 7 8 import ( 9 "fmt" 10 "net/http" 11 "strconv" 12 "strings" 13 14 "github.com/NVIDIA/aistore/api/apc" 15 "github.com/NVIDIA/aistore/cmn/cos" 16 "github.com/NVIDIA/aistore/cmn/debug" 17 ) 18 19 // LOM custom metadata stored under `lomCustomMD`. 20 const ( 21 // source of the cold-GET and download; the values include all 22 // 3rd party backend providers 23 SourceObjMD = "source" 24 25 // downloader' source is "web" 26 WebObjMD = "web" 27 28 // system-supported custom attrs 29 // NOTE: for provider specific HTTP headers, see cmn/cos/const_http.go 30 31 VersionObjMD = "version" // "generation" for GCP, "version" for AWS but only if the bucket is versioned, etc. 32 CRC32CObjMD = cos.ChecksumCRC32C 33 MD5ObjMD = cos.ChecksumMD5 34 ETag = cos.HdrETag 35 36 OrigURLObjMD = "orig_url" 37 38 // additional backend 39 LastModified = "LastModified" 40 ) 41 42 // object properties 43 // NOTE: embeds system `ObjAttrs` that in turn includes custom user-defined 44 // NOTE: compare with `apc.LsoMsg` 45 type ObjectProps struct { 46 Bck Bck `json:"bucket"` 47 ObjAttrs 48 Name string `json:"name"` 49 Location string `json:"location"` // see also `GetPropsLocation` 50 Mirror struct { 51 Paths []string `json:"paths,omitempty"` 52 Copies int `json:"copies,omitempty"` 53 } `json:"mirror"` 54 EC struct { 55 Generation int64 `json:"generation"` 56 DataSlices int `json:"data"` 57 ParitySlices int `json:"parity"` 58 IsECCopy bool `json:"replicated"` 59 } `json:"ec"` 60 Present bool `json:"present"` 61 } 62 63 // see also apc.HdrObjAtime et al. @ api/apc/const.go (and note that naming must be consistent) 64 type ObjAttrs struct { 65 Cksum *cos.Cksum `json:"checksum,omitempty"` // object checksum (cloned) 66 CustomMD cos.StrKVs `json:"custom-md,omitempty"` // custom metadata: ETag, MD5, CRC, user-defined ... 67 Ver string `json:"version,omitempty"` // object version 68 Atime int64 `json:"atime,omitempty"` // access time (nanoseconds since UNIX epoch) 69 Size int64 `json:"size,omitempty"` // object size (bytes) 70 } 71 72 // interface guard 73 var _ cos.OAH = (*ObjAttrs)(nil) 74 75 func (oa *ObjAttrs) String() string { 76 return fmt.Sprintf("%dB, v%q, %s, %+v", oa.Size, oa.Ver, oa.Cksum, oa.CustomMD) 77 } 78 79 func (oa *ObjAttrs) SizeBytes(_ ...bool) int64 { return oa.Size } 80 func (oa *ObjAttrs) Version(_ ...bool) string { return oa.Ver } 81 func (oa *ObjAttrs) AtimeUnix() int64 { return oa.Atime } 82 func (oa *ObjAttrs) Checksum() *cos.Cksum { return oa.Cksum } 83 func (oa *ObjAttrs) SetCksum(ty, val string) { oa.Cksum = cos.NewCksum(ty, val) } 84 85 func (oa *ObjAttrs) SetSize(size int64) { 86 debug.Assert(oa.Size == 0) 87 oa.Size = size 88 } 89 90 // 91 // custom metadata 92 // 93 94 func CustomMD2S(md cos.StrKVs) string { return fmt.Sprintf("%+v", md) } 95 96 func S2CustomMD(custom, version string) (md cos.StrKVs) { 97 if len(custom) < 8 || !strings.HasPrefix(custom, "map[") { // Sprintf above 98 return nil 99 } 100 s := custom[4 : len(custom)-1] 101 lst := strings.Split(s, " ") 102 md = make(cos.StrKVs, len(lst)) 103 md[VersionObjMD] = version 104 parseCustom(md, lst, SourceObjMD) 105 parseCustom(md, lst, CRC32CObjMD) 106 parseCustom(md, lst, MD5ObjMD) 107 parseCustom(md, lst, ETag) 108 return md 109 } 110 111 func parseCustom(md cos.StrKVs, lst []string, key string) { 112 keyX := key + ":" 113 for _, kv := range lst { 114 if strings.HasPrefix(kv, keyX) { 115 md[key] = kv[len(keyX):] 116 return 117 } 118 } 119 } 120 121 func (oa *ObjAttrs) GetCustomMD() cos.StrKVs { return oa.CustomMD } 122 func (oa *ObjAttrs) SetCustomMD(md cos.StrKVs) { oa.CustomMD = md } 123 124 func (oa *ObjAttrs) GetCustomKey(key string) (val string, exists bool) { 125 val, exists = oa.CustomMD[key] 126 return 127 } 128 129 func (oa *ObjAttrs) SetCustomKey(k, v string) { 130 debug.Assert(k != "") 131 if oa.CustomMD == nil { 132 oa.CustomMD = make(cos.StrKVs, 6) 133 } 134 oa.CustomMD[k] = v 135 } 136 137 func (oa *ObjAttrs) DelCustomKeys(keys ...string) { 138 for _, key := range keys { 139 delete(oa.CustomMD, key) 140 } 141 } 142 143 // clone OAH => ObjAttrs (see also lom.CopyAttrs) 144 func (oa *ObjAttrs) CopyFrom(oah cos.OAH, skipCksum bool) { 145 oa.Atime = oah.AtimeUnix() 146 oa.Size = oah.SizeBytes() 147 oa.Ver = oah.Version() 148 if !skipCksum { 149 oa.Cksum = oah.Checksum().Clone() 150 } 151 for k, v := range oah.GetCustomMD() { 152 oa.SetCustomKey(k, v) 153 } 154 } 155 156 // 157 // to and from HTTP header converters (as in: HEAD /object) 158 // 159 160 // may set headers: 161 // - standard cos.HdrContentLength ("Content-Length") & cos.HdrETag ("ETag") 162 // - atime, version, etc. - all the rest "ais-" prefixed 163 func ToHeader(oah cos.OAH, hdr http.Header, size int64, cksums ...*cos.Cksum) { 164 var cksum *cos.Cksum 165 if len(cksums) > 0 { 166 // - range checksum, or 167 // - archived file checksum, or 168 // - object checksum (when read range is _not_ checksummed) 169 cksum = cksums[0] 170 } else { 171 cksum = oah.Checksum() 172 } 173 if !cksum.IsEmpty() { 174 hdr.Set(apc.HdrObjCksumType, cksum.Ty()) 175 hdr.Set(apc.HdrObjCksumVal, cksum.Val()) 176 } 177 if at := oah.AtimeUnix(); at != 0 { 178 hdr.Set(apc.HdrObjAtime, cos.UnixNano2S(at)) 179 } 180 if size > 0 { 181 hdr.Set(cos.HdrContentLength, strconv.FormatInt(size, 10)) 182 } 183 if v := oah.Version(true); v != "" { 184 hdr.Set(apc.HdrObjVersion, v) 185 } 186 custom := oah.GetCustomMD() 187 for k, v := range custom { 188 hdr.Add(apc.HdrObjCustomMD, k+"="+v) 189 if k == ETag { 190 // TODO: redundant vs CustomMD - maybe extend cos.OAH to include get/set(ETag) 191 hdr.Set(cos.HdrETag, v) 192 } 193 } 194 } 195 196 // NOTE: returning checksum separately for subsequent validation 197 func (oa *ObjAttrs) FromHeader(hdr http.Header) (cksum *cos.Cksum) { 198 if ty := hdr.Get(apc.HdrObjCksumType); ty != "" { 199 val := hdr.Get(apc.HdrObjCksumVal) 200 cksum = cos.NewCksum(ty, val) 201 } 202 203 if at := hdr.Get(apc.HdrObjAtime); at != "" { 204 atime, err := cos.S2UnixNano(at) 205 debug.AssertNoErr(err) 206 oa.Atime = atime 207 } 208 if sz := hdr.Get(cos.HdrContentLength); sz != "" { 209 size, err := strconv.ParseInt(sz, 10, 64) 210 debug.AssertNoErr(err) 211 oa.Size = size 212 } 213 if v := hdr.Get(apc.HdrObjVersion); v != "" { 214 oa.Ver = v 215 } 216 custom := hdr[http.CanonicalHeaderKey(apc.HdrObjCustomMD)] 217 for _, v := range custom { 218 entry := strings.SplitN(v, "=", 2) 219 debug.Assert(len(entry) == 2) 220 oa.SetCustomKey(entry[0], entry[1]) 221 } 222 return 223 } 224 225 func (oa *ObjAttrs) FromLsoEntry(e *LsoEnt) { 226 oa.Size = e.Size 227 oa.Ver = e.Version 228 229 // entry.Custom = cmn.CustomMD2S(custom) 230 _ = CustomMD2S(nil) 231 } 232 233 // local <=> remote equality in the context of cold-GET and download. This function 234 // decides whether we need to go ahead and re-read the object from its remote location. 235 // 236 // Other than a "binary" size and version checks, rest logic goes as follows: objects are 237 // considered equal if they have a) the same version and at least one matching checksum, or 238 // b) the same remote "source" and at least one matching checksum, or c) two matching checksums. 239 // (See also note below.) 240 // 241 // Note that mismatch in any given checksum type immediately renders inequality and return 242 // from the function. 243 func (oa *ObjAttrs) Equal(rem cos.OAH) (eq bool) { 244 var ( 245 ver string 246 md5 string 247 etag string 248 cksumVal string 249 count int 250 sameEtag bool 251 ) 252 // size check 253 if remSize := rem.SizeBytes(true); oa.Size != 0 && remSize != 0 && oa.Size != remSize { 254 return false 255 } 256 257 // version check 258 if remVer := rem.Version(true); oa.Ver != "" && remVer != "" { 259 if oa.Ver != remVer { 260 return false 261 } 262 ver = oa.Ver 263 // NOTE: ais own version is, currently, a nonunique sequence number - not counting 264 if remSrc, _ := rem.GetCustomKey(SourceObjMD); remSrc != apc.AIS { 265 count++ 266 } 267 } else if remMeta, ok := rem.GetCustomKey(VersionObjMD); ok && remMeta != "" { 268 if locMeta, ok := oa.GetCustomKey(VersionObjMD); ok && locMeta != "" { 269 if remMeta != locMeta { 270 return false 271 } 272 count++ 273 ver = locMeta 274 } 275 } 276 277 // checksum check 278 if a, b := rem.Checksum(), oa.Cksum; !a.IsEmpty() && !b.IsEmpty() && a.Ty() == b.Ty() { 279 if !a.Equal(b) { 280 return false 281 } 282 cksumVal = a.Val() 283 count++ 284 } 285 286 // custom MD: ETag check 287 if remMeta, ok := rem.GetCustomKey(ETag); ok && remMeta != "" { 288 if locMeta, ok := oa.GetCustomKey(ETag); ok && locMeta != "" { 289 if remMeta != locMeta { 290 return false 291 } 292 etag = locMeta 293 if ver != locMeta && cksumVal != locMeta { // against double-counting 294 count++ 295 sameEtag = true 296 } 297 } 298 } 299 // custom MD: CRC check 300 if remMeta, ok := rem.GetCustomKey(CRC32CObjMD); ok && remMeta != "" { 301 if locMeta, ok := oa.GetCustomKey(CRC32CObjMD); ok && locMeta != "" { 302 if remMeta != locMeta { 303 return false 304 } 305 if cksumVal != locMeta { 306 count++ 307 } 308 } 309 } 310 311 // custom MD: MD5 check iff count < 2 312 // (ETag ambiguity, see: https://docs.aws.amazon.com/AmazonS3/latest/API/API_Object.htm) 313 if !sameEtag { 314 if remMeta, ok := rem.GetCustomKey(MD5ObjMD); ok && remMeta != "" { 315 if locMeta, ok := oa.GetCustomKey(MD5ObjMD); ok && locMeta != "" { 316 if remMeta != locMeta { 317 return 318 } 319 md5 = locMeta 320 if etag != md5 && cksumVal != md5 { 321 count++ // (ditto) 322 } 323 } 324 } 325 } 326 327 switch { 328 case count >= 2: // e.g., equal because they have the same (version & md5, where version != md5) 329 return true 330 case count == 0: 331 return false 332 default: 333 // same version or ETag from the same (remote) backend 334 // (arguably, must be configurable) 335 if remMeta, ok := rem.GetCustomKey(SourceObjMD); ok && remMeta != "" { 336 if locMeta, ok := oa.GetCustomKey(SourceObjMD); ok && locMeta != "" { 337 if (ver != "" || etag != "") && remMeta == locMeta { 338 return true 339 } 340 } 341 } 342 } 343 return eq 344 }