github.com/minio/minio@v0.0.0-20240328213742-3f72439b8a27/internal/etag/etag.go (about) 1 // Copyright (c) 2015-2021 MinIO, Inc. 2 // 3 // This file is part of MinIO Object Storage stack 4 // 5 // This program is free software: you can redistribute it and/or modify 6 // it under the terms of the GNU Affero General Public License as published by 7 // the Free Software Foundation, either version 3 of the License, or 8 // (at your option) any later version. 9 // 10 // This program is distributed in the hope that it will be useful 11 // but WITHOUT ANY WARRANTY; without even the implied warranty of 12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 // GNU Affero General Public License for more details. 14 // 15 // You should have received a copy of the GNU Affero General Public License 16 // along with this program. If not, see <http://www.gnu.org/licenses/>. 17 18 // Package etag provides an implementation of S3 ETags. 19 // 20 // Each S3 object has an associated ETag that can be 21 // used to e.g. quickly compare objects or check whether 22 // the content of an object has changed. 23 // 24 // In general, an S3 ETag is an MD5 checksum of the object 25 // content. However, there are many exceptions to this rule. 26 // 27 // # Single-part Upload 28 // 29 // In case of a basic single-part PUT operation - without server 30 // side encryption or object compression - the ETag of an object 31 // is its content MD5. 32 // 33 // # Multi-part Upload 34 // 35 // The ETag of an object does not correspond to its content MD5 36 // when the object is uploaded in multiple parts via the S3 37 // multipart API. Instead, S3 first computes a MD5 of each part: 38 // 39 // e1 := MD5(part-1) 40 // e2 := MD5(part-2) 41 // ... 42 // eN := MD5(part-N) 43 // 44 // Then, the ETag of the object is computed as MD5 of all individual 45 // part checksums. S3 also encodes the number of parts into the ETag 46 // by appending a -<number-of-parts> at the end: 47 // 48 // ETag := MD5(e1 || e2 || e3 ... || eN) || -N 49 // 50 // For example: ceb8853ddc5086cc4ab9e149f8f09c88-5 51 // 52 // However, this scheme is only used for multipart objects that are 53 // not encrypted. 54 // 55 // # Server-side Encryption 56 // 57 // S3 specifies three types of server-side-encryption - SSE-C, SSE-S3 58 // and SSE-KMS - with different semantics w.r.t. ETags. 59 // In case of SSE-S3, the ETag of an object is computed the same as 60 // for single resp. multipart plaintext objects. In particular, 61 // the ETag of a singlepart SSE-S3 object is its content MD5. 62 // 63 // In case of SSE-C and SSE-KMS, the ETag of an object is computed 64 // differently. For singlepart uploads the ETag is not the content 65 // MD5 of the object. For multipart uploads the ETag is also not 66 // the MD5 of the individual part checksums but it still contains 67 // the number of parts as suffix. 68 // 69 // Instead, the ETag is kind of unpredictable for S3 clients when 70 // an object is encrypted using SSE-C or SSE-KMS. Maybe AWS S3 71 // computes the ETag as MD5 of the encrypted content but there is 72 // no way to verify this assumption since the encryption happens 73 // inside AWS S3. 74 // Therefore, S3 clients must not make any assumption about ETags 75 // in case of SSE-C or SSE-KMS except that the ETag is well-formed. 76 // 77 // To put all of this into a simple rule: 78 // 79 // SSE-S3 : ETag == MD5 80 // SSE-C : ETag != MD5 81 // SSE-KMS: ETag != MD5 82 // 83 // # Encrypted ETags 84 // 85 // An S3 implementation has to remember the content MD5 of objects 86 // in case of SSE-S3. However, storing the ETag of an encrypted 87 // object in plaintext may reveal some information about the object. 88 // For example, two objects with the same ETag are identical with 89 // a very high probability. 90 // 91 // Therefore, an S3 implementation may encrypt an ETag before storing 92 // it. In this case, the stored ETag may not be a well-formed S3 ETag. 93 // For example, it can be larger due to a checksum added by authenticated 94 // encryption schemes. Such an ETag must be decrypted before sent to an 95 // S3 client. 96 // 97 // # S3 Clients 98 // 99 // There are many different S3 client implementations. Most of them 100 // access the ETag by looking for the HTTP response header key "Etag". 101 // However, some of them assume that the header key has to be "ETag" 102 // (case-sensitive) and will fail otherwise. 103 // Further, some clients require that the ETag value is a double-quoted 104 // string. Therefore, this package provides dedicated functions for 105 // adding and extracting the ETag to/from HTTP headers. 106 package etag 107 108 import ( 109 "bytes" 110 "crypto/hmac" 111 "crypto/md5" 112 "encoding/base64" 113 "encoding/hex" 114 "errors" 115 "fmt" 116 "net/http" 117 "strconv" 118 "strings" 119 120 "github.com/minio/minio/internal/fips" 121 "github.com/minio/minio/internal/hash/sha256" 122 xhttp "github.com/minio/minio/internal/http" 123 "github.com/minio/sio" 124 ) 125 126 // ETag is a single S3 ETag. 127 // 128 // An S3 ETag sometimes corresponds to the MD5 of 129 // the S3 object content. However, when an object 130 // is encrypted, compressed or uploaded using 131 // the S3 multipart API then its ETag is not 132 // necessarily the MD5 of the object content. 133 // 134 // For a more detailed description of S3 ETags 135 // take a look at the package documentation. 136 type ETag []byte 137 138 // String returns the string representation of the ETag. 139 // 140 // The returned string is a hex representation of the 141 // binary ETag with an optional '-<part-number>' suffix. 142 func (e ETag) String() string { 143 if e.IsMultipart() { 144 return hex.EncodeToString(e[:16]) + string(e[16:]) 145 } 146 return hex.EncodeToString(e) 147 } 148 149 // IsEncrypted reports whether the ETag is encrypted. 150 func (e ETag) IsEncrypted() bool { 151 // An encrypted ETag must be at least 32 bytes long. 152 // It contains the encrypted ETag value + an authentication 153 // code generated by the AEAD cipher. 154 // 155 // Here is an incorrect implementation of IsEncrypted: 156 // 157 // return len(e) > 16 && !bytes.ContainsRune(e, '-') 158 // 159 // An encrypted ETag may contain some random bytes - e.g. 160 // and nonce value. This nonce value may contain a '-' 161 // just by its nature of being randomly generated. 162 // The above implementation would incorrectly consider 163 // such an ETag (with a nonce value containing a '-') 164 // as non-encrypted. 165 166 return len(e) >= 32 // We consider all ETags longer than 32 bytes as encrypted 167 } 168 169 // IsMultipart reports whether the ETag belongs to an 170 // object that has been uploaded using the S3 multipart 171 // API. 172 // An S3 multipart ETag has a -<part-number> suffix. 173 func (e ETag) IsMultipart() bool { 174 return len(e) > 16 && !e.IsEncrypted() && bytes.ContainsRune(e, '-') 175 } 176 177 // Parts returns the number of object parts that are 178 // referenced by this ETag. It returns 1 if the object 179 // has been uploaded using the S3 singlepart API. 180 // 181 // Parts may panic if the ETag is an invalid multipart 182 // ETag. 183 func (e ETag) Parts() int { 184 if !e.IsMultipart() { 185 return 1 186 } 187 188 n := bytes.IndexRune(e, '-') 189 parts, err := strconv.Atoi(string(e[n+1:])) 190 if err != nil { 191 panic(err) // malformed ETag 192 } 193 return parts 194 } 195 196 // Format returns an ETag that is formatted as specified 197 // by AWS S3. 198 // 199 // An AWS S3 ETag is 16 bytes long and, in case of a multipart 200 // upload, has a `-N` suffix encoding the number of object parts. 201 // An ETag is not AWS S3 compatible when encrypted. When sending 202 // an ETag back to an S3 client it has to be formatted to be 203 // AWS S3 compatible. 204 // 205 // Therefore, Format returns the last 16 bytes of an encrypted 206 // ETag. 207 // 208 // In general, a caller has to distinguish the following cases: 209 // - The object is a multipart object. In this case, 210 // Format returns the ETag unmodified. 211 // - The object is a SSE-KMS or SSE-C encrypted single- 212 // part object. In this case, Format returns the last 213 // 16 bytes of the encrypted ETag which will be a random 214 // value. 215 // - The object is a SSE-S3 encrypted single-part object. 216 // In this case, the caller has to decrypt the ETag first 217 // before calling Format. 218 // S3 clients expect that the ETag of an SSE-S3 encrypted 219 // single-part object is equal to the object's content MD5. 220 // Formatting the SSE-S3 ETag before decryption will result 221 // in a random-looking ETag which an S3 client will not accept. 222 // 223 // Hence, a caller has to check: 224 // 225 // if method == SSE-S3 { 226 // ETag, err := Decrypt(key, ETag) 227 // if err != nil { 228 // } 229 // } 230 // ETag = ETag.Format() 231 func (e ETag) Format() ETag { 232 if !e.IsEncrypted() { 233 return e 234 } 235 return e[len(e)-16:] 236 } 237 238 var _ Tagger = ETag{} // compiler check 239 240 // ETag returns the ETag itself. 241 // 242 // By providing this method ETag implements 243 // the Tagger interface. 244 func (e ETag) ETag() ETag { return e } 245 246 // FromContentMD5 decodes and returns the Content-MD5 247 // as ETag, if set. If no Content-MD5 header is set 248 // it returns an empty ETag and no error. 249 func FromContentMD5(h http.Header) (ETag, error) { 250 v, ok := h["Content-Md5"] 251 if !ok { 252 return nil, nil 253 } 254 if v[0] == "" { 255 return nil, errors.New("etag: content-md5 is set but contains no value") 256 } 257 b, err := base64.StdEncoding.Strict().DecodeString(v[0]) 258 if err != nil { 259 return nil, err 260 } 261 if len(b) != md5.Size { 262 return nil, errors.New("etag: invalid content-md5") 263 } 264 return ETag(b), nil 265 } 266 267 // ContentMD5Requested - for http.request.header is not request Content-Md5 268 func ContentMD5Requested(h http.Header) bool { 269 _, ok := h[xhttp.ContentMD5] 270 return ok 271 } 272 273 // Multipart computes an S3 multipart ETag given a list of 274 // S3 singlepart ETags. It returns nil if the list of 275 // ETags is empty. 276 // 277 // Any encrypted or multipart ETag will be ignored and not 278 // used to compute the returned ETag. 279 func Multipart(etags ...ETag) ETag { 280 if len(etags) == 0 { 281 return nil 282 } 283 284 var n int64 285 h := md5.New() 286 for _, etag := range etags { 287 if !etag.IsMultipart() && !etag.IsEncrypted() { 288 h.Write(etag) 289 n++ 290 } 291 } 292 etag := append(h.Sum(nil), '-') 293 return strconv.AppendInt(etag, n, 10) 294 } 295 296 // Set adds the ETag to the HTTP headers. It overwrites any 297 // existing ETag entry. 298 // 299 // Due to legacy S3 clients, that make incorrect assumptions 300 // about HTTP headers, Set should be used instead of 301 // http.Header.Set(...). Otherwise, some S3 clients will not 302 // able to extract the ETag. 303 func Set(etag ETag, h http.Header) { 304 // Some (broken) S3 clients expect the ETag header to 305 // literally "ETag" - not "Etag". Further, some clients 306 // expect an ETag in double quotes. Therefore, we set the 307 // ETag directly as map entry instead of using http.Header.Set 308 h["ETag"] = []string{`"` + etag.String() + `"`} 309 } 310 311 // Get extracts and parses an ETag from the given HTTP headers. 312 // It returns an error when the HTTP headers do not contain 313 // an ETag entry or when the ETag is malformed. 314 // 315 // Get only accepts AWS S3 compatible ETags - i.e. no 316 // encrypted ETags - and therefore is stricter than Parse. 317 func Get(h http.Header) (ETag, error) { 318 const strict = true 319 if v := h.Get("Etag"); v != "" { 320 return parse(v, strict) 321 } 322 v, ok := h["ETag"] 323 if !ok || len(v) == 0 { 324 return nil, errors.New("etag: HTTP header does not contain an ETag") 325 } 326 return parse(v[0], strict) 327 } 328 329 // Equal returns true if and only if the two ETags are 330 // identical. 331 func Equal(a, b ETag) bool { return bytes.Equal(a, b) } 332 333 // Decrypt decrypts the ETag with the given key. 334 // 335 // If the ETag is not encrypted, Decrypt returns 336 // the ETag unmodified. 337 func Decrypt(key []byte, etag ETag) (ETag, error) { 338 const HMACContext = "SSE-etag" 339 340 if !etag.IsEncrypted() { 341 return etag, nil 342 } 343 mac := hmac.New(sha256.New, key) 344 mac.Write([]byte(HMACContext)) 345 decryptionKey := mac.Sum(nil) 346 347 plaintext := make([]byte, 0, 16) 348 etag, err := sio.DecryptBuffer(plaintext, etag, sio.Config{ 349 Key: decryptionKey, 350 CipherSuites: fips.DARECiphers(), 351 }) 352 if err != nil { 353 return nil, err 354 } 355 return etag, nil 356 } 357 358 // Parse parses s as an S3 ETag, returning the result. 359 // The string can be an encrypted, singlepart 360 // or multipart S3 ETag. It returns an error if s is 361 // not a valid textual representation of an ETag. 362 func Parse(s string) (ETag, error) { 363 const strict = false 364 return parse(s, strict) 365 } 366 367 // parse parse s as an S3 ETag, returning the result. 368 // It operates in one of two modes: 369 // - strict 370 // - non-strict 371 // 372 // In strict mode, parse only accepts ETags that 373 // are AWS S3 compatible. In particular, an AWS 374 // S3 ETag always consists of a 128 bit checksum 375 // value and an optional -<part-number> suffix. 376 // Therefore, s must have the following form in 377 // strict mode: <32-hex-characters>[-<integer>] 378 // 379 // In non-strict mode, parse also accepts ETags 380 // that are not AWS S3 compatible - e.g. encrypted 381 // ETags. 382 func parse(s string, strict bool) (ETag, error) { 383 // An S3 ETag may be a double-quoted string. 384 // Therefore, we remove double quotes at the 385 // start and end, if any. 386 if strings.HasPrefix(s, `"`) && strings.HasSuffix(s, `"`) { 387 s = s[1 : len(s)-1] 388 } 389 390 // An S3 ETag may be a multipart ETag that 391 // contains a '-' followed by a number. 392 // If the ETag does not a '-' is either 393 // a singlepart or encrypted ETag. 394 n := strings.IndexRune(s, '-') 395 if n == -1 { 396 etag, err := hex.DecodeString(s) 397 if err != nil { 398 return nil, err 399 } 400 if strict && len(etag) != 16 { // AWS S3 ETags are always 128 bit long 401 return nil, fmt.Errorf("etag: invalid length %d", len(etag)) 402 } 403 return ETag(etag), nil 404 } 405 406 prefix, suffix := s[:n], s[n:] 407 if len(prefix) != 32 { 408 return nil, fmt.Errorf("etag: invalid prefix length %d", len(prefix)) 409 } 410 if len(suffix) <= 1 { 411 return nil, errors.New("etag: suffix is not a part number") 412 } 413 414 etag, err := hex.DecodeString(prefix) 415 if err != nil { 416 return nil, err 417 } 418 partNumber, err := strconv.Atoi(suffix[1:]) // suffix[0] == '-' Therefore, we start parsing at suffix[1] 419 if err != nil { 420 return nil, err 421 } 422 if strict && (partNumber == 0 || partNumber > 10000) { 423 return nil, fmt.Errorf("etag: invalid part number %d", partNumber) 424 } 425 return ETag(append(etag, suffix...)), nil 426 }