storj.io/minio@v0.0.0-20230509071714-0cbc90f649b1/pkg/etag/etag.go (about) 1 // MinIO Cloud Storage, (C) 2021 MinIO, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Package etag provides an implementation of S3 ETags. 16 // 17 // Each S3 object has an associated ETag that can be 18 // used to e.g. quickly compare objects or check whether 19 // the content of an object has changed. 20 // 21 // In general, an S3 ETag is an MD5 checksum of the object 22 // content. However, there are many exceptions to this rule. 23 // 24 // 25 // Single-part Upload 26 // 27 // In case of a basic single-part PUT operation - without server 28 // side encryption or object compression - the ETag of an object 29 // is its content MD5. 30 // 31 // 32 // Multi-part Upload 33 // 34 // The ETag of an object does not correspond to its content MD5 35 // when the object is uploaded in multiple parts via the S3 36 // multipart API. Instead, S3 first computes a MD5 of each part: 37 // e1 := MD5(part-1) 38 // e2 := MD5(part-2) 39 // ... 40 // eN := MD5(part-N) 41 // 42 // Then, the ETag of the object is computed as MD5 of all individual 43 // part checksums. S3 also encodes the number of parts into the ETag 44 // by appending a -<number-of-parts> at the end: 45 // ETag := MD5(e1 || e2 || e3 ... || eN) || -N 46 // 47 // For example: ceb8853ddc5086cc4ab9e149f8f09c88-5 48 // 49 // However, this scheme is only used for multipart objects that are 50 // not encrypted. 51 // 52 // Server-side Encryption 53 // 54 // S3 specifies three types of server-side-encryption - SSE-C, SSE-S3 55 // and SSE-KMS - with different semantics w.r.t. ETags. 56 // In case of SSE-S3, the ETag of an object is computed the same as 57 // for single resp. multipart plaintext objects. In particular, 58 // the ETag of a singlepart SSE-S3 object is its content MD5. 59 // 60 // In case of SSE-C and SSE-KMS, the ETag of an object is computed 61 // differently. For singlepart uploads the ETag is not the content 62 // MD5 of the object. For multipart uploads the ETag is also not 63 // the MD5 of the individual part checksums but it still contains 64 // the number of parts as suffix. 65 // 66 // Instead, the ETag is kind of unpredictable for S3 clients when 67 // an object is encrypted using SSE-C or SSE-KMS. Maybe AWS S3 68 // computes the ETag as MD5 of the encrypted content but there is 69 // no way to verify this assumption since the encryption happens 70 // inside AWS S3. 71 // Therefore, S3 clients must not make any assumption about ETags 72 // in case of SSE-C or SSE-KMS except that the ETag is well-formed. 73 // 74 // To put all of this into a simple rule: 75 // SSE-S3 : ETag == MD5 76 // SSE-C : ETag != MD5 77 // SSE-KMS: ETag != MD5 78 // 79 // 80 // Encrypted ETags 81 // 82 // An S3 implementation has to remember the content MD5 of objects 83 // in case of SSE-S3. However, storing the ETag of an encrypted 84 // object in plaintext may reveal some information about the object. 85 // For example, two objects with the same ETag are identical with 86 // a very high probability. 87 // 88 // Therefore, an S3 implementation may encrypt an ETag before storing 89 // it. In this case, the stored ETag may not be a well-formed S3 ETag. 90 // For example, it can be larger due to a checksum added by authenticated 91 // encryption schemes. Such an ETag must be decrypted before sent to an 92 // S3 client. 93 // 94 // 95 // S3 Clients 96 // 97 // There are many different S3 client implementations. Most of them 98 // access the ETag by looking for the HTTP response header key "Etag". 99 // However, some of them assume that the header key has to be "ETag" 100 // (case-sensitive) and will fail otherwise. 101 // Further, some clients require that the ETag value is a double-quoted 102 // string. Therefore, this package provides dedicated functions for 103 // adding and extracing the ETag to/from HTTP headers. 104 package etag 105 106 import ( 107 "bytes" 108 "crypto/md5" 109 "encoding/base64" 110 "encoding/hex" 111 "errors" 112 "fmt" 113 "net/http" 114 "strconv" 115 "strings" 116 ) 117 118 // ETag is a single S3 ETag. 119 // 120 // An S3 ETag sometimes corresponds to the MD5 of 121 // the S3 object content. However, when an object 122 // is encrypted, compressed or uploaded using 123 // the S3 multipart API then its ETag is not 124 // necessarily the MD5 of the object content. 125 // 126 // For a more detailed description of S3 ETags 127 // take a look at the package documentation. 128 type ETag []byte 129 130 // String returns the string representation of the ETag. 131 // 132 // The returned string is a hex representation of the 133 // binary ETag with an optional '-<part-number>' suffix. 134 func (e ETag) String() string { 135 if e.IsMultipart() { 136 return hex.EncodeToString(e[:16]) + string(e[16:]) 137 } 138 return hex.EncodeToString(e) 139 } 140 141 // IsEncrypted reports whether the ETag is encrypted. 142 func (e ETag) IsEncrypted() bool { 143 return len(e) > 16 && !bytes.ContainsRune(e, '-') 144 } 145 146 // IsMultipart reports whether the ETag belongs to an 147 // object that has been uploaded using the S3 multipart 148 // API. 149 // An S3 multipart ETag has a -<part-number> suffix. 150 func (e ETag) IsMultipart() bool { 151 return len(e) > 16 && bytes.ContainsRune(e, '-') 152 } 153 154 // Parts returns the number of object parts that are 155 // referenced by this ETag. It returns 1 if the object 156 // has been uploaded using the S3 singlepart API. 157 // 158 // Parts may panic if the ETag is an invalid multipart 159 // ETag. 160 func (e ETag) Parts() int { 161 if !e.IsMultipart() { 162 return 1 163 } 164 165 n := bytes.IndexRune(e, '-') 166 parts, err := strconv.Atoi(string(e[n+1:])) 167 if err != nil { 168 panic(err) // malformed ETag 169 } 170 return parts 171 } 172 173 var _ Tagger = ETag{} // compiler check 174 175 // ETag returns the ETag itself. 176 // 177 // By providing this method ETag implements 178 // the Tagger interface. 179 func (e ETag) ETag() ETag { return e } 180 181 // FromContentMD5 decodes and returns the Content-MD5 182 // as ETag, if set. If no Content-MD5 header is set 183 // it returns an empty ETag and no error. 184 func FromContentMD5(h http.Header) (ETag, error) { 185 v, ok := h["Content-Md5"] 186 if !ok { 187 return nil, nil 188 } 189 if v[0] == "" { 190 return nil, errors.New("etag: content-md5 is set but contains no value") 191 } 192 b, err := base64.StdEncoding.Strict().DecodeString(v[0]) 193 if err != nil { 194 return nil, err 195 } 196 if len(b) != md5.Size { 197 return nil, errors.New("etag: invalid content-md5") 198 } 199 return ETag(b), nil 200 } 201 202 // Multipart computes an S3 multipart ETag given a list of 203 // S3 singlepart ETags. It returns nil if the list of 204 // ETags is empty. 205 // 206 // Any encrypted or multipart ETag will be ignored and not 207 // used to compute the returned ETag. 208 func Multipart(etags ...ETag) ETag { 209 if len(etags) == 0 { 210 return nil 211 } 212 213 var n int64 214 h := md5.New() 215 for _, etag := range etags { 216 if !etag.IsMultipart() && !etag.IsEncrypted() { 217 h.Write(etag) 218 n++ 219 } 220 } 221 etag := append(h.Sum(nil), '-') 222 return strconv.AppendInt(etag, n, 10) 223 } 224 225 // Set adds the ETag to the HTTP headers. It overwrites any 226 // existing ETag entry. 227 // 228 // Due to legacy S3 clients, that make incorrect assumptions 229 // about HTTP headers, Set should be used instead of 230 // http.Header.Set(...). Otherwise, some S3 clients will not 231 // able to extract the ETag. 232 func Set(etag ETag, h http.Header) { 233 // Some (broken) S3 clients expect the ETag header to 234 // literally "ETag" - not "Etag". Further, some clients 235 // expect an ETag in double quotes. Therefore, we set the 236 // ETag directly as map entry instead of using http.Header.Set 237 h["ETag"] = []string{`"` + etag.String() + `"`} 238 } 239 240 // Get extracts and parses an ETag from the given HTTP headers. 241 // It returns an error when the HTTP headers do not contain 242 // an ETag entry or when the ETag is malformed. 243 // 244 // Get only accepts AWS S3 compatible ETags - i.e. no 245 // encrypted ETags - and therefore is stricter than Parse. 246 func Get(h http.Header) (ETag, error) { 247 const strict = true 248 if v := h.Get("Etag"); v != "" { 249 return parse(v, strict) 250 } 251 v, ok := h["ETag"] 252 if !ok || len(v) == 0 { 253 return nil, errors.New("etag: HTTP header does not contain an ETag") 254 } 255 return parse(v[0], strict) 256 } 257 258 // Equal returns true if and only if the two ETags are 259 // identical. 260 func Equal(a, b ETag) bool { return bytes.Equal(a, b) } 261 262 // Parse parses s as an S3 ETag, returning the result. 263 // The string can be an encrypted, singlepart 264 // or multipart S3 ETag. It returns an error if s is 265 // not a valid textual representation of an ETag. 266 func Parse(s string) (ETag, error) { 267 const strict = false 268 return parse(s, strict) 269 } 270 271 // parse parse s as an S3 ETag, returning the result. 272 // It operates in one of two modes: 273 // - strict 274 // - non-strict 275 // 276 // In strict mode, parse only accepts ETags that 277 // are AWS S3 compatible. In particular, an AWS 278 // S3 ETag always consists of a 128 bit checksum 279 // value and an optional -<part-number> suffix. 280 // Therefore, s must have the following form in 281 // strict mode: <32-hex-characters>[-<integer>] 282 // 283 // In non-strict mode, parse also accepts ETags 284 // that are not AWS S3 compatible - e.g. encrypted 285 // ETags. 286 func parse(s string, strict bool) (ETag, error) { 287 // An S3 ETag may be a double-quoted string. 288 // Therefore, we remove double quotes at the 289 // start and end, if any. 290 if strings.HasPrefix(s, `"`) && strings.HasSuffix(s, `"`) { 291 s = s[1 : len(s)-1] 292 } 293 294 // An S3 ETag may be a multipart ETag that 295 // contains a '-' followed by a number. 296 // If the ETag does not a '-' is is either 297 // a singlepart or encrypted ETag. 298 n := strings.IndexRune(s, '-') 299 if n == -1 { 300 etag, err := hex.DecodeString(s) 301 if err != nil { 302 return nil, err 303 } 304 if strict && len(etag) != 16 { // AWS S3 ETags are always 128 bit long 305 return nil, fmt.Errorf("etag: invalid length %d", len(etag)) 306 } 307 return ETag(etag), nil 308 } 309 310 prefix, suffix := s[:n], s[n:] 311 if len(prefix) != 32 { 312 return nil, fmt.Errorf("etag: invalid prefix length %d", len(prefix)) 313 } 314 if len(suffix) <= 1 { 315 return nil, errors.New("etag: suffix is not a part number") 316 } 317 318 etag, err := hex.DecodeString(prefix) 319 if err != nil { 320 return nil, err 321 } 322 partNumber, err := strconv.Atoi(suffix[1:]) // suffix[0] == '-' Therefore, we start parsing at suffix[1] 323 if err != nil { 324 return nil, err 325 } 326 if strict && (partNumber == 0 || partNumber > 10000) { 327 return nil, fmt.Errorf("etag: invalid part number %d", partNumber) 328 } 329 return ETag(append(etag, suffix...)), nil 330 }