github.com/minio/minio@v0.0.0-20240328213742-3f72439b8a27/internal/etag/etag.go (about)

     1  // Copyright (c) 2015-2021 MinIO, Inc.
     2  //
     3  // This file is part of MinIO Object Storage stack
     4  //
     5  // This program is free software: you can redistribute it and/or modify
     6  // it under the terms of the GNU Affero General Public License as published by
     7  // the Free Software Foundation, either version 3 of the License, or
     8  // (at your option) any later version.
     9  //
    10  // This program is distributed in the hope that it will be useful
    11  // but WITHOUT ANY WARRANTY; without even the implied warranty of
    12  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    13  // GNU Affero General Public License for more details.
    14  //
    15  // You should have received a copy of the GNU Affero General Public License
    16  // along with this program.  If not, see <http://www.gnu.org/licenses/>.
    17  
    18  // Package etag provides an implementation of S3 ETags.
    19  //
    20  // Each S3 object has an associated ETag that can be
    21  // used to e.g. quickly compare objects or check whether
    22  // the content of an object has changed.
    23  //
    24  // In general, an S3 ETag is an MD5 checksum of the object
    25  // content. However, there are many exceptions to this rule.
    26  //
    27  // # Single-part Upload
    28  //
    29  // In case of a basic single-part PUT operation - without server
    30  // side encryption or object compression - the ETag of an object
    31  // is its content MD5.
    32  //
    33  // # Multi-part Upload
    34  //
    35  // The ETag of an object does not correspond to its content MD5
    36  // when the object is uploaded in multiple parts via the S3
    37  // multipart API. Instead, S3 first computes a MD5 of each part:
    38  //
    39  //	 e1 := MD5(part-1)
    40  //	 e2 := MD5(part-2)
    41  //	...
    42  //	 eN := MD5(part-N)
    43  //
    44  // Then, the ETag of the object is computed as MD5 of all individual
    45  // part checksums. S3 also encodes the number of parts into the ETag
    46  // by appending a -<number-of-parts> at the end:
    47  //
    48  //	ETag := MD5(e1 || e2 || e3 ... || eN) || -N
    49  //
    50  //	For example: ceb8853ddc5086cc4ab9e149f8f09c88-5
    51  //
    52  // However, this scheme is only used for multipart objects that are
    53  // not encrypted.
    54  //
    55  // # Server-side Encryption
    56  //
    57  // S3 specifies three types of server-side-encryption - SSE-C, SSE-S3
    58  // and SSE-KMS - with different semantics w.r.t. ETags.
    59  // In case of SSE-S3, the ETag of an object is computed the same as
    60  // for single resp. multipart plaintext objects. In particular,
    61  // the ETag of a singlepart SSE-S3 object is its content MD5.
    62  //
    63  // In case of SSE-C and SSE-KMS, the ETag of an object is computed
    64  // differently. For singlepart uploads the ETag is not the content
    65  // MD5 of the object. For multipart uploads the ETag is also not
    66  // the MD5 of the individual part checksums but it still contains
    67  // the number of parts as suffix.
    68  //
    69  // Instead, the ETag is kind of unpredictable for S3 clients when
    70  // an object is encrypted using SSE-C or SSE-KMS. Maybe AWS S3
    71  // computes the ETag as MD5 of the encrypted content but there is
    72  // no way to verify this assumption since the encryption happens
    73  // inside AWS S3.
    74  // Therefore, S3 clients must not make any assumption about ETags
    75  // in case of SSE-C or SSE-KMS except that the ETag is well-formed.
    76  //
    77  // To put all of this into a simple rule:
    78  //
    79  //	SSE-S3 : ETag == MD5
    80  //	SSE-C  : ETag != MD5
    81  //	SSE-KMS: ETag != MD5
    82  //
    83  // # Encrypted ETags
    84  //
    85  // An S3 implementation has to remember the content MD5 of objects
    86  // in case of SSE-S3. However, storing the ETag of an encrypted
    87  // object in plaintext may reveal some information about the object.
    88  // For example, two objects with the same ETag are identical with
    89  // a very high probability.
    90  //
    91  // Therefore, an S3 implementation may encrypt an ETag before storing
    92  // it. In this case, the stored ETag may not be a well-formed S3 ETag.
    93  // For example, it can be larger due to a checksum added by authenticated
    94  // encryption schemes. Such an ETag must be decrypted before sent to an
    95  // S3 client.
    96  //
    97  // # S3 Clients
    98  //
    99  // There are many different S3 client implementations. Most of them
   100  // access the ETag by looking for the HTTP response header key "Etag".
   101  // However, some of them assume that the header key has to be "ETag"
   102  // (case-sensitive) and will fail otherwise.
   103  // Further, some clients require that the ETag value is a double-quoted
   104  // string. Therefore, this package provides dedicated functions for
   105  // adding and extracting the ETag to/from HTTP headers.
   106  package etag
   107  
   108  import (
   109  	"bytes"
   110  	"crypto/hmac"
   111  	"crypto/md5"
   112  	"encoding/base64"
   113  	"encoding/hex"
   114  	"errors"
   115  	"fmt"
   116  	"net/http"
   117  	"strconv"
   118  	"strings"
   119  
   120  	"github.com/minio/minio/internal/fips"
   121  	"github.com/minio/minio/internal/hash/sha256"
   122  	xhttp "github.com/minio/minio/internal/http"
   123  	"github.com/minio/sio"
   124  )
   125  
   126  // ETag is a single S3 ETag.
   127  //
   128  // An S3 ETag sometimes corresponds to the MD5 of
   129  // the S3 object content. However, when an object
   130  // is encrypted, compressed or uploaded using
   131  // the S3 multipart API then its ETag is not
   132  // necessarily the MD5 of the object content.
   133  //
   134  // For a more detailed description of S3 ETags
   135  // take a look at the package documentation.
   136  type ETag []byte
   137  
   138  // String returns the string representation of the ETag.
   139  //
   140  // The returned string is a hex representation of the
   141  // binary ETag with an optional '-<part-number>' suffix.
   142  func (e ETag) String() string {
   143  	if e.IsMultipart() {
   144  		return hex.EncodeToString(e[:16]) + string(e[16:])
   145  	}
   146  	return hex.EncodeToString(e)
   147  }
   148  
   149  // IsEncrypted reports whether the ETag is encrypted.
   150  func (e ETag) IsEncrypted() bool {
   151  	// An encrypted ETag must be at least 32 bytes long.
   152  	// It contains the encrypted ETag value + an authentication
   153  	// code generated by the AEAD cipher.
   154  	//
   155  	// Here is an incorrect implementation of IsEncrypted:
   156  	//
   157  	//   return len(e) > 16 && !bytes.ContainsRune(e, '-')
   158  	//
   159  	// An encrypted ETag may contain some random bytes - e.g.
   160  	// and nonce value. This nonce value may contain a '-'
   161  	// just by its nature of being randomly generated.
   162  	// The above implementation would incorrectly consider
   163  	// such an ETag (with a nonce value containing a '-')
   164  	// as non-encrypted.
   165  
   166  	return len(e) >= 32 // We consider all ETags longer than 32 bytes as encrypted
   167  }
   168  
   169  // IsMultipart reports whether the ETag belongs to an
   170  // object that has been uploaded using the S3 multipart
   171  // API.
   172  // An S3 multipart ETag has a -<part-number> suffix.
   173  func (e ETag) IsMultipart() bool {
   174  	return len(e) > 16 && !e.IsEncrypted() && bytes.ContainsRune(e, '-')
   175  }
   176  
   177  // Parts returns the number of object parts that are
   178  // referenced by this ETag. It returns 1 if the object
   179  // has been uploaded using the S3 singlepart API.
   180  //
   181  // Parts may panic if the ETag is an invalid multipart
   182  // ETag.
   183  func (e ETag) Parts() int {
   184  	if !e.IsMultipart() {
   185  		return 1
   186  	}
   187  
   188  	n := bytes.IndexRune(e, '-')
   189  	parts, err := strconv.Atoi(string(e[n+1:]))
   190  	if err != nil {
   191  		panic(err) // malformed ETag
   192  	}
   193  	return parts
   194  }
   195  
   196  // Format returns an ETag that is formatted as specified
   197  // by AWS S3.
   198  //
   199  // An AWS S3 ETag is 16 bytes long and, in case of a multipart
   200  // upload, has a `-N` suffix encoding the number of object parts.
   201  // An ETag is not AWS S3 compatible when encrypted. When sending
   202  // an ETag back to an S3 client it has to be formatted to be
   203  // AWS S3 compatible.
   204  //
   205  // Therefore, Format returns the last 16 bytes of an encrypted
   206  // ETag.
   207  //
   208  // In general, a caller has to distinguish the following cases:
   209  //   - The object is a multipart object. In this case,
   210  //     Format returns the ETag unmodified.
   211  //   - The object is a SSE-KMS or SSE-C encrypted single-
   212  //     part object. In this case, Format returns the last
   213  //     16 bytes of the encrypted ETag which will be a random
   214  //     value.
   215  //   - The object is a SSE-S3 encrypted single-part object.
   216  //     In this case, the caller has to decrypt the ETag first
   217  //     before calling Format.
   218  //     S3 clients expect that the ETag of an SSE-S3 encrypted
   219  //     single-part object is equal to the object's content MD5.
   220  //     Formatting the SSE-S3 ETag before decryption will result
   221  //     in a random-looking ETag which an S3 client will not accept.
   222  //
   223  // Hence, a caller has to check:
   224  //
   225  //	if method == SSE-S3 {
   226  //	   ETag, err := Decrypt(key, ETag)
   227  //	   if err != nil {
   228  //	   }
   229  //	}
   230  //	ETag = ETag.Format()
   231  func (e ETag) Format() ETag {
   232  	if !e.IsEncrypted() {
   233  		return e
   234  	}
   235  	return e[len(e)-16:]
   236  }
   237  
   238  var _ Tagger = ETag{} // compiler check
   239  
   240  // ETag returns the ETag itself.
   241  //
   242  // By providing this method ETag implements
   243  // the Tagger interface.
   244  func (e ETag) ETag() ETag { return e }
   245  
   246  // FromContentMD5 decodes and returns the Content-MD5
   247  // as ETag, if set. If no Content-MD5 header is set
   248  // it returns an empty ETag and no error.
   249  func FromContentMD5(h http.Header) (ETag, error) {
   250  	v, ok := h["Content-Md5"]
   251  	if !ok {
   252  		return nil, nil
   253  	}
   254  	if v[0] == "" {
   255  		return nil, errors.New("etag: content-md5 is set but contains no value")
   256  	}
   257  	b, err := base64.StdEncoding.Strict().DecodeString(v[0])
   258  	if err != nil {
   259  		return nil, err
   260  	}
   261  	if len(b) != md5.Size {
   262  		return nil, errors.New("etag: invalid content-md5")
   263  	}
   264  	return ETag(b), nil
   265  }
   266  
   267  // ContentMD5Requested - for http.request.header is not request Content-Md5
   268  func ContentMD5Requested(h http.Header) bool {
   269  	_, ok := h[xhttp.ContentMD5]
   270  	return ok
   271  }
   272  
   273  // Multipart computes an S3 multipart ETag given a list of
   274  // S3 singlepart ETags. It returns nil if the list of
   275  // ETags is empty.
   276  //
   277  // Any encrypted or multipart ETag will be ignored and not
   278  // used to compute the returned ETag.
   279  func Multipart(etags ...ETag) ETag {
   280  	if len(etags) == 0 {
   281  		return nil
   282  	}
   283  
   284  	var n int64
   285  	h := md5.New()
   286  	for _, etag := range etags {
   287  		if !etag.IsMultipart() && !etag.IsEncrypted() {
   288  			h.Write(etag)
   289  			n++
   290  		}
   291  	}
   292  	etag := append(h.Sum(nil), '-')
   293  	return strconv.AppendInt(etag, n, 10)
   294  }
   295  
   296  // Set adds the ETag to the HTTP headers. It overwrites any
   297  // existing ETag entry.
   298  //
   299  // Due to legacy S3 clients, that make incorrect assumptions
   300  // about HTTP headers, Set should be used instead of
   301  // http.Header.Set(...). Otherwise, some S3 clients will not
   302  // able to extract the ETag.
   303  func Set(etag ETag, h http.Header) {
   304  	// Some (broken) S3 clients expect the ETag header to
   305  	// literally "ETag" - not "Etag". Further, some clients
   306  	// expect an ETag in double quotes. Therefore, we set the
   307  	// ETag directly as map entry instead of using http.Header.Set
   308  	h["ETag"] = []string{`"` + etag.String() + `"`}
   309  }
   310  
   311  // Get extracts and parses an ETag from the given HTTP headers.
   312  // It returns an error when the HTTP headers do not contain
   313  // an ETag entry or when the ETag is malformed.
   314  //
   315  // Get only accepts AWS S3 compatible ETags - i.e. no
   316  // encrypted ETags - and therefore is stricter than Parse.
   317  func Get(h http.Header) (ETag, error) {
   318  	const strict = true
   319  	if v := h.Get("Etag"); v != "" {
   320  		return parse(v, strict)
   321  	}
   322  	v, ok := h["ETag"]
   323  	if !ok || len(v) == 0 {
   324  		return nil, errors.New("etag: HTTP header does not contain an ETag")
   325  	}
   326  	return parse(v[0], strict)
   327  }
   328  
   329  // Equal returns true if and only if the two ETags are
   330  // identical.
   331  func Equal(a, b ETag) bool { return bytes.Equal(a, b) }
   332  
   333  // Decrypt decrypts the ETag with the given key.
   334  //
   335  // If the ETag is not encrypted, Decrypt returns
   336  // the ETag unmodified.
   337  func Decrypt(key []byte, etag ETag) (ETag, error) {
   338  	const HMACContext = "SSE-etag"
   339  
   340  	if !etag.IsEncrypted() {
   341  		return etag, nil
   342  	}
   343  	mac := hmac.New(sha256.New, key)
   344  	mac.Write([]byte(HMACContext))
   345  	decryptionKey := mac.Sum(nil)
   346  
   347  	plaintext := make([]byte, 0, 16)
   348  	etag, err := sio.DecryptBuffer(plaintext, etag, sio.Config{
   349  		Key:          decryptionKey,
   350  		CipherSuites: fips.DARECiphers(),
   351  	})
   352  	if err != nil {
   353  		return nil, err
   354  	}
   355  	return etag, nil
   356  }
   357  
   358  // Parse parses s as an S3 ETag, returning the result.
   359  // The string can be an encrypted, singlepart
   360  // or multipart S3 ETag. It returns an error if s is
   361  // not a valid textual representation of an ETag.
   362  func Parse(s string) (ETag, error) {
   363  	const strict = false
   364  	return parse(s, strict)
   365  }
   366  
   367  // parse parse s as an S3 ETag, returning the result.
   368  // It operates in one of two modes:
   369  //   - strict
   370  //   - non-strict
   371  //
   372  // In strict mode, parse only accepts ETags that
   373  // are AWS S3 compatible. In particular, an AWS
   374  // S3 ETag always consists of a 128 bit checksum
   375  // value and an optional -<part-number> suffix.
   376  // Therefore, s must have the following form in
   377  // strict mode:  <32-hex-characters>[-<integer>]
   378  //
   379  // In non-strict mode, parse also accepts ETags
   380  // that are not AWS S3 compatible - e.g. encrypted
   381  // ETags.
   382  func parse(s string, strict bool) (ETag, error) {
   383  	// An S3 ETag may be a double-quoted string.
   384  	// Therefore, we remove double quotes at the
   385  	// start and end, if any.
   386  	if strings.HasPrefix(s, `"`) && strings.HasSuffix(s, `"`) {
   387  		s = s[1 : len(s)-1]
   388  	}
   389  
   390  	// An S3 ETag may be a multipart ETag that
   391  	// contains a '-' followed by a number.
   392  	// If the ETag does not a '-' is either
   393  	// a singlepart or encrypted ETag.
   394  	n := strings.IndexRune(s, '-')
   395  	if n == -1 {
   396  		etag, err := hex.DecodeString(s)
   397  		if err != nil {
   398  			return nil, err
   399  		}
   400  		if strict && len(etag) != 16 { // AWS S3 ETags are always 128 bit long
   401  			return nil, fmt.Errorf("etag: invalid length %d", len(etag))
   402  		}
   403  		return ETag(etag), nil
   404  	}
   405  
   406  	prefix, suffix := s[:n], s[n:]
   407  	if len(prefix) != 32 {
   408  		return nil, fmt.Errorf("etag: invalid prefix length %d", len(prefix))
   409  	}
   410  	if len(suffix) <= 1 {
   411  		return nil, errors.New("etag: suffix is not a part number")
   412  	}
   413  
   414  	etag, err := hex.DecodeString(prefix)
   415  	if err != nil {
   416  		return nil, err
   417  	}
   418  	partNumber, err := strconv.Atoi(suffix[1:]) // suffix[0] == '-' Therefore, we start parsing at suffix[1]
   419  	if err != nil {
   420  		return nil, err
   421  	}
   422  	if strict && (partNumber == 0 || partNumber > 10000) {
   423  		return nil, fmt.Errorf("etag: invalid part number %d", partNumber)
   424  	}
   425  	return ETag(append(etag, suffix...)), nil
   426  }