storj.io/minio@v0.0.0-20230509071714-0cbc90f649b1/pkg/etag/etag.go (about)

     1  // MinIO Cloud Storage, (C) 2021 MinIO, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Package etag provides an implementation of S3 ETags.
    16  //
    17  // Each S3 object has an associated ETag that can be
    18  // used to e.g. quickly compare objects or check whether
    19  // the content of an object has changed.
    20  //
    21  // In general, an S3 ETag is an MD5 checksum of the object
    22  // content. However, there are many exceptions to this rule.
    23  //
    24  //
    25  // Single-part Upload
    26  //
    27  // In case of a basic single-part PUT operation - without server
    28  // side encryption or object compression - the ETag of an object
    29  // is its content MD5.
    30  //
    31  //
    32  // Multi-part Upload
    33  //
    34  // The ETag of an object does not correspond to its content MD5
    35  // when the object is uploaded in multiple parts via the S3
    36  // multipart API. Instead, S3 first computes a MD5 of each part:
    37  //   e1 := MD5(part-1)
    38  //   e2 := MD5(part-2)
    39  //  ...
    40  //   eN := MD5(part-N)
    41  //
    42  // Then, the ETag of the object is computed as MD5 of all individual
    43  // part checksums. S3 also encodes the number of parts into the ETag
    44  // by appending a -<number-of-parts> at the end:
    45  //   ETag := MD5(e1 || e2 || e3 ... || eN) || -N
    46  //
    47  //   For example: ceb8853ddc5086cc4ab9e149f8f09c88-5
    48  //
    49  // However, this scheme is only used for multipart objects that are
    50  // not encrypted.
    51  //
    52  // Server-side Encryption
    53  //
    54  // S3 specifies three types of server-side-encryption - SSE-C, SSE-S3
    55  // and SSE-KMS - with different semantics w.r.t. ETags.
    56  // In case of SSE-S3, the ETag of an object is computed the same as
    57  // for single resp. multipart plaintext objects. In particular,
    58  // the ETag of a singlepart SSE-S3 object is its content MD5.
    59  //
    60  // In case of SSE-C and SSE-KMS, the ETag of an object is computed
    61  // differently. For singlepart uploads the ETag is not the content
    62  // MD5 of the object. For multipart uploads the ETag is also not
    63  // the MD5 of the individual part checksums but it still contains
    64  // the number of parts as suffix.
    65  //
    66  // Instead, the ETag is kind of unpredictable for S3 clients when
    67  // an object is encrypted using SSE-C or SSE-KMS. Maybe AWS S3
    68  // computes the ETag as MD5 of the encrypted content but there is
    69  // no way to verify this assumption since the encryption happens
    70  // inside AWS S3.
    71  // Therefore, S3 clients must not make any assumption about ETags
    72  // in case of SSE-C or SSE-KMS except that the ETag is well-formed.
    73  //
    74  // To put all of this into a simple rule:
    75  //    SSE-S3 : ETag == MD5
    76  //    SSE-C  : ETag != MD5
    77  //    SSE-KMS: ETag != MD5
    78  //
    79  //
    80  // Encrypted ETags
    81  //
    82  // An S3 implementation has to remember the content MD5 of objects
    83  // in case of SSE-S3. However, storing the ETag of an encrypted
    84  // object in plaintext may reveal some information about the object.
    85  // For example, two objects with the same ETag are identical with
    86  // a very high probability.
    87  //
    88  // Therefore, an S3 implementation may encrypt an ETag before storing
    89  // it. In this case, the stored ETag may not be a well-formed S3 ETag.
    90  // For example, it can be larger due to a checksum added by authenticated
    91  // encryption schemes. Such an ETag must be decrypted before sent to an
    92  // S3 client.
    93  //
    94  //
    95  // S3 Clients
    96  //
    97  // There are many different S3 client implementations. Most of them
    98  // access the ETag by looking for the HTTP response header key "Etag".
    99  // However, some of them assume that the header key has to be "ETag"
   100  // (case-sensitive) and will fail otherwise.
   101  // Further, some clients require that the ETag value is a double-quoted
   102  // string. Therefore, this package provides dedicated functions for
   103  // adding and extracing the ETag to/from HTTP headers.
   104  package etag
   105  
   106  import (
   107  	"bytes"
   108  	"crypto/md5"
   109  	"encoding/base64"
   110  	"encoding/hex"
   111  	"errors"
   112  	"fmt"
   113  	"net/http"
   114  	"strconv"
   115  	"strings"
   116  )
   117  
   118  // ETag is a single S3 ETag.
   119  //
   120  // An S3 ETag sometimes corresponds to the MD5 of
   121  // the S3 object content. However, when an object
   122  // is encrypted, compressed or uploaded using
   123  // the S3 multipart API then its ETag is not
   124  // necessarily the MD5 of the object content.
   125  //
   126  // For a more detailed description of S3 ETags
   127  // take a look at the package documentation.
   128  type ETag []byte
   129  
   130  // String returns the string representation of the ETag.
   131  //
   132  // The returned string is a hex representation of the
   133  // binary ETag with an optional '-<part-number>' suffix.
   134  func (e ETag) String() string {
   135  	if e.IsMultipart() {
   136  		return hex.EncodeToString(e[:16]) + string(e[16:])
   137  	}
   138  	return hex.EncodeToString(e)
   139  }
   140  
   141  // IsEncrypted reports whether the ETag is encrypted.
   142  func (e ETag) IsEncrypted() bool {
   143  	return len(e) > 16 && !bytes.ContainsRune(e, '-')
   144  }
   145  
   146  // IsMultipart reports whether the ETag belongs to an
   147  // object that has been uploaded using the S3 multipart
   148  // API.
   149  // An S3 multipart ETag has a -<part-number> suffix.
   150  func (e ETag) IsMultipart() bool {
   151  	return len(e) > 16 && bytes.ContainsRune(e, '-')
   152  }
   153  
   154  // Parts returns the number of object parts that are
   155  // referenced by this ETag. It returns 1 if the object
   156  // has been uploaded using the S3 singlepart API.
   157  //
   158  // Parts may panic if the ETag is an invalid multipart
   159  // ETag.
   160  func (e ETag) Parts() int {
   161  	if !e.IsMultipart() {
   162  		return 1
   163  	}
   164  
   165  	n := bytes.IndexRune(e, '-')
   166  	parts, err := strconv.Atoi(string(e[n+1:]))
   167  	if err != nil {
   168  		panic(err) // malformed ETag
   169  	}
   170  	return parts
   171  }
   172  
   173  var _ Tagger = ETag{} // compiler check
   174  
   175  // ETag returns the ETag itself.
   176  //
   177  // By providing this method ETag implements
   178  // the Tagger interface.
   179  func (e ETag) ETag() ETag { return e }
   180  
   181  // FromContentMD5 decodes and returns the Content-MD5
   182  // as ETag, if set. If no Content-MD5 header is set
   183  // it returns an empty ETag and no error.
   184  func FromContentMD5(h http.Header) (ETag, error) {
   185  	v, ok := h["Content-Md5"]
   186  	if !ok {
   187  		return nil, nil
   188  	}
   189  	if v[0] == "" {
   190  		return nil, errors.New("etag: content-md5 is set but contains no value")
   191  	}
   192  	b, err := base64.StdEncoding.Strict().DecodeString(v[0])
   193  	if err != nil {
   194  		return nil, err
   195  	}
   196  	if len(b) != md5.Size {
   197  		return nil, errors.New("etag: invalid content-md5")
   198  	}
   199  	return ETag(b), nil
   200  }
   201  
   202  // Multipart computes an S3 multipart ETag given a list of
   203  // S3 singlepart ETags. It returns nil if the list of
   204  // ETags is empty.
   205  //
   206  // Any encrypted or multipart ETag will be ignored and not
   207  // used to compute the returned ETag.
   208  func Multipart(etags ...ETag) ETag {
   209  	if len(etags) == 0 {
   210  		return nil
   211  	}
   212  
   213  	var n int64
   214  	h := md5.New()
   215  	for _, etag := range etags {
   216  		if !etag.IsMultipart() && !etag.IsEncrypted() {
   217  			h.Write(etag)
   218  			n++
   219  		}
   220  	}
   221  	etag := append(h.Sum(nil), '-')
   222  	return strconv.AppendInt(etag, n, 10)
   223  }
   224  
   225  // Set adds the ETag to the HTTP headers. It overwrites any
   226  // existing ETag entry.
   227  //
   228  // Due to legacy S3 clients, that make incorrect assumptions
   229  // about HTTP headers, Set should be used instead of
   230  // http.Header.Set(...). Otherwise, some S3 clients will not
   231  // able to extract the ETag.
   232  func Set(etag ETag, h http.Header) {
   233  	// Some (broken) S3 clients expect the ETag header to
   234  	// literally "ETag" - not "Etag". Further, some clients
   235  	// expect an ETag in double quotes. Therefore, we set the
   236  	// ETag directly as map entry instead of using http.Header.Set
   237  	h["ETag"] = []string{`"` + etag.String() + `"`}
   238  }
   239  
   240  // Get extracts and parses an ETag from the given HTTP headers.
   241  // It returns an error when the HTTP headers do not contain
   242  // an ETag entry or when the ETag is malformed.
   243  //
   244  // Get only accepts AWS S3 compatible ETags - i.e. no
   245  // encrypted ETags - and therefore is stricter than Parse.
   246  func Get(h http.Header) (ETag, error) {
   247  	const strict = true
   248  	if v := h.Get("Etag"); v != "" {
   249  		return parse(v, strict)
   250  	}
   251  	v, ok := h["ETag"]
   252  	if !ok || len(v) == 0 {
   253  		return nil, errors.New("etag: HTTP header does not contain an ETag")
   254  	}
   255  	return parse(v[0], strict)
   256  }
   257  
   258  // Equal returns true if and only if the two ETags are
   259  // identical.
   260  func Equal(a, b ETag) bool { return bytes.Equal(a, b) }
   261  
   262  // Parse parses s as an S3 ETag, returning the result.
   263  // The string can be an encrypted, singlepart
   264  // or multipart S3 ETag. It returns an error if s is
   265  // not a valid textual representation of an ETag.
   266  func Parse(s string) (ETag, error) {
   267  	const strict = false
   268  	return parse(s, strict)
   269  }
   270  
   271  // parse parse s as an S3 ETag, returning the result.
   272  // It operates in one of two modes:
   273  //  - strict
   274  //  - non-strict
   275  //
   276  // In strict mode, parse only accepts ETags that
   277  // are AWS S3 compatible. In particular, an AWS
   278  // S3 ETag always consists of a 128 bit checksum
   279  // value and an optional -<part-number> suffix.
   280  // Therefore, s must have the following form in
   281  // strict mode:  <32-hex-characters>[-<integer>]
   282  //
   283  // In non-strict mode, parse also accepts ETags
   284  // that are not AWS S3 compatible - e.g. encrypted
   285  // ETags.
   286  func parse(s string, strict bool) (ETag, error) {
   287  	// An S3 ETag may be a double-quoted string.
   288  	// Therefore, we remove double quotes at the
   289  	// start and end, if any.
   290  	if strings.HasPrefix(s, `"`) && strings.HasSuffix(s, `"`) {
   291  		s = s[1 : len(s)-1]
   292  	}
   293  
   294  	// An S3 ETag may be a multipart ETag that
   295  	// contains a '-' followed by a number.
   296  	// If the ETag does not a '-' is is either
   297  	// a singlepart or encrypted ETag.
   298  	n := strings.IndexRune(s, '-')
   299  	if n == -1 {
   300  		etag, err := hex.DecodeString(s)
   301  		if err != nil {
   302  			return nil, err
   303  		}
   304  		if strict && len(etag) != 16 { // AWS S3 ETags are always 128 bit long
   305  			return nil, fmt.Errorf("etag: invalid length %d", len(etag))
   306  		}
   307  		return ETag(etag), nil
   308  	}
   309  
   310  	prefix, suffix := s[:n], s[n:]
   311  	if len(prefix) != 32 {
   312  		return nil, fmt.Errorf("etag: invalid prefix length %d", len(prefix))
   313  	}
   314  	if len(suffix) <= 1 {
   315  		return nil, errors.New("etag: suffix is not a part number")
   316  	}
   317  
   318  	etag, err := hex.DecodeString(prefix)
   319  	if err != nil {
   320  		return nil, err
   321  	}
   322  	partNumber, err := strconv.Atoi(suffix[1:]) // suffix[0] == '-' Therefore, we start parsing at suffix[1]
   323  	if err != nil {
   324  		return nil, err
   325  	}
   326  	if strict && (partNumber == 0 || partNumber > 10000) {
   327  		return nil, fmt.Errorf("etag: invalid part number %d", partNumber)
   328  	}
   329  	return ETag(append(etag, suffix...)), nil
   330  }