github.com/minio/minio@v0.0.0-20240328213742-3f72439b8a27/internal/hash/checksum.go (about)

     1  // Copyright (c) 2015-2022 MinIO, Inc.
     2  //
     3  // This file is part of MinIO Object Storage stack
     4  //
     5  // This program is free software: you can redistribute it and/or modify
     6  // it under the terms of the GNU Affero General Public License as published by
     7  // the Free Software Foundation, either version 3 of the License, or
     8  // (at your option) any later version.
     9  //
    10  // This program is distributed in the hope that it will be useful
    11  // but WITHOUT ANY WARRANTY; without even the implied warranty of
    12  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    13  // GNU Affero General Public License for more details.
    14  //
    15  // You should have received a copy of the GNU Affero General Public License
    16  // along with this program.  If not, see <http://www.gnu.org/licenses/>.
    17  
    18  package hash
    19  
    20  import (
    21  	"bytes"
    22  	"context"
    23  	"crypto/sha1"
    24  	"encoding/base64"
    25  	"encoding/binary"
    26  	"fmt"
    27  	"hash"
    28  	"hash/crc32"
    29  	"net/http"
    30  	"strings"
    31  
    32  	"github.com/minio/minio/internal/hash/sha256"
    33  	xhttp "github.com/minio/minio/internal/http"
    34  	"github.com/minio/minio/internal/logger"
    35  )
    36  
    37  // MinIOMultipartChecksum is as metadata on multipart uploads to indicate checksum type.
    38  const MinIOMultipartChecksum = "x-minio-multipart-checksum"
    39  
    40  // ChecksumType contains information about the checksum type.
    41  type ChecksumType uint32
    42  
    43  const (
    44  
    45  	// ChecksumTrailing indicates the checksum will be sent in the trailing header.
    46  	// Another checksum type will be set.
    47  	ChecksumTrailing ChecksumType = 1 << iota
    48  
    49  	// ChecksumSHA256 indicates a SHA256 checksum.
    50  	ChecksumSHA256
    51  	// ChecksumSHA1 indicates a SHA-1 checksum.
    52  	ChecksumSHA1
    53  	// ChecksumCRC32 indicates a CRC32 checksum with IEEE table.
    54  	ChecksumCRC32
    55  	// ChecksumCRC32C indicates a CRC32 checksum with Castagnoli table.
    56  	ChecksumCRC32C
    57  	// ChecksumInvalid indicates an invalid checksum.
    58  	ChecksumInvalid
    59  	// ChecksumMultipart indicates the checksum is from a multipart upload.
    60  	ChecksumMultipart
    61  	// ChecksumIncludesMultipart indicates the checksum also contains part checksums.
    62  	ChecksumIncludesMultipart
    63  
    64  	// ChecksumNone indicates no checksum.
    65  	ChecksumNone ChecksumType = 0
    66  )
    67  
    68  // Checksum is a type and base 64 encoded value.
    69  type Checksum struct {
    70  	Type    ChecksumType
    71  	Encoded string
    72  	Raw     []byte
    73  }
    74  
    75  // Is returns if c is all of t.
    76  func (c ChecksumType) Is(t ChecksumType) bool {
    77  	if t == ChecksumNone {
    78  		return c == ChecksumNone
    79  	}
    80  	return c&t == t
    81  }
    82  
    83  // Key returns the header key.
    84  // returns empty string if invalid or none.
    85  func (c ChecksumType) Key() string {
    86  	switch {
    87  	case c.Is(ChecksumCRC32):
    88  		return xhttp.AmzChecksumCRC32
    89  	case c.Is(ChecksumCRC32C):
    90  		return xhttp.AmzChecksumCRC32C
    91  	case c.Is(ChecksumSHA1):
    92  		return xhttp.AmzChecksumSHA1
    93  	case c.Is(ChecksumSHA256):
    94  		return xhttp.AmzChecksumSHA256
    95  	}
    96  	return ""
    97  }
    98  
    99  // RawByteLen returns the size of the un-encoded checksum.
   100  func (c ChecksumType) RawByteLen() int {
   101  	switch {
   102  	case c.Is(ChecksumCRC32):
   103  		return 4
   104  	case c.Is(ChecksumCRC32C):
   105  		return 4
   106  	case c.Is(ChecksumSHA1):
   107  		return sha1.Size
   108  	case c.Is(ChecksumSHA256):
   109  		return sha256.Size
   110  	}
   111  	return 0
   112  }
   113  
   114  // IsSet returns whether the type is valid and known.
   115  func (c ChecksumType) IsSet() bool {
   116  	return !c.Is(ChecksumInvalid) && !c.Is(ChecksumNone)
   117  }
   118  
   119  // NewChecksumType returns a checksum type based on the algorithm string.
   120  func NewChecksumType(alg string) ChecksumType {
   121  	switch strings.ToUpper(alg) {
   122  	case "CRC32":
   123  		return ChecksumCRC32
   124  	case "CRC32C":
   125  		return ChecksumCRC32C
   126  	case "SHA1":
   127  		return ChecksumSHA1
   128  	case "SHA256":
   129  		return ChecksumSHA256
   130  	case "":
   131  		return ChecksumNone
   132  	}
   133  	return ChecksumInvalid
   134  }
   135  
   136  // String returns the type as a string.
   137  func (c ChecksumType) String() string {
   138  	switch {
   139  	case c.Is(ChecksumCRC32):
   140  		return "CRC32"
   141  	case c.Is(ChecksumCRC32C):
   142  		return "CRC32C"
   143  	case c.Is(ChecksumSHA1):
   144  		return "SHA1"
   145  	case c.Is(ChecksumSHA256):
   146  		return "SHA256"
   147  	case c.Is(ChecksumNone):
   148  		return ""
   149  	}
   150  	return "invalid"
   151  }
   152  
   153  // Hasher returns a hasher corresponding to the checksum type.
   154  // Returns nil if no checksum.
   155  func (c ChecksumType) Hasher() hash.Hash {
   156  	switch {
   157  	case c.Is(ChecksumCRC32):
   158  		return crc32.NewIEEE()
   159  	case c.Is(ChecksumCRC32C):
   160  		return crc32.New(crc32.MakeTable(crc32.Castagnoli))
   161  	case c.Is(ChecksumSHA1):
   162  		return sha1.New()
   163  	case c.Is(ChecksumSHA256):
   164  		return sha256.New()
   165  	}
   166  	return nil
   167  }
   168  
   169  // Trailing return whether the checksum is trailing.
   170  func (c ChecksumType) Trailing() bool {
   171  	return c.Is(ChecksumTrailing)
   172  }
   173  
   174  // NewChecksumFromData returns a new checksum from specified algorithm and base64 encoded value.
   175  func NewChecksumFromData(t ChecksumType, data []byte) *Checksum {
   176  	if !t.IsSet() {
   177  		return nil
   178  	}
   179  	h := t.Hasher()
   180  	h.Write(data)
   181  	raw := h.Sum(nil)
   182  	c := Checksum{Type: t, Encoded: base64.StdEncoding.EncodeToString(raw), Raw: raw}
   183  	if !c.Valid() {
   184  		return nil
   185  	}
   186  	return &c
   187  }
   188  
   189  // ReadCheckSums will read checksums from b and return them.
   190  func ReadCheckSums(b []byte, part int) map[string]string {
   191  	res := make(map[string]string, 1)
   192  	for len(b) > 0 {
   193  		t, n := binary.Uvarint(b)
   194  		if n < 0 {
   195  			break
   196  		}
   197  		b = b[n:]
   198  
   199  		typ := ChecksumType(t)
   200  		length := typ.RawByteLen()
   201  		if length == 0 || len(b) < length {
   202  			break
   203  		}
   204  		cs := base64.StdEncoding.EncodeToString(b[:length])
   205  		b = b[length:]
   206  		if typ.Is(ChecksumMultipart) {
   207  			t, n := binary.Uvarint(b)
   208  			if n < 0 {
   209  				break
   210  			}
   211  			cs = fmt.Sprintf("%s-%d", cs, t)
   212  			b = b[n:]
   213  			if part > 0 {
   214  				cs = ""
   215  			}
   216  			if typ.Is(ChecksumIncludesMultipart) {
   217  				wantLen := int(t) * length
   218  				if len(b) < wantLen {
   219  					break
   220  				}
   221  				// Read part checksum
   222  				if part > 0 && uint64(part) <= t {
   223  					offset := (part - 1) * length
   224  					partCs := b[offset:]
   225  					cs = base64.StdEncoding.EncodeToString(partCs[:length])
   226  				}
   227  				b = b[wantLen:]
   228  			}
   229  		} else if part > 1 {
   230  			// For non-multipart, checksum is part 1.
   231  			cs = ""
   232  		}
   233  		if cs != "" {
   234  			res[typ.String()] = cs
   235  		}
   236  	}
   237  	if len(res) == 0 {
   238  		res = nil
   239  	}
   240  	return res
   241  }
   242  
   243  // ReadPartCheckSums will read all part checksums from b and return them.
   244  func ReadPartCheckSums(b []byte) (res []map[string]string) {
   245  	for len(b) > 0 {
   246  		t, n := binary.Uvarint(b)
   247  		if n <= 0 {
   248  			break
   249  		}
   250  		b = b[n:]
   251  
   252  		typ := ChecksumType(t)
   253  		length := typ.RawByteLen()
   254  		if length == 0 || len(b) < length {
   255  			break
   256  		}
   257  		// Skip main checksum
   258  		b = b[length:]
   259  		if !typ.Is(ChecksumIncludesMultipart) {
   260  			continue
   261  		}
   262  		parts, n := binary.Uvarint(b)
   263  		if n <= 0 {
   264  			break
   265  		}
   266  		if len(res) == 0 {
   267  			res = make([]map[string]string, parts)
   268  		}
   269  		b = b[n:]
   270  		for part := 0; part < int(parts); part++ {
   271  			if len(b) < length {
   272  				break
   273  			}
   274  			// Read part checksum
   275  			cs := base64.StdEncoding.EncodeToString(b[:length])
   276  			b = b[length:]
   277  			if res[part] == nil {
   278  				res[part] = make(map[string]string, 1)
   279  			}
   280  			res[part][typ.String()] = cs
   281  		}
   282  	}
   283  	return res
   284  }
   285  
   286  // NewChecksumWithType is similar to NewChecksumString but expects input algo of ChecksumType.
   287  func NewChecksumWithType(alg ChecksumType, value string) *Checksum {
   288  	if !alg.IsSet() {
   289  		return nil
   290  	}
   291  	bvalue, err := base64.StdEncoding.DecodeString(value)
   292  	if err != nil {
   293  		return nil
   294  	}
   295  	c := Checksum{Type: alg, Encoded: value, Raw: bvalue}
   296  	if !c.Valid() {
   297  		return nil
   298  	}
   299  	return &c
   300  }
   301  
   302  // NewChecksumString returns a new checksum from specified algorithm and base64 encoded value.
   303  func NewChecksumString(alg, value string) *Checksum {
   304  	return NewChecksumWithType(NewChecksumType(alg), value)
   305  }
   306  
   307  // AppendTo will append the checksum to b.
   308  // 'parts' is used when checksum has ChecksumMultipart set.
   309  // ReadCheckSums reads the values back.
   310  func (c *Checksum) AppendTo(b []byte, parts []byte) []byte {
   311  	if c == nil {
   312  		return nil
   313  	}
   314  	var tmp [binary.MaxVarintLen32]byte
   315  	n := binary.PutUvarint(tmp[:], uint64(c.Type))
   316  	crc := c.Raw
   317  	if len(crc) != c.Type.RawByteLen() {
   318  		return b
   319  	}
   320  	b = append(b, tmp[:n]...)
   321  	b = append(b, crc...)
   322  	if c.Type.Is(ChecksumMultipart) {
   323  		var checksums int
   324  		// Ensure we don't divide by 0:
   325  		if c.Type.RawByteLen() == 0 || len(parts)%c.Type.RawByteLen() != 0 {
   326  			logger.LogIf(context.Background(), fmt.Errorf("internal error: Unexpected checksum length: %d, each checksum %d", len(parts), c.Type.RawByteLen()))
   327  			checksums = 0
   328  			parts = nil
   329  		} else {
   330  			checksums = len(parts) / c.Type.RawByteLen()
   331  		}
   332  		if !c.Type.Is(ChecksumIncludesMultipart) {
   333  			parts = nil
   334  		}
   335  		n := binary.PutUvarint(tmp[:], uint64(checksums))
   336  		b = append(b, tmp[:n]...)
   337  		if len(parts) > 0 {
   338  			b = append(b, parts...)
   339  		}
   340  	}
   341  	return b
   342  }
   343  
   344  // Valid returns whether checksum is valid.
   345  func (c Checksum) Valid() bool {
   346  	if c.Type == ChecksumInvalid {
   347  		return false
   348  	}
   349  	if len(c.Encoded) == 0 || c.Type.Trailing() {
   350  		return c.Type.Is(ChecksumNone) || c.Type.Trailing()
   351  	}
   352  	raw := c.Raw
   353  	return c.Type.RawByteLen() == len(raw)
   354  }
   355  
   356  // Matches returns whether given content matches c.
   357  func (c Checksum) Matches(content []byte) error {
   358  	if len(c.Encoded) == 0 {
   359  		return nil
   360  	}
   361  	hasher := c.Type.Hasher()
   362  	_, err := hasher.Write(content)
   363  	if err != nil {
   364  		return err
   365  	}
   366  	sum := hasher.Sum(nil)
   367  	if !bytes.Equal(sum, c.Raw) {
   368  		return ChecksumMismatch{
   369  			Want: c.Encoded,
   370  			Got:  base64.StdEncoding.EncodeToString(sum),
   371  		}
   372  	}
   373  	return nil
   374  }
   375  
   376  // AsMap returns the
   377  func (c *Checksum) AsMap() map[string]string {
   378  	if c == nil || !c.Valid() {
   379  		return nil
   380  	}
   381  	return map[string]string{c.Type.String(): c.Encoded}
   382  }
   383  
   384  // TransferChecksumHeader will transfer any checksum value that has been checked.
   385  // If checksum was trailing, they must have been added to r.Trailer.
   386  func TransferChecksumHeader(w http.ResponseWriter, r *http.Request) {
   387  	c, err := GetContentChecksum(r.Header)
   388  	if err != nil || c == nil {
   389  		return
   390  	}
   391  	t, s := c.Type, c.Encoded
   392  	if !c.Type.IsSet() {
   393  		return
   394  	}
   395  	if c.Type.Is(ChecksumTrailing) {
   396  		val := r.Trailer.Get(t.Key())
   397  		if val != "" {
   398  			w.Header().Set(t.Key(), val)
   399  		}
   400  		return
   401  	}
   402  	w.Header().Set(t.Key(), s)
   403  }
   404  
   405  // AddChecksumHeader will transfer any checksum value that has been checked.
   406  func AddChecksumHeader(w http.ResponseWriter, c map[string]string) {
   407  	for k, v := range c {
   408  		cksum := NewChecksumString(k, v)
   409  		if cksum == nil {
   410  			continue
   411  		}
   412  		if cksum.Valid() {
   413  			w.Header().Set(cksum.Type.Key(), v)
   414  		}
   415  	}
   416  }
   417  
   418  // GetContentChecksum returns content checksum.
   419  // Returns ErrInvalidChecksum if so.
   420  // Returns nil, nil if no checksum.
   421  func GetContentChecksum(h http.Header) (*Checksum, error) {
   422  	if trailing := h.Values(xhttp.AmzTrailer); len(trailing) > 0 {
   423  		var res *Checksum
   424  		for _, header := range trailing {
   425  			var duplicates bool
   426  			switch {
   427  			case strings.EqualFold(header, ChecksumCRC32C.Key()):
   428  				duplicates = res != nil
   429  				res = NewChecksumWithType(ChecksumCRC32C|ChecksumTrailing, "")
   430  			case strings.EqualFold(header, ChecksumCRC32.Key()):
   431  				duplicates = res != nil
   432  				res = NewChecksumWithType(ChecksumCRC32|ChecksumTrailing, "")
   433  			case strings.EqualFold(header, ChecksumSHA256.Key()):
   434  				duplicates = res != nil
   435  				res = NewChecksumWithType(ChecksumSHA256|ChecksumTrailing, "")
   436  			case strings.EqualFold(header, ChecksumSHA1.Key()):
   437  				duplicates = res != nil
   438  				res = NewChecksumWithType(ChecksumSHA1|ChecksumTrailing, "")
   439  			}
   440  			if duplicates {
   441  				return nil, ErrInvalidChecksum
   442  			}
   443  		}
   444  		if res != nil {
   445  			return res, nil
   446  		}
   447  	}
   448  	t, s := getContentChecksum(h)
   449  	if t == ChecksumNone {
   450  		if s == "" {
   451  			return nil, nil
   452  		}
   453  		return nil, ErrInvalidChecksum
   454  	}
   455  	cksum := NewChecksumWithType(t, s)
   456  	if cksum == nil {
   457  		return nil, ErrInvalidChecksum
   458  	}
   459  	return cksum, nil
   460  }
   461  
   462  // getContentChecksum returns content checksum type and value.
   463  // Returns ChecksumInvalid if so.
   464  func getContentChecksum(h http.Header) (t ChecksumType, s string) {
   465  	t = ChecksumNone
   466  	alg := h.Get(xhttp.AmzChecksumAlgo)
   467  	if alg != "" {
   468  		t |= NewChecksumType(alg)
   469  		if t.IsSet() {
   470  			hdr := t.Key()
   471  			if s = h.Get(hdr); s == "" {
   472  				return ChecksumNone, ""
   473  			}
   474  		}
   475  		return t, s
   476  	}
   477  	checkType := func(c ChecksumType) {
   478  		if got := h.Get(c.Key()); got != "" {
   479  			// If already set, invalid
   480  			if t != ChecksumNone {
   481  				t = ChecksumInvalid
   482  				s = ""
   483  			} else {
   484  				t = c
   485  				s = got
   486  			}
   487  			return
   488  		}
   489  	}
   490  	checkType(ChecksumCRC32)
   491  	checkType(ChecksumCRC32C)
   492  	checkType(ChecksumSHA1)
   493  	checkType(ChecksumSHA256)
   494  	return t, s
   495  }