github.com/minio/minio@v0.0.0-20240328213742-3f72439b8a27/internal/hash/reader.go (about)

     1  // Copyright (c) 2015-2021 MinIO, Inc.
     2  //
     3  // This file is part of MinIO Object Storage stack
     4  //
     5  // This program is free software: you can redistribute it and/or modify
     6  // it under the terms of the GNU Affero General Public License as published by
     7  // the Free Software Foundation, either version 3 of the License, or
     8  // (at your option) any later version.
     9  //
    10  // This program is distributed in the hope that it will be useful
    11  // but WITHOUT ANY WARRANTY; without even the implied warranty of
    12  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    13  // GNU Affero General Public License for more details.
    14  //
    15  // You should have received a copy of the GNU Affero General Public License
    16  // along with this program.  If not, see <http://www.gnu.org/licenses/>.
    17  
    18  package hash
    19  
    20  import (
    21  	"bytes"
    22  	"context"
    23  	"encoding/base64"
    24  	"encoding/hex"
    25  	"errors"
    26  	"hash"
    27  	"io"
    28  	"net/http"
    29  
    30  	"github.com/minio/minio/internal/etag"
    31  	"github.com/minio/minio/internal/hash/sha256"
    32  	"github.com/minio/minio/internal/ioutil"
    33  )
    34  
    35  // A Reader wraps an io.Reader and computes the MD5 checksum
    36  // of the read content as ETag. Optionally, it also computes
    37  // the SHA256 checksum of the content.
    38  //
    39  // If the reference values for the ETag and content SHA26
    40  // are not empty then it will check whether the computed
    41  // match the reference values.
    42  type Reader struct {
    43  	src         io.Reader
    44  	bytesRead   int64
    45  	expectedMin int64
    46  	expectedMax int64
    47  
    48  	size       int64
    49  	actualSize int64
    50  
    51  	checksum      etag.ETag
    52  	contentSHA256 []byte
    53  
    54  	// Content checksum
    55  	contentHash   Checksum
    56  	contentHasher hash.Hash
    57  	disableMD5    bool
    58  
    59  	trailer http.Header
    60  
    61  	sha256 hash.Hash
    62  }
    63  
    64  // Options are optional arguments to NewReaderWithOpts, Options
    65  // simply converts positional arguments to NewReader() into a
    66  // more flexible way to provide optional inputs. This is currently
    67  // used by the FanOut API call mostly to disable expensive md5sum
    68  // calculation repeatedly under hash.Reader.
    69  type Options struct {
    70  	MD5Hex     string
    71  	SHA256Hex  string
    72  	Size       int64
    73  	ActualSize int64
    74  	DisableMD5 bool
    75  	ForceMD5   []byte
    76  }
    77  
    78  // NewReaderWithOpts is like NewReader but takes `Options` as argument, allowing
    79  // callers to indicate if they want to disable md5sum checksum.
    80  func NewReaderWithOpts(ctx context.Context, src io.Reader, opts Options) (*Reader, error) {
    81  	// return hard limited reader
    82  	return newReader(ctx, src, opts.Size, opts.MD5Hex, opts.SHA256Hex, opts.ActualSize, opts.DisableMD5, opts.ForceMD5)
    83  }
    84  
    85  // NewReader returns a new Reader that wraps src and computes
    86  // MD5 checksum of everything it reads as ETag.
    87  //
    88  // It also computes the SHA256 checksum of everything it reads
    89  // if sha256Hex is not the empty string.
    90  //
    91  // If size resp. actualSize is unknown at the time of calling
    92  // NewReader then it should be set to -1.
    93  // When size is >=0 it *must* match the amount of data provided by r.
    94  //
    95  // NewReader may try merge the given size, MD5 and SHA256 values
    96  // into src - if src is a Reader - to avoid computing the same
    97  // checksums multiple times.
    98  // NewReader enforces S3 compatibility strictly by ensuring caller
    99  // does not send more content than specified size.
   100  func NewReader(ctx context.Context, src io.Reader, size int64, md5Hex, sha256Hex string, actualSize int64) (*Reader, error) {
   101  	return newReader(ctx, src, size, md5Hex, sha256Hex, actualSize, false, nil)
   102  }
   103  
   104  func newReader(ctx context.Context, src io.Reader, size int64, md5Hex, sha256Hex string, actualSize int64, disableMD5 bool, forceMD5 []byte) (*Reader, error) {
   105  	MD5, err := hex.DecodeString(md5Hex)
   106  	if err != nil {
   107  		return nil, BadDigest{ // TODO(aead): Return an error that indicates that an invalid ETag has been specified
   108  			ExpectedMD5:   md5Hex,
   109  			CalculatedMD5: "",
   110  		}
   111  	}
   112  	SHA256, err := hex.DecodeString(sha256Hex)
   113  	if err != nil {
   114  		return nil, SHA256Mismatch{ // TODO(aead): Return an error that indicates that an invalid Content-SHA256 has been specified
   115  			ExpectedSHA256:   sha256Hex,
   116  			CalculatedSHA256: "",
   117  		}
   118  	}
   119  
   120  	// Merge the size, MD5 and SHA256 values if src is a Reader.
   121  	// The size may be set to -1 by callers if unknown.
   122  	if r, ok := src.(*Reader); ok {
   123  		if r.bytesRead > 0 {
   124  			return nil, errors.New("hash: already read from hash reader")
   125  		}
   126  		if len(r.checksum) != 0 && len(MD5) != 0 && !etag.Equal(r.checksum, MD5) {
   127  			return nil, BadDigest{
   128  				ExpectedMD5:   r.checksum.String(),
   129  				CalculatedMD5: md5Hex,
   130  			}
   131  		}
   132  		if len(r.contentSHA256) != 0 && len(SHA256) != 0 && !bytes.Equal(r.contentSHA256, SHA256) {
   133  			return nil, SHA256Mismatch{
   134  				ExpectedSHA256:   hex.EncodeToString(r.contentSHA256),
   135  				CalculatedSHA256: sha256Hex,
   136  			}
   137  		}
   138  		if r.size >= 0 && size >= 0 && r.size != size {
   139  			return nil, SizeMismatch{Want: r.size, Got: size}
   140  		}
   141  
   142  		r.checksum = MD5
   143  		r.contentSHA256 = SHA256
   144  		if r.size < 0 && size >= 0 {
   145  			r.src = etag.Wrap(ioutil.HardLimitReader(r.src, size), r.src)
   146  			r.size = size
   147  		}
   148  		if r.actualSize <= 0 && actualSize >= 0 {
   149  			r.actualSize = actualSize
   150  		}
   151  		return r, nil
   152  	}
   153  
   154  	if size >= 0 {
   155  		r := ioutil.HardLimitReader(src, size)
   156  		if !disableMD5 {
   157  			if _, ok := src.(etag.Tagger); !ok {
   158  				src = etag.NewReader(ctx, r, MD5, forceMD5)
   159  			} else {
   160  				src = etag.Wrap(r, src)
   161  			}
   162  		} else {
   163  			src = r
   164  		}
   165  	} else if _, ok := src.(etag.Tagger); !ok {
   166  		if !disableMD5 {
   167  			src = etag.NewReader(ctx, src, MD5, forceMD5)
   168  		}
   169  	}
   170  	var h hash.Hash
   171  	if len(SHA256) != 0 {
   172  		h = sha256.New()
   173  	}
   174  	return &Reader{
   175  		src:           src,
   176  		size:          size,
   177  		actualSize:    actualSize,
   178  		checksum:      MD5,
   179  		contentSHA256: SHA256,
   180  		sha256:        h,
   181  		disableMD5:    disableMD5,
   182  	}, nil
   183  }
   184  
   185  // ErrInvalidChecksum is returned when an invalid checksum is provided in headers.
   186  var ErrInvalidChecksum = errors.New("invalid checksum")
   187  
   188  // SetExpectedMin set expected minimum data expected from reader
   189  func (r *Reader) SetExpectedMin(expectedMin int64) {
   190  	r.expectedMin = expectedMin
   191  }
   192  
   193  // SetExpectedMax set expected max data expected from reader
   194  func (r *Reader) SetExpectedMax(expectedMax int64) {
   195  	r.expectedMax = expectedMax
   196  }
   197  
   198  // AddChecksum will add checksum checks as specified in
   199  // https://docs.aws.amazon.com/AmazonS3/latest/userguide/checking-object-integrity.html
   200  // Returns ErrInvalidChecksum if a problem with the checksum is found.
   201  func (r *Reader) AddChecksum(req *http.Request, ignoreValue bool) error {
   202  	cs, err := GetContentChecksum(req.Header)
   203  	if err != nil {
   204  		return ErrInvalidChecksum
   205  	}
   206  	if cs == nil {
   207  		return nil
   208  	}
   209  	r.contentHash = *cs
   210  	if cs.Type.Trailing() {
   211  		r.trailer = req.Trailer
   212  	}
   213  	return r.AddNonTrailingChecksum(cs, ignoreValue)
   214  }
   215  
   216  // AddChecksumNoTrailer will add checksum checks as specified in
   217  // https://docs.aws.amazon.com/AmazonS3/latest/userguide/checking-object-integrity.html
   218  // Returns ErrInvalidChecksum if a problem with the checksum is found.
   219  func (r *Reader) AddChecksumNoTrailer(headers http.Header, ignoreValue bool) error {
   220  	cs, err := GetContentChecksum(headers)
   221  	if err != nil {
   222  		return ErrInvalidChecksum
   223  	}
   224  	if cs == nil {
   225  		return nil
   226  	}
   227  	r.contentHash = *cs
   228  	return r.AddNonTrailingChecksum(cs, ignoreValue)
   229  }
   230  
   231  // AddNonTrailingChecksum will add a checksum to the reader.
   232  // The checksum cannot be trailing.
   233  func (r *Reader) AddNonTrailingChecksum(cs *Checksum, ignoreValue bool) error {
   234  	if cs == nil {
   235  		return nil
   236  	}
   237  	r.contentHash = *cs
   238  	if ignoreValue {
   239  		// Do not validate, but allow for transfer
   240  		return nil
   241  	}
   242  
   243  	r.contentHasher = cs.Type.Hasher()
   244  	if r.contentHasher == nil {
   245  		return ErrInvalidChecksum
   246  	}
   247  	return nil
   248  }
   249  
   250  func (r *Reader) Read(p []byte) (int, error) {
   251  	n, err := r.src.Read(p)
   252  	r.bytesRead += int64(n)
   253  	if r.sha256 != nil {
   254  		r.sha256.Write(p[:n])
   255  	}
   256  	if r.contentHasher != nil {
   257  		r.contentHasher.Write(p[:n])
   258  	}
   259  
   260  	if err == io.EOF { // Verify content SHA256, if set.
   261  		if r.expectedMin > 0 {
   262  			if r.bytesRead < r.expectedMin {
   263  				return 0, SizeTooSmall{Want: r.expectedMin, Got: r.bytesRead}
   264  			}
   265  		}
   266  		if r.expectedMax > 0 {
   267  			if r.bytesRead > r.expectedMax {
   268  				return 0, SizeTooLarge{Want: r.expectedMax, Got: r.bytesRead}
   269  			}
   270  		}
   271  
   272  		if r.sha256 != nil {
   273  			if sum := r.sha256.Sum(nil); !bytes.Equal(r.contentSHA256, sum) {
   274  				return n, SHA256Mismatch{
   275  					ExpectedSHA256:   hex.EncodeToString(r.contentSHA256),
   276  					CalculatedSHA256: hex.EncodeToString(sum),
   277  				}
   278  			}
   279  		}
   280  		if r.contentHasher != nil {
   281  			if r.contentHash.Type.Trailing() {
   282  				var err error
   283  				r.contentHash.Encoded = r.trailer.Get(r.contentHash.Type.Key())
   284  				r.contentHash.Raw, err = base64.StdEncoding.DecodeString(r.contentHash.Encoded)
   285  				if err != nil || len(r.contentHash.Raw) == 0 {
   286  					return 0, ChecksumMismatch{Got: r.contentHash.Encoded}
   287  				}
   288  			}
   289  			if sum := r.contentHasher.Sum(nil); !bytes.Equal(r.contentHash.Raw, sum) {
   290  				err := ChecksumMismatch{
   291  					Want: r.contentHash.Encoded,
   292  					Got:  base64.StdEncoding.EncodeToString(sum),
   293  				}
   294  				return n, err
   295  			}
   296  		}
   297  	}
   298  	if err != nil && err != io.EOF {
   299  		if v, ok := err.(etag.VerifyError); ok {
   300  			return n, BadDigest{
   301  				ExpectedMD5:   v.Expected.String(),
   302  				CalculatedMD5: v.Computed.String(),
   303  			}
   304  		}
   305  	}
   306  	return n, err
   307  }
   308  
   309  // Size returns the absolute number of bytes the Reader
   310  // will return during reading. It returns -1 for unlimited
   311  // data.
   312  func (r *Reader) Size() int64 { return r.size }
   313  
   314  // ActualSize returns the pre-modified size of the object.
   315  // DecompressedSize - For compressed objects.
   316  func (r *Reader) ActualSize() int64 { return r.actualSize }
   317  
   318  // ETag returns the ETag computed by an underlying etag.Tagger.
   319  // If the underlying io.Reader does not implement etag.Tagger
   320  // it returns nil.
   321  func (r *Reader) ETag() etag.ETag {
   322  	if t, ok := r.src.(etag.Tagger); ok {
   323  		return t.ETag()
   324  	}
   325  	return nil
   326  }
   327  
   328  // MD5Current returns the MD5 checksum of the content
   329  // that has been read so far.
   330  //
   331  // Calling MD5Current again after reading more data may
   332  // result in a different checksum.
   333  func (r *Reader) MD5Current() []byte {
   334  	if r.disableMD5 {
   335  		return r.checksum
   336  	}
   337  	return r.ETag()[:]
   338  }
   339  
   340  // SHA256 returns the SHA256 checksum set as reference value.
   341  //
   342  // It corresponds to the checksum that is expected and
   343  // not the actual SHA256 checksum of the content.
   344  func (r *Reader) SHA256() []byte {
   345  	return r.contentSHA256
   346  }
   347  
   348  // SHA256HexString returns a hex representation of the SHA256.
   349  func (r *Reader) SHA256HexString() string {
   350  	return hex.EncodeToString(r.contentSHA256)
   351  }
   352  
   353  // ContentCRCType returns the content checksum type.
   354  func (r *Reader) ContentCRCType() ChecksumType {
   355  	return r.contentHash.Type
   356  }
   357  
   358  // ContentCRC returns the content crc if set.
   359  func (r *Reader) ContentCRC() map[string]string {
   360  	if r.contentHash.Type == ChecksumNone || !r.contentHash.Valid() {
   361  		return nil
   362  	}
   363  	if r.contentHash.Type.Trailing() {
   364  		return map[string]string{r.contentHash.Type.String(): r.trailer.Get(r.contentHash.Type.Key())}
   365  	}
   366  	return map[string]string{r.contentHash.Type.String(): r.contentHash.Encoded}
   367  }
   368  
   369  var _ io.Closer = (*Reader)(nil) // compiler check
   370  
   371  // Close and release resources.
   372  func (r *Reader) Close() error { return nil }