github.com/minio/minio@v0.0.0-20240328213742-3f72439b8a27/internal/hash/reader.go (about) 1 // Copyright (c) 2015-2021 MinIO, Inc. 2 // 3 // This file is part of MinIO Object Storage stack 4 // 5 // This program is free software: you can redistribute it and/or modify 6 // it under the terms of the GNU Affero General Public License as published by 7 // the Free Software Foundation, either version 3 of the License, or 8 // (at your option) any later version. 9 // 10 // This program is distributed in the hope that it will be useful 11 // but WITHOUT ANY WARRANTY; without even the implied warranty of 12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 // GNU Affero General Public License for more details. 14 // 15 // You should have received a copy of the GNU Affero General Public License 16 // along with this program. If not, see <http://www.gnu.org/licenses/>. 17 18 package hash 19 20 import ( 21 "bytes" 22 "context" 23 "encoding/base64" 24 "encoding/hex" 25 "errors" 26 "hash" 27 "io" 28 "net/http" 29 30 "github.com/minio/minio/internal/etag" 31 "github.com/minio/minio/internal/hash/sha256" 32 "github.com/minio/minio/internal/ioutil" 33 ) 34 35 // A Reader wraps an io.Reader and computes the MD5 checksum 36 // of the read content as ETag. Optionally, it also computes 37 // the SHA256 checksum of the content. 38 // 39 // If the reference values for the ETag and content SHA26 40 // are not empty then it will check whether the computed 41 // match the reference values. 42 type Reader struct { 43 src io.Reader 44 bytesRead int64 45 expectedMin int64 46 expectedMax int64 47 48 size int64 49 actualSize int64 50 51 checksum etag.ETag 52 contentSHA256 []byte 53 54 // Content checksum 55 contentHash Checksum 56 contentHasher hash.Hash 57 disableMD5 bool 58 59 trailer http.Header 60 61 sha256 hash.Hash 62 } 63 64 // Options are optional arguments to NewReaderWithOpts, Options 65 // simply converts positional arguments to NewReader() into a 66 // more flexible way to provide optional inputs. This is currently 67 // used by the FanOut API call mostly to disable expensive md5sum 68 // calculation repeatedly under hash.Reader. 69 type Options struct { 70 MD5Hex string 71 SHA256Hex string 72 Size int64 73 ActualSize int64 74 DisableMD5 bool 75 ForceMD5 []byte 76 } 77 78 // NewReaderWithOpts is like NewReader but takes `Options` as argument, allowing 79 // callers to indicate if they want to disable md5sum checksum. 80 func NewReaderWithOpts(ctx context.Context, src io.Reader, opts Options) (*Reader, error) { 81 // return hard limited reader 82 return newReader(ctx, src, opts.Size, opts.MD5Hex, opts.SHA256Hex, opts.ActualSize, opts.DisableMD5, opts.ForceMD5) 83 } 84 85 // NewReader returns a new Reader that wraps src and computes 86 // MD5 checksum of everything it reads as ETag. 87 // 88 // It also computes the SHA256 checksum of everything it reads 89 // if sha256Hex is not the empty string. 90 // 91 // If size resp. actualSize is unknown at the time of calling 92 // NewReader then it should be set to -1. 93 // When size is >=0 it *must* match the amount of data provided by r. 94 // 95 // NewReader may try merge the given size, MD5 and SHA256 values 96 // into src - if src is a Reader - to avoid computing the same 97 // checksums multiple times. 98 // NewReader enforces S3 compatibility strictly by ensuring caller 99 // does not send more content than specified size. 100 func NewReader(ctx context.Context, src io.Reader, size int64, md5Hex, sha256Hex string, actualSize int64) (*Reader, error) { 101 return newReader(ctx, src, size, md5Hex, sha256Hex, actualSize, false, nil) 102 } 103 104 func newReader(ctx context.Context, src io.Reader, size int64, md5Hex, sha256Hex string, actualSize int64, disableMD5 bool, forceMD5 []byte) (*Reader, error) { 105 MD5, err := hex.DecodeString(md5Hex) 106 if err != nil { 107 return nil, BadDigest{ // TODO(aead): Return an error that indicates that an invalid ETag has been specified 108 ExpectedMD5: md5Hex, 109 CalculatedMD5: "", 110 } 111 } 112 SHA256, err := hex.DecodeString(sha256Hex) 113 if err != nil { 114 return nil, SHA256Mismatch{ // TODO(aead): Return an error that indicates that an invalid Content-SHA256 has been specified 115 ExpectedSHA256: sha256Hex, 116 CalculatedSHA256: "", 117 } 118 } 119 120 // Merge the size, MD5 and SHA256 values if src is a Reader. 121 // The size may be set to -1 by callers if unknown. 122 if r, ok := src.(*Reader); ok { 123 if r.bytesRead > 0 { 124 return nil, errors.New("hash: already read from hash reader") 125 } 126 if len(r.checksum) != 0 && len(MD5) != 0 && !etag.Equal(r.checksum, MD5) { 127 return nil, BadDigest{ 128 ExpectedMD5: r.checksum.String(), 129 CalculatedMD5: md5Hex, 130 } 131 } 132 if len(r.contentSHA256) != 0 && len(SHA256) != 0 && !bytes.Equal(r.contentSHA256, SHA256) { 133 return nil, SHA256Mismatch{ 134 ExpectedSHA256: hex.EncodeToString(r.contentSHA256), 135 CalculatedSHA256: sha256Hex, 136 } 137 } 138 if r.size >= 0 && size >= 0 && r.size != size { 139 return nil, SizeMismatch{Want: r.size, Got: size} 140 } 141 142 r.checksum = MD5 143 r.contentSHA256 = SHA256 144 if r.size < 0 && size >= 0 { 145 r.src = etag.Wrap(ioutil.HardLimitReader(r.src, size), r.src) 146 r.size = size 147 } 148 if r.actualSize <= 0 && actualSize >= 0 { 149 r.actualSize = actualSize 150 } 151 return r, nil 152 } 153 154 if size >= 0 { 155 r := ioutil.HardLimitReader(src, size) 156 if !disableMD5 { 157 if _, ok := src.(etag.Tagger); !ok { 158 src = etag.NewReader(ctx, r, MD5, forceMD5) 159 } else { 160 src = etag.Wrap(r, src) 161 } 162 } else { 163 src = r 164 } 165 } else if _, ok := src.(etag.Tagger); !ok { 166 if !disableMD5 { 167 src = etag.NewReader(ctx, src, MD5, forceMD5) 168 } 169 } 170 var h hash.Hash 171 if len(SHA256) != 0 { 172 h = sha256.New() 173 } 174 return &Reader{ 175 src: src, 176 size: size, 177 actualSize: actualSize, 178 checksum: MD5, 179 contentSHA256: SHA256, 180 sha256: h, 181 disableMD5: disableMD5, 182 }, nil 183 } 184 185 // ErrInvalidChecksum is returned when an invalid checksum is provided in headers. 186 var ErrInvalidChecksum = errors.New("invalid checksum") 187 188 // SetExpectedMin set expected minimum data expected from reader 189 func (r *Reader) SetExpectedMin(expectedMin int64) { 190 r.expectedMin = expectedMin 191 } 192 193 // SetExpectedMax set expected max data expected from reader 194 func (r *Reader) SetExpectedMax(expectedMax int64) { 195 r.expectedMax = expectedMax 196 } 197 198 // AddChecksum will add checksum checks as specified in 199 // https://docs.aws.amazon.com/AmazonS3/latest/userguide/checking-object-integrity.html 200 // Returns ErrInvalidChecksum if a problem with the checksum is found. 201 func (r *Reader) AddChecksum(req *http.Request, ignoreValue bool) error { 202 cs, err := GetContentChecksum(req.Header) 203 if err != nil { 204 return ErrInvalidChecksum 205 } 206 if cs == nil { 207 return nil 208 } 209 r.contentHash = *cs 210 if cs.Type.Trailing() { 211 r.trailer = req.Trailer 212 } 213 return r.AddNonTrailingChecksum(cs, ignoreValue) 214 } 215 216 // AddChecksumNoTrailer will add checksum checks as specified in 217 // https://docs.aws.amazon.com/AmazonS3/latest/userguide/checking-object-integrity.html 218 // Returns ErrInvalidChecksum if a problem with the checksum is found. 219 func (r *Reader) AddChecksumNoTrailer(headers http.Header, ignoreValue bool) error { 220 cs, err := GetContentChecksum(headers) 221 if err != nil { 222 return ErrInvalidChecksum 223 } 224 if cs == nil { 225 return nil 226 } 227 r.contentHash = *cs 228 return r.AddNonTrailingChecksum(cs, ignoreValue) 229 } 230 231 // AddNonTrailingChecksum will add a checksum to the reader. 232 // The checksum cannot be trailing. 233 func (r *Reader) AddNonTrailingChecksum(cs *Checksum, ignoreValue bool) error { 234 if cs == nil { 235 return nil 236 } 237 r.contentHash = *cs 238 if ignoreValue { 239 // Do not validate, but allow for transfer 240 return nil 241 } 242 243 r.contentHasher = cs.Type.Hasher() 244 if r.contentHasher == nil { 245 return ErrInvalidChecksum 246 } 247 return nil 248 } 249 250 func (r *Reader) Read(p []byte) (int, error) { 251 n, err := r.src.Read(p) 252 r.bytesRead += int64(n) 253 if r.sha256 != nil { 254 r.sha256.Write(p[:n]) 255 } 256 if r.contentHasher != nil { 257 r.contentHasher.Write(p[:n]) 258 } 259 260 if err == io.EOF { // Verify content SHA256, if set. 261 if r.expectedMin > 0 { 262 if r.bytesRead < r.expectedMin { 263 return 0, SizeTooSmall{Want: r.expectedMin, Got: r.bytesRead} 264 } 265 } 266 if r.expectedMax > 0 { 267 if r.bytesRead > r.expectedMax { 268 return 0, SizeTooLarge{Want: r.expectedMax, Got: r.bytesRead} 269 } 270 } 271 272 if r.sha256 != nil { 273 if sum := r.sha256.Sum(nil); !bytes.Equal(r.contentSHA256, sum) { 274 return n, SHA256Mismatch{ 275 ExpectedSHA256: hex.EncodeToString(r.contentSHA256), 276 CalculatedSHA256: hex.EncodeToString(sum), 277 } 278 } 279 } 280 if r.contentHasher != nil { 281 if r.contentHash.Type.Trailing() { 282 var err error 283 r.contentHash.Encoded = r.trailer.Get(r.contentHash.Type.Key()) 284 r.contentHash.Raw, err = base64.StdEncoding.DecodeString(r.contentHash.Encoded) 285 if err != nil || len(r.contentHash.Raw) == 0 { 286 return 0, ChecksumMismatch{Got: r.contentHash.Encoded} 287 } 288 } 289 if sum := r.contentHasher.Sum(nil); !bytes.Equal(r.contentHash.Raw, sum) { 290 err := ChecksumMismatch{ 291 Want: r.contentHash.Encoded, 292 Got: base64.StdEncoding.EncodeToString(sum), 293 } 294 return n, err 295 } 296 } 297 } 298 if err != nil && err != io.EOF { 299 if v, ok := err.(etag.VerifyError); ok { 300 return n, BadDigest{ 301 ExpectedMD5: v.Expected.String(), 302 CalculatedMD5: v.Computed.String(), 303 } 304 } 305 } 306 return n, err 307 } 308 309 // Size returns the absolute number of bytes the Reader 310 // will return during reading. It returns -1 for unlimited 311 // data. 312 func (r *Reader) Size() int64 { return r.size } 313 314 // ActualSize returns the pre-modified size of the object. 315 // DecompressedSize - For compressed objects. 316 func (r *Reader) ActualSize() int64 { return r.actualSize } 317 318 // ETag returns the ETag computed by an underlying etag.Tagger. 319 // If the underlying io.Reader does not implement etag.Tagger 320 // it returns nil. 321 func (r *Reader) ETag() etag.ETag { 322 if t, ok := r.src.(etag.Tagger); ok { 323 return t.ETag() 324 } 325 return nil 326 } 327 328 // MD5Current returns the MD5 checksum of the content 329 // that has been read so far. 330 // 331 // Calling MD5Current again after reading more data may 332 // result in a different checksum. 333 func (r *Reader) MD5Current() []byte { 334 if r.disableMD5 { 335 return r.checksum 336 } 337 return r.ETag()[:] 338 } 339 340 // SHA256 returns the SHA256 checksum set as reference value. 341 // 342 // It corresponds to the checksum that is expected and 343 // not the actual SHA256 checksum of the content. 344 func (r *Reader) SHA256() []byte { 345 return r.contentSHA256 346 } 347 348 // SHA256HexString returns a hex representation of the SHA256. 349 func (r *Reader) SHA256HexString() string { 350 return hex.EncodeToString(r.contentSHA256) 351 } 352 353 // ContentCRCType returns the content checksum type. 354 func (r *Reader) ContentCRCType() ChecksumType { 355 return r.contentHash.Type 356 } 357 358 // ContentCRC returns the content crc if set. 359 func (r *Reader) ContentCRC() map[string]string { 360 if r.contentHash.Type == ChecksumNone || !r.contentHash.Valid() { 361 return nil 362 } 363 if r.contentHash.Type.Trailing() { 364 return map[string]string{r.contentHash.Type.String(): r.trailer.Get(r.contentHash.Type.Key())} 365 } 366 return map[string]string{r.contentHash.Type.String(): r.contentHash.Encoded} 367 } 368 369 var _ io.Closer = (*Reader)(nil) // compiler check 370 371 // Close and release resources. 372 func (r *Reader) Close() error { return nil }