storj.io/minio@v0.0.0-20230509071714-0cbc90f649b1/cmd/disk-cache-backend.go (about) 1 /* 2 * MinIO Cloud Storage, (C) 2019-2020 MinIO, Inc. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package cmd 18 19 import ( 20 "bytes" 21 "context" 22 "crypto/md5" 23 "crypto/rand" 24 "encoding/base64" 25 "encoding/hex" 26 "fmt" 27 "io" 28 "io/ioutil" 29 "net/http" 30 "os" 31 "strings" 32 "sync" 33 "sync/atomic" 34 "time" 35 36 "github.com/djherbis/atime" 37 "github.com/minio/sio" 38 39 "storj.io/minio/cmd/config/cache" 40 "storj.io/minio/cmd/crypto" 41 xhttp "storj.io/minio/cmd/http" 42 "storj.io/minio/cmd/logger" 43 "storj.io/minio/pkg/disk" 44 "storj.io/minio/pkg/fips" 45 "storj.io/minio/pkg/kms" 46 ) 47 48 const ( 49 // cache.json object metadata for cached objects. 50 cacheMetaJSONFile = "cache.json" 51 cacheDataFile = "part.1" 52 cacheMetaVersion = "1.0.0" 53 cacheExpiryDays = 90 * time.Hour * 24 // defaults to 90 days 54 // SSECacheEncrypted is the metadata key indicating that the object 55 // is a cache entry encrypted with cache KMS master key in globalCacheKMS. 56 SSECacheEncrypted = "X-Minio-Internal-Encrypted-Cache" 57 ) 58 59 // CacheChecksumInfoV1 - carries checksums of individual blocks on disk. 60 type CacheChecksumInfoV1 struct { 61 Algorithm string `json:"algorithm"` 62 Blocksize int64 `json:"blocksize"` 63 } 64 65 // Represents the cache metadata struct 66 type cacheMeta struct { 67 Version string `json:"version"` 68 Stat StatInfo `json:"stat"` // Stat of the current object `cache.json`. 69 70 // checksums of blocks on disk. 71 Checksum CacheChecksumInfoV1 `json:"checksum,omitempty"` 72 // Metadata map for current object. 73 Meta map[string]string `json:"meta,omitempty"` 74 // Ranges maps cached range to associated filename. 75 Ranges map[string]string `json:"ranges,omitempty"` 76 // Hits is a counter on the number of times this object has been accessed so far. 77 Hits int `json:"hits,omitempty"` 78 Bucket string `json:"bucket,omitempty"` 79 Object string `json:"object,omitempty"` 80 } 81 82 // RangeInfo has the range, file and range length information for a cached range. 83 type RangeInfo struct { 84 Range string 85 File string 86 Size int64 87 } 88 89 // Empty returns true if this is an empty struct 90 func (r *RangeInfo) Empty() bool { 91 return r.Range == "" && r.File == "" && r.Size == 0 92 } 93 94 func (m *cacheMeta) ToObjectInfo(bucket, object string) (o ObjectInfo) { 95 if len(m.Meta) == 0 { 96 m.Meta = make(map[string]string) 97 m.Stat.ModTime = timeSentinel 98 } 99 100 o = ObjectInfo{ 101 Bucket: bucket, 102 Name: object, 103 CacheStatus: CacheHit, 104 CacheLookupStatus: CacheHit, 105 } 106 107 // We set file info only if its valid. 108 o.Size = m.Stat.Size 109 o.ETag = extractETag(m.Meta) 110 o.ContentType = m.Meta["content-type"] 111 o.ContentEncoding = m.Meta["content-encoding"] 112 if storageClass, ok := m.Meta[xhttp.AmzStorageClass]; ok { 113 o.StorageClass = storageClass 114 } else { 115 o.StorageClass = globalMinioDefaultStorageClass 116 } 117 var ( 118 t time.Time 119 e error 120 ) 121 if exp, ok := m.Meta["expires"]; ok { 122 if t, e = time.Parse(http.TimeFormat, exp); e == nil { 123 o.Expires = t.UTC() 124 } 125 } 126 if mtime, ok := m.Meta["last-modified"]; ok { 127 if t, e = time.Parse(http.TimeFormat, mtime); e == nil { 128 o.ModTime = t.UTC() 129 } 130 } 131 132 // etag/md5Sum has already been extracted. We need to 133 // remove to avoid it from appearing as part of user-defined metadata 134 o.UserDefined = cleanMetadata(m.Meta) 135 return o 136 } 137 138 // represents disk cache struct 139 type diskCache struct { 140 // is set to 0 if drive is offline 141 online uint32 // ref: https://golang.org/pkg/sync/atomic/#pkg-note-BUG 142 purgeRunning int32 143 144 triggerGC chan struct{} 145 dir string // caching directory 146 stats CacheDiskStats // disk cache stats for prometheus 147 quotaPct int // max usage in % 148 pool sync.Pool 149 after int // minimum accesses before an object is cached. 150 lowWatermark int 151 highWatermark int 152 enableRange bool 153 commitWriteback bool 154 retryWritebackCh chan ObjectInfo 155 // nsMutex namespace lock 156 nsMutex *nsLockMap 157 // Object functions pointing to the corresponding functions of backend implementation. 158 NewNSLockFn func(cachePath string) RWLocker 159 } 160 161 // Inits the disk cache dir if it is not initialized already. 162 func newDiskCache(ctx context.Context, dir string, config cache.Config) (*diskCache, error) { 163 quotaPct := config.MaxUse 164 if quotaPct == 0 { 165 quotaPct = config.Quota 166 } 167 168 if err := os.MkdirAll(dir, 0777); err != nil { 169 return nil, fmt.Errorf("Unable to initialize '%s' dir, %w", dir, err) 170 } 171 cache := diskCache{ 172 dir: dir, 173 triggerGC: make(chan struct{}, 1), 174 stats: CacheDiskStats{Dir: dir}, 175 quotaPct: quotaPct, 176 after: config.After, 177 lowWatermark: config.WatermarkLow, 178 highWatermark: config.WatermarkHigh, 179 enableRange: config.Range, 180 commitWriteback: config.CommitWriteback, 181 retryWritebackCh: make(chan ObjectInfo, 10000), 182 online: 1, 183 pool: sync.Pool{ 184 New: func() interface{} { 185 b := disk.AlignedBlock(int(cacheBlkSize)) 186 return &b 187 }, 188 }, 189 nsMutex: newNSLock(false), 190 } 191 go cache.purgeWait(ctx) 192 if cache.commitWriteback { 193 go cache.scanCacheWritebackFailures(ctx) 194 } 195 cache.diskSpaceAvailable(0) // update if cache usage is already high. 196 cache.NewNSLockFn = func(cachePath string) RWLocker { 197 return cache.nsMutex.NewNSLock(nil, cachePath, "") 198 } 199 return &cache, nil 200 } 201 202 // diskUsageLow() returns true if disk usage falls below the low watermark w.r.t configured cache quota. 203 // Ex. for a 100GB disk, if quota is configured as 70% and watermark_low = 80% and 204 // watermark_high = 90% then garbage collection starts when 63% of disk is used and 205 // stops when disk usage drops to 56% 206 func (c *diskCache) diskUsageLow() bool { 207 gcStopPct := c.quotaPct * c.lowWatermark / 100 208 di, err := disk.GetInfo(c.dir) 209 if err != nil { 210 reqInfo := (&logger.ReqInfo{}).AppendTags("cachePath", c.dir) 211 ctx := logger.SetReqInfo(GlobalContext, reqInfo) 212 logger.LogIf(ctx, err) 213 return false 214 } 215 usedPercent := float64(di.Used) * 100 / float64(di.Total) 216 low := int(usedPercent) < gcStopPct 217 atomic.StoreUint64(&c.stats.UsagePercent, uint64(usedPercent)) 218 if low { 219 atomic.StoreInt32(&c.stats.UsageState, 0) 220 } 221 return low 222 } 223 224 // Returns if the disk usage reaches or exceeds configured cache quota when size is added. 225 // If current usage without size exceeds high watermark a GC is automatically queued. 226 func (c *diskCache) diskSpaceAvailable(size int64) bool { 227 gcTriggerPct := c.quotaPct * c.highWatermark / 100 228 di, err := disk.GetInfo(c.dir) 229 if err != nil { 230 reqInfo := (&logger.ReqInfo{}).AppendTags("cachePath", c.dir) 231 ctx := logger.SetReqInfo(GlobalContext, reqInfo) 232 logger.LogIf(ctx, err) 233 return false 234 } 235 if di.Total == 0 { 236 logger.Info("diskCache: Received 0 total disk size") 237 return false 238 } 239 usedPercent := float64(di.Used) * 100 / float64(di.Total) 240 if usedPercent >= float64(gcTriggerPct) { 241 atomic.StoreInt32(&c.stats.UsageState, 1) 242 c.queueGC() 243 } 244 atomic.StoreUint64(&c.stats.UsagePercent, uint64(usedPercent)) 245 246 // Recalculate percentage with provided size added. 247 usedPercent = float64(di.Used+uint64(size)) * 100 / float64(di.Total) 248 249 return usedPercent < float64(c.quotaPct) 250 } 251 252 // queueGC will queue a GC. 253 // Calling this function is always non-blocking. 254 func (c *diskCache) queueGC() { 255 select { 256 case c.triggerGC <- struct{}{}: 257 default: 258 } 259 } 260 261 // toClear returns how many bytes should be cleared to reach the low watermark quota. 262 // returns 0 if below quota. 263 func (c *diskCache) toClear() uint64 { 264 di, err := disk.GetInfo(c.dir) 265 if err != nil { 266 reqInfo := (&logger.ReqInfo{}).AppendTags("cachePath", c.dir) 267 ctx := logger.SetReqInfo(GlobalContext, reqInfo) 268 logger.LogIf(ctx, err) 269 return 0 270 } 271 return bytesToClear(int64(di.Total), int64(di.Free), uint64(c.quotaPct), uint64(c.lowWatermark), uint64(c.highWatermark)) 272 } 273 274 func (c *diskCache) purgeWait(ctx context.Context) { 275 for { 276 select { 277 case <-ctx.Done(): 278 case <-c.triggerGC: // wait here until someone triggers. 279 c.purge(ctx) 280 } 281 } 282 } 283 284 // Purge cache entries that were not accessed. 285 func (c *diskCache) purge(ctx context.Context) { 286 if atomic.LoadInt32(&c.purgeRunning) == 1 || c.diskUsageLow() { 287 return 288 } 289 290 toFree := c.toClear() 291 if toFree == 0 { 292 return 293 } 294 295 atomic.StoreInt32(&c.purgeRunning, 1) // do not run concurrent purge() 296 defer atomic.StoreInt32(&c.purgeRunning, 0) 297 298 // expiry for cleaning up old cache.json files that 299 // need to be cleaned up. 300 expiry := UTCNow().Add(-cacheExpiryDays) 301 // defaulting max hits count to 100 302 // ignore error we know what value we are passing. 303 scorer, _ := newFileScorer(toFree, time.Now().Unix(), 100) 304 305 // this function returns FileInfo for cached range files and cache data file. 306 fiStatFn := func(ranges map[string]string, dataFile, pathPrefix string) map[string]os.FileInfo { 307 fm := make(map[string]os.FileInfo) 308 fname := pathJoin(pathPrefix, dataFile) 309 if fi, err := os.Stat(fname); err == nil { 310 fm[fname] = fi 311 } 312 313 for _, rngFile := range ranges { 314 fname = pathJoin(pathPrefix, rngFile) 315 if fi, err := os.Stat(fname); err == nil { 316 fm[fname] = fi 317 } 318 } 319 return fm 320 } 321 322 filterFn := func(name string, typ os.FileMode) error { 323 if name == minioMetaBucket { 324 // Proceed to next file. 325 return nil 326 } 327 328 cacheDir := pathJoin(c.dir, name) 329 meta, _, numHits, err := c.statCachedMeta(ctx, cacheDir) 330 if err != nil { 331 // delete any partially filled cache entry left behind. 332 removeAll(cacheDir) 333 // Proceed to next file. 334 return nil 335 } 336 337 // stat all cached file ranges and cacheDataFile. 338 cachedFiles := fiStatFn(meta.Ranges, cacheDataFile, pathJoin(c.dir, name)) 339 objInfo := meta.ToObjectInfo("", "") 340 // prevent gc from clearing un-synced commits. This metadata is present when 341 // cache writeback commit setting is enabled. 342 status, ok := objInfo.UserDefined[writeBackStatusHeader] 343 if ok && status != CommitComplete.String() { 344 return nil 345 } 346 cc := cacheControlOpts(objInfo) 347 for fname, fi := range cachedFiles { 348 if cc != nil { 349 if cc.isStale(objInfo.ModTime) { 350 if err = removeAll(fname); err != nil { 351 logger.LogIf(ctx, err) 352 } 353 scorer.adjustSaveBytes(-fi.Size()) 354 355 // break early if sufficient disk space reclaimed. 356 if c.diskUsageLow() { 357 // if we found disk usage is already low, we return nil filtering is complete. 358 return errDoneForNow 359 } 360 } 361 continue 362 } 363 scorer.addFile(fname, atime.Get(fi), fi.Size(), numHits) 364 } 365 // clean up stale cache.json files for objects that never got cached but access count was maintained in cache.json 366 fi, err := os.Stat(pathJoin(cacheDir, cacheMetaJSONFile)) 367 if err != nil || (fi.ModTime().Before(expiry) && len(cachedFiles) == 0) { 368 removeAll(cacheDir) 369 scorer.adjustSaveBytes(-fi.Size()) 370 // Proceed to next file. 371 return nil 372 } 373 374 // if we found disk usage is already low, we return nil filtering is complete. 375 if c.diskUsageLow() { 376 return errDoneForNow 377 } 378 379 // Proceed to next file. 380 return nil 381 } 382 383 if err := readDirFn(c.dir, filterFn); err != nil { 384 logger.LogIf(ctx, err) 385 return 386 } 387 388 scorer.purgeFunc(func(qfile queuedFile) { 389 fileName := qfile.name 390 removeAll(fileName) 391 slashIdx := strings.LastIndex(fileName, SlashSeparator) 392 if slashIdx >= 0 { 393 fileNamePrefix := fileName[0:slashIdx] 394 fname := fileName[slashIdx+1:] 395 if fname == cacheDataFile { 396 removeAll(fileNamePrefix) 397 } 398 } 399 }) 400 401 scorer.reset() 402 } 403 404 // sets cache drive status 405 func (c *diskCache) setOffline() { 406 atomic.StoreUint32(&c.online, 0) 407 } 408 409 // returns true if cache drive is online 410 func (c *diskCache) IsOnline() bool { 411 return atomic.LoadUint32(&c.online) != 0 412 } 413 414 // Stat returns ObjectInfo from disk cache 415 func (c *diskCache) Stat(ctx context.Context, bucket, object string) (oi ObjectInfo, numHits int, err error) { 416 var partial bool 417 var meta *cacheMeta 418 419 cacheObjPath := getCacheSHADir(c.dir, bucket, object) 420 // Stat the file to get file size. 421 meta, partial, numHits, err = c.statCachedMeta(ctx, cacheObjPath) 422 if err != nil { 423 return 424 } 425 if partial { 426 return oi, numHits, errFileNotFound 427 } 428 oi = meta.ToObjectInfo("", "") 429 oi.Bucket = bucket 430 oi.Name = object 431 432 if err = decryptCacheObjectETag(&oi); err != nil { 433 return 434 } 435 return 436 } 437 438 // statCachedMeta returns metadata from cache - including ranges cached, partial to indicate 439 // if partial object is cached. 440 func (c *diskCache) statCachedMeta(ctx context.Context, cacheObjPath string) (meta *cacheMeta, partial bool, numHits int, err error) { 441 442 cLock := c.NewNSLockFn(cacheObjPath) 443 if ctx, err = cLock.GetRLock(ctx, globalOperationTimeout); err != nil { 444 return 445 } 446 447 defer cLock.RUnlock() 448 return c.statCache(ctx, cacheObjPath) 449 } 450 451 // statRange returns ObjectInfo and RangeInfo from disk cache 452 func (c *diskCache) statRange(ctx context.Context, bucket, object string, rs *HTTPRangeSpec) (oi ObjectInfo, rngInfo RangeInfo, numHits int, err error) { 453 // Stat the file to get file size. 454 cacheObjPath := getCacheSHADir(c.dir, bucket, object) 455 var meta *cacheMeta 456 var partial bool 457 458 meta, partial, numHits, err = c.statCachedMeta(ctx, cacheObjPath) 459 if err != nil { 460 return 461 } 462 463 oi = meta.ToObjectInfo("", "") 464 oi.Bucket = bucket 465 oi.Name = object 466 if !partial { 467 err = decryptCacheObjectETag(&oi) 468 return 469 } 470 471 actualSize := uint64(meta.Stat.Size) 472 var length int64 473 _, length, err = rs.GetOffsetLength(int64(actualSize)) 474 if err != nil { 475 return 476 } 477 478 actualRngSize := uint64(length) 479 if globalCacheKMS != nil { 480 actualRngSize, _ = sio.EncryptedSize(uint64(length)) 481 } 482 483 rng := rs.String(int64(actualSize)) 484 rngFile, ok := meta.Ranges[rng] 485 if !ok { 486 return oi, rngInfo, numHits, ObjectNotFound{Bucket: bucket, Object: object} 487 } 488 if _, err = os.Stat(pathJoin(cacheObjPath, rngFile)); err != nil { 489 return oi, rngInfo, numHits, ObjectNotFound{Bucket: bucket, Object: object} 490 } 491 rngInfo = RangeInfo{Range: rng, File: rngFile, Size: int64(actualRngSize)} 492 493 err = decryptCacheObjectETag(&oi) 494 return 495 } 496 497 // statCache is a convenience function for purge() to get ObjectInfo for cached object 498 func (c *diskCache) statCache(ctx context.Context, cacheObjPath string) (meta *cacheMeta, partial bool, numHits int, err error) { 499 // Stat the file to get file size. 500 metaPath := pathJoin(cacheObjPath, cacheMetaJSONFile) 501 f, err := os.Open(metaPath) 502 if err != nil { 503 return meta, partial, 0, err 504 } 505 defer f.Close() 506 meta = &cacheMeta{Version: cacheMetaVersion} 507 if err := jsonLoad(f, meta); err != nil { 508 return meta, partial, 0, err 509 } 510 // get metadata of part.1 if full file has been cached. 511 partial = true 512 if _, err := os.Stat(pathJoin(cacheObjPath, cacheDataFile)); err == nil { 513 partial = false 514 } 515 return meta, partial, meta.Hits, nil 516 } 517 518 // saves object metadata to disk cache 519 // incHitsOnly is true if metadata update is incrementing only the hit counter 520 func (c *diskCache) SaveMetadata(ctx context.Context, bucket, object string, meta map[string]string, actualSize int64, rs *HTTPRangeSpec, rsFileName string, incHitsOnly bool) error { 521 var err error 522 cachedPath := getCacheSHADir(c.dir, bucket, object) 523 cLock := c.NewNSLockFn(cachedPath) 524 ctx, err = cLock.GetLock(ctx, globalOperationTimeout) 525 if err != nil { 526 return err 527 } 528 defer cLock.Unlock() 529 return c.saveMetadata(ctx, bucket, object, meta, actualSize, rs, rsFileName, incHitsOnly) 530 } 531 532 // saves object metadata to disk cache 533 // incHitsOnly is true if metadata update is incrementing only the hit counter 534 func (c *diskCache) saveMetadata(ctx context.Context, bucket, object string, meta map[string]string, actualSize int64, rs *HTTPRangeSpec, rsFileName string, incHitsOnly bool) error { 535 cachedPath := getCacheSHADir(c.dir, bucket, object) 536 metaPath := pathJoin(cachedPath, cacheMetaJSONFile) 537 // Create cache directory if needed 538 if err := os.MkdirAll(cachedPath, 0777); err != nil { 539 return err 540 } 541 f, err := os.OpenFile(metaPath, os.O_RDWR|os.O_CREATE, 0666) 542 if err != nil { 543 return err 544 } 545 defer f.Close() 546 547 m := &cacheMeta{ 548 Version: cacheMetaVersion, 549 Bucket: bucket, 550 Object: object, 551 } 552 if err := jsonLoad(f, m); err != nil && err != io.EOF { 553 return err 554 } 555 // increment hits 556 if rs != nil { 557 // rsFileName gets set by putRange. Check for blank values here 558 // coming from other code paths that set rs only (eg initial creation or hit increment). 559 if rsFileName != "" { 560 if m.Ranges == nil { 561 m.Ranges = make(map[string]string) 562 } 563 m.Ranges[rs.String(actualSize)] = rsFileName 564 } 565 } 566 if rs == nil && !incHitsOnly { 567 // this is necessary cleanup of range files if entire object is cached. 568 if _, err := os.Stat(pathJoin(cachedPath, cacheDataFile)); err == nil { 569 for _, f := range m.Ranges { 570 removeAll(pathJoin(cachedPath, f)) 571 } 572 m.Ranges = nil 573 } 574 } 575 m.Stat.Size = actualSize 576 if !incHitsOnly { 577 // reset meta 578 m.Meta = meta 579 } else { 580 if m.Meta == nil { 581 m.Meta = make(map[string]string) 582 } 583 if etag, ok := meta["etag"]; ok { 584 m.Meta["etag"] = etag 585 } 586 } 587 m.Hits++ 588 589 m.Checksum = CacheChecksumInfoV1{Algorithm: HighwayHash256S.String(), Blocksize: cacheBlkSize} 590 return jsonSave(f, m) 591 } 592 593 func getCacheSHADir(dir, bucket, object string) string { 594 return pathJoin(dir, getSHA256Hash([]byte(pathJoin(bucket, object)))) 595 } 596 597 // Cache data to disk with bitrot checksum added for each block of 1MB 598 func (c *diskCache) bitrotWriteToCache(cachePath, fileName string, reader io.Reader, size uint64) (int64, string, error) { 599 if err := os.MkdirAll(cachePath, 0777); err != nil { 600 return 0, "", err 601 } 602 filePath := pathJoin(cachePath, fileName) 603 604 if filePath == "" || reader == nil { 605 return 0, "", errInvalidArgument 606 } 607 608 if err := checkPathLength(filePath); err != nil { 609 return 0, "", err 610 } 611 f, err := os.Create(filePath) 612 if err != nil { 613 return 0, "", osErrToFileErr(err) 614 } 615 defer f.Close() 616 617 var bytesWritten int64 618 619 h := HighwayHash256S.New() 620 621 bufp := c.pool.Get().(*[]byte) 622 defer c.pool.Put(bufp) 623 md5Hash := md5.New() 624 var n, n2 int 625 for { 626 n, err = io.ReadFull(reader, *bufp) 627 if err != nil && err != io.EOF && err != io.ErrUnexpectedEOF { 628 return 0, "", err 629 } 630 eof := err == io.EOF || err == io.ErrUnexpectedEOF 631 if n == 0 && size != 0 { 632 // Reached EOF, nothing more to be done. 633 break 634 } 635 h.Reset() 636 if _, err = h.Write((*bufp)[:n]); err != nil { 637 return 0, "", err 638 } 639 hashBytes := h.Sum(nil) 640 // compute md5Hash of original data stream if writeback commit to cache 641 if c.commitWriteback { 642 if _, err = md5Hash.Write((*bufp)[:n]); err != nil { 643 return 0, "", err 644 } 645 } 646 if _, err = f.Write(hashBytes); err != nil { 647 return 0, "", err 648 } 649 if n2, err = f.Write((*bufp)[:n]); err != nil { 650 return 0, "", err 651 } 652 bytesWritten += int64(n2) 653 if eof { 654 break 655 } 656 } 657 658 return bytesWritten, base64.StdEncoding.EncodeToString(md5Hash.Sum(nil)), nil 659 } 660 661 func newCacheEncryptReader(content io.Reader, bucket, object string, metadata map[string]string) (r io.Reader, err error) { 662 objectEncryptionKey, err := newCacheEncryptMetadata(bucket, object, metadata) 663 if err != nil { 664 return nil, err 665 } 666 667 reader, err := sio.EncryptReader(content, sio.Config{Key: objectEncryptionKey[:], MinVersion: sio.Version20, CipherSuites: fips.CipherSuitesDARE()}) 668 if err != nil { 669 return nil, crypto.ErrInvalidCustomerKey 670 } 671 return reader, nil 672 } 673 func newCacheEncryptMetadata(bucket, object string, metadata map[string]string) ([]byte, error) { 674 var sealedKey crypto.SealedKey 675 if globalCacheKMS == nil { 676 return nil, errKMSNotConfigured 677 } 678 key, err := globalCacheKMS.GenerateKey("", kms.Context{bucket: pathJoin(bucket, object)}) 679 if err != nil { 680 return nil, err 681 } 682 683 objectKey := crypto.GenerateKey(key.Plaintext, rand.Reader) 684 sealedKey = objectKey.Seal(key.Plaintext, crypto.GenerateIV(rand.Reader), crypto.S3.String(), bucket, object) 685 crypto.S3.CreateMetadata(metadata, key.KeyID, key.Ciphertext, sealedKey) 686 687 if etag, ok := metadata["etag"]; ok { 688 metadata["etag"] = hex.EncodeToString(objectKey.SealETag([]byte(etag))) 689 } 690 metadata[SSECacheEncrypted] = "" 691 return objectKey[:], nil 692 } 693 694 // Caches the object to disk 695 func (c *diskCache) Put(ctx context.Context, bucket, object string, data io.Reader, size int64, rs *HTTPRangeSpec, opts ObjectOptions, incHitsOnly bool) (oi ObjectInfo, err error) { 696 if !c.diskSpaceAvailable(size) { 697 io.Copy(ioutil.Discard, data) 698 return oi, errDiskFull 699 } 700 cachePath := getCacheSHADir(c.dir, bucket, object) 701 cLock := c.NewNSLockFn(cachePath) 702 ctx, err = cLock.GetLock(ctx, globalOperationTimeout) 703 if err != nil { 704 return oi, err 705 } 706 defer cLock.Unlock() 707 708 meta, _, numHits, err := c.statCache(ctx, cachePath) 709 // Case where object not yet cached 710 if osIsNotExist(err) && c.after >= 1 { 711 return oi, c.saveMetadata(ctx, bucket, object, opts.UserDefined, size, nil, "", false) 712 } 713 // Case where object already has a cache metadata entry but not yet cached 714 if err == nil && numHits < c.after { 715 cETag := extractETag(meta.Meta) 716 bETag := extractETag(opts.UserDefined) 717 if cETag == bETag { 718 return oi, c.saveMetadata(ctx, bucket, object, opts.UserDefined, size, nil, "", false) 719 } 720 incHitsOnly = true 721 } 722 723 if rs != nil { 724 return oi, c.putRange(ctx, bucket, object, data, size, rs, opts) 725 } 726 if !c.diskSpaceAvailable(size) { 727 return oi, errDiskFull 728 } 729 if err := os.MkdirAll(cachePath, 0777); err != nil { 730 return oi, err 731 } 732 var metadata = cloneMSS(opts.UserDefined) 733 var reader = data 734 var actualSize = uint64(size) 735 if globalCacheKMS != nil { 736 reader, err = newCacheEncryptReader(data, bucket, object, metadata) 737 if err != nil { 738 return oi, err 739 } 740 actualSize, _ = sio.EncryptedSize(uint64(size)) 741 } 742 n, md5sum, err := c.bitrotWriteToCache(cachePath, cacheDataFile, reader, actualSize) 743 if IsErr(err, baseErrs...) { 744 // take the cache drive offline 745 c.setOffline() 746 } 747 if err != nil { 748 removeAll(cachePath) 749 return oi, err 750 } 751 752 if actualSize != uint64(n) { 753 removeAll(cachePath) 754 return oi, IncompleteBody{Bucket: bucket, Object: object} 755 } 756 if c.commitWriteback { 757 metadata["content-md5"] = md5sum 758 if md5bytes, err := base64.StdEncoding.DecodeString(md5sum); err == nil { 759 metadata["etag"] = hex.EncodeToString(md5bytes) 760 } 761 metadata[writeBackStatusHeader] = CommitPending.String() 762 } 763 return ObjectInfo{ 764 Bucket: bucket, 765 Name: object, 766 ETag: metadata["etag"], 767 Size: n, 768 UserDefined: metadata, 769 }, 770 c.saveMetadata(ctx, bucket, object, metadata, n, nil, "", incHitsOnly) 771 } 772 773 // Caches the range to disk 774 func (c *diskCache) putRange(ctx context.Context, bucket, object string, data io.Reader, size int64, rs *HTTPRangeSpec, opts ObjectOptions) error { 775 rlen, err := rs.GetLength(size) 776 if err != nil { 777 return err 778 } 779 if !c.diskSpaceAvailable(rlen) { 780 return errDiskFull 781 } 782 cachePath := getCacheSHADir(c.dir, bucket, object) 783 if err := os.MkdirAll(cachePath, 0777); err != nil { 784 return err 785 } 786 var metadata = cloneMSS(opts.UserDefined) 787 var reader = data 788 var actualSize = uint64(rlen) 789 // objSize is the actual size of object (with encryption overhead if any) 790 var objSize = uint64(size) 791 if globalCacheKMS != nil { 792 reader, err = newCacheEncryptReader(data, bucket, object, metadata) 793 if err != nil { 794 return err 795 } 796 actualSize, _ = sio.EncryptedSize(uint64(rlen)) 797 objSize, _ = sio.EncryptedSize(uint64(size)) 798 799 } 800 cacheFile := MustGetUUID() 801 n, _, err := c.bitrotWriteToCache(cachePath, cacheFile, reader, actualSize) 802 if IsErr(err, baseErrs...) { 803 // take the cache drive offline 804 c.setOffline() 805 } 806 if err != nil { 807 removeAll(cachePath) 808 return err 809 } 810 if actualSize != uint64(n) { 811 removeAll(cachePath) 812 return IncompleteBody{Bucket: bucket, Object: object} 813 } 814 return c.saveMetadata(ctx, bucket, object, metadata, int64(objSize), rs, cacheFile, false) 815 } 816 817 // checks streaming bitrot checksum of cached object before returning data 818 func (c *diskCache) bitrotReadFromCache(ctx context.Context, filePath string, offset, length int64, writer io.Writer) error { 819 h := HighwayHash256S.New() 820 821 checksumHash := make([]byte, h.Size()) 822 823 startBlock := offset / cacheBlkSize 824 endBlock := (offset + length) / cacheBlkSize 825 826 // get block start offset 827 var blockStartOffset int64 828 if startBlock > 0 { 829 blockStartOffset = (cacheBlkSize + int64(h.Size())) * startBlock 830 } 831 832 tillLength := (cacheBlkSize + int64(h.Size())) * (endBlock - startBlock + 1) 833 834 // Start offset cannot be negative. 835 if offset < 0 { 836 logger.LogIf(ctx, errUnexpected) 837 return errUnexpected 838 } 839 840 // Writer cannot be nil. 841 if writer == nil { 842 logger.LogIf(ctx, errUnexpected) 843 return errUnexpected 844 } 845 var blockOffset, blockLength int64 846 rc, err := readCacheFileStream(filePath, blockStartOffset, tillLength) 847 if err != nil { 848 return err 849 } 850 bufp := c.pool.Get().(*[]byte) 851 defer c.pool.Put(bufp) 852 853 for block := startBlock; block <= endBlock; block++ { 854 switch { 855 case startBlock == endBlock: 856 blockOffset = offset % cacheBlkSize 857 blockLength = length 858 case block == startBlock: 859 blockOffset = offset % cacheBlkSize 860 blockLength = cacheBlkSize - blockOffset 861 case block == endBlock: 862 blockOffset = 0 863 blockLength = (offset + length) % cacheBlkSize 864 default: 865 blockOffset = 0 866 blockLength = cacheBlkSize 867 } 868 if blockLength == 0 { 869 break 870 } 871 if _, err := io.ReadFull(rc, checksumHash); err != nil { 872 return err 873 } 874 h.Reset() 875 n, err := io.ReadFull(rc, *bufp) 876 if err != nil && err != io.EOF && err != io.ErrUnexpectedEOF { 877 logger.LogIf(ctx, err) 878 return err 879 } 880 eof := err == io.EOF || err == io.ErrUnexpectedEOF 881 if n == 0 && length != 0 { 882 // Reached EOF, nothing more to be done. 883 break 884 } 885 886 if _, e := h.Write((*bufp)[:n]); e != nil { 887 return e 888 } 889 hashBytes := h.Sum(nil) 890 891 if !bytes.Equal(hashBytes, checksumHash) { 892 err = fmt.Errorf("hashes do not match expected %s, got %s", 893 hex.EncodeToString(checksumHash), hex.EncodeToString(hashBytes)) 894 logger.LogIf(GlobalContext, err) 895 return err 896 } 897 898 if _, err := io.Copy(writer, bytes.NewReader((*bufp)[blockOffset:blockOffset+blockLength])); err != nil { 899 if err != io.ErrClosedPipe { 900 logger.LogIf(ctx, err) 901 return err 902 } 903 eof = true 904 } 905 if eof { 906 break 907 } 908 } 909 910 return nil 911 } 912 913 // Get returns ObjectInfo and reader for object from disk cache 914 func (c *diskCache) Get(ctx context.Context, bucket, object string, rs *HTTPRangeSpec, h http.Header, opts ObjectOptions) (gr *GetObjectReader, numHits int, err error) { 915 cacheObjPath := getCacheSHADir(c.dir, bucket, object) 916 cLock := c.NewNSLockFn(cacheObjPath) 917 ctx, err = cLock.GetRLock(ctx, globalOperationTimeout) 918 if err != nil { 919 return nil, numHits, err 920 } 921 922 defer cLock.RUnlock() 923 var objInfo ObjectInfo 924 var rngInfo RangeInfo 925 if objInfo, rngInfo, numHits, err = c.statRange(ctx, bucket, object, rs); err != nil { 926 return nil, numHits, toObjectErr(err, bucket, object) 927 } 928 cacheFile := cacheDataFile 929 objSize := objInfo.Size 930 if !rngInfo.Empty() { 931 // for cached ranges, need to pass actual range file size to GetObjectReader 932 // and clear out range spec 933 cacheFile = rngInfo.File 934 objInfo.Size = rngInfo.Size 935 rs = nil 936 } 937 var nsUnlocker = func() {} 938 // For a directory, we need to send an reader that returns no bytes. 939 if HasSuffix(object, SlashSeparator) { 940 // The lock taken above is released when 941 // objReader.Close() is called by the caller. 942 gr, gerr := NewGetObjectReaderFromReader(bytes.NewBuffer(nil), objInfo, opts, nsUnlocker) 943 return gr, numHits, gerr 944 } 945 946 fn, off, length, nErr := NewGetObjectReader(rs, objInfo, opts, nsUnlocker) 947 if nErr != nil { 948 return nil, numHits, nErr 949 } 950 filePath := pathJoin(cacheObjPath, cacheFile) 951 pr, pw := io.Pipe() 952 go func() { 953 err := c.bitrotReadFromCache(ctx, filePath, off, length, pw) 954 if err != nil { 955 removeAll(cacheObjPath) 956 } 957 pw.CloseWithError(err) 958 }() 959 // Cleanup function to cause the go routine above to exit, in 960 // case of incomplete read. 961 pipeCloser := func() { pr.Close() } 962 963 gr, gerr := fn(pr, h, opts.CheckPrecondFn, pipeCloser) 964 if gerr != nil { 965 return gr, numHits, gerr 966 } 967 if globalCacheKMS != nil { 968 // clean up internal SSE cache metadata 969 delete(gr.ObjInfo.UserDefined, xhttp.AmzServerSideEncryption) 970 } 971 if !rngInfo.Empty() { 972 // overlay Size with actual object size and not the range size 973 gr.ObjInfo.Size = objSize 974 } 975 return gr, numHits, nil 976 977 } 978 979 // Deletes the cached object 980 func (c *diskCache) delete(ctx context.Context, cacheObjPath string) (err error) { 981 cLock := c.NewNSLockFn(cacheObjPath) 982 _, err = cLock.GetLock(ctx, globalOperationTimeout) 983 if err != nil { 984 return err 985 } 986 defer cLock.Unlock() 987 return removeAll(cacheObjPath) 988 } 989 990 // Deletes the cached object 991 func (c *diskCache) Delete(ctx context.Context, bucket, object string) (err error) { 992 cacheObjPath := getCacheSHADir(c.dir, bucket, object) 993 return c.delete(ctx, cacheObjPath) 994 } 995 996 // convenience function to check if object is cached on this diskCache 997 func (c *diskCache) Exists(ctx context.Context, bucket, object string) bool { 998 if _, err := os.Stat(getCacheSHADir(c.dir, bucket, object)); err != nil { 999 return false 1000 } 1001 return true 1002 } 1003 1004 // queues writeback upload failures on server startup 1005 func (c *diskCache) scanCacheWritebackFailures(ctx context.Context) { 1006 defer close(c.retryWritebackCh) 1007 filterFn := func(name string, typ os.FileMode) error { 1008 if name == minioMetaBucket { 1009 // Proceed to next file. 1010 return nil 1011 } 1012 cacheDir := pathJoin(c.dir, name) 1013 meta, _, _, err := c.statCachedMeta(ctx, cacheDir) 1014 if err != nil { 1015 return nil 1016 } 1017 1018 objInfo := meta.ToObjectInfo("", "") 1019 status, ok := objInfo.UserDefined[writeBackStatusHeader] 1020 if !ok || status == CommitComplete.String() { 1021 return nil 1022 } 1023 select { 1024 case c.retryWritebackCh <- objInfo: 1025 default: 1026 } 1027 1028 return nil 1029 } 1030 1031 if err := readDirFn(c.dir, filterFn); err != nil { 1032 logger.LogIf(ctx, err) 1033 return 1034 } 1035 }