storj.io/minio@v0.0.0-20230509071714-0cbc90f649b1/cmd/disk-cache-backend.go (about)

     1  /*
     2   * MinIO Cloud Storage, (C) 2019-2020 MinIO, Inc.
     3   *
     4   * Licensed under the Apache License, Version 2.0 (the "License");
     5   * you may not use this file except in compliance with the License.
     6   * You may obtain a copy of the License at
     7   *
     8   *     http://www.apache.org/licenses/LICENSE-2.0
     9   *
    10   * Unless required by applicable law or agreed to in writing, software
    11   * distributed under the License is distributed on an "AS IS" BASIS,
    12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13   * See the License for the specific language governing permissions and
    14   * limitations under the License.
    15   */
    16  
    17  package cmd
    18  
    19  import (
    20  	"bytes"
    21  	"context"
    22  	"crypto/md5"
    23  	"crypto/rand"
    24  	"encoding/base64"
    25  	"encoding/hex"
    26  	"fmt"
    27  	"io"
    28  	"io/ioutil"
    29  	"net/http"
    30  	"os"
    31  	"strings"
    32  	"sync"
    33  	"sync/atomic"
    34  	"time"
    35  
    36  	"github.com/djherbis/atime"
    37  	"github.com/minio/sio"
    38  
    39  	"storj.io/minio/cmd/config/cache"
    40  	"storj.io/minio/cmd/crypto"
    41  	xhttp "storj.io/minio/cmd/http"
    42  	"storj.io/minio/cmd/logger"
    43  	"storj.io/minio/pkg/disk"
    44  	"storj.io/minio/pkg/fips"
    45  	"storj.io/minio/pkg/kms"
    46  )
    47  
    48  const (
    49  	// cache.json object metadata for cached objects.
    50  	cacheMetaJSONFile = "cache.json"
    51  	cacheDataFile     = "part.1"
    52  	cacheMetaVersion  = "1.0.0"
    53  	cacheExpiryDays   = 90 * time.Hour * 24 // defaults to 90 days
    54  	// SSECacheEncrypted is the metadata key indicating that the object
    55  	// is a cache entry encrypted with cache KMS master key in globalCacheKMS.
    56  	SSECacheEncrypted = "X-Minio-Internal-Encrypted-Cache"
    57  )
    58  
    59  // CacheChecksumInfoV1 - carries checksums of individual blocks on disk.
    60  type CacheChecksumInfoV1 struct {
    61  	Algorithm string `json:"algorithm"`
    62  	Blocksize int64  `json:"blocksize"`
    63  }
    64  
    65  // Represents the cache metadata struct
    66  type cacheMeta struct {
    67  	Version string   `json:"version"`
    68  	Stat    StatInfo `json:"stat"` // Stat of the current object `cache.json`.
    69  
    70  	// checksums of blocks on disk.
    71  	Checksum CacheChecksumInfoV1 `json:"checksum,omitempty"`
    72  	// Metadata map for current object.
    73  	Meta map[string]string `json:"meta,omitempty"`
    74  	// Ranges maps cached range to associated filename.
    75  	Ranges map[string]string `json:"ranges,omitempty"`
    76  	// Hits is a counter on the number of times this object has been accessed so far.
    77  	Hits   int    `json:"hits,omitempty"`
    78  	Bucket string `json:"bucket,omitempty"`
    79  	Object string `json:"object,omitempty"`
    80  }
    81  
    82  // RangeInfo has the range, file and range length information for a cached range.
    83  type RangeInfo struct {
    84  	Range string
    85  	File  string
    86  	Size  int64
    87  }
    88  
    89  // Empty returns true if this is an empty struct
    90  func (r *RangeInfo) Empty() bool {
    91  	return r.Range == "" && r.File == "" && r.Size == 0
    92  }
    93  
    94  func (m *cacheMeta) ToObjectInfo(bucket, object string) (o ObjectInfo) {
    95  	if len(m.Meta) == 0 {
    96  		m.Meta = make(map[string]string)
    97  		m.Stat.ModTime = timeSentinel
    98  	}
    99  
   100  	o = ObjectInfo{
   101  		Bucket:            bucket,
   102  		Name:              object,
   103  		CacheStatus:       CacheHit,
   104  		CacheLookupStatus: CacheHit,
   105  	}
   106  
   107  	// We set file info only if its valid.
   108  	o.Size = m.Stat.Size
   109  	o.ETag = extractETag(m.Meta)
   110  	o.ContentType = m.Meta["content-type"]
   111  	o.ContentEncoding = m.Meta["content-encoding"]
   112  	if storageClass, ok := m.Meta[xhttp.AmzStorageClass]; ok {
   113  		o.StorageClass = storageClass
   114  	} else {
   115  		o.StorageClass = globalMinioDefaultStorageClass
   116  	}
   117  	var (
   118  		t time.Time
   119  		e error
   120  	)
   121  	if exp, ok := m.Meta["expires"]; ok {
   122  		if t, e = time.Parse(http.TimeFormat, exp); e == nil {
   123  			o.Expires = t.UTC()
   124  		}
   125  	}
   126  	if mtime, ok := m.Meta["last-modified"]; ok {
   127  		if t, e = time.Parse(http.TimeFormat, mtime); e == nil {
   128  			o.ModTime = t.UTC()
   129  		}
   130  	}
   131  
   132  	// etag/md5Sum has already been extracted. We need to
   133  	// remove to avoid it from appearing as part of user-defined metadata
   134  	o.UserDefined = cleanMetadata(m.Meta)
   135  	return o
   136  }
   137  
   138  // represents disk cache struct
   139  type diskCache struct {
   140  	// is set to 0 if drive is offline
   141  	online       uint32 // ref: https://golang.org/pkg/sync/atomic/#pkg-note-BUG
   142  	purgeRunning int32
   143  
   144  	triggerGC        chan struct{}
   145  	dir              string         // caching directory
   146  	stats            CacheDiskStats // disk cache stats for prometheus
   147  	quotaPct         int            // max usage in %
   148  	pool             sync.Pool
   149  	after            int // minimum accesses before an object is cached.
   150  	lowWatermark     int
   151  	highWatermark    int
   152  	enableRange      bool
   153  	commitWriteback  bool
   154  	retryWritebackCh chan ObjectInfo
   155  	// nsMutex namespace lock
   156  	nsMutex *nsLockMap
   157  	// Object functions pointing to the corresponding functions of backend implementation.
   158  	NewNSLockFn func(cachePath string) RWLocker
   159  }
   160  
   161  // Inits the disk cache dir if it is not initialized already.
   162  func newDiskCache(ctx context.Context, dir string, config cache.Config) (*diskCache, error) {
   163  	quotaPct := config.MaxUse
   164  	if quotaPct == 0 {
   165  		quotaPct = config.Quota
   166  	}
   167  
   168  	if err := os.MkdirAll(dir, 0777); err != nil {
   169  		return nil, fmt.Errorf("Unable to initialize '%s' dir, %w", dir, err)
   170  	}
   171  	cache := diskCache{
   172  		dir:              dir,
   173  		triggerGC:        make(chan struct{}, 1),
   174  		stats:            CacheDiskStats{Dir: dir},
   175  		quotaPct:         quotaPct,
   176  		after:            config.After,
   177  		lowWatermark:     config.WatermarkLow,
   178  		highWatermark:    config.WatermarkHigh,
   179  		enableRange:      config.Range,
   180  		commitWriteback:  config.CommitWriteback,
   181  		retryWritebackCh: make(chan ObjectInfo, 10000),
   182  		online:           1,
   183  		pool: sync.Pool{
   184  			New: func() interface{} {
   185  				b := disk.AlignedBlock(int(cacheBlkSize))
   186  				return &b
   187  			},
   188  		},
   189  		nsMutex: newNSLock(false),
   190  	}
   191  	go cache.purgeWait(ctx)
   192  	if cache.commitWriteback {
   193  		go cache.scanCacheWritebackFailures(ctx)
   194  	}
   195  	cache.diskSpaceAvailable(0) // update if cache usage is already high.
   196  	cache.NewNSLockFn = func(cachePath string) RWLocker {
   197  		return cache.nsMutex.NewNSLock(nil, cachePath, "")
   198  	}
   199  	return &cache, nil
   200  }
   201  
   202  // diskUsageLow() returns true if disk usage falls below the low watermark w.r.t configured cache quota.
   203  // Ex. for a 100GB disk, if quota is configured as 70%  and watermark_low = 80% and
   204  // watermark_high = 90% then garbage collection starts when 63% of disk is used and
   205  // stops when disk usage drops to 56%
   206  func (c *diskCache) diskUsageLow() bool {
   207  	gcStopPct := c.quotaPct * c.lowWatermark / 100
   208  	di, err := disk.GetInfo(c.dir)
   209  	if err != nil {
   210  		reqInfo := (&logger.ReqInfo{}).AppendTags("cachePath", c.dir)
   211  		ctx := logger.SetReqInfo(GlobalContext, reqInfo)
   212  		logger.LogIf(ctx, err)
   213  		return false
   214  	}
   215  	usedPercent := float64(di.Used) * 100 / float64(di.Total)
   216  	low := int(usedPercent) < gcStopPct
   217  	atomic.StoreUint64(&c.stats.UsagePercent, uint64(usedPercent))
   218  	if low {
   219  		atomic.StoreInt32(&c.stats.UsageState, 0)
   220  	}
   221  	return low
   222  }
   223  
   224  // Returns if the disk usage reaches  or exceeds configured cache quota when size is added.
   225  // If current usage without size exceeds high watermark a GC is automatically queued.
   226  func (c *diskCache) diskSpaceAvailable(size int64) bool {
   227  	gcTriggerPct := c.quotaPct * c.highWatermark / 100
   228  	di, err := disk.GetInfo(c.dir)
   229  	if err != nil {
   230  		reqInfo := (&logger.ReqInfo{}).AppendTags("cachePath", c.dir)
   231  		ctx := logger.SetReqInfo(GlobalContext, reqInfo)
   232  		logger.LogIf(ctx, err)
   233  		return false
   234  	}
   235  	if di.Total == 0 {
   236  		logger.Info("diskCache: Received 0 total disk size")
   237  		return false
   238  	}
   239  	usedPercent := float64(di.Used) * 100 / float64(di.Total)
   240  	if usedPercent >= float64(gcTriggerPct) {
   241  		atomic.StoreInt32(&c.stats.UsageState, 1)
   242  		c.queueGC()
   243  	}
   244  	atomic.StoreUint64(&c.stats.UsagePercent, uint64(usedPercent))
   245  
   246  	// Recalculate percentage with provided size added.
   247  	usedPercent = float64(di.Used+uint64(size)) * 100 / float64(di.Total)
   248  
   249  	return usedPercent < float64(c.quotaPct)
   250  }
   251  
   252  // queueGC will queue a GC.
   253  // Calling this function is always non-blocking.
   254  func (c *diskCache) queueGC() {
   255  	select {
   256  	case c.triggerGC <- struct{}{}:
   257  	default:
   258  	}
   259  }
   260  
   261  // toClear returns how many bytes should be cleared to reach the low watermark quota.
   262  // returns 0 if below quota.
   263  func (c *diskCache) toClear() uint64 {
   264  	di, err := disk.GetInfo(c.dir)
   265  	if err != nil {
   266  		reqInfo := (&logger.ReqInfo{}).AppendTags("cachePath", c.dir)
   267  		ctx := logger.SetReqInfo(GlobalContext, reqInfo)
   268  		logger.LogIf(ctx, err)
   269  		return 0
   270  	}
   271  	return bytesToClear(int64(di.Total), int64(di.Free), uint64(c.quotaPct), uint64(c.lowWatermark), uint64(c.highWatermark))
   272  }
   273  
   274  func (c *diskCache) purgeWait(ctx context.Context) {
   275  	for {
   276  		select {
   277  		case <-ctx.Done():
   278  		case <-c.triggerGC: // wait here until someone triggers.
   279  			c.purge(ctx)
   280  		}
   281  	}
   282  }
   283  
   284  // Purge cache entries that were not accessed.
   285  func (c *diskCache) purge(ctx context.Context) {
   286  	if atomic.LoadInt32(&c.purgeRunning) == 1 || c.diskUsageLow() {
   287  		return
   288  	}
   289  
   290  	toFree := c.toClear()
   291  	if toFree == 0 {
   292  		return
   293  	}
   294  
   295  	atomic.StoreInt32(&c.purgeRunning, 1) // do not run concurrent purge()
   296  	defer atomic.StoreInt32(&c.purgeRunning, 0)
   297  
   298  	// expiry for cleaning up old cache.json files that
   299  	// need to be cleaned up.
   300  	expiry := UTCNow().Add(-cacheExpiryDays)
   301  	// defaulting max hits count to 100
   302  	// ignore error we know what value we are passing.
   303  	scorer, _ := newFileScorer(toFree, time.Now().Unix(), 100)
   304  
   305  	// this function returns FileInfo for cached range files and cache data file.
   306  	fiStatFn := func(ranges map[string]string, dataFile, pathPrefix string) map[string]os.FileInfo {
   307  		fm := make(map[string]os.FileInfo)
   308  		fname := pathJoin(pathPrefix, dataFile)
   309  		if fi, err := os.Stat(fname); err == nil {
   310  			fm[fname] = fi
   311  		}
   312  
   313  		for _, rngFile := range ranges {
   314  			fname = pathJoin(pathPrefix, rngFile)
   315  			if fi, err := os.Stat(fname); err == nil {
   316  				fm[fname] = fi
   317  			}
   318  		}
   319  		return fm
   320  	}
   321  
   322  	filterFn := func(name string, typ os.FileMode) error {
   323  		if name == minioMetaBucket {
   324  			// Proceed to next file.
   325  			return nil
   326  		}
   327  
   328  		cacheDir := pathJoin(c.dir, name)
   329  		meta, _, numHits, err := c.statCachedMeta(ctx, cacheDir)
   330  		if err != nil {
   331  			// delete any partially filled cache entry left behind.
   332  			removeAll(cacheDir)
   333  			// Proceed to next file.
   334  			return nil
   335  		}
   336  
   337  		// stat all cached file ranges and cacheDataFile.
   338  		cachedFiles := fiStatFn(meta.Ranges, cacheDataFile, pathJoin(c.dir, name))
   339  		objInfo := meta.ToObjectInfo("", "")
   340  		// prevent gc from clearing un-synced commits. This metadata is present when
   341  		// cache writeback commit setting is enabled.
   342  		status, ok := objInfo.UserDefined[writeBackStatusHeader]
   343  		if ok && status != CommitComplete.String() {
   344  			return nil
   345  		}
   346  		cc := cacheControlOpts(objInfo)
   347  		for fname, fi := range cachedFiles {
   348  			if cc != nil {
   349  				if cc.isStale(objInfo.ModTime) {
   350  					if err = removeAll(fname); err != nil {
   351  						logger.LogIf(ctx, err)
   352  					}
   353  					scorer.adjustSaveBytes(-fi.Size())
   354  
   355  					// break early if sufficient disk space reclaimed.
   356  					if c.diskUsageLow() {
   357  						// if we found disk usage is already low, we return nil filtering is complete.
   358  						return errDoneForNow
   359  					}
   360  				}
   361  				continue
   362  			}
   363  			scorer.addFile(fname, atime.Get(fi), fi.Size(), numHits)
   364  		}
   365  		// clean up stale cache.json files for objects that never got cached but access count was maintained in cache.json
   366  		fi, err := os.Stat(pathJoin(cacheDir, cacheMetaJSONFile))
   367  		if err != nil || (fi.ModTime().Before(expiry) && len(cachedFiles) == 0) {
   368  			removeAll(cacheDir)
   369  			scorer.adjustSaveBytes(-fi.Size())
   370  			// Proceed to next file.
   371  			return nil
   372  		}
   373  
   374  		// if we found disk usage is already low, we return nil filtering is complete.
   375  		if c.diskUsageLow() {
   376  			return errDoneForNow
   377  		}
   378  
   379  		// Proceed to next file.
   380  		return nil
   381  	}
   382  
   383  	if err := readDirFn(c.dir, filterFn); err != nil {
   384  		logger.LogIf(ctx, err)
   385  		return
   386  	}
   387  
   388  	scorer.purgeFunc(func(qfile queuedFile) {
   389  		fileName := qfile.name
   390  		removeAll(fileName)
   391  		slashIdx := strings.LastIndex(fileName, SlashSeparator)
   392  		if slashIdx >= 0 {
   393  			fileNamePrefix := fileName[0:slashIdx]
   394  			fname := fileName[slashIdx+1:]
   395  			if fname == cacheDataFile {
   396  				removeAll(fileNamePrefix)
   397  			}
   398  		}
   399  	})
   400  
   401  	scorer.reset()
   402  }
   403  
   404  // sets cache drive status
   405  func (c *diskCache) setOffline() {
   406  	atomic.StoreUint32(&c.online, 0)
   407  }
   408  
   409  // returns true if cache drive is online
   410  func (c *diskCache) IsOnline() bool {
   411  	return atomic.LoadUint32(&c.online) != 0
   412  }
   413  
   414  // Stat returns ObjectInfo from disk cache
   415  func (c *diskCache) Stat(ctx context.Context, bucket, object string) (oi ObjectInfo, numHits int, err error) {
   416  	var partial bool
   417  	var meta *cacheMeta
   418  
   419  	cacheObjPath := getCacheSHADir(c.dir, bucket, object)
   420  	// Stat the file to get file size.
   421  	meta, partial, numHits, err = c.statCachedMeta(ctx, cacheObjPath)
   422  	if err != nil {
   423  		return
   424  	}
   425  	if partial {
   426  		return oi, numHits, errFileNotFound
   427  	}
   428  	oi = meta.ToObjectInfo("", "")
   429  	oi.Bucket = bucket
   430  	oi.Name = object
   431  
   432  	if err = decryptCacheObjectETag(&oi); err != nil {
   433  		return
   434  	}
   435  	return
   436  }
   437  
   438  // statCachedMeta returns metadata from cache - including ranges cached, partial to indicate
   439  // if partial object is cached.
   440  func (c *diskCache) statCachedMeta(ctx context.Context, cacheObjPath string) (meta *cacheMeta, partial bool, numHits int, err error) {
   441  
   442  	cLock := c.NewNSLockFn(cacheObjPath)
   443  	if ctx, err = cLock.GetRLock(ctx, globalOperationTimeout); err != nil {
   444  		return
   445  	}
   446  
   447  	defer cLock.RUnlock()
   448  	return c.statCache(ctx, cacheObjPath)
   449  }
   450  
   451  // statRange returns ObjectInfo and RangeInfo from disk cache
   452  func (c *diskCache) statRange(ctx context.Context, bucket, object string, rs *HTTPRangeSpec) (oi ObjectInfo, rngInfo RangeInfo, numHits int, err error) {
   453  	// Stat the file to get file size.
   454  	cacheObjPath := getCacheSHADir(c.dir, bucket, object)
   455  	var meta *cacheMeta
   456  	var partial bool
   457  
   458  	meta, partial, numHits, err = c.statCachedMeta(ctx, cacheObjPath)
   459  	if err != nil {
   460  		return
   461  	}
   462  
   463  	oi = meta.ToObjectInfo("", "")
   464  	oi.Bucket = bucket
   465  	oi.Name = object
   466  	if !partial {
   467  		err = decryptCacheObjectETag(&oi)
   468  		return
   469  	}
   470  
   471  	actualSize := uint64(meta.Stat.Size)
   472  	var length int64
   473  	_, length, err = rs.GetOffsetLength(int64(actualSize))
   474  	if err != nil {
   475  		return
   476  	}
   477  
   478  	actualRngSize := uint64(length)
   479  	if globalCacheKMS != nil {
   480  		actualRngSize, _ = sio.EncryptedSize(uint64(length))
   481  	}
   482  
   483  	rng := rs.String(int64(actualSize))
   484  	rngFile, ok := meta.Ranges[rng]
   485  	if !ok {
   486  		return oi, rngInfo, numHits, ObjectNotFound{Bucket: bucket, Object: object}
   487  	}
   488  	if _, err = os.Stat(pathJoin(cacheObjPath, rngFile)); err != nil {
   489  		return oi, rngInfo, numHits, ObjectNotFound{Bucket: bucket, Object: object}
   490  	}
   491  	rngInfo = RangeInfo{Range: rng, File: rngFile, Size: int64(actualRngSize)}
   492  
   493  	err = decryptCacheObjectETag(&oi)
   494  	return
   495  }
   496  
   497  // statCache is a convenience function for purge() to get ObjectInfo for cached object
   498  func (c *diskCache) statCache(ctx context.Context, cacheObjPath string) (meta *cacheMeta, partial bool, numHits int, err error) {
   499  	// Stat the file to get file size.
   500  	metaPath := pathJoin(cacheObjPath, cacheMetaJSONFile)
   501  	f, err := os.Open(metaPath)
   502  	if err != nil {
   503  		return meta, partial, 0, err
   504  	}
   505  	defer f.Close()
   506  	meta = &cacheMeta{Version: cacheMetaVersion}
   507  	if err := jsonLoad(f, meta); err != nil {
   508  		return meta, partial, 0, err
   509  	}
   510  	// get metadata of part.1 if full file has been cached.
   511  	partial = true
   512  	if _, err := os.Stat(pathJoin(cacheObjPath, cacheDataFile)); err == nil {
   513  		partial = false
   514  	}
   515  	return meta, partial, meta.Hits, nil
   516  }
   517  
   518  // saves object metadata to disk cache
   519  // incHitsOnly is true if metadata update is incrementing only the hit counter
   520  func (c *diskCache) SaveMetadata(ctx context.Context, bucket, object string, meta map[string]string, actualSize int64, rs *HTTPRangeSpec, rsFileName string, incHitsOnly bool) error {
   521  	var err error
   522  	cachedPath := getCacheSHADir(c.dir, bucket, object)
   523  	cLock := c.NewNSLockFn(cachedPath)
   524  	ctx, err = cLock.GetLock(ctx, globalOperationTimeout)
   525  	if err != nil {
   526  		return err
   527  	}
   528  	defer cLock.Unlock()
   529  	return c.saveMetadata(ctx, bucket, object, meta, actualSize, rs, rsFileName, incHitsOnly)
   530  }
   531  
   532  // saves object metadata to disk cache
   533  // incHitsOnly is true if metadata update is incrementing only the hit counter
   534  func (c *diskCache) saveMetadata(ctx context.Context, bucket, object string, meta map[string]string, actualSize int64, rs *HTTPRangeSpec, rsFileName string, incHitsOnly bool) error {
   535  	cachedPath := getCacheSHADir(c.dir, bucket, object)
   536  	metaPath := pathJoin(cachedPath, cacheMetaJSONFile)
   537  	// Create cache directory if needed
   538  	if err := os.MkdirAll(cachedPath, 0777); err != nil {
   539  		return err
   540  	}
   541  	f, err := os.OpenFile(metaPath, os.O_RDWR|os.O_CREATE, 0666)
   542  	if err != nil {
   543  		return err
   544  	}
   545  	defer f.Close()
   546  
   547  	m := &cacheMeta{
   548  		Version: cacheMetaVersion,
   549  		Bucket:  bucket,
   550  		Object:  object,
   551  	}
   552  	if err := jsonLoad(f, m); err != nil && err != io.EOF {
   553  		return err
   554  	}
   555  	// increment hits
   556  	if rs != nil {
   557  		// rsFileName gets set by putRange. Check for blank values here
   558  		// coming from other code paths that set rs only (eg initial creation or hit increment).
   559  		if rsFileName != "" {
   560  			if m.Ranges == nil {
   561  				m.Ranges = make(map[string]string)
   562  			}
   563  			m.Ranges[rs.String(actualSize)] = rsFileName
   564  		}
   565  	}
   566  	if rs == nil && !incHitsOnly {
   567  		// this is necessary cleanup of range files if entire object is cached.
   568  		if _, err := os.Stat(pathJoin(cachedPath, cacheDataFile)); err == nil {
   569  			for _, f := range m.Ranges {
   570  				removeAll(pathJoin(cachedPath, f))
   571  			}
   572  			m.Ranges = nil
   573  		}
   574  	}
   575  	m.Stat.Size = actualSize
   576  	if !incHitsOnly {
   577  		// reset meta
   578  		m.Meta = meta
   579  	} else {
   580  		if m.Meta == nil {
   581  			m.Meta = make(map[string]string)
   582  		}
   583  		if etag, ok := meta["etag"]; ok {
   584  			m.Meta["etag"] = etag
   585  		}
   586  	}
   587  	m.Hits++
   588  
   589  	m.Checksum = CacheChecksumInfoV1{Algorithm: HighwayHash256S.String(), Blocksize: cacheBlkSize}
   590  	return jsonSave(f, m)
   591  }
   592  
   593  func getCacheSHADir(dir, bucket, object string) string {
   594  	return pathJoin(dir, getSHA256Hash([]byte(pathJoin(bucket, object))))
   595  }
   596  
   597  // Cache data to disk with bitrot checksum added for each block of 1MB
   598  func (c *diskCache) bitrotWriteToCache(cachePath, fileName string, reader io.Reader, size uint64) (int64, string, error) {
   599  	if err := os.MkdirAll(cachePath, 0777); err != nil {
   600  		return 0, "", err
   601  	}
   602  	filePath := pathJoin(cachePath, fileName)
   603  
   604  	if filePath == "" || reader == nil {
   605  		return 0, "", errInvalidArgument
   606  	}
   607  
   608  	if err := checkPathLength(filePath); err != nil {
   609  		return 0, "", err
   610  	}
   611  	f, err := os.Create(filePath)
   612  	if err != nil {
   613  		return 0, "", osErrToFileErr(err)
   614  	}
   615  	defer f.Close()
   616  
   617  	var bytesWritten int64
   618  
   619  	h := HighwayHash256S.New()
   620  
   621  	bufp := c.pool.Get().(*[]byte)
   622  	defer c.pool.Put(bufp)
   623  	md5Hash := md5.New()
   624  	var n, n2 int
   625  	for {
   626  		n, err = io.ReadFull(reader, *bufp)
   627  		if err != nil && err != io.EOF && err != io.ErrUnexpectedEOF {
   628  			return 0, "", err
   629  		}
   630  		eof := err == io.EOF || err == io.ErrUnexpectedEOF
   631  		if n == 0 && size != 0 {
   632  			// Reached EOF, nothing more to be done.
   633  			break
   634  		}
   635  		h.Reset()
   636  		if _, err = h.Write((*bufp)[:n]); err != nil {
   637  			return 0, "", err
   638  		}
   639  		hashBytes := h.Sum(nil)
   640  		// compute md5Hash of original data stream if writeback commit to cache
   641  		if c.commitWriteback {
   642  			if _, err = md5Hash.Write((*bufp)[:n]); err != nil {
   643  				return 0, "", err
   644  			}
   645  		}
   646  		if _, err = f.Write(hashBytes); err != nil {
   647  			return 0, "", err
   648  		}
   649  		if n2, err = f.Write((*bufp)[:n]); err != nil {
   650  			return 0, "", err
   651  		}
   652  		bytesWritten += int64(n2)
   653  		if eof {
   654  			break
   655  		}
   656  	}
   657  
   658  	return bytesWritten, base64.StdEncoding.EncodeToString(md5Hash.Sum(nil)), nil
   659  }
   660  
   661  func newCacheEncryptReader(content io.Reader, bucket, object string, metadata map[string]string) (r io.Reader, err error) {
   662  	objectEncryptionKey, err := newCacheEncryptMetadata(bucket, object, metadata)
   663  	if err != nil {
   664  		return nil, err
   665  	}
   666  
   667  	reader, err := sio.EncryptReader(content, sio.Config{Key: objectEncryptionKey[:], MinVersion: sio.Version20, CipherSuites: fips.CipherSuitesDARE()})
   668  	if err != nil {
   669  		return nil, crypto.ErrInvalidCustomerKey
   670  	}
   671  	return reader, nil
   672  }
   673  func newCacheEncryptMetadata(bucket, object string, metadata map[string]string) ([]byte, error) {
   674  	var sealedKey crypto.SealedKey
   675  	if globalCacheKMS == nil {
   676  		return nil, errKMSNotConfigured
   677  	}
   678  	key, err := globalCacheKMS.GenerateKey("", kms.Context{bucket: pathJoin(bucket, object)})
   679  	if err != nil {
   680  		return nil, err
   681  	}
   682  
   683  	objectKey := crypto.GenerateKey(key.Plaintext, rand.Reader)
   684  	sealedKey = objectKey.Seal(key.Plaintext, crypto.GenerateIV(rand.Reader), crypto.S3.String(), bucket, object)
   685  	crypto.S3.CreateMetadata(metadata, key.KeyID, key.Ciphertext, sealedKey)
   686  
   687  	if etag, ok := metadata["etag"]; ok {
   688  		metadata["etag"] = hex.EncodeToString(objectKey.SealETag([]byte(etag)))
   689  	}
   690  	metadata[SSECacheEncrypted] = ""
   691  	return objectKey[:], nil
   692  }
   693  
   694  // Caches the object to disk
   695  func (c *diskCache) Put(ctx context.Context, bucket, object string, data io.Reader, size int64, rs *HTTPRangeSpec, opts ObjectOptions, incHitsOnly bool) (oi ObjectInfo, err error) {
   696  	if !c.diskSpaceAvailable(size) {
   697  		io.Copy(ioutil.Discard, data)
   698  		return oi, errDiskFull
   699  	}
   700  	cachePath := getCacheSHADir(c.dir, bucket, object)
   701  	cLock := c.NewNSLockFn(cachePath)
   702  	ctx, err = cLock.GetLock(ctx, globalOperationTimeout)
   703  	if err != nil {
   704  		return oi, err
   705  	}
   706  	defer cLock.Unlock()
   707  
   708  	meta, _, numHits, err := c.statCache(ctx, cachePath)
   709  	// Case where object not yet cached
   710  	if osIsNotExist(err) && c.after >= 1 {
   711  		return oi, c.saveMetadata(ctx, bucket, object, opts.UserDefined, size, nil, "", false)
   712  	}
   713  	// Case where object already has a cache metadata entry but not yet cached
   714  	if err == nil && numHits < c.after {
   715  		cETag := extractETag(meta.Meta)
   716  		bETag := extractETag(opts.UserDefined)
   717  		if cETag == bETag {
   718  			return oi, c.saveMetadata(ctx, bucket, object, opts.UserDefined, size, nil, "", false)
   719  		}
   720  		incHitsOnly = true
   721  	}
   722  
   723  	if rs != nil {
   724  		return oi, c.putRange(ctx, bucket, object, data, size, rs, opts)
   725  	}
   726  	if !c.diskSpaceAvailable(size) {
   727  		return oi, errDiskFull
   728  	}
   729  	if err := os.MkdirAll(cachePath, 0777); err != nil {
   730  		return oi, err
   731  	}
   732  	var metadata = cloneMSS(opts.UserDefined)
   733  	var reader = data
   734  	var actualSize = uint64(size)
   735  	if globalCacheKMS != nil {
   736  		reader, err = newCacheEncryptReader(data, bucket, object, metadata)
   737  		if err != nil {
   738  			return oi, err
   739  		}
   740  		actualSize, _ = sio.EncryptedSize(uint64(size))
   741  	}
   742  	n, md5sum, err := c.bitrotWriteToCache(cachePath, cacheDataFile, reader, actualSize)
   743  	if IsErr(err, baseErrs...) {
   744  		// take the cache drive offline
   745  		c.setOffline()
   746  	}
   747  	if err != nil {
   748  		removeAll(cachePath)
   749  		return oi, err
   750  	}
   751  
   752  	if actualSize != uint64(n) {
   753  		removeAll(cachePath)
   754  		return oi, IncompleteBody{Bucket: bucket, Object: object}
   755  	}
   756  	if c.commitWriteback {
   757  		metadata["content-md5"] = md5sum
   758  		if md5bytes, err := base64.StdEncoding.DecodeString(md5sum); err == nil {
   759  			metadata["etag"] = hex.EncodeToString(md5bytes)
   760  		}
   761  		metadata[writeBackStatusHeader] = CommitPending.String()
   762  	}
   763  	return ObjectInfo{
   764  			Bucket:      bucket,
   765  			Name:        object,
   766  			ETag:        metadata["etag"],
   767  			Size:        n,
   768  			UserDefined: metadata,
   769  		},
   770  		c.saveMetadata(ctx, bucket, object, metadata, n, nil, "", incHitsOnly)
   771  }
   772  
   773  // Caches the range to disk
   774  func (c *diskCache) putRange(ctx context.Context, bucket, object string, data io.Reader, size int64, rs *HTTPRangeSpec, opts ObjectOptions) error {
   775  	rlen, err := rs.GetLength(size)
   776  	if err != nil {
   777  		return err
   778  	}
   779  	if !c.diskSpaceAvailable(rlen) {
   780  		return errDiskFull
   781  	}
   782  	cachePath := getCacheSHADir(c.dir, bucket, object)
   783  	if err := os.MkdirAll(cachePath, 0777); err != nil {
   784  		return err
   785  	}
   786  	var metadata = cloneMSS(opts.UserDefined)
   787  	var reader = data
   788  	var actualSize = uint64(rlen)
   789  	// objSize is the actual size of object (with encryption overhead if any)
   790  	var objSize = uint64(size)
   791  	if globalCacheKMS != nil {
   792  		reader, err = newCacheEncryptReader(data, bucket, object, metadata)
   793  		if err != nil {
   794  			return err
   795  		}
   796  		actualSize, _ = sio.EncryptedSize(uint64(rlen))
   797  		objSize, _ = sio.EncryptedSize(uint64(size))
   798  
   799  	}
   800  	cacheFile := MustGetUUID()
   801  	n, _, err := c.bitrotWriteToCache(cachePath, cacheFile, reader, actualSize)
   802  	if IsErr(err, baseErrs...) {
   803  		// take the cache drive offline
   804  		c.setOffline()
   805  	}
   806  	if err != nil {
   807  		removeAll(cachePath)
   808  		return err
   809  	}
   810  	if actualSize != uint64(n) {
   811  		removeAll(cachePath)
   812  		return IncompleteBody{Bucket: bucket, Object: object}
   813  	}
   814  	return c.saveMetadata(ctx, bucket, object, metadata, int64(objSize), rs, cacheFile, false)
   815  }
   816  
   817  // checks streaming bitrot checksum of cached object before returning data
   818  func (c *diskCache) bitrotReadFromCache(ctx context.Context, filePath string, offset, length int64, writer io.Writer) error {
   819  	h := HighwayHash256S.New()
   820  
   821  	checksumHash := make([]byte, h.Size())
   822  
   823  	startBlock := offset / cacheBlkSize
   824  	endBlock := (offset + length) / cacheBlkSize
   825  
   826  	// get block start offset
   827  	var blockStartOffset int64
   828  	if startBlock > 0 {
   829  		blockStartOffset = (cacheBlkSize + int64(h.Size())) * startBlock
   830  	}
   831  
   832  	tillLength := (cacheBlkSize + int64(h.Size())) * (endBlock - startBlock + 1)
   833  
   834  	// Start offset cannot be negative.
   835  	if offset < 0 {
   836  		logger.LogIf(ctx, errUnexpected)
   837  		return errUnexpected
   838  	}
   839  
   840  	// Writer cannot be nil.
   841  	if writer == nil {
   842  		logger.LogIf(ctx, errUnexpected)
   843  		return errUnexpected
   844  	}
   845  	var blockOffset, blockLength int64
   846  	rc, err := readCacheFileStream(filePath, blockStartOffset, tillLength)
   847  	if err != nil {
   848  		return err
   849  	}
   850  	bufp := c.pool.Get().(*[]byte)
   851  	defer c.pool.Put(bufp)
   852  
   853  	for block := startBlock; block <= endBlock; block++ {
   854  		switch {
   855  		case startBlock == endBlock:
   856  			blockOffset = offset % cacheBlkSize
   857  			blockLength = length
   858  		case block == startBlock:
   859  			blockOffset = offset % cacheBlkSize
   860  			blockLength = cacheBlkSize - blockOffset
   861  		case block == endBlock:
   862  			blockOffset = 0
   863  			blockLength = (offset + length) % cacheBlkSize
   864  		default:
   865  			blockOffset = 0
   866  			blockLength = cacheBlkSize
   867  		}
   868  		if blockLength == 0 {
   869  			break
   870  		}
   871  		if _, err := io.ReadFull(rc, checksumHash); err != nil {
   872  			return err
   873  		}
   874  		h.Reset()
   875  		n, err := io.ReadFull(rc, *bufp)
   876  		if err != nil && err != io.EOF && err != io.ErrUnexpectedEOF {
   877  			logger.LogIf(ctx, err)
   878  			return err
   879  		}
   880  		eof := err == io.EOF || err == io.ErrUnexpectedEOF
   881  		if n == 0 && length != 0 {
   882  			// Reached EOF, nothing more to be done.
   883  			break
   884  		}
   885  
   886  		if _, e := h.Write((*bufp)[:n]); e != nil {
   887  			return e
   888  		}
   889  		hashBytes := h.Sum(nil)
   890  
   891  		if !bytes.Equal(hashBytes, checksumHash) {
   892  			err = fmt.Errorf("hashes do not match expected %s, got %s",
   893  				hex.EncodeToString(checksumHash), hex.EncodeToString(hashBytes))
   894  			logger.LogIf(GlobalContext, err)
   895  			return err
   896  		}
   897  
   898  		if _, err := io.Copy(writer, bytes.NewReader((*bufp)[blockOffset:blockOffset+blockLength])); err != nil {
   899  			if err != io.ErrClosedPipe {
   900  				logger.LogIf(ctx, err)
   901  				return err
   902  			}
   903  			eof = true
   904  		}
   905  		if eof {
   906  			break
   907  		}
   908  	}
   909  
   910  	return nil
   911  }
   912  
   913  // Get returns ObjectInfo and reader for object from disk cache
   914  func (c *diskCache) Get(ctx context.Context, bucket, object string, rs *HTTPRangeSpec, h http.Header, opts ObjectOptions) (gr *GetObjectReader, numHits int, err error) {
   915  	cacheObjPath := getCacheSHADir(c.dir, bucket, object)
   916  	cLock := c.NewNSLockFn(cacheObjPath)
   917  	ctx, err = cLock.GetRLock(ctx, globalOperationTimeout)
   918  	if err != nil {
   919  		return nil, numHits, err
   920  	}
   921  
   922  	defer cLock.RUnlock()
   923  	var objInfo ObjectInfo
   924  	var rngInfo RangeInfo
   925  	if objInfo, rngInfo, numHits, err = c.statRange(ctx, bucket, object, rs); err != nil {
   926  		return nil, numHits, toObjectErr(err, bucket, object)
   927  	}
   928  	cacheFile := cacheDataFile
   929  	objSize := objInfo.Size
   930  	if !rngInfo.Empty() {
   931  		// for cached ranges, need to pass actual range file size to GetObjectReader
   932  		// and clear out range spec
   933  		cacheFile = rngInfo.File
   934  		objInfo.Size = rngInfo.Size
   935  		rs = nil
   936  	}
   937  	var nsUnlocker = func() {}
   938  	// For a directory, we need to send an reader that returns no bytes.
   939  	if HasSuffix(object, SlashSeparator) {
   940  		// The lock taken above is released when
   941  		// objReader.Close() is called by the caller.
   942  		gr, gerr := NewGetObjectReaderFromReader(bytes.NewBuffer(nil), objInfo, opts, nsUnlocker)
   943  		return gr, numHits, gerr
   944  	}
   945  
   946  	fn, off, length, nErr := NewGetObjectReader(rs, objInfo, opts, nsUnlocker)
   947  	if nErr != nil {
   948  		return nil, numHits, nErr
   949  	}
   950  	filePath := pathJoin(cacheObjPath, cacheFile)
   951  	pr, pw := io.Pipe()
   952  	go func() {
   953  		err := c.bitrotReadFromCache(ctx, filePath, off, length, pw)
   954  		if err != nil {
   955  			removeAll(cacheObjPath)
   956  		}
   957  		pw.CloseWithError(err)
   958  	}()
   959  	// Cleanup function to cause the go routine above to exit, in
   960  	// case of incomplete read.
   961  	pipeCloser := func() { pr.Close() }
   962  
   963  	gr, gerr := fn(pr, h, opts.CheckPrecondFn, pipeCloser)
   964  	if gerr != nil {
   965  		return gr, numHits, gerr
   966  	}
   967  	if globalCacheKMS != nil {
   968  		// clean up internal SSE cache metadata
   969  		delete(gr.ObjInfo.UserDefined, xhttp.AmzServerSideEncryption)
   970  	}
   971  	if !rngInfo.Empty() {
   972  		// overlay Size with actual object size and not the range size
   973  		gr.ObjInfo.Size = objSize
   974  	}
   975  	return gr, numHits, nil
   976  
   977  }
   978  
   979  // Deletes the cached object
   980  func (c *diskCache) delete(ctx context.Context, cacheObjPath string) (err error) {
   981  	cLock := c.NewNSLockFn(cacheObjPath)
   982  	_, err = cLock.GetLock(ctx, globalOperationTimeout)
   983  	if err != nil {
   984  		return err
   985  	}
   986  	defer cLock.Unlock()
   987  	return removeAll(cacheObjPath)
   988  }
   989  
   990  // Deletes the cached object
   991  func (c *diskCache) Delete(ctx context.Context, bucket, object string) (err error) {
   992  	cacheObjPath := getCacheSHADir(c.dir, bucket, object)
   993  	return c.delete(ctx, cacheObjPath)
   994  }
   995  
   996  // convenience function to check if object is cached on this diskCache
   997  func (c *diskCache) Exists(ctx context.Context, bucket, object string) bool {
   998  	if _, err := os.Stat(getCacheSHADir(c.dir, bucket, object)); err != nil {
   999  		return false
  1000  	}
  1001  	return true
  1002  }
  1003  
  1004  // queues writeback upload failures on server startup
  1005  func (c *diskCache) scanCacheWritebackFailures(ctx context.Context) {
  1006  	defer close(c.retryWritebackCh)
  1007  	filterFn := func(name string, typ os.FileMode) error {
  1008  		if name == minioMetaBucket {
  1009  			// Proceed to next file.
  1010  			return nil
  1011  		}
  1012  		cacheDir := pathJoin(c.dir, name)
  1013  		meta, _, _, err := c.statCachedMeta(ctx, cacheDir)
  1014  		if err != nil {
  1015  			return nil
  1016  		}
  1017  
  1018  		objInfo := meta.ToObjectInfo("", "")
  1019  		status, ok := objInfo.UserDefined[writeBackStatusHeader]
  1020  		if !ok || status == CommitComplete.String() {
  1021  			return nil
  1022  		}
  1023  		select {
  1024  		case c.retryWritebackCh <- objInfo:
  1025  		default:
  1026  		}
  1027  
  1028  		return nil
  1029  	}
  1030  
  1031  	if err := readDirFn(c.dir, filterFn); err != nil {
  1032  		logger.LogIf(ctx, err)
  1033  		return
  1034  	}
  1035  }