github.com/minio/minio@v0.0.0-20240328213742-3f72439b8a27/cmd/xl-storage.go (about)

     1  // Copyright (c) 2015-2023 MinIO, Inc.
     2  //
     3  // This file is part of MinIO Object Storage stack
     4  //
     5  // This program is free software: you can redistribute it and/or modify
     6  // it under the terms of the GNU Affero General Public License as published by
     7  // the Free Software Foundation, either version 3 of the License, or
     8  // (at your option) any later version.
     9  //
    10  // This program is distributed in the hope that it will be useful
    11  // but WITHOUT ANY WARRANTY; without even the implied warranty of
    12  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    13  // GNU Affero General Public License for more details.
    14  //
    15  // You should have received a copy of the GNU Affero General Public License
    16  // along with this program.  If not, see <http://www.gnu.org/licenses/>.
    17  
    18  package cmd
    19  
    20  import (
    21  	"bytes"
    22  	"context"
    23  	"crypto/rand"
    24  	"encoding/binary"
    25  	"errors"
    26  	"fmt"
    27  	"io"
    28  	"net/url"
    29  	"os"
    30  	pathutil "path"
    31  	"path/filepath"
    32  	"runtime"
    33  	"strconv"
    34  	"strings"
    35  	"sync"
    36  	"sync/atomic"
    37  	"syscall"
    38  	"time"
    39  
    40  	"github.com/dustin/go-humanize"
    41  	"github.com/google/uuid"
    42  	jsoniter "github.com/json-iterator/go"
    43  	"github.com/klauspost/filepathx"
    44  	"github.com/minio/madmin-go/v3"
    45  	"github.com/minio/minio/internal/bucket/lifecycle"
    46  	"github.com/minio/minio/internal/cachevalue"
    47  	"github.com/minio/minio/internal/config/storageclass"
    48  	"github.com/minio/minio/internal/disk"
    49  	xioutil "github.com/minio/minio/internal/ioutil"
    50  	"github.com/minio/minio/internal/logger"
    51  	"github.com/pkg/xattr"
    52  	"github.com/zeebo/xxh3"
    53  )
    54  
    55  const (
    56  	nullVersionID = "null"
    57  
    58  	// Small file threshold below which data accompanies metadata from storage layer.
    59  	smallFileThreshold = 128 * humanize.KiByte // Optimized for NVMe/SSDs
    60  
    61  	// For hardrives it is possible to set this to a lower value to avoid any
    62  	// spike in latency. But currently we are simply keeping it optimal for SSDs.
    63  
    64  	// bigFileThreshold is the point where we add readahead to put operations.
    65  	bigFileThreshold = 128 * humanize.MiByte
    66  
    67  	// XL metadata file carries per object metadata.
    68  	xlStorageFormatFile = "xl.meta"
    69  )
    70  
    71  var alignedBuf []byte
    72  
    73  func init() {
    74  	alignedBuf = disk.AlignedBlock(xioutil.DirectioAlignSize)
    75  	_, _ = rand.Read(alignedBuf)
    76  }
    77  
    78  // isValidVolname verifies a volname name in accordance with object
    79  // layer requirements.
    80  func isValidVolname(volname string) bool {
    81  	if len(volname) < 3 {
    82  		return false
    83  	}
    84  
    85  	if runtime.GOOS == "windows" {
    86  		// Volname shouldn't have reserved characters in Windows.
    87  		return !strings.ContainsAny(volname, `\:*?\"<>|`)
    88  	}
    89  
    90  	return true
    91  }
    92  
    93  // xlStorage - implements StorageAPI interface.
    94  type xlStorage struct {
    95  	// Indicate of NSScanner is in progress in this disk
    96  	scanning int32
    97  
    98  	drivePath string
    99  	endpoint  Endpoint
   100  
   101  	globalSync bool
   102  	oDirect    bool // indicates if this disk supports ODirect
   103  
   104  	diskID string
   105  
   106  	// Indexes, will be -1 until assigned a set.
   107  	poolIndex, setIndex, diskIndex int
   108  
   109  	formatFileInfo  os.FileInfo
   110  	formatFile      string
   111  	formatLegacy    bool
   112  	formatLastCheck time.Time
   113  
   114  	diskInfoCache *cachevalue.Cache[DiskInfo]
   115  	sync.RWMutex
   116  	formatData []byte
   117  
   118  	nrRequests   uint64
   119  	major, minor uint32
   120  
   121  	immediatePurge chan string
   122  
   123  	// mutex to prevent concurrent read operations overloading walks.
   124  	rotational bool
   125  	walkMu     *sync.Mutex
   126  	walkReadMu *sync.Mutex
   127  }
   128  
   129  // checkPathLength - returns error if given path name length more than 255
   130  func checkPathLength(pathName string) error {
   131  	// Apple OS X path length is limited to 1016
   132  	if runtime.GOOS == "darwin" && len(pathName) > 1016 {
   133  		return errFileNameTooLong
   134  	}
   135  
   136  	// Disallow more than 1024 characters on windows, there
   137  	// are no known name_max limits on Windows.
   138  	if runtime.GOOS == "windows" && len(pathName) > 1024 {
   139  		return errFileNameTooLong
   140  	}
   141  
   142  	// On Unix we reject paths if they are just '.', '..' or '/'
   143  	if pathName == "." || pathName == ".." || pathName == slashSeparator {
   144  		return errFileAccessDenied
   145  	}
   146  
   147  	// Check each path segment length is > 255 on all Unix
   148  	// platforms, look for this value as NAME_MAX in
   149  	// /usr/include/linux/limits.h
   150  	var count int64
   151  	for _, p := range pathName {
   152  		switch p {
   153  		case '/':
   154  			count = 0 // Reset
   155  		case '\\':
   156  			if runtime.GOOS == globalWindowsOSName {
   157  				count = 0
   158  			}
   159  		default:
   160  			count++
   161  			if count > 255 {
   162  				return errFileNameTooLong
   163  			}
   164  		}
   165  	} // Success.
   166  	return nil
   167  }
   168  
   169  func getValidPath(path string) (string, error) {
   170  	if path == "" {
   171  		return path, errInvalidArgument
   172  	}
   173  
   174  	var err error
   175  	// Disallow relative paths, figure out absolute paths.
   176  	path, err = filepath.Abs(path)
   177  	if err != nil {
   178  		return path, err
   179  	}
   180  
   181  	fi, err := Lstat(path)
   182  	if err != nil && !osIsNotExist(err) {
   183  		return path, err
   184  	}
   185  	if osIsNotExist(err) {
   186  		// Disk not found create it.
   187  		if err = mkdirAll(path, 0o777, ""); err != nil {
   188  			return path, err
   189  		}
   190  	}
   191  	if fi != nil && !fi.IsDir() {
   192  		return path, errDiskNotDir
   193  	}
   194  
   195  	return path, nil
   196  }
   197  
   198  // Initialize a new storage disk.
   199  func newLocalXLStorage(path string) (*xlStorage, error) {
   200  	u := url.URL{Path: path}
   201  	return newXLStorage(Endpoint{
   202  		URL:     &u,
   203  		IsLocal: true,
   204  	}, true)
   205  }
   206  
   207  // Make Erasure backend meta volumes.
   208  func makeFormatErasureMetaVolumes(disk StorageAPI) error {
   209  	if disk == nil {
   210  		return errDiskNotFound
   211  	}
   212  	volumes := []string{
   213  		minioMetaTmpDeletedBucket, // creates .minio.sys/tmp as well as .minio.sys/tmp/.trash
   214  		minioMetaMultipartBucket,  // creates .minio.sys/multipart
   215  		dataUsageBucket,           // creates .minio.sys/buckets
   216  		minioConfigBucket,         // creates .minio.sys/config
   217  	}
   218  	// Attempt to create MinIO internal buckets.
   219  	return disk.MakeVolBulk(context.TODO(), volumes...)
   220  }
   221  
   222  // Initialize a new storage disk.
   223  func newXLStorage(ep Endpoint, cleanUp bool) (s *xlStorage, err error) {
   224  	immediatePurgeQueue := 100000
   225  	if globalIsTesting || globalIsCICD {
   226  		immediatePurgeQueue = 1
   227  	}
   228  	s = &xlStorage{
   229  		drivePath:      ep.Path,
   230  		endpoint:       ep,
   231  		globalSync:     globalFSOSync,
   232  		diskInfoCache:  cachevalue.New[DiskInfo](),
   233  		poolIndex:      -1,
   234  		setIndex:       -1,
   235  		diskIndex:      -1,
   236  		immediatePurge: make(chan string, immediatePurgeQueue),
   237  	}
   238  
   239  	defer func() {
   240  		if err == nil {
   241  			go s.cleanupTrashImmediateCallers(GlobalContext)
   242  		}
   243  	}()
   244  
   245  	s.drivePath, err = getValidPath(ep.Path)
   246  	if err != nil {
   247  		s.drivePath = ep.Path
   248  		return s, err
   249  	}
   250  
   251  	info, err := disk.GetInfo(s.drivePath, true)
   252  	if err != nil {
   253  		return s, err
   254  	}
   255  	s.major = info.Major
   256  	s.minor = info.Minor
   257  
   258  	if !globalIsCICD && !globalIsErasureSD {
   259  		var rootDrive bool
   260  		if globalRootDiskThreshold > 0 {
   261  			// Use MINIO_ROOTDISK_THRESHOLD_SIZE to figure out if
   262  			// this disk is a root disk. treat those disks with
   263  			// size less than or equal to the threshold as rootDrives.
   264  			rootDrive = info.Total <= globalRootDiskThreshold
   265  		} else {
   266  			rootDrive, err = disk.IsRootDisk(s.drivePath, SlashSeparator)
   267  			if err != nil {
   268  				return nil, err
   269  			}
   270  		}
   271  		if rootDrive {
   272  			return s, errDriveIsRoot
   273  		}
   274  	}
   275  
   276  	// Sanitize before setting it
   277  	if info.NRRequests > 0 {
   278  		s.nrRequests = info.NRRequests
   279  	}
   280  
   281  	// We stagger listings only on HDDs.
   282  	if info.Rotational == nil || *info.Rotational {
   283  		s.rotational = true
   284  		s.walkMu = &sync.Mutex{}
   285  		s.walkReadMu = &sync.Mutex{}
   286  	}
   287  
   288  	if cleanUp {
   289  		bgFormatErasureCleanupTmp(s.drivePath) // cleanup any old data.
   290  	}
   291  
   292  	formatData, formatFi, err := formatErasureMigrate(s.drivePath)
   293  	if err != nil && !errors.Is(err, os.ErrNotExist) {
   294  		if os.IsPermission(err) {
   295  			return s, errDiskAccessDenied
   296  		} else if isSysErrIO(err) {
   297  			return s, errFaultyDisk
   298  		}
   299  		return s, err
   300  	}
   301  	s.formatData = formatData
   302  	s.formatFileInfo = formatFi
   303  	s.formatFile = pathJoin(s.drivePath, minioMetaBucket, formatConfigFile)
   304  
   305  	// Create all necessary bucket folders if possible.
   306  	if err = makeFormatErasureMetaVolumes(s); err != nil {
   307  		return s, err
   308  	}
   309  
   310  	if len(s.formatData) > 0 {
   311  		format := &formatErasureV3{}
   312  		json := jsoniter.ConfigCompatibleWithStandardLibrary
   313  		if err = json.Unmarshal(s.formatData, &format); err != nil {
   314  			return s, errCorruptedFormat
   315  		}
   316  		s.diskID = format.Erasure.This
   317  		s.formatLastCheck = time.Now()
   318  		s.formatLegacy = format.Erasure.DistributionAlgo == formatErasureVersionV2DistributionAlgoV1
   319  	}
   320  
   321  	// Return an error if ODirect is not supported. Single disk will have
   322  	// oDirect off.
   323  	if globalIsErasureSD || !disk.ODirectPlatform {
   324  		s.oDirect = false
   325  	} else if err := s.checkODirectDiskSupport(info.FSType); err == nil {
   326  		s.oDirect = true
   327  	} else {
   328  		return s, err
   329  	}
   330  
   331  	// Initialize DiskInfo cache
   332  	s.diskInfoCache.InitOnce(time.Second, cachevalue.Opts{},
   333  		func() (DiskInfo, error) {
   334  			dcinfo := DiskInfo{}
   335  			di, err := getDiskInfo(s.drivePath)
   336  			if err != nil {
   337  				return dcinfo, err
   338  			}
   339  			dcinfo.Major = di.Major
   340  			dcinfo.Minor = di.Minor
   341  			dcinfo.Total = di.Total
   342  			dcinfo.Free = di.Free
   343  			dcinfo.Used = di.Used
   344  			dcinfo.UsedInodes = di.Files - di.Ffree
   345  			dcinfo.FreeInodes = di.Ffree
   346  			dcinfo.FSType = di.FSType
   347  			diskID, err := s.GetDiskID()
   348  			// Healing is 'true' when
   349  			// - if we found an unformatted disk (no 'format.json')
   350  			// - if we found healing tracker 'healing.bin'
   351  			dcinfo.Healing = errors.Is(err, errUnformattedDisk) || (s.Healing() != nil)
   352  			dcinfo.ID = diskID
   353  			return dcinfo, err
   354  		},
   355  	)
   356  
   357  	// Success.
   358  	return s, nil
   359  }
   360  
   361  // getDiskInfo returns given disk information.
   362  func getDiskInfo(drivePath string) (di disk.Info, err error) {
   363  	if err = checkPathLength(drivePath); err == nil {
   364  		di, err = disk.GetInfo(drivePath, false)
   365  	}
   366  	switch {
   367  	case osIsNotExist(err):
   368  		err = errDiskNotFound
   369  	case isSysErrTooLong(err):
   370  		err = errFileNameTooLong
   371  	case isSysErrIO(err):
   372  		err = errFaultyDisk
   373  	}
   374  
   375  	return di, err
   376  }
   377  
   378  // Implements stringer compatible interface.
   379  func (s *xlStorage) String() string {
   380  	return s.drivePath
   381  }
   382  
   383  func (s *xlStorage) Hostname() string {
   384  	return s.endpoint.Host
   385  }
   386  
   387  func (s *xlStorage) Endpoint() Endpoint {
   388  	return s.endpoint
   389  }
   390  
   391  func (*xlStorage) Close() error {
   392  	return nil
   393  }
   394  
   395  func (s *xlStorage) IsOnline() bool {
   396  	return true
   397  }
   398  
   399  func (s *xlStorage) LastConn() time.Time {
   400  	return time.Time{}
   401  }
   402  
   403  func (s *xlStorage) IsLocal() bool {
   404  	return true
   405  }
   406  
   407  // Retrieve location indexes.
   408  func (s *xlStorage) GetDiskLoc() (poolIdx, setIdx, diskIdx int) {
   409  	// If unset, see if we can locate it.
   410  	if s.poolIndex < 0 || s.setIndex < 0 || s.diskIndex < 0 {
   411  		return getXLDiskLoc(s.diskID)
   412  	}
   413  	return s.poolIndex, s.setIndex, s.diskIndex
   414  }
   415  
   416  func (s *xlStorage) SetFormatData(b []byte) {
   417  	s.Lock()
   418  	defer s.Unlock()
   419  	s.formatData = b
   420  }
   421  
   422  // Set location indexes.
   423  func (s *xlStorage) SetDiskLoc(poolIdx, setIdx, diskIdx int) {
   424  	s.poolIndex = poolIdx
   425  	s.setIndex = setIdx
   426  	s.diskIndex = diskIdx
   427  }
   428  
   429  func (s *xlStorage) Healing() *healingTracker {
   430  	healingFile := pathJoin(s.drivePath, minioMetaBucket,
   431  		bucketMetaPrefix, healingTrackerFilename)
   432  	b, err := os.ReadFile(healingFile)
   433  	if err != nil {
   434  		return nil
   435  	}
   436  	h := newHealingTracker()
   437  	_, err = h.UnmarshalMsg(b)
   438  	logger.LogIf(GlobalContext, err)
   439  	return h
   440  }
   441  
   442  // checkODirectDiskSupport asks the disk to write some data
   443  // with O_DIRECT support, return an error if any and return
   444  // errUnsupportedDisk if there is no O_DIRECT support
   445  func (s *xlStorage) checkODirectDiskSupport(fsType string) error {
   446  	if !disk.ODirectPlatform {
   447  		return errUnsupportedDisk
   448  	}
   449  
   450  	// We know XFS already supports O_DIRECT no need to check.
   451  	if fsType == "XFS" {
   452  		return nil
   453  	}
   454  
   455  	// For all other FS pay the price of not using our recommended filesystem.
   456  
   457  	// Check if backend is writable and supports O_DIRECT
   458  	uuid := mustGetUUID()
   459  	filePath := pathJoin(s.drivePath, minioMetaTmpDeletedBucket, ".writable-check-"+uuid+".tmp")
   460  
   461  	// Create top level directories if they don't exist.
   462  	// with mode 0o777 mkdir honors system umask.
   463  	mkdirAll(pathutil.Dir(filePath), 0o777, s.drivePath) // don't need to fail here
   464  
   465  	w, err := s.openFileDirect(filePath, os.O_CREATE|os.O_WRONLY|os.O_EXCL)
   466  	if err != nil {
   467  		return err
   468  	}
   469  	_, err = w.Write(alignedBuf)
   470  	w.Close()
   471  	if err != nil {
   472  		if isSysErrInvalidArg(err) {
   473  			err = errUnsupportedDisk
   474  		}
   475  	}
   476  	return err
   477  }
   478  
   479  // readsMetadata and returns disk mTime information for xl.meta
   480  func (s *xlStorage) readMetadataWithDMTime(ctx context.Context, itemPath string) ([]byte, time.Time, error) {
   481  	if contextCanceled(ctx) {
   482  		return nil, time.Time{}, ctx.Err()
   483  	}
   484  
   485  	if err := checkPathLength(itemPath); err != nil {
   486  		return nil, time.Time{}, err
   487  	}
   488  
   489  	f, err := OpenFile(itemPath, readMode, 0o666)
   490  	if err != nil {
   491  		return nil, time.Time{}, err
   492  	}
   493  	defer f.Close()
   494  	stat, err := f.Stat()
   495  	if err != nil {
   496  		return nil, time.Time{}, err
   497  	}
   498  	if stat.IsDir() {
   499  		return nil, time.Time{}, &os.PathError{
   500  			Op:   "open",
   501  			Path: itemPath,
   502  			Err:  syscall.EISDIR,
   503  		}
   504  	}
   505  	buf, err := readXLMetaNoData(f, stat.Size())
   506  	if err != nil {
   507  		return nil, stat.ModTime().UTC(), fmt.Errorf("%w -> %s", err, itemPath)
   508  	}
   509  	return buf, stat.ModTime().UTC(), err
   510  }
   511  
   512  func (s *xlStorage) readMetadata(ctx context.Context, itemPath string) ([]byte, error) {
   513  	return xioutil.WithDeadline[[]byte](ctx, globalDriveConfig.GetMaxTimeout(), func(ctx context.Context) ([]byte, error) {
   514  		buf, _, err := s.readMetadataWithDMTime(ctx, itemPath)
   515  		return buf, err
   516  	})
   517  }
   518  
   519  func (s *xlStorage) NSScanner(ctx context.Context, cache dataUsageCache, updates chan<- dataUsageEntry, scanMode madmin.HealScanMode, weSleep func() bool) (dataUsageCache, error) {
   520  	atomic.AddInt32(&s.scanning, 1)
   521  	defer atomic.AddInt32(&s.scanning, -1)
   522  
   523  	var err error
   524  	stopFn := globalScannerMetrics.log(scannerMetricScanBucketDrive, s.drivePath, cache.Info.Name)
   525  	defer func() {
   526  		res := make(map[string]string)
   527  		if err != nil {
   528  			res["err"] = err.Error()
   529  		}
   530  		stopFn(res)
   531  	}()
   532  
   533  	// Updates must be closed before we return.
   534  	defer xioutil.SafeClose(updates)
   535  	var lc *lifecycle.Lifecycle
   536  
   537  	// Check if the current bucket has a configured lifecycle policy
   538  	if globalLifecycleSys != nil {
   539  		lc, err = globalLifecycleSys.Get(cache.Info.Name)
   540  		if err == nil && lc.HasActiveRules("") {
   541  			cache.Info.lifeCycle = lc
   542  		}
   543  	}
   544  
   545  	// Check if the current bucket has replication configuration
   546  	if rcfg, _, err := globalBucketMetadataSys.GetReplicationConfig(ctx, cache.Info.Name); err == nil {
   547  		if rcfg.HasActiveRules("", true) {
   548  			tgts, err := globalBucketTargetSys.ListBucketTargets(ctx, cache.Info.Name)
   549  			if err == nil {
   550  				cache.Info.replication = replicationConfig{
   551  					Config:  rcfg,
   552  					remotes: tgts,
   553  				}
   554  			}
   555  		}
   556  	}
   557  
   558  	vcfg, _ := globalBucketVersioningSys.Get(cache.Info.Name)
   559  
   560  	// return initialized object layer
   561  	objAPI := newObjectLayerFn()
   562  	// object layer not initialized, return.
   563  	if objAPI == nil {
   564  		return cache, errServerNotInitialized
   565  	}
   566  
   567  	poolIdx, setIdx, _ := s.GetDiskLoc()
   568  
   569  	disks, err := objAPI.GetDisks(poolIdx, setIdx)
   570  	if err != nil {
   571  		return cache, err
   572  	}
   573  
   574  	cache.Info.updates = updates
   575  
   576  	dataUsageInfo, err := scanDataFolder(ctx, disks, s.drivePath, cache, func(item scannerItem) (sizeSummary, error) {
   577  		// Look for `xl.meta/xl.json' at the leaf.
   578  		if !strings.HasSuffix(item.Path, SlashSeparator+xlStorageFormatFile) &&
   579  			!strings.HasSuffix(item.Path, SlashSeparator+xlStorageFormatFileV1) {
   580  			// if no xl.meta/xl.json found, skip the file.
   581  			return sizeSummary{}, errSkipFile
   582  		}
   583  		stopFn := globalScannerMetrics.log(scannerMetricScanObject, s.drivePath, pathJoin(item.bucket, item.objectPath()))
   584  		res := make(map[string]string, 8)
   585  		defer func() {
   586  			stopFn(res)
   587  		}()
   588  
   589  		doneSz := globalScannerMetrics.timeSize(scannerMetricReadMetadata)
   590  		buf, err := s.readMetadata(ctx, item.Path)
   591  		doneSz(len(buf))
   592  		res["metasize"] = strconv.Itoa(len(buf))
   593  		if err != nil {
   594  			res["err"] = err.Error()
   595  			return sizeSummary{}, errSkipFile
   596  		}
   597  
   598  		// Remove filename which is the meta file.
   599  		item.transformMetaDir()
   600  
   601  		fivs, err := getFileInfoVersions(buf, item.bucket, item.objectPath(), false)
   602  		metaDataPoolPut(buf)
   603  		if err != nil {
   604  			res["err"] = err.Error()
   605  			return sizeSummary{}, errSkipFile
   606  		}
   607  
   608  		sizeS := sizeSummary{}
   609  		for _, tier := range globalTierConfigMgr.ListTiers() {
   610  			if sizeS.tiers == nil {
   611  				sizeS.tiers = make(map[string]tierStats)
   612  			}
   613  			sizeS.tiers[tier.Name] = tierStats{}
   614  		}
   615  		if sizeS.tiers != nil {
   616  			sizeS.tiers[storageclass.STANDARD] = tierStats{}
   617  			sizeS.tiers[storageclass.RRS] = tierStats{}
   618  		}
   619  
   620  		done := globalScannerMetrics.time(scannerMetricApplyAll)
   621  		objInfos, err := item.applyVersionActions(ctx, objAPI, fivs.Versions, globalExpiryState)
   622  		done()
   623  
   624  		if err != nil {
   625  			res["err"] = err.Error()
   626  			return sizeSummary{}, errSkipFile
   627  		}
   628  
   629  		versioned := vcfg != nil && vcfg.Versioned(item.objectPath())
   630  
   631  		var objDeleted bool
   632  		for _, oi := range objInfos {
   633  			done = globalScannerMetrics.time(scannerMetricApplyVersion)
   634  			var sz int64
   635  			objDeleted, sz = item.applyActions(ctx, objAPI, oi, &sizeS)
   636  			done()
   637  
   638  			// DeleteAllVersionsAction: The object and all its
   639  			// versions are expired and
   640  			// doesn't contribute toward data usage.
   641  			if objDeleted {
   642  				break
   643  			}
   644  			actualSz, err := oi.GetActualSize()
   645  			if err != nil {
   646  				continue
   647  			}
   648  
   649  			if oi.DeleteMarker {
   650  				sizeS.deleteMarkers++
   651  			}
   652  			if oi.VersionID != "" && sz == actualSz {
   653  				sizeS.versions++
   654  			}
   655  			sizeS.totalSize += sz
   656  
   657  			// Skip tier accounting if object version is a delete-marker or a free-version
   658  			// tracking deleted transitioned objects
   659  			switch {
   660  			case oi.DeleteMarker, oi.TransitionedObject.FreeVersion:
   661  				continue
   662  			}
   663  			tier := oi.StorageClass
   664  			if tier == "" {
   665  				tier = storageclass.STANDARD // no SC means "STANDARD"
   666  			}
   667  			if oi.TransitionedObject.Status == lifecycle.TransitionComplete {
   668  				tier = oi.TransitionedObject.Tier
   669  			}
   670  			if sizeS.tiers != nil {
   671  				if st, ok := sizeS.tiers[tier]; ok {
   672  					sizeS.tiers[tier] = st.add(oi.tierStats())
   673  				}
   674  			}
   675  		}
   676  
   677  		// apply tier sweep action on free versions
   678  		for _, freeVersion := range fivs.FreeVersions {
   679  			oi := freeVersion.ToObjectInfo(item.bucket, item.objectPath(), versioned)
   680  			done = globalScannerMetrics.time(scannerMetricTierObjSweep)
   681  			globalExpiryState.enqueueFreeVersion(oi)
   682  			done()
   683  		}
   684  
   685  		// These are rather expensive. Skip if nobody listens.
   686  		if globalTrace.NumSubscribers(madmin.TraceScanner) > 0 {
   687  			if len(fivs.FreeVersions) > 0 {
   688  				res["free-versions"] = strconv.Itoa(len(fivs.FreeVersions))
   689  			}
   690  
   691  			if sizeS.versions > 0 {
   692  				res["versions"] = strconv.FormatUint(sizeS.versions, 10)
   693  			}
   694  			res["size"] = strconv.FormatInt(sizeS.totalSize, 10)
   695  			for name, tier := range sizeS.tiers {
   696  				res["tier-size-"+name] = strconv.FormatUint(tier.TotalSize, 10)
   697  				res["tier-versions-"+name] = strconv.Itoa(tier.NumVersions)
   698  			}
   699  			if sizeS.failedCount > 0 {
   700  				res["repl-failed"] = fmt.Sprintf("%d versions, %d bytes", sizeS.failedCount, sizeS.failedSize)
   701  			}
   702  			if sizeS.pendingCount > 0 {
   703  				res["repl-pending"] = fmt.Sprintf("%d versions, %d bytes", sizeS.pendingCount, sizeS.pendingSize)
   704  			}
   705  			for tgt, st := range sizeS.replTargetStats {
   706  				res["repl-size-"+tgt] = strconv.FormatInt(st.replicatedSize, 10)
   707  				res["repl-count-"+tgt] = strconv.FormatInt(st.replicatedCount, 10)
   708  				if st.failedCount > 0 {
   709  					res["repl-failed-"+tgt] = fmt.Sprintf("%d versions, %d bytes", st.failedCount, st.failedSize)
   710  				}
   711  				if st.pendingCount > 0 {
   712  					res["repl-pending-"+tgt] = fmt.Sprintf("%d versions, %d bytes", st.pendingCount, st.pendingSize)
   713  				}
   714  			}
   715  		}
   716  		if objDeleted {
   717  			// we return errIgnoreFileContrib to signal this function's
   718  			// callers to skip this object's contribution towards
   719  			// usage.
   720  			return sizeSummary{}, errIgnoreFileContrib
   721  		}
   722  		return sizeS, nil
   723  	}, scanMode, weSleep)
   724  	if err != nil {
   725  		return dataUsageInfo, err
   726  	}
   727  
   728  	dataUsageInfo.Info.LastUpdate = time.Now()
   729  	return dataUsageInfo, nil
   730  }
   731  
   732  func (s *xlStorage) getDeleteAttribute() uint64 {
   733  	attr := "user.total_deletes"
   734  	buf, err := xattr.LGet(s.formatFile, attr)
   735  	if err != nil {
   736  		// We start off with '0' if we can read the attributes
   737  		return 0
   738  	}
   739  	return binary.LittleEndian.Uint64(buf[:8])
   740  }
   741  
   742  func (s *xlStorage) getWriteAttribute() uint64 {
   743  	attr := "user.total_writes"
   744  	buf, err := xattr.LGet(s.formatFile, attr)
   745  	if err != nil {
   746  		// We start off with '0' if we can read the attributes
   747  		return 0
   748  	}
   749  
   750  	return binary.LittleEndian.Uint64(buf[:8])
   751  }
   752  
   753  func (s *xlStorage) setDeleteAttribute(deleteCount uint64) error {
   754  	attr := "user.total_deletes"
   755  
   756  	data := make([]byte, 8)
   757  	binary.LittleEndian.PutUint64(data, deleteCount)
   758  	return xattr.LSet(s.formatFile, attr, data)
   759  }
   760  
   761  func (s *xlStorage) setWriteAttribute(writeCount uint64) error {
   762  	attr := "user.total_writes"
   763  
   764  	data := make([]byte, 8)
   765  	binary.LittleEndian.PutUint64(data, writeCount)
   766  	return xattr.LSet(s.formatFile, attr, data)
   767  }
   768  
   769  // DiskInfo provides current information about disk space usage,
   770  // total free inodes and underlying filesystem.
   771  func (s *xlStorage) DiskInfo(_ context.Context, _ DiskInfoOptions) (info DiskInfo, err error) {
   772  	info, err = s.diskInfoCache.Get()
   773  	info.NRRequests = s.nrRequests
   774  	info.Rotational = s.rotational
   775  	info.MountPath = s.drivePath
   776  	info.Endpoint = s.endpoint.String()
   777  	info.Scanning = atomic.LoadInt32(&s.scanning) == 1
   778  	return info, err
   779  }
   780  
   781  // getVolDir - will convert incoming volume names to
   782  // corresponding valid volume names on the backend in a platform
   783  // compatible way for all operating systems. If volume is not found
   784  // an error is generated.
   785  func (s *xlStorage) getVolDir(volume string) (string, error) {
   786  	if volume == "" || volume == "." || volume == ".." {
   787  		return "", errVolumeNotFound
   788  	}
   789  	volumeDir := pathJoin(s.drivePath, volume)
   790  	return volumeDir, nil
   791  }
   792  
   793  func (s *xlStorage) checkFormatJSON() (os.FileInfo, error) {
   794  	fi, err := Lstat(s.formatFile)
   795  	if err != nil {
   796  		// If the disk is still not initialized.
   797  		if osIsNotExist(err) {
   798  			if err = Access(s.drivePath); err == nil {
   799  				// Disk is present but missing `format.json`
   800  				return nil, errUnformattedDisk
   801  			}
   802  			if osIsNotExist(err) {
   803  				return nil, errDiskNotFound
   804  			} else if osIsPermission(err) {
   805  				return nil, errDiskAccessDenied
   806  			}
   807  			logger.LogOnceIf(GlobalContext, err, "check-format-json") // log unexpected errors
   808  			return nil, errCorruptedBackend
   809  		} else if osIsPermission(err) {
   810  			return nil, errDiskAccessDenied
   811  		}
   812  		logger.LogOnceIf(GlobalContext, err, "check-format-json") // log unexpected errors
   813  		return nil, errCorruptedBackend
   814  	}
   815  	return fi, nil
   816  }
   817  
   818  // GetDiskID - returns the cached disk uuid
   819  func (s *xlStorage) GetDiskID() (string, error) {
   820  	s.RLock()
   821  	diskID := s.diskID
   822  	fileInfo := s.formatFileInfo
   823  	lastCheck := s.formatLastCheck
   824  
   825  	// check if we have a valid disk ID that is less than 1 seconds old.
   826  	if fileInfo != nil && diskID != "" && time.Since(lastCheck) <= 1*time.Second {
   827  		s.RUnlock()
   828  		return diskID, nil
   829  	}
   830  	s.RUnlock()
   831  
   832  	fi, err := s.checkFormatJSON()
   833  	if err != nil {
   834  		return "", err
   835  	}
   836  
   837  	if xioutil.SameFile(fi, fileInfo) && diskID != "" {
   838  		s.Lock()
   839  		// If the file has not changed, just return the cached diskID information.
   840  		s.formatLastCheck = time.Now()
   841  		s.Unlock()
   842  		return diskID, nil
   843  	}
   844  
   845  	b, err := os.ReadFile(s.formatFile)
   846  	if err != nil {
   847  		// If the disk is still not initialized.
   848  		if osIsNotExist(err) {
   849  			if err = Access(s.drivePath); err == nil {
   850  				// Disk is present but missing `format.json`
   851  				return "", errUnformattedDisk
   852  			}
   853  			if osIsNotExist(err) {
   854  				return "", errDiskNotFound
   855  			} else if osIsPermission(err) {
   856  				return "", errDiskAccessDenied
   857  			}
   858  			logger.LogOnceIf(GlobalContext, err, "check-format-json") // log unexpected errors
   859  			return "", errCorruptedBackend
   860  		} else if osIsPermission(err) {
   861  			return "", errDiskAccessDenied
   862  		}
   863  		logger.LogOnceIf(GlobalContext, err, "check-format-json") // log unexpected errors
   864  		return "", errCorruptedBackend
   865  	}
   866  
   867  	format := &formatErasureV3{}
   868  	json := jsoniter.ConfigCompatibleWithStandardLibrary
   869  	if err = json.Unmarshal(b, &format); err != nil {
   870  		logger.LogOnceIf(GlobalContext, err, "check-format-json") // log unexpected errors
   871  		return "", errCorruptedFormat
   872  	}
   873  
   874  	s.Lock()
   875  	defer s.Unlock()
   876  	s.formatData = b
   877  	s.diskID = format.Erasure.This
   878  	s.formatLegacy = format.Erasure.DistributionAlgo == formatErasureVersionV2DistributionAlgoV1
   879  	s.formatFileInfo = fi
   880  	s.formatLastCheck = time.Now()
   881  	return s.diskID, nil
   882  }
   883  
   884  // Make a volume entry.
   885  func (s *xlStorage) SetDiskID(id string) {
   886  	// NO-OP for xlStorage as it is handled either by xlStorageDiskIDCheck{} for local disks or
   887  	// storage rest server for remote disks.
   888  }
   889  
   890  func (s *xlStorage) MakeVolBulk(ctx context.Context, volumes ...string) error {
   891  	for _, volume := range volumes {
   892  		err := s.MakeVol(ctx, volume)
   893  		if err != nil && !errors.Is(err, errVolumeExists) {
   894  			return err
   895  		}
   896  		diskHealthCheckOK(ctx, err)
   897  	}
   898  	return nil
   899  }
   900  
   901  // Make a volume entry.
   902  func (s *xlStorage) MakeVol(ctx context.Context, volume string) error {
   903  	if !isValidVolname(volume) {
   904  		return errInvalidArgument
   905  	}
   906  
   907  	volumeDir, err := s.getVolDir(volume)
   908  	if err != nil {
   909  		return err
   910  	}
   911  
   912  	if err = Access(volumeDir); err != nil {
   913  		// Volume does not exist we proceed to create.
   914  		if osIsNotExist(err) {
   915  			// Make a volume entry, with mode 0777 mkdir honors system umask.
   916  			err = mkdirAll(volumeDir, 0o777, s.drivePath)
   917  		}
   918  		if osIsPermission(err) {
   919  			return errDiskAccessDenied
   920  		} else if isSysErrIO(err) {
   921  			return errFaultyDisk
   922  		}
   923  		return err
   924  	}
   925  
   926  	// Stat succeeds we return errVolumeExists.
   927  	return errVolumeExists
   928  }
   929  
   930  // ListVols - list volumes.
   931  func (s *xlStorage) ListVols(ctx context.Context) (volsInfo []VolInfo, err error) {
   932  	return listVols(ctx, s.drivePath)
   933  }
   934  
   935  // List all the volumes from drivePath.
   936  func listVols(ctx context.Context, dirPath string) ([]VolInfo, error) {
   937  	if err := checkPathLength(dirPath); err != nil {
   938  		return nil, err
   939  	}
   940  	entries, err := readDir(dirPath)
   941  	if err != nil {
   942  		if errors.Is(err, errFileAccessDenied) {
   943  			return nil, errDiskAccessDenied
   944  		} else if errors.Is(err, errFileNotFound) {
   945  			return nil, errDiskNotFound
   946  		}
   947  		return nil, err
   948  	}
   949  	volsInfo := make([]VolInfo, 0, len(entries))
   950  	for _, entry := range entries {
   951  		if !HasSuffix(entry, SlashSeparator) || !isValidVolname(pathutil.Clean(entry)) {
   952  			// Skip if entry is neither a directory not a valid volume name.
   953  			continue
   954  		}
   955  		volsInfo = append(volsInfo, VolInfo{
   956  			Name: pathutil.Clean(entry),
   957  		})
   958  	}
   959  	return volsInfo, nil
   960  }
   961  
   962  // StatVol - get volume info.
   963  func (s *xlStorage) StatVol(ctx context.Context, volume string) (vol VolInfo, err error) {
   964  	// Verify if volume is valid and it exists.
   965  	volumeDir, err := s.getVolDir(volume)
   966  	if err != nil {
   967  		return VolInfo{}, err
   968  	}
   969  
   970  	// Stat a volume entry.
   971  	var st os.FileInfo
   972  	st, err = Lstat(volumeDir)
   973  	if err != nil {
   974  		switch {
   975  		case osIsNotExist(err):
   976  			return VolInfo{}, errVolumeNotFound
   977  		case osIsPermission(err):
   978  			return VolInfo{}, errDiskAccessDenied
   979  		case isSysErrIO(err):
   980  			return VolInfo{}, errFaultyDisk
   981  		default:
   982  			return VolInfo{}, err
   983  		}
   984  	}
   985  	// As os.Lstat() doesn't carry other than ModTime(), use ModTime()
   986  	// as CreatedTime.
   987  	createdTime := st.ModTime()
   988  	return VolInfo{
   989  		Name:    volume,
   990  		Created: createdTime,
   991  	}, nil
   992  }
   993  
   994  // DeleteVol - delete a volume.
   995  func (s *xlStorage) DeleteVol(ctx context.Context, volume string, forceDelete bool) (err error) {
   996  	// Verify if volume is valid and it exists.
   997  	volumeDir, err := s.getVolDir(volume)
   998  	if err != nil {
   999  		return err
  1000  	}
  1001  
  1002  	if forceDelete {
  1003  		err = s.moveToTrash(volumeDir, true, true)
  1004  	} else {
  1005  		err = Remove(volumeDir)
  1006  	}
  1007  
  1008  	if err != nil {
  1009  		switch {
  1010  		case errors.Is(err, errFileNotFound):
  1011  			return errVolumeNotFound
  1012  		case osIsNotExist(err):
  1013  			return errVolumeNotFound
  1014  		case isSysErrNotEmpty(err):
  1015  			return errVolumeNotEmpty
  1016  		case osIsPermission(err):
  1017  			return errDiskAccessDenied
  1018  		case isSysErrIO(err):
  1019  			return errFaultyDisk
  1020  		default:
  1021  			return err
  1022  		}
  1023  	}
  1024  	return nil
  1025  }
  1026  
  1027  // ListDir - return all the entries at the given directory path.
  1028  // If an entry is a directory it will be returned with a trailing SlashSeparator.
  1029  func (s *xlStorage) ListDir(ctx context.Context, origvolume, volume, dirPath string, count int) (entries []string, err error) {
  1030  	if contextCanceled(ctx) {
  1031  		return nil, ctx.Err()
  1032  	}
  1033  
  1034  	if origvolume != "" {
  1035  		if !skipAccessChecks(origvolume) {
  1036  			origvolumeDir, err := s.getVolDir(origvolume)
  1037  			if err != nil {
  1038  				return nil, err
  1039  			}
  1040  			if err = Access(origvolumeDir); err != nil {
  1041  				return nil, convertAccessError(err, errVolumeAccessDenied)
  1042  			}
  1043  		}
  1044  	}
  1045  
  1046  	// Verify if volume is valid and it exists.
  1047  	volumeDir, err := s.getVolDir(volume)
  1048  	if err != nil {
  1049  		return nil, err
  1050  	}
  1051  
  1052  	dirPathAbs := pathJoin(volumeDir, dirPath)
  1053  	if count > 0 {
  1054  		entries, err = readDirN(dirPathAbs, count)
  1055  	} else {
  1056  		entries, err = readDir(dirPathAbs)
  1057  	}
  1058  	if err != nil {
  1059  		if errors.Is(err, errFileNotFound) && !skipAccessChecks(volume) {
  1060  			if ierr := Access(volumeDir); ierr != nil {
  1061  				return nil, convertAccessError(ierr, errVolumeAccessDenied)
  1062  			}
  1063  		}
  1064  		return nil, err
  1065  	}
  1066  
  1067  	return entries, nil
  1068  }
  1069  
  1070  func (s *xlStorage) deleteVersions(ctx context.Context, volume, path string, fis ...FileInfo) error {
  1071  	volumeDir, err := s.getVolDir(volume)
  1072  	if err != nil {
  1073  		return err
  1074  	}
  1075  
  1076  	discard := true
  1077  
  1078  	var legacyJSON bool
  1079  	buf, _, err := s.readAllData(ctx, volume, volumeDir, pathJoin(volumeDir, path, xlStorageFormatFile), discard)
  1080  	if err != nil {
  1081  		if !errors.Is(err, errFileNotFound) {
  1082  			return err
  1083  		}
  1084  
  1085  		s.RLock()
  1086  		legacy := s.formatLegacy
  1087  		s.RUnlock()
  1088  		if legacy {
  1089  			buf, _, err = s.readAllData(ctx, volume, volumeDir, pathJoin(volumeDir, path, xlStorageFormatFileV1), discard)
  1090  			if err != nil {
  1091  				return err
  1092  			}
  1093  			legacyJSON = true
  1094  		}
  1095  	}
  1096  
  1097  	if len(buf) == 0 {
  1098  		if errors.Is(err, errFileNotFound) && !skipAccessChecks(volume) {
  1099  			if aerr := Access(volumeDir); aerr != nil && osIsNotExist(aerr) {
  1100  				return errVolumeNotFound
  1101  			}
  1102  		}
  1103  		return errFileNotFound
  1104  	}
  1105  
  1106  	if legacyJSON {
  1107  		// Delete the meta file, if there are no more versions the
  1108  		// top level parent is automatically removed.
  1109  		return s.deleteFile(volumeDir, pathJoin(volumeDir, path), true, false)
  1110  	}
  1111  
  1112  	var xlMeta xlMetaV2
  1113  	if err = xlMeta.LoadOrConvert(buf); err != nil {
  1114  		return err
  1115  	}
  1116  
  1117  	for _, fi := range fis {
  1118  		dataDir, err := xlMeta.DeleteVersion(fi)
  1119  		if err != nil {
  1120  			if !fi.Deleted && (err == errFileNotFound || err == errFileVersionNotFound) {
  1121  				// Ignore these since they do not exist
  1122  				continue
  1123  			}
  1124  			return err
  1125  		}
  1126  		if dataDir != "" {
  1127  			versionID := fi.VersionID
  1128  			if versionID == "" {
  1129  				versionID = nullVersionID
  1130  			}
  1131  
  1132  			// PR #11758 used DataDir, preserve it
  1133  			// for users who might have used master
  1134  			// branch
  1135  			xlMeta.data.remove(versionID, dataDir)
  1136  
  1137  			// We need to attempt delete "dataDir" on the disk
  1138  			// due to a CopyObject() bug where it might have
  1139  			// inlined the data incorrectly, to avoid a situation
  1140  			// where we potentially leave "DataDir"
  1141  			filePath := pathJoin(volumeDir, path, dataDir)
  1142  			if err = checkPathLength(filePath); err != nil {
  1143  				return err
  1144  			}
  1145  			if err = s.moveToTrash(filePath, true, false); err != nil {
  1146  				if err != errFileNotFound {
  1147  					return err
  1148  				}
  1149  			}
  1150  		}
  1151  	}
  1152  
  1153  	lastVersion := len(xlMeta.versions) == 0
  1154  	if !lastVersion {
  1155  		buf, err = xlMeta.AppendTo(metaDataPoolGet())
  1156  		defer metaDataPoolPut(buf)
  1157  		if err != nil {
  1158  			return err
  1159  		}
  1160  
  1161  		return s.WriteAll(ctx, volume, pathJoin(path, xlStorageFormatFile), buf)
  1162  	}
  1163  
  1164  	return s.deleteFile(volumeDir, pathJoin(volumeDir, path, xlStorageFormatFile), true, false)
  1165  }
  1166  
  1167  // DeleteVersions deletes slice of versions, it can be same object
  1168  // or multiple objects.
  1169  func (s *xlStorage) DeleteVersions(ctx context.Context, volume string, versions []FileInfoVersions, opts DeleteOptions) []error {
  1170  	errs := make([]error, len(versions))
  1171  
  1172  	for i, fiv := range versions {
  1173  		if contextCanceled(ctx) {
  1174  			errs[i] = ctx.Err()
  1175  			continue
  1176  		}
  1177  		w := xioutil.NewDeadlineWorker(globalDriveConfig.GetMaxTimeout())
  1178  		if err := w.Run(func() error { return s.deleteVersions(ctx, volume, fiv.Name, fiv.Versions...) }); err != nil {
  1179  			errs[i] = err
  1180  		}
  1181  		diskHealthCheckOK(ctx, errs[i])
  1182  	}
  1183  
  1184  	return errs
  1185  }
  1186  
  1187  func (s *xlStorage) cleanupTrashImmediateCallers(ctx context.Context) {
  1188  	for {
  1189  		select {
  1190  		case <-ctx.Done():
  1191  			return
  1192  		case entry := <-s.immediatePurge:
  1193  			removeAll(entry)
  1194  		}
  1195  	}
  1196  }
  1197  
  1198  const almostFilledPercent = 0.05
  1199  
  1200  func (s *xlStorage) diskAlmostFilled() bool {
  1201  	info, err := s.diskInfoCache.Get()
  1202  	if err != nil {
  1203  		return false
  1204  	}
  1205  	if info.Used == 0 || info.UsedInodes == 0 {
  1206  		return false
  1207  	}
  1208  	return (float64(info.Free)/float64(info.Used)) < almostFilledPercent || (float64(info.FreeInodes)/float64(info.UsedInodes)) < almostFilledPercent
  1209  }
  1210  
  1211  func (s *xlStorage) moveToTrash(filePath string, recursive, immediatePurge bool) (err error) {
  1212  	pathUUID := mustGetUUID()
  1213  	targetPath := pathutil.Join(s.drivePath, minioMetaTmpDeletedBucket, pathUUID)
  1214  
  1215  	if recursive {
  1216  		err = renameAll(filePath, targetPath, pathutil.Join(s.drivePath, minioMetaBucket))
  1217  	} else {
  1218  		err = Rename(filePath, targetPath)
  1219  	}
  1220  
  1221  	var targetPath2 string
  1222  	if immediatePurge && HasSuffix(filePath, SlashSeparator) {
  1223  		// With immediate purge also attempt deleting for `__XL_DIR__` folder/directory objects.
  1224  		targetPath2 = pathutil.Join(s.drivePath, minioMetaTmpDeletedBucket, mustGetUUID())
  1225  		renameAll(encodeDirObject(filePath), targetPath2, pathutil.Join(s.drivePath, minioMetaBucket))
  1226  	}
  1227  
  1228  	// ENOSPC is a valid error from rename(); remove instead of rename in that case
  1229  	if errors.Is(err, errDiskFull) || isSysErrNoSpace(err) {
  1230  		if recursive {
  1231  			err = removeAll(filePath)
  1232  		} else {
  1233  			err = Remove(filePath)
  1234  		}
  1235  		return err // Avoid the immediate purge since not needed
  1236  	}
  1237  
  1238  	if err != nil {
  1239  		return err
  1240  	}
  1241  
  1242  	if !immediatePurge && s.diskAlmostFilled() {
  1243  		immediatePurge = true
  1244  	}
  1245  
  1246  	// immediately purge the target
  1247  	if immediatePurge {
  1248  		for _, target := range []string{
  1249  			targetPath,
  1250  			targetPath2,
  1251  		} {
  1252  			if target == "" {
  1253  				continue
  1254  			}
  1255  			select {
  1256  			case s.immediatePurge <- target:
  1257  			default:
  1258  				// Too much back pressure, we will perform the delete
  1259  				// blocking at this point we need to serialize operations.
  1260  				removeAll(target)
  1261  			}
  1262  		}
  1263  	}
  1264  
  1265  	return nil
  1266  }
  1267  
  1268  // DeleteVersion - deletes FileInfo metadata for path at `xl.meta`. forceDelMarker
  1269  // will force creating a new `xl.meta` to create a new delete marker
  1270  func (s *xlStorage) DeleteVersion(ctx context.Context, volume, path string, fi FileInfo, forceDelMarker bool, opts DeleteOptions) (err error) {
  1271  	if HasSuffix(path, SlashSeparator) {
  1272  		return s.Delete(ctx, volume, path, DeleteOptions{
  1273  			Recursive: false,
  1274  			Immediate: false,
  1275  		})
  1276  	}
  1277  
  1278  	volumeDir, err := s.getVolDir(volume)
  1279  	if err != nil {
  1280  		return err
  1281  	}
  1282  
  1283  	// Validate file path length, before reading.
  1284  	filePath := pathJoin(volumeDir, path)
  1285  	if err = checkPathLength(filePath); err != nil {
  1286  		return err
  1287  	}
  1288  
  1289  	var legacyJSON bool
  1290  	buf, _, err := s.readAllData(ctx, volume, volumeDir, pathJoin(filePath, xlStorageFormatFile), true)
  1291  	if err != nil {
  1292  		if !errors.Is(err, errFileNotFound) {
  1293  			return err
  1294  		}
  1295  		metaDataPoolPut(buf) // Never used, return it
  1296  		if fi.Deleted && forceDelMarker {
  1297  			// Create a new xl.meta with a delete marker in it
  1298  			return s.WriteMetadata(ctx, "", volume, path, fi)
  1299  		}
  1300  
  1301  		s.RLock()
  1302  		legacy := s.formatLegacy
  1303  		s.RUnlock()
  1304  		if legacy {
  1305  			buf, err = s.ReadAll(ctx, volume, pathJoin(path, xlStorageFormatFileV1))
  1306  			if err != nil {
  1307  				if errors.Is(err, errFileNotFound) && fi.VersionID != "" {
  1308  					return errFileVersionNotFound
  1309  				}
  1310  				return err
  1311  			}
  1312  			legacyJSON = true
  1313  		}
  1314  	}
  1315  
  1316  	if len(buf) == 0 {
  1317  		if fi.VersionID != "" {
  1318  			return errFileVersionNotFound
  1319  		}
  1320  		return errFileNotFound
  1321  	}
  1322  
  1323  	if legacyJSON {
  1324  		// Delete the meta file, if there are no more versions the
  1325  		// top level parent is automatically removed.
  1326  		return s.deleteFile(volumeDir, pathJoin(volumeDir, path), true, false)
  1327  	}
  1328  
  1329  	var xlMeta xlMetaV2
  1330  	if err = xlMeta.LoadOrConvert(buf); err != nil {
  1331  		return err
  1332  	}
  1333  
  1334  	dataDir, err := xlMeta.DeleteVersion(fi)
  1335  	if err != nil {
  1336  		return err
  1337  	}
  1338  	if dataDir != "" {
  1339  		versionID := fi.VersionID
  1340  		if versionID == "" {
  1341  			versionID = nullVersionID
  1342  		}
  1343  		// PR #11758 used DataDir, preserve it
  1344  		// for users who might have used master
  1345  		// branch
  1346  		xlMeta.data.remove(versionID, dataDir)
  1347  
  1348  		// We need to attempt delete "dataDir" on the disk
  1349  		// due to a CopyObject() bug where it might have
  1350  		// inlined the data incorrectly, to avoid a situation
  1351  		// where we potentially leave "DataDir"
  1352  		filePath := pathJoin(volumeDir, path, dataDir)
  1353  		if err = checkPathLength(filePath); err != nil {
  1354  			return err
  1355  		}
  1356  		if err = s.moveToTrash(filePath, true, false); err != nil {
  1357  			if err != errFileNotFound {
  1358  				return err
  1359  			}
  1360  		}
  1361  	}
  1362  
  1363  	if len(xlMeta.versions) != 0 {
  1364  		// xl.meta must still exist for other versions, dataDir is purged.
  1365  		buf, err = xlMeta.AppendTo(metaDataPoolGet())
  1366  		defer metaDataPoolPut(buf)
  1367  		if err != nil {
  1368  			return err
  1369  		}
  1370  
  1371  		return s.WriteAll(ctx, volume, pathJoin(path, xlStorageFormatFile), buf)
  1372  	}
  1373  
  1374  	return s.deleteFile(volumeDir, pathJoin(volumeDir, path, xlStorageFormatFile), true, false)
  1375  }
  1376  
  1377  // Updates only metadata for a given version.
  1378  func (s *xlStorage) UpdateMetadata(ctx context.Context, volume, path string, fi FileInfo, opts UpdateMetadataOpts) (err error) {
  1379  	if len(fi.Metadata) == 0 {
  1380  		return errInvalidArgument
  1381  	}
  1382  
  1383  	buf, err := s.ReadAll(ctx, volume, pathJoin(path, xlStorageFormatFile))
  1384  	if err != nil {
  1385  		if err == errFileNotFound {
  1386  			if fi.VersionID != "" {
  1387  				return errFileVersionNotFound
  1388  			}
  1389  		}
  1390  		return err
  1391  	}
  1392  	defer metaDataPoolPut(buf)
  1393  
  1394  	if !isXL2V1Format(buf) {
  1395  		return errFileVersionNotFound
  1396  	}
  1397  
  1398  	var xlMeta xlMetaV2
  1399  	if err = xlMeta.Load(buf); err != nil {
  1400  		return err
  1401  	}
  1402  
  1403  	if err = xlMeta.UpdateObjectVersion(fi); err != nil {
  1404  		return err
  1405  	}
  1406  
  1407  	wbuf, err := xlMeta.AppendTo(metaDataPoolGet())
  1408  	if err != nil {
  1409  		return err
  1410  	}
  1411  	defer metaDataPoolPut(wbuf)
  1412  
  1413  	return s.writeAll(ctx, volume, pathJoin(path, xlStorageFormatFile), wbuf, !opts.NoPersistence)
  1414  }
  1415  
  1416  // WriteMetadata - writes FileInfo metadata for path at `xl.meta`
  1417  func (s *xlStorage) WriteMetadata(ctx context.Context, origvolume, volume, path string, fi FileInfo) (err error) {
  1418  	if fi.Fresh {
  1419  		if origvolume != "" {
  1420  			origvolumeDir, err := s.getVolDir(origvolume)
  1421  			if err != nil {
  1422  				return err
  1423  			}
  1424  
  1425  			if !skipAccessChecks(origvolume) {
  1426  				// Stat a volume entry.
  1427  				if err = Access(origvolumeDir); err != nil {
  1428  					return convertAccessError(err, errVolumeAccessDenied)
  1429  				}
  1430  			}
  1431  		}
  1432  
  1433  		var xlMeta xlMetaV2
  1434  		if err := xlMeta.AddVersion(fi); err != nil {
  1435  			return err
  1436  		}
  1437  		buf, err := xlMeta.AppendTo(metaDataPoolGet())
  1438  		defer metaDataPoolPut(buf)
  1439  		if err != nil {
  1440  			return err
  1441  		}
  1442  		// First writes for special situations do not write to stable storage.
  1443  		// this is currently used by
  1444  		// - emphemeral objects such as objects created during listObjects() calls
  1445  		// - newMultipartUpload() call..
  1446  		return s.writeAll(ctx, volume, pathJoin(path, xlStorageFormatFile), buf, false)
  1447  	}
  1448  
  1449  	buf, err := s.ReadAll(ctx, volume, pathJoin(path, xlStorageFormatFile))
  1450  	if err != nil && err != errFileNotFound {
  1451  		return err
  1452  	}
  1453  	defer metaDataPoolPut(buf)
  1454  
  1455  	var xlMeta xlMetaV2
  1456  	if !isXL2V1Format(buf) {
  1457  		// This is both legacy and without proper version.
  1458  		if err = xlMeta.AddVersion(fi); err != nil {
  1459  			return err
  1460  		}
  1461  
  1462  		buf, err = xlMeta.AppendTo(metaDataPoolGet())
  1463  		defer metaDataPoolPut(buf)
  1464  		if err != nil {
  1465  			return err
  1466  		}
  1467  	} else {
  1468  		if err = xlMeta.Load(buf); err != nil {
  1469  			// Corrupted data, reset and write.
  1470  			xlMeta = xlMetaV2{}
  1471  		}
  1472  
  1473  		if err = xlMeta.AddVersion(fi); err != nil {
  1474  			return err
  1475  		}
  1476  
  1477  		buf, err = xlMeta.AppendTo(metaDataPoolGet())
  1478  		defer metaDataPoolPut(buf)
  1479  		if err != nil {
  1480  			return err
  1481  		}
  1482  	}
  1483  
  1484  	return s.WriteAll(ctx, volume, pathJoin(path, xlStorageFormatFile), buf)
  1485  }
  1486  
  1487  func (s *xlStorage) renameLegacyMetadata(volumeDir, path string) (err error) {
  1488  	s.RLock()
  1489  	legacy := s.formatLegacy
  1490  	s.RUnlock()
  1491  	if !legacy {
  1492  		// if its not a legacy backend then this function is
  1493  		// a no-op always returns errFileNotFound
  1494  		return errFileNotFound
  1495  	}
  1496  
  1497  	// Validate file path length, before reading.
  1498  	filePath := pathJoin(volumeDir, path)
  1499  	if err = checkPathLength(filePath); err != nil {
  1500  		return err
  1501  	}
  1502  
  1503  	srcFilePath := pathJoin(filePath, xlStorageFormatFileV1)
  1504  	dstFilePath := pathJoin(filePath, xlStorageFormatFile)
  1505  
  1506  	// Renaming xl.json to xl.meta should be fully synced to disk.
  1507  	defer func() {
  1508  		if err == nil && s.globalSync {
  1509  			// Sync to disk only upon success.
  1510  			globalSync()
  1511  		}
  1512  	}()
  1513  
  1514  	if err = Rename(srcFilePath, dstFilePath); err != nil {
  1515  		switch {
  1516  		case isSysErrNotDir(err):
  1517  			return errFileNotFound
  1518  		case isSysErrPathNotFound(err):
  1519  			return errFileNotFound
  1520  		case isSysErrCrossDevice(err):
  1521  			return fmt.Errorf("%w (%s)->(%s)", errCrossDeviceLink, srcFilePath, dstFilePath)
  1522  		case osIsNotExist(err):
  1523  			return errFileNotFound
  1524  		case osIsExist(err):
  1525  			// This is returned only when destination is a directory and we
  1526  			// are attempting a rename from file to directory.
  1527  			return errIsNotRegular
  1528  		default:
  1529  			return err
  1530  		}
  1531  	}
  1532  	return nil
  1533  }
  1534  
  1535  func (s *xlStorage) readRaw(ctx context.Context, volume, volumeDir, filePath string, readData bool) (buf []byte, dmTime time.Time, err error) {
  1536  	if filePath == "" {
  1537  		return nil, dmTime, errFileNotFound
  1538  	}
  1539  
  1540  	xlPath := pathJoin(filePath, xlStorageFormatFile)
  1541  	if readData {
  1542  		buf, dmTime, err = s.readAllData(ctx, volume, volumeDir, xlPath, false)
  1543  	} else {
  1544  		buf, dmTime, err = s.readMetadataWithDMTime(ctx, xlPath)
  1545  		if err != nil {
  1546  			if osIsNotExist(err) {
  1547  				if !skipAccessChecks(volume) {
  1548  					if aerr := Access(volumeDir); aerr != nil && osIsNotExist(aerr) {
  1549  						return nil, time.Time{}, errVolumeNotFound
  1550  					}
  1551  				}
  1552  			}
  1553  			err = osErrToFileErr(err)
  1554  		}
  1555  	}
  1556  
  1557  	s.RLock()
  1558  	legacy := s.formatLegacy
  1559  	s.RUnlock()
  1560  
  1561  	if err != nil && errors.Is(err, errFileNotFound) && legacy {
  1562  		buf, dmTime, err = s.readAllData(ctx, volume, volumeDir, pathJoin(filePath, xlStorageFormatFileV1), false)
  1563  		if err != nil {
  1564  			return nil, time.Time{}, err
  1565  		}
  1566  	}
  1567  
  1568  	if len(buf) == 0 {
  1569  		if err != nil {
  1570  			return nil, time.Time{}, err
  1571  		}
  1572  		return nil, time.Time{}, errFileNotFound
  1573  	}
  1574  
  1575  	return buf, dmTime, nil
  1576  }
  1577  
  1578  // ReadXL reads from path/xl.meta, does not interpret the data it read. This
  1579  // is a raw call equivalent of ReadVersion().
  1580  func (s *xlStorage) ReadXL(ctx context.Context, volume, path string, readData bool) (RawFileInfo, error) {
  1581  	volumeDir, err := s.getVolDir(volume)
  1582  	if err != nil {
  1583  		return RawFileInfo{}, err
  1584  	}
  1585  
  1586  	// Validate file path length, before reading.
  1587  	filePath := pathJoin(volumeDir, path)
  1588  	if err = checkPathLength(filePath); err != nil {
  1589  		return RawFileInfo{}, err
  1590  	}
  1591  
  1592  	buf, _, err := s.readRaw(ctx, volume, volumeDir, filePath, readData)
  1593  	return RawFileInfo{
  1594  		Buf: buf,
  1595  	}, err
  1596  }
  1597  
  1598  // ReadOptions optional inputs for ReadVersion
  1599  type ReadOptions struct {
  1600  	ReadData bool
  1601  	Healing  bool
  1602  }
  1603  
  1604  // ReadVersion - reads metadata and returns FileInfo at path `xl.meta`
  1605  // for all objects less than `32KiB` this call returns data as well
  1606  // along with metadata.
  1607  func (s *xlStorage) ReadVersion(ctx context.Context, origvolume, volume, path, versionID string, opts ReadOptions) (fi FileInfo, err error) {
  1608  	if origvolume != "" {
  1609  		origvolumeDir, err := s.getVolDir(origvolume)
  1610  		if err != nil {
  1611  			return fi, err
  1612  		}
  1613  
  1614  		if !skipAccessChecks(origvolume) {
  1615  			// Stat a volume entry.
  1616  			if err = Access(origvolumeDir); err != nil {
  1617  				return fi, convertAccessError(err, errVolumeAccessDenied)
  1618  			}
  1619  		}
  1620  	}
  1621  
  1622  	volumeDir, err := s.getVolDir(volume)
  1623  	if err != nil {
  1624  		return fi, err
  1625  	}
  1626  
  1627  	// Validate file path length, before reading.
  1628  	filePath := pathJoin(volumeDir, path)
  1629  	if err = checkPathLength(filePath); err != nil {
  1630  		return fi, err
  1631  	}
  1632  
  1633  	readData := opts.ReadData
  1634  
  1635  	buf, _, err := s.readRaw(ctx, volume, volumeDir, filePath, readData)
  1636  	if err != nil {
  1637  		if err == errFileNotFound {
  1638  			if versionID != "" {
  1639  				return fi, errFileVersionNotFound
  1640  			}
  1641  		}
  1642  		return fi, err
  1643  	}
  1644  
  1645  	fi, err = getFileInfo(buf, volume, path, versionID, readData, true)
  1646  	if err != nil {
  1647  		return fi, err
  1648  	}
  1649  
  1650  	if len(fi.Data) == 0 {
  1651  		// We did not read inline data, so we have no references.
  1652  		defer metaDataPoolPut(buf)
  1653  	}
  1654  
  1655  	if readData {
  1656  		if len(fi.Data) > 0 || fi.Size == 0 {
  1657  			if fi.InlineData() {
  1658  				// If written with header we are fine.
  1659  				return fi, nil
  1660  			}
  1661  			if fi.Size == 0 || !(fi.VersionID != "" && fi.VersionID != nullVersionID) {
  1662  				// If versioned we have no conflicts.
  1663  				fi.SetInlineData()
  1664  				return fi, nil
  1665  			}
  1666  
  1667  			// For overwritten objects without header we might have a
  1668  			// conflict with data written later. Check the data path
  1669  			// if there is a part with data.
  1670  			partPath := fmt.Sprintf("part.%d", fi.Parts[0].Number)
  1671  			dataPath := pathJoin(path, fi.DataDir, partPath)
  1672  			_, lerr := Lstat(pathJoin(volumeDir, dataPath))
  1673  			if lerr != nil {
  1674  				// Set the inline header, our inlined data is fine.
  1675  				fi.SetInlineData()
  1676  				return fi, nil
  1677  			}
  1678  			// Data exists on disk, remove the version from metadata.
  1679  			fi.Data = nil
  1680  		}
  1681  
  1682  		// Reading data for small objects when
  1683  		// - object has not yet transitioned
  1684  		// - object size lesser than 128KiB
  1685  		// - object has maximum of 1 parts
  1686  		if fi.TransitionStatus == "" &&
  1687  			fi.DataDir != "" && fi.Size <= smallFileThreshold &&
  1688  			len(fi.Parts) == 1 {
  1689  			partPath := fmt.Sprintf("part.%d", fi.Parts[0].Number)
  1690  			dataPath := pathJoin(volumeDir, path, fi.DataDir, partPath)
  1691  			fi.Data, _, err = s.readAllData(ctx, volume, volumeDir, dataPath, false)
  1692  			if err != nil {
  1693  				return FileInfo{}, err
  1694  			}
  1695  		}
  1696  	}
  1697  
  1698  	if !skipAccessChecks(volume) && !opts.Healing && fi.TransitionStatus == "" && !fi.InlineData() && len(fi.Data) == 0 && fi.DataDir != "" && fi.DataDir != emptyUUID && fi.VersionPurgeStatus().Empty() {
  1699  		// Verify if the dataDir is present or not when the data
  1700  		// is not inlined to make sure we return correct errors
  1701  		// during HeadObject().
  1702  
  1703  		// Healing must not come here and return error, since healing
  1704  		// deals with dataDirs directly, let healing fix things automatically.
  1705  		if lerr := Access(pathJoin(volumeDir, path, fi.DataDir)); lerr != nil {
  1706  			if os.IsNotExist(lerr) {
  1707  				// Data dir is missing we must return errFileCorrupted
  1708  				return FileInfo{}, errFileCorrupt
  1709  			}
  1710  			return FileInfo{}, osErrToFileErr(lerr)
  1711  		}
  1712  	}
  1713  
  1714  	return fi, nil
  1715  }
  1716  
  1717  func (s *xlStorage) readAllData(ctx context.Context, volume, volumeDir string, filePath string, discard bool) (buf []byte, dmTime time.Time, err error) {
  1718  	if filePath == "" {
  1719  		return nil, dmTime, errFileNotFound
  1720  	}
  1721  
  1722  	if contextCanceled(ctx) {
  1723  		return nil, time.Time{}, ctx.Err()
  1724  	}
  1725  
  1726  	f, err := OpenFile(filePath, readMode, 0o666)
  1727  	if err != nil {
  1728  		switch {
  1729  		case osIsNotExist(err):
  1730  			// Check if the object doesn't exist because its bucket
  1731  			// is missing in order to return the correct error.
  1732  			if !skipAccessChecks(volume) {
  1733  				if err = Access(volumeDir); err != nil && osIsNotExist(err) {
  1734  					return nil, dmTime, errVolumeNotFound
  1735  				}
  1736  			}
  1737  			return nil, dmTime, errFileNotFound
  1738  		case osIsPermission(err):
  1739  			return nil, dmTime, errFileAccessDenied
  1740  		case isSysErrNotDir(err) || isSysErrIsDir(err):
  1741  			return nil, dmTime, errFileNotFound
  1742  		case isSysErrHandleInvalid(err):
  1743  			// This case is special and needs to be handled for windows.
  1744  			return nil, dmTime, errFileNotFound
  1745  		case isSysErrIO(err):
  1746  			return nil, dmTime, errFaultyDisk
  1747  		case isSysErrTooManyFiles(err):
  1748  			return nil, dmTime, errTooManyOpenFiles
  1749  		case isSysErrInvalidArg(err):
  1750  			st, _ := Lstat(filePath)
  1751  			if st != nil && st.IsDir() {
  1752  				// Linux returns InvalidArg for directory O_DIRECT
  1753  				// we need to keep this fallback code to return correct
  1754  				// errors upwards.
  1755  				return nil, dmTime, errFileNotFound
  1756  			}
  1757  			return nil, dmTime, errUnsupportedDisk
  1758  		}
  1759  		return nil, dmTime, err
  1760  	}
  1761  
  1762  	if discard {
  1763  		// This discard is mostly true for DELETEEs
  1764  		// so we need to make sure we do not keep
  1765  		// page-cache references after.
  1766  		defer disk.Fdatasync(f)
  1767  	}
  1768  
  1769  	defer f.Close()
  1770  
  1771  	// Get size for precise allocation.
  1772  	stat, err := f.Stat()
  1773  	if err != nil {
  1774  		buf, err = io.ReadAll(f)
  1775  		return buf, dmTime, osErrToFileErr(err)
  1776  	}
  1777  	if stat.IsDir() {
  1778  		return nil, dmTime, errFileNotFound
  1779  	}
  1780  
  1781  	// Read into appropriate buffer.
  1782  	sz := stat.Size()
  1783  	if sz <= metaDataReadDefault {
  1784  		buf = metaDataPoolGet()
  1785  		buf = buf[:sz]
  1786  	} else {
  1787  		buf = make([]byte, sz)
  1788  	}
  1789  
  1790  	// Read file...
  1791  	_, err = io.ReadFull(f, buf)
  1792  
  1793  	return buf, stat.ModTime().UTC(), osErrToFileErr(err)
  1794  }
  1795  
  1796  // ReadAll is a raw call, reads content at any path and returns the buffer.
  1797  func (s *xlStorage) ReadAll(ctx context.Context, volume string, path string) (buf []byte, err error) {
  1798  	// Specific optimization to avoid re-read from the drives for `format.json`
  1799  	// in-case the caller is a network operation.
  1800  	if volume == minioMetaBucket && path == formatConfigFile {
  1801  		s.RLock()
  1802  		formatData := make([]byte, len(s.formatData))
  1803  		copy(formatData, s.formatData)
  1804  		s.RUnlock()
  1805  		if len(formatData) > 0 {
  1806  			return formatData, nil
  1807  		}
  1808  	}
  1809  	volumeDir, err := s.getVolDir(volume)
  1810  	if err != nil {
  1811  		return nil, err
  1812  	}
  1813  
  1814  	// Validate file path length, before reading.
  1815  	filePath := pathJoin(volumeDir, path)
  1816  	if err = checkPathLength(filePath); err != nil {
  1817  		return nil, err
  1818  	}
  1819  
  1820  	buf, _, err = s.readAllData(ctx, volume, volumeDir, filePath, false)
  1821  	return buf, err
  1822  }
  1823  
  1824  // ReadFile reads exactly len(buf) bytes into buf. It returns the
  1825  // number of bytes copied. The error is EOF only if no bytes were
  1826  // read. On return, n == len(buf) if and only if err == nil. n == 0
  1827  // for io.EOF.
  1828  //
  1829  // If an EOF happens after reading some but not all the bytes,
  1830  // ReadFile returns ErrUnexpectedEOF.
  1831  //
  1832  // If the BitrotVerifier is not nil or not verified ReadFile
  1833  // tries to verify whether the disk has bitrot.
  1834  //
  1835  // Additionally ReadFile also starts reading from an offset. ReadFile
  1836  // semantics are same as io.ReadFull.
  1837  func (s *xlStorage) ReadFile(ctx context.Context, volume string, path string, offset int64, buffer []byte, verifier *BitrotVerifier) (int64, error) {
  1838  	if offset < 0 {
  1839  		return 0, errInvalidArgument
  1840  	}
  1841  
  1842  	volumeDir, err := s.getVolDir(volume)
  1843  	if err != nil {
  1844  		return 0, err
  1845  	}
  1846  
  1847  	var n int
  1848  
  1849  	if !skipAccessChecks(volume) {
  1850  		// Stat a volume entry.
  1851  		if err = Access(volumeDir); err != nil {
  1852  			return 0, convertAccessError(err, errFileAccessDenied)
  1853  		}
  1854  	}
  1855  
  1856  	// Validate effective path length before reading.
  1857  	filePath := pathJoin(volumeDir, path)
  1858  	if err = checkPathLength(filePath); err != nil {
  1859  		return 0, err
  1860  	}
  1861  
  1862  	// Open the file for reading.
  1863  	file, err := OpenFile(filePath, readMode, 0o666)
  1864  	if err != nil {
  1865  		switch {
  1866  		case osIsNotExist(err):
  1867  			return 0, errFileNotFound
  1868  		case osIsPermission(err):
  1869  			return 0, errFileAccessDenied
  1870  		case isSysErrNotDir(err):
  1871  			return 0, errFileAccessDenied
  1872  		case isSysErrIO(err):
  1873  			return 0, errFaultyDisk
  1874  		case isSysErrTooManyFiles(err):
  1875  			return 0, errTooManyOpenFiles
  1876  		default:
  1877  			return 0, err
  1878  		}
  1879  	}
  1880  
  1881  	// Close the file descriptor.
  1882  	defer file.Close()
  1883  
  1884  	st, err := file.Stat()
  1885  	if err != nil {
  1886  		return 0, err
  1887  	}
  1888  
  1889  	// Verify it is a regular file, otherwise subsequent Seek is
  1890  	// undefined.
  1891  	if !st.Mode().IsRegular() {
  1892  		return 0, errIsNotRegular
  1893  	}
  1894  
  1895  	if verifier == nil {
  1896  		n, err = file.ReadAt(buffer, offset)
  1897  		return int64(n), err
  1898  	}
  1899  
  1900  	h := verifier.algorithm.New()
  1901  	if _, err = io.Copy(h, io.LimitReader(file, offset)); err != nil {
  1902  		return 0, err
  1903  	}
  1904  
  1905  	if n, err = io.ReadFull(file, buffer); err != nil {
  1906  		return int64(n), err
  1907  	}
  1908  
  1909  	if _, err = h.Write(buffer); err != nil {
  1910  		return 0, err
  1911  	}
  1912  
  1913  	if _, err = io.Copy(h, file); err != nil {
  1914  		return 0, err
  1915  	}
  1916  
  1917  	if !bytes.Equal(h.Sum(nil), verifier.sum) {
  1918  		return 0, errFileCorrupt
  1919  	}
  1920  
  1921  	return int64(len(buffer)), nil
  1922  }
  1923  
  1924  func (s *xlStorage) openFileDirect(path string, mode int) (f *os.File, err error) {
  1925  	w, err := OpenFileDirectIO(path, mode, 0o666)
  1926  	if err != nil {
  1927  		switch {
  1928  		case isSysErrInvalidArg(err):
  1929  			return nil, errUnsupportedDisk
  1930  		case osIsPermission(err):
  1931  			return nil, errDiskAccessDenied
  1932  		case isSysErrIO(err):
  1933  			return nil, errFaultyDisk
  1934  		case isSysErrNotDir(err):
  1935  			return nil, errDiskNotDir
  1936  		case os.IsNotExist(err):
  1937  			return nil, errDiskNotFound
  1938  		}
  1939  	}
  1940  
  1941  	return w, nil
  1942  }
  1943  
  1944  func (s *xlStorage) openFileSync(filePath string, mode int) (f *os.File, err error) {
  1945  	return s.openFile(filePath, mode|writeMode)
  1946  }
  1947  
  1948  func (s *xlStorage) openFile(filePath string, mode int) (f *os.File, err error) {
  1949  	// Create top level directories if they don't exist.
  1950  	// with mode 0777 mkdir honors system umask.
  1951  	if err = mkdirAll(pathutil.Dir(filePath), 0o777, s.drivePath); err != nil {
  1952  		return nil, osErrToFileErr(err)
  1953  	}
  1954  
  1955  	w, err := OpenFile(filePath, mode, 0o666)
  1956  	if err != nil {
  1957  		// File path cannot be verified since one of the parents is a file.
  1958  		switch {
  1959  		case isSysErrIsDir(err):
  1960  			return nil, errIsNotRegular
  1961  		case osIsPermission(err):
  1962  			return nil, errFileAccessDenied
  1963  		case isSysErrNotDir(err):
  1964  			return nil, errFileAccessDenied
  1965  		case isSysErrIO(err):
  1966  			return nil, errFaultyDisk
  1967  		case isSysErrTooManyFiles(err):
  1968  			return nil, errTooManyOpenFiles
  1969  		default:
  1970  			return nil, err
  1971  		}
  1972  	}
  1973  
  1974  	return w, nil
  1975  }
  1976  
  1977  type sendFileReader struct {
  1978  	io.Reader
  1979  	io.Closer
  1980  }
  1981  
  1982  // ReadFileStream - Returns the read stream of the file.
  1983  func (s *xlStorage) ReadFileStream(ctx context.Context, volume, path string, offset, length int64) (io.ReadCloser, error) {
  1984  	if offset < 0 {
  1985  		return nil, errInvalidArgument
  1986  	}
  1987  
  1988  	volumeDir, err := s.getVolDir(volume)
  1989  	if err != nil {
  1990  		return nil, err
  1991  	}
  1992  
  1993  	// Validate effective path length before reading.
  1994  	filePath := pathJoin(volumeDir, path)
  1995  	if err = checkPathLength(filePath); err != nil {
  1996  		return nil, err
  1997  	}
  1998  
  1999  	file, err := OpenFile(filePath, readMode, 0o666)
  2000  	if err != nil {
  2001  		switch {
  2002  		case osIsNotExist(err):
  2003  			if !skipAccessChecks(volume) {
  2004  				if err = Access(volumeDir); err != nil && osIsNotExist(err) {
  2005  					return nil, errVolumeNotFound
  2006  				}
  2007  			}
  2008  			return nil, errFileNotFound
  2009  		case osIsPermission(err):
  2010  			return nil, errFileAccessDenied
  2011  		case isSysErrNotDir(err):
  2012  			return nil, errFileAccessDenied
  2013  		case isSysErrIO(err):
  2014  			return nil, errFaultyDisk
  2015  		case isSysErrTooManyFiles(err):
  2016  			return nil, errTooManyOpenFiles
  2017  		case isSysErrInvalidArg(err):
  2018  			return nil, errUnsupportedDisk
  2019  		default:
  2020  			return nil, err
  2021  		}
  2022  	}
  2023  
  2024  	if length < 0 {
  2025  		return file, nil
  2026  	}
  2027  
  2028  	st, err := file.Stat()
  2029  	if err != nil {
  2030  		file.Close()
  2031  		return nil, err
  2032  	}
  2033  
  2034  	// Verify it is a regular file, otherwise subsequent Seek is
  2035  	// undefined.
  2036  	if !st.Mode().IsRegular() {
  2037  		file.Close()
  2038  		return nil, errIsNotRegular
  2039  	}
  2040  
  2041  	if st.Size() < offset+length {
  2042  		// Expected size cannot be satisfied for
  2043  		// requested offset and length
  2044  		file.Close()
  2045  		return nil, errFileCorrupt
  2046  	}
  2047  
  2048  	if offset > 0 {
  2049  		if _, err = file.Seek(offset, io.SeekStart); err != nil {
  2050  			file.Close()
  2051  			return nil, err
  2052  		}
  2053  	}
  2054  
  2055  	return &sendFileReader{Reader: io.LimitReader(file, length), Closer: file}, nil
  2056  }
  2057  
  2058  // closeWrapper converts a function to an io.Closer
  2059  type closeWrapper func() error
  2060  
  2061  // Close calls the wrapped function.
  2062  func (c closeWrapper) Close() error {
  2063  	return c()
  2064  }
  2065  
  2066  // CreateFile - creates the file.
  2067  func (s *xlStorage) CreateFile(ctx context.Context, origvolume, volume, path string, fileSize int64, r io.Reader) (err error) {
  2068  	if origvolume != "" {
  2069  		origvolumeDir, err := s.getVolDir(origvolume)
  2070  		if err != nil {
  2071  			return err
  2072  		}
  2073  
  2074  		if !skipAccessChecks(origvolume) {
  2075  			// Stat a volume entry.
  2076  			if err = Access(origvolumeDir); err != nil {
  2077  				return convertAccessError(err, errVolumeAccessDenied)
  2078  			}
  2079  		}
  2080  	}
  2081  
  2082  	volumeDir, err := s.getVolDir(volume)
  2083  	if err != nil {
  2084  		return err
  2085  	}
  2086  
  2087  	filePath := pathJoin(volumeDir, path)
  2088  	if err = checkPathLength(filePath); err != nil {
  2089  		return err
  2090  	}
  2091  
  2092  	parentFilePath := pathutil.Dir(filePath)
  2093  	defer func() {
  2094  		if err != nil {
  2095  			if volume == minioMetaTmpBucket {
  2096  				// only cleanup parent path if the
  2097  				// parent volume name is minioMetaTmpBucket
  2098  				removeAll(parentFilePath)
  2099  			}
  2100  		}
  2101  	}()
  2102  
  2103  	return s.writeAllDirect(ctx, filePath, fileSize, r, os.O_CREATE|os.O_WRONLY|os.O_EXCL)
  2104  }
  2105  
  2106  func (s *xlStorage) writeAllDirect(ctx context.Context, filePath string, fileSize int64, r io.Reader, flags int) (err error) {
  2107  	if contextCanceled(ctx) {
  2108  		return ctx.Err()
  2109  	}
  2110  
  2111  	// Create top level directories if they don't exist.
  2112  	// with mode 0777 mkdir honors system umask.
  2113  	parentFilePath := pathutil.Dir(filePath)
  2114  	if err = mkdirAll(parentFilePath, 0o777, s.drivePath); err != nil {
  2115  		return osErrToFileErr(err)
  2116  	}
  2117  
  2118  	odirectEnabled := globalAPIConfig.odirectEnabled() && s.oDirect && fileSize > 0
  2119  
  2120  	var w *os.File
  2121  	if odirectEnabled {
  2122  		w, err = OpenFileDirectIO(filePath, flags, 0o666)
  2123  	} else {
  2124  		w, err = OpenFile(filePath, flags, 0o666)
  2125  	}
  2126  	if err != nil {
  2127  		return osErrToFileErr(err)
  2128  	}
  2129  
  2130  	var bufp *[]byte
  2131  	switch {
  2132  	case fileSize > 0 && fileSize >= xioutil.BlockSizeReallyLarge:
  2133  		// use a larger 4MiB buffer for a really large streams.
  2134  		bufp = xioutil.ODirectPoolXLarge.Get().(*[]byte)
  2135  		defer xioutil.ODirectPoolXLarge.Put(bufp)
  2136  	case fileSize <= xioutil.BlockSizeSmall:
  2137  		bufp = xioutil.ODirectPoolSmall.Get().(*[]byte)
  2138  		defer xioutil.ODirectPoolSmall.Put(bufp)
  2139  	default:
  2140  		bufp = xioutil.ODirectPoolLarge.Get().(*[]byte)
  2141  		defer xioutil.ODirectPoolLarge.Put(bufp)
  2142  	}
  2143  
  2144  	var written int64
  2145  	if odirectEnabled {
  2146  		written, err = xioutil.CopyAligned(diskHealthWriter(ctx, w), r, *bufp, fileSize, w)
  2147  	} else {
  2148  		written, err = io.CopyBuffer(diskHealthWriter(ctx, w), r, *bufp)
  2149  	}
  2150  	if err != nil {
  2151  		w.Close()
  2152  		return err
  2153  	}
  2154  
  2155  	if written < fileSize && fileSize >= 0 {
  2156  		w.Close()
  2157  		return errLessData
  2158  	} else if written > fileSize && fileSize >= 0 {
  2159  		w.Close()
  2160  		return errMoreData
  2161  	}
  2162  
  2163  	// Only interested in flushing the size_t not mtime/atime
  2164  	if err = Fdatasync(w); err != nil {
  2165  		w.Close()
  2166  		return err
  2167  	}
  2168  
  2169  	// Dealing with error returns from close() - 'man 2 close'
  2170  	//
  2171  	// A careful programmer will check the return value of close(), since it is quite possible that
  2172  	// errors on a previous write(2) operation are reported only on the final close() that releases
  2173  	// the open file descriptor.
  2174  	//
  2175  	// Failing to check the return value when closing a file may lead to silent loss of data.
  2176  	// This can especially be observed with NFS and with disk quota.
  2177  	return w.Close()
  2178  }
  2179  
  2180  func (s *xlStorage) writeAll(ctx context.Context, volume string, path string, b []byte, sync bool) (err error) {
  2181  	if contextCanceled(ctx) {
  2182  		return ctx.Err()
  2183  	}
  2184  
  2185  	volumeDir, err := s.getVolDir(volume)
  2186  	if err != nil {
  2187  		return err
  2188  	}
  2189  
  2190  	filePath := pathJoin(volumeDir, path)
  2191  	if err = checkPathLength(filePath); err != nil {
  2192  		return err
  2193  	}
  2194  
  2195  	flags := os.O_CREATE | os.O_WRONLY | os.O_TRUNC
  2196  
  2197  	var w *os.File
  2198  	if sync {
  2199  		// Perform directIO along with fdatasync for larger xl.meta, mostly when
  2200  		// xl.meta has "inlined data" we prefer writing O_DIRECT and then doing
  2201  		// fdatasync() at the end instead of opening the file with O_DSYNC.
  2202  		//
  2203  		// This is an optimization mainly to ensure faster I/O.
  2204  		if len(b) > xioutil.DirectioAlignSize {
  2205  			r := bytes.NewReader(b)
  2206  			return s.writeAllDirect(ctx, filePath, r.Size(), r, flags)
  2207  		}
  2208  		w, err = s.openFileSync(filePath, flags)
  2209  	} else {
  2210  		w, err = s.openFile(filePath, flags)
  2211  	}
  2212  	if err != nil {
  2213  		return err
  2214  	}
  2215  
  2216  	n, err := w.Write(b)
  2217  	if err != nil {
  2218  		w.Close()
  2219  		return err
  2220  	}
  2221  
  2222  	if n != len(b) {
  2223  		w.Close()
  2224  		return io.ErrShortWrite
  2225  	}
  2226  
  2227  	// Dealing with error returns from close() - 'man 2 close'
  2228  	//
  2229  	// A careful programmer will check the return value of close(), since it is quite possible that
  2230  	// errors on a previous write(2) operation are reported only on the final close() that releases
  2231  	// the open file descriptor.
  2232  	//
  2233  	// Failing to check the return value when closing a file may lead to silent loss of data.
  2234  	// This can especially be observed with NFS and with disk quota.
  2235  	return w.Close()
  2236  }
  2237  
  2238  func (s *xlStorage) WriteAll(ctx context.Context, volume string, path string, b []byte) (err error) {
  2239  	return s.writeAll(ctx, volume, path, b, true)
  2240  }
  2241  
  2242  // AppendFile - append a byte array at path, if file doesn't exist at
  2243  // path this call explicitly creates it.
  2244  func (s *xlStorage) AppendFile(ctx context.Context, volume string, path string, buf []byte) (err error) {
  2245  	volumeDir, err := s.getVolDir(volume)
  2246  	if err != nil {
  2247  		return err
  2248  	}
  2249  
  2250  	if !skipAccessChecks(volume) {
  2251  		// Stat a volume entry.
  2252  		if err = Access(volumeDir); err != nil {
  2253  			return convertAccessError(err, errVolumeAccessDenied)
  2254  		}
  2255  	}
  2256  
  2257  	filePath := pathJoin(volumeDir, path)
  2258  	if err = checkPathLength(filePath); err != nil {
  2259  		return err
  2260  	}
  2261  
  2262  	var w *os.File
  2263  	// Create file if not found. Not doing O_DIRECT here to avoid the code that does buffer aligned writes.
  2264  	// AppendFile() is only used by healing code to heal objects written in old format.
  2265  	w, err = s.openFileSync(filePath, os.O_CREATE|os.O_APPEND|os.O_WRONLY)
  2266  	if err != nil {
  2267  		return err
  2268  	}
  2269  	defer w.Close()
  2270  
  2271  	n, err := w.Write(buf)
  2272  	if err != nil {
  2273  		return err
  2274  	}
  2275  
  2276  	if n != len(buf) {
  2277  		return io.ErrShortWrite
  2278  	}
  2279  
  2280  	return nil
  2281  }
  2282  
  2283  // CheckParts check if path has necessary parts available.
  2284  func (s *xlStorage) CheckParts(ctx context.Context, volume string, path string, fi FileInfo) error {
  2285  	volumeDir, err := s.getVolDir(volume)
  2286  	if err != nil {
  2287  		return err
  2288  	}
  2289  
  2290  	for _, part := range fi.Parts {
  2291  		partPath := pathJoin(path, fi.DataDir, fmt.Sprintf("part.%d", part.Number))
  2292  		filePath := pathJoin(volumeDir, partPath)
  2293  		if err = checkPathLength(filePath); err != nil {
  2294  			return err
  2295  		}
  2296  		st, err := Lstat(filePath)
  2297  		if err != nil {
  2298  			if osIsNotExist(err) {
  2299  				if !skipAccessChecks(volume) {
  2300  					// Stat a volume entry.
  2301  					if verr := Access(volumeDir); verr != nil {
  2302  						if osIsNotExist(verr) {
  2303  							return errVolumeNotFound
  2304  						}
  2305  						return verr
  2306  					}
  2307  				}
  2308  			}
  2309  			return osErrToFileErr(err)
  2310  		}
  2311  		if st.Mode().IsDir() {
  2312  			return errFileNotFound
  2313  		}
  2314  		// Check if shard is truncated.
  2315  		if st.Size() < fi.Erasure.ShardFileSize(part.Size) {
  2316  			return errFileCorrupt
  2317  		}
  2318  	}
  2319  
  2320  	return nil
  2321  }
  2322  
  2323  // deleteFile deletes a file or a directory if its empty unless recursive
  2324  // is set to true. If the target is successfully deleted, it will recursively
  2325  // move up the tree, deleting empty parent directories until it finds one
  2326  // with files in it. Returns nil for a non-empty directory even when
  2327  // recursive is set to false.
  2328  func (s *xlStorage) deleteFile(basePath, deletePath string, recursive, immediate bool) error {
  2329  	if basePath == "" || deletePath == "" {
  2330  		return nil
  2331  	}
  2332  
  2333  	bp := pathutil.Clean(basePath) // do not override basepath / or deletePath /
  2334  	dp := pathutil.Clean(deletePath)
  2335  	if !strings.HasPrefix(dp, bp) || dp == bp {
  2336  		return nil
  2337  	}
  2338  
  2339  	var err error
  2340  	if recursive {
  2341  		err = s.moveToTrash(deletePath, true, immediate)
  2342  	} else {
  2343  		err = Remove(deletePath)
  2344  	}
  2345  	if err != nil {
  2346  		switch {
  2347  		case isSysErrNotEmpty(err):
  2348  			// if object is a directory, but if its not empty
  2349  			// return FileNotFound to indicate its an empty prefix.
  2350  			if HasSuffix(deletePath, SlashSeparator) {
  2351  				return errFileNotFound
  2352  			}
  2353  			// if we have .DS_Store only on macOS
  2354  			if runtime.GOOS == globalMacOSName {
  2355  				storeFilePath := pathJoin(deletePath, ".DS_Store")
  2356  				_, err := Stat(storeFilePath)
  2357  				// .DS_Store exists
  2358  				if err == nil {
  2359  					// delete first
  2360  					Remove(storeFilePath)
  2361  					// try again
  2362  					Remove(deletePath)
  2363  				}
  2364  			}
  2365  			// Ignore errors if the directory is not empty. The server relies on
  2366  			// this functionality, and sometimes uses recursion that should not
  2367  			// error on parent directories.
  2368  			return nil
  2369  		case osIsNotExist(err):
  2370  			return nil
  2371  		case errors.Is(err, errFileNotFound):
  2372  			return nil
  2373  		case osIsPermission(err):
  2374  			return errFileAccessDenied
  2375  		case isSysErrIO(err):
  2376  			return errFaultyDisk
  2377  		default:
  2378  			return err
  2379  		}
  2380  	}
  2381  
  2382  	// Delete parent directory obviously not recursively. Errors for
  2383  	// parent directories shouldn't trickle down.
  2384  	s.deleteFile(basePath, pathutil.Dir(pathutil.Clean(deletePath)), false, false)
  2385  
  2386  	return nil
  2387  }
  2388  
  2389  // DeleteFile - delete a file at path.
  2390  func (s *xlStorage) Delete(ctx context.Context, volume string, path string, deleteOpts DeleteOptions) (err error) {
  2391  	volumeDir, err := s.getVolDir(volume)
  2392  	if err != nil {
  2393  		return err
  2394  	}
  2395  
  2396  	if !skipAccessChecks(volume) {
  2397  		// Stat a volume entry.
  2398  		if err = Access(volumeDir); err != nil {
  2399  			return convertAccessError(err, errVolumeAccessDenied)
  2400  		}
  2401  	}
  2402  
  2403  	// Following code is needed so that we retain SlashSeparator suffix if any in
  2404  	// path argument.
  2405  	filePath := pathJoin(volumeDir, path)
  2406  	if err = checkPathLength(filePath); err != nil {
  2407  		return err
  2408  	}
  2409  
  2410  	// Delete file and delete parent directory as well if it's empty.
  2411  	return s.deleteFile(volumeDir, filePath, deleteOpts.Recursive, deleteOpts.Immediate)
  2412  }
  2413  
  2414  func skipAccessChecks(volume string) (ok bool) {
  2415  	for _, prefix := range []string{
  2416  		minioMetaTmpDeletedBucket,
  2417  		minioMetaTmpBucket,
  2418  		minioMetaMultipartBucket,
  2419  		minioMetaBucket,
  2420  	} {
  2421  		if strings.HasPrefix(volume, prefix) {
  2422  			return true
  2423  		}
  2424  	}
  2425  	return ok
  2426  }
  2427  
  2428  // RenameData - rename source path to destination path atomically, metadata and data directory.
  2429  func (s *xlStorage) RenameData(ctx context.Context, srcVolume, srcPath string, fi FileInfo, dstVolume, dstPath string, opts RenameOptions) (sign uint64, err error) {
  2430  	defer func() {
  2431  		ignoredErrs := []error{
  2432  			errFileNotFound,
  2433  			errVolumeNotFound,
  2434  			errFileVersionNotFound,
  2435  			errDiskNotFound,
  2436  			errUnformattedDisk,
  2437  			errMaxVersionsExceeded,
  2438  			errFileAccessDenied,
  2439  		}
  2440  		if err != nil && !IsErr(err, ignoredErrs...) && !contextCanceled(ctx) {
  2441  			// Only log these errors if context is not yet canceled.
  2442  			logger.LogOnceIf(ctx, fmt.Errorf("drive:%s, srcVolume: %s, srcPath: %s, dstVolume: %s:, dstPath: %s - error %v",
  2443  				s.drivePath,
  2444  				srcVolume, srcPath,
  2445  				dstVolume, dstPath,
  2446  				err), "xl-storage-rename-data-"+dstVolume)
  2447  		}
  2448  		if s.globalSync {
  2449  			globalSync()
  2450  		}
  2451  	}()
  2452  
  2453  	srcVolumeDir, err := s.getVolDir(srcVolume)
  2454  	if err != nil {
  2455  		return 0, err
  2456  	}
  2457  
  2458  	dstVolumeDir, err := s.getVolDir(dstVolume)
  2459  	if err != nil {
  2460  		return 0, err
  2461  	}
  2462  
  2463  	if !skipAccessChecks(srcVolume) {
  2464  		// Stat a volume entry.
  2465  		if err = Access(srcVolumeDir); err != nil {
  2466  			return 0, convertAccessError(err, errVolumeAccessDenied)
  2467  		}
  2468  	}
  2469  
  2470  	if !skipAccessChecks(dstVolume) {
  2471  		if err = Access(dstVolumeDir); err != nil {
  2472  			return 0, convertAccessError(err, errVolumeAccessDenied)
  2473  		}
  2474  	}
  2475  
  2476  	srcFilePath := pathutil.Join(srcVolumeDir, pathJoin(srcPath, xlStorageFormatFile))
  2477  	dstFilePath := pathutil.Join(dstVolumeDir, pathJoin(dstPath, xlStorageFormatFile))
  2478  
  2479  	var srcDataPath string
  2480  	var dstDataPath string
  2481  	var dataDir string
  2482  	if !fi.IsRemote() {
  2483  		dataDir = retainSlash(fi.DataDir)
  2484  	}
  2485  	if dataDir != "" {
  2486  		srcDataPath = retainSlash(pathJoin(srcVolumeDir, srcPath, dataDir))
  2487  		// make sure to always use path.Join here, do not use pathJoin as
  2488  		// it would additionally add `/` at the end and it comes in the
  2489  		// way of renameAll(), parentDir creation.
  2490  		dstDataPath = pathutil.Join(dstVolumeDir, dstPath, dataDir)
  2491  	}
  2492  
  2493  	if err = checkPathLength(srcFilePath); err != nil {
  2494  		return 0, err
  2495  	}
  2496  
  2497  	if err = checkPathLength(dstFilePath); err != nil {
  2498  		return 0, err
  2499  	}
  2500  
  2501  	dstBuf, err := xioutil.ReadFile(dstFilePath)
  2502  	if err != nil {
  2503  		// handle situations when dstFilePath is 'file'
  2504  		// for example such as someone is trying to
  2505  		// upload an object such as `prefix/object/xl.meta`
  2506  		// where `prefix/object` is already an object
  2507  		if isSysErrNotDir(err) && runtime.GOOS != globalWindowsOSName {
  2508  			// NOTE: On windows the error happens at
  2509  			// next line and returns appropriate error.
  2510  			return 0, errFileAccessDenied
  2511  		}
  2512  		if !osIsNotExist(err) {
  2513  			return 0, osErrToFileErr(err)
  2514  		}
  2515  		// errFileNotFound comes here.
  2516  		err = s.renameLegacyMetadata(dstVolumeDir, dstPath)
  2517  		if err != nil && err != errFileNotFound {
  2518  			return 0, err
  2519  		}
  2520  		if err == nil {
  2521  			dstBuf, err = xioutil.ReadFile(dstFilePath)
  2522  			if err != nil && !osIsNotExist(err) {
  2523  				return 0, osErrToFileErr(err)
  2524  			}
  2525  		}
  2526  	}
  2527  
  2528  	var xlMeta xlMetaV2
  2529  	var legacyPreserved bool
  2530  	if len(dstBuf) > 0 {
  2531  		if isXL2V1Format(dstBuf) {
  2532  			if err = xlMeta.Load(dstBuf); err != nil {
  2533  				// Data appears corrupt. Drop data.
  2534  				xlMeta = xlMetaV2{}
  2535  			}
  2536  		} else {
  2537  			// This code-path is to preserve the legacy data.
  2538  			xlMetaLegacy := &xlMetaV1Object{}
  2539  			json := jsoniter.ConfigCompatibleWithStandardLibrary
  2540  			if err := json.Unmarshal(dstBuf, xlMetaLegacy); err != nil {
  2541  				logger.LogOnceIf(ctx, err, "read-data-unmarshal-"+dstFilePath)
  2542  				// Data appears corrupt. Drop data.
  2543  			} else {
  2544  				xlMetaLegacy.DataDir = legacyDataDir
  2545  				if err = xlMeta.AddLegacy(xlMetaLegacy); err != nil {
  2546  					logger.LogOnceIf(ctx, err, "read-data-add-legacy-"+dstFilePath)
  2547  				}
  2548  				legacyPreserved = true
  2549  			}
  2550  		}
  2551  	} else {
  2552  		s.RLock()
  2553  		formatLegacy := s.formatLegacy
  2554  		s.RUnlock()
  2555  		// It is possible that some drives may not have `xl.meta` file
  2556  		// in such scenarios verify if at least `part.1` files exist
  2557  		// to verify for legacy version.
  2558  		if formatLegacy {
  2559  			// We only need this code if we are moving
  2560  			// from `xl.json` to `xl.meta`, we can avoid
  2561  			// one extra readdir operation here for all
  2562  			// new deployments.
  2563  			currentDataPath := pathJoin(dstVolumeDir, dstPath)
  2564  			entries, err := readDirN(currentDataPath, 1)
  2565  			if err != nil && err != errFileNotFound {
  2566  				return 0, osErrToFileErr(err)
  2567  			}
  2568  			for _, entry := range entries {
  2569  				if entry == xlStorageFormatFile || strings.HasSuffix(entry, slashSeparator) {
  2570  					continue
  2571  				}
  2572  				if strings.HasPrefix(entry, "part.") {
  2573  					legacyPreserved = true
  2574  					break
  2575  				}
  2576  			}
  2577  		}
  2578  	}
  2579  
  2580  	legacyDataPath := pathJoin(dstVolumeDir, dstPath, legacyDataDir)
  2581  	if legacyPreserved {
  2582  		// Preserve all the legacy data, could be slow, but at max there can be 10,000 parts.
  2583  		currentDataPath := pathJoin(dstVolumeDir, dstPath)
  2584  		entries, err := readDir(currentDataPath)
  2585  		if err != nil {
  2586  			return 0, osErrToFileErr(err)
  2587  		}
  2588  
  2589  		// legacy data dir means its old content, honor system umask.
  2590  		if err = mkdirAll(legacyDataPath, 0o777, dstVolumeDir); err != nil {
  2591  			// any failed mkdir-calls delete them.
  2592  			s.deleteFile(dstVolumeDir, legacyDataPath, true, false)
  2593  			return 0, osErrToFileErr(err)
  2594  		}
  2595  
  2596  		for _, entry := range entries {
  2597  			// Skip xl.meta renames further, also ignore any directories such as `legacyDataDir`
  2598  			if entry == xlStorageFormatFile || strings.HasSuffix(entry, slashSeparator) {
  2599  				continue
  2600  			}
  2601  
  2602  			if err = Rename(pathJoin(currentDataPath, entry), pathJoin(legacyDataPath, entry)); err != nil {
  2603  				// Any failed rename calls un-roll previous transaction.
  2604  				s.deleteFile(dstVolumeDir, legacyDataPath, true, false)
  2605  
  2606  				return 0, osErrToFileErr(err)
  2607  			}
  2608  		}
  2609  	}
  2610  
  2611  	var oldDstDataPath, reqVID string
  2612  
  2613  	if fi.VersionID == "" {
  2614  		reqVID = nullVersionID
  2615  	} else {
  2616  		reqVID = fi.VersionID
  2617  	}
  2618  
  2619  	// Replace the data of null version or any other existing version-id
  2620  	_, ver, err := xlMeta.findVersionStr(reqVID)
  2621  	if err == nil {
  2622  		dataDir := ver.getDataDir()
  2623  		if dataDir != "" && (xlMeta.SharedDataDirCountStr(reqVID, dataDir) == 0) {
  2624  			// Purge the destination path as we are not preserving anything
  2625  			// versioned object was not requested.
  2626  			oldDstDataPath = pathJoin(dstVolumeDir, dstPath, dataDir)
  2627  			// if old destination path is same as new destination path
  2628  			// there is nothing to purge, this is true in case of healing
  2629  			// avoid setting oldDstDataPath at that point.
  2630  			if oldDstDataPath == dstDataPath {
  2631  				oldDstDataPath = ""
  2632  			} else {
  2633  				xlMeta.data.remove(reqVID, dataDir)
  2634  			}
  2635  		}
  2636  	}
  2637  
  2638  	// Empty fi.VersionID indicates that versioning is either
  2639  	// suspended or disabled on this bucket. RenameData will replace
  2640  	// the 'null' version. We add a free-version to track its tiered
  2641  	// content for asynchronous deletion.
  2642  	//
  2643  	// Note: RestoreObject and HealObject requests don't end up replacing the
  2644  	// null version and therefore don't require the free-version to track
  2645  	// anything
  2646  	if fi.VersionID == "" && !fi.IsRestoreObjReq() && !fi.Healing() {
  2647  		// Note: Restore object request reuses PutObject/Multipart
  2648  		// upload to copy back its data from the remote tier. This
  2649  		// doesn't replace the existing version, so we don't need to add
  2650  		// a free-version.
  2651  		xlMeta.AddFreeVersion(fi)
  2652  	}
  2653  
  2654  	// indicates if RenameData() is called by healing.
  2655  	// healing doesn't preserve the dataDir as 'legacy'
  2656  	healing := fi.XLV1 && fi.DataDir != legacyDataDir
  2657  
  2658  	if err = xlMeta.AddVersion(fi); err != nil {
  2659  		if legacyPreserved {
  2660  			// Any failed rename calls un-roll previous transaction.
  2661  			s.deleteFile(dstVolumeDir, legacyDataPath, true, false)
  2662  		}
  2663  		return 0, err
  2664  	}
  2665  
  2666  	var sbuf bytes.Buffer
  2667  	for _, ver := range xlMeta.versions {
  2668  		sbuf.Write(ver.header.Signature[:])
  2669  	}
  2670  	sign = xxh3.Hash(sbuf.Bytes())
  2671  
  2672  	dstBuf, err = xlMeta.AppendTo(metaDataPoolGet())
  2673  	defer metaDataPoolPut(dstBuf)
  2674  	if err != nil {
  2675  		if legacyPreserved {
  2676  			s.deleteFile(dstVolumeDir, legacyDataPath, true, false)
  2677  		}
  2678  		return 0, errFileCorrupt
  2679  	}
  2680  
  2681  	if err = s.WriteAll(ctx, srcVolume, pathJoin(srcPath, xlStorageFormatFile), dstBuf); err != nil {
  2682  		if legacyPreserved {
  2683  			s.deleteFile(dstVolumeDir, legacyDataPath, true, false)
  2684  		}
  2685  		return 0, osErrToFileErr(err)
  2686  	}
  2687  	diskHealthCheckOK(ctx, err)
  2688  
  2689  	if srcDataPath != "" && len(fi.Data) == 0 && fi.Size > 0 {
  2690  		// renameAll only for objects that have xl.meta not saved inline.
  2691  		s.moveToTrash(dstDataPath, true, false)
  2692  		if healing {
  2693  			// If we are healing we should purge any legacyDataPath content,
  2694  			// that was previously preserved during PutObject() call
  2695  			// on a versioned bucket.
  2696  			s.moveToTrash(legacyDataPath, true, false)
  2697  		}
  2698  		if err = renameAll(srcDataPath, dstDataPath, dstVolumeDir); err != nil {
  2699  			if legacyPreserved {
  2700  				// Any failed rename calls un-roll previous transaction.
  2701  				s.deleteFile(dstVolumeDir, legacyDataPath, true, false)
  2702  			}
  2703  			s.deleteFile(dstVolumeDir, dstDataPath, false, false)
  2704  			return 0, osErrToFileErr(err)
  2705  		}
  2706  	}
  2707  
  2708  	// Commit meta-file
  2709  	if err = renameAll(srcFilePath, dstFilePath, dstVolumeDir); err != nil {
  2710  		if legacyPreserved {
  2711  			// Any failed rename calls un-roll previous transaction.
  2712  			s.deleteFile(dstVolumeDir, legacyDataPath, true, false)
  2713  		}
  2714  		s.deleteFile(dstVolumeDir, dstDataPath, false, false)
  2715  		return 0, osErrToFileErr(err)
  2716  	}
  2717  
  2718  	// additionally only purge older data at the end of the transaction of new data-dir
  2719  	// movement, this is to ensure that previous data references can co-exist for
  2720  	// any recoverability.
  2721  	if oldDstDataPath != "" {
  2722  		s.moveToTrash(oldDstDataPath, true, false)
  2723  	}
  2724  
  2725  	if srcVolume != minioMetaMultipartBucket {
  2726  		// srcFilePath is some-times minioMetaTmpBucket, an attempt to
  2727  		// remove the temporary folder is enough since at this point
  2728  		// ideally all transaction should be complete.
  2729  		Remove(pathutil.Dir(srcFilePath))
  2730  	} else {
  2731  		s.deleteFile(srcVolumeDir, pathutil.Dir(srcFilePath), true, false)
  2732  	}
  2733  	return sign, nil
  2734  }
  2735  
  2736  // RenameFile - rename source path to destination path atomically.
  2737  func (s *xlStorage) RenameFile(ctx context.Context, srcVolume, srcPath, dstVolume, dstPath string) (err error) {
  2738  	srcVolumeDir, err := s.getVolDir(srcVolume)
  2739  	if err != nil {
  2740  		return err
  2741  	}
  2742  	dstVolumeDir, err := s.getVolDir(dstVolume)
  2743  	if err != nil {
  2744  		return err
  2745  	}
  2746  	if !skipAccessChecks(srcVolume) {
  2747  		// Stat a volume entry.
  2748  		if err = Access(srcVolumeDir); err != nil {
  2749  			if osIsNotExist(err) {
  2750  				return errVolumeNotFound
  2751  			} else if isSysErrIO(err) {
  2752  				return errFaultyDisk
  2753  			}
  2754  			return err
  2755  		}
  2756  	}
  2757  	if !skipAccessChecks(dstVolume) {
  2758  		if err = Access(dstVolumeDir); err != nil {
  2759  			if osIsNotExist(err) {
  2760  				return errVolumeNotFound
  2761  			} else if isSysErrIO(err) {
  2762  				return errFaultyDisk
  2763  			}
  2764  			return err
  2765  		}
  2766  	}
  2767  	srcIsDir := HasSuffix(srcPath, SlashSeparator)
  2768  	dstIsDir := HasSuffix(dstPath, SlashSeparator)
  2769  	// Either src and dst have to be directories or files, else return error.
  2770  	if !(srcIsDir && dstIsDir || !srcIsDir && !dstIsDir) {
  2771  		return errFileAccessDenied
  2772  	}
  2773  	srcFilePath := pathutil.Join(srcVolumeDir, srcPath)
  2774  	if err = checkPathLength(srcFilePath); err != nil {
  2775  		return err
  2776  	}
  2777  	dstFilePath := pathutil.Join(dstVolumeDir, dstPath)
  2778  	if err = checkPathLength(dstFilePath); err != nil {
  2779  		return err
  2780  	}
  2781  	if srcIsDir {
  2782  		// If source is a directory, we expect the destination to be non-existent but we
  2783  		// we still need to allow overwriting an empty directory since it represents
  2784  		// an object empty directory.
  2785  		dirInfo, err := Lstat(dstFilePath)
  2786  		if isSysErrIO(err) {
  2787  			return errFaultyDisk
  2788  		}
  2789  		if err != nil {
  2790  			if !osIsNotExist(err) {
  2791  				return err
  2792  			}
  2793  		} else {
  2794  			if !dirInfo.IsDir() {
  2795  				return errFileAccessDenied
  2796  			}
  2797  			if err = Remove(dstFilePath); err != nil {
  2798  				if isSysErrNotEmpty(err) || isSysErrNotDir(err) {
  2799  					return errFileAccessDenied
  2800  				} else if isSysErrIO(err) {
  2801  					return errFaultyDisk
  2802  				}
  2803  				return err
  2804  			}
  2805  		}
  2806  	}
  2807  
  2808  	if err = renameAll(srcFilePath, dstFilePath, dstVolumeDir); err != nil {
  2809  		if isSysErrNotEmpty(err) || isSysErrNotDir(err) {
  2810  			return errFileAccessDenied
  2811  		}
  2812  		return osErrToFileErr(err)
  2813  	}
  2814  
  2815  	// Remove parent dir of the source file if empty
  2816  	parentDir := pathutil.Dir(srcFilePath)
  2817  	s.deleteFile(srcVolumeDir, parentDir, false, false)
  2818  
  2819  	return nil
  2820  }
  2821  
  2822  func (s *xlStorage) bitrotVerify(ctx context.Context, partPath string, partSize int64, algo BitrotAlgorithm, sum []byte, shardSize int64) error {
  2823  	// Open the file for reading.
  2824  	file, err := OpenFile(partPath, readMode, 0o666)
  2825  	if err != nil {
  2826  		return osErrToFileErr(err)
  2827  	}
  2828  
  2829  	// Close the file descriptor.
  2830  	defer file.Close()
  2831  	fi, err := file.Stat()
  2832  	if err != nil {
  2833  		// Unable to stat on the file, return an expected error
  2834  		// for healing code to fix this file.
  2835  		return err
  2836  	}
  2837  	return bitrotVerify(diskHealthReader(ctx, file), fi.Size(), partSize, algo, sum, shardSize)
  2838  }
  2839  
  2840  func (s *xlStorage) VerifyFile(ctx context.Context, volume, path string, fi FileInfo) (err error) {
  2841  	volumeDir, err := s.getVolDir(volume)
  2842  	if err != nil {
  2843  		return err
  2844  	}
  2845  
  2846  	if !skipAccessChecks(volume) {
  2847  		// Stat a volume entry.
  2848  		if err = Access(volumeDir); err != nil {
  2849  			return convertAccessError(err, errVolumeAccessDenied)
  2850  		}
  2851  	}
  2852  
  2853  	erasure := fi.Erasure
  2854  	for _, part := range fi.Parts {
  2855  		checksumInfo := erasure.GetChecksumInfo(part.Number)
  2856  		partPath := pathJoin(volumeDir, path, fi.DataDir, fmt.Sprintf("part.%d", part.Number))
  2857  		if err := s.bitrotVerify(ctx, partPath,
  2858  			erasure.ShardFileSize(part.Size),
  2859  			checksumInfo.Algorithm,
  2860  			checksumInfo.Hash, erasure.ShardSize()); err != nil {
  2861  			if !IsErr(err, []error{
  2862  				errFileNotFound,
  2863  				errVolumeNotFound,
  2864  				errFileCorrupt,
  2865  				errFileAccessDenied,
  2866  				errFileVersionNotFound,
  2867  			}...) {
  2868  				logger.GetReqInfo(ctx).AppendTags("disk", s.String())
  2869  				logger.LogOnceIf(ctx, err, partPath)
  2870  			}
  2871  			return err
  2872  		}
  2873  	}
  2874  
  2875  	return nil
  2876  }
  2877  
  2878  // ReadMultiple will read multiple files and send each back as response.
  2879  // Files are read and returned in the given order.
  2880  // The resp channel is closed before the call returns.
  2881  // Only a canceled context will return an error.
  2882  func (s *xlStorage) ReadMultiple(ctx context.Context, req ReadMultipleReq, resp chan<- ReadMultipleResp) error {
  2883  	defer xioutil.SafeClose(resp)
  2884  
  2885  	volumeDir := pathJoin(s.drivePath, req.Bucket)
  2886  	found := 0
  2887  	for _, f := range req.Files {
  2888  		if contextCanceled(ctx) {
  2889  			return ctx.Err()
  2890  		}
  2891  		r := ReadMultipleResp{
  2892  			Bucket: req.Bucket,
  2893  			Prefix: req.Prefix,
  2894  			File:   f,
  2895  		}
  2896  		var data []byte
  2897  		var mt time.Time
  2898  		fullPath := pathJoin(volumeDir, req.Prefix, f)
  2899  		w := xioutil.NewDeadlineWorker(globalDriveConfig.GetMaxTimeout())
  2900  		if err := w.Run(func() (err error) {
  2901  			if req.MetadataOnly {
  2902  				data, mt, err = s.readMetadataWithDMTime(ctx, fullPath)
  2903  			} else {
  2904  				data, mt, err = s.readAllData(ctx, req.Bucket, volumeDir, fullPath, true)
  2905  			}
  2906  			return err
  2907  		}); err != nil {
  2908  			if !IsErr(err, errFileNotFound, errVolumeNotFound) {
  2909  				r.Exists = true
  2910  				r.Error = err.Error()
  2911  			}
  2912  			select {
  2913  			case <-ctx.Done():
  2914  				return ctx.Err()
  2915  			case resp <- r:
  2916  			}
  2917  			if req.AbortOn404 && !r.Exists {
  2918  				// We stop at first file not found.
  2919  				// We have already reported the error, return nil.
  2920  				return nil
  2921  			}
  2922  			continue
  2923  		}
  2924  		diskHealthCheckOK(ctx, nil)
  2925  		if req.MaxSize > 0 && int64(len(data)) > req.MaxSize {
  2926  			r.Exists = true
  2927  			r.Error = fmt.Sprintf("max size (%d) exceeded: %d", req.MaxSize, len(data))
  2928  			select {
  2929  			case <-ctx.Done():
  2930  				return ctx.Err()
  2931  			case resp <- r:
  2932  				continue
  2933  			}
  2934  		}
  2935  		found++
  2936  		r.Exists = true
  2937  		r.Data = data
  2938  		r.Modtime = mt
  2939  		select {
  2940  		case <-ctx.Done():
  2941  			return ctx.Err()
  2942  		case resp <- r:
  2943  		}
  2944  		if req.MaxResults > 0 && found >= req.MaxResults {
  2945  			return nil
  2946  		}
  2947  	}
  2948  	return nil
  2949  }
  2950  
  2951  func (s *xlStorage) StatInfoFile(ctx context.Context, volume, path string, glob bool) (stat []StatInfo, err error) {
  2952  	volumeDir, err := s.getVolDir(volume)
  2953  	if err != nil {
  2954  		return stat, err
  2955  	}
  2956  
  2957  	files := []string{pathJoin(volumeDir, path)}
  2958  	if glob {
  2959  		files, err = filepathx.Glob(filepath.Join(volumeDir, path))
  2960  		if err != nil {
  2961  			return nil, err
  2962  		}
  2963  	}
  2964  	for _, filePath := range files {
  2965  		if err := checkPathLength(filePath); err != nil {
  2966  			return stat, err
  2967  		}
  2968  		st, _ := Lstat(filePath)
  2969  		if st == nil {
  2970  			if !skipAccessChecks(volume) {
  2971  				// Stat a volume entry.
  2972  				if verr := Access(volumeDir); verr != nil {
  2973  					return stat, convertAccessError(verr, errVolumeAccessDenied)
  2974  				}
  2975  			}
  2976  			return stat, errPathNotFound
  2977  		}
  2978  		name, err := filepath.Rel(volumeDir, filePath)
  2979  		if err != nil {
  2980  			name = filePath
  2981  		}
  2982  		stat = append(stat, StatInfo{
  2983  			Name:    filepath.ToSlash(name),
  2984  			Size:    st.Size(),
  2985  			Dir:     st.IsDir(),
  2986  			Mode:    uint32(st.Mode()),
  2987  			ModTime: st.ModTime(),
  2988  		})
  2989  	}
  2990  	return stat, nil
  2991  }
  2992  
  2993  // CleanAbandonedData will read metadata of the object on disk
  2994  // and delete any data directories and inline data that isn't referenced in metadata.
  2995  // Metadata itself is not modified, only inline data.
  2996  func (s *xlStorage) CleanAbandonedData(ctx context.Context, volume string, path string) error {
  2997  	if volume == "" || path == "" {
  2998  		return nil // Ignore
  2999  	}
  3000  
  3001  	volumeDir, err := s.getVolDir(volume)
  3002  	if err != nil {
  3003  		return err
  3004  	}
  3005  	baseDir := pathJoin(volumeDir, path+slashSeparator)
  3006  	metaPath := pathutil.Join(baseDir, xlStorageFormatFile)
  3007  	buf, _, err := s.readAllData(ctx, volume, volumeDir, metaPath, true)
  3008  	if err != nil {
  3009  		return err
  3010  	}
  3011  	defer metaDataPoolPut(buf)
  3012  
  3013  	if !isXL2V1Format(buf) {
  3014  		return nil
  3015  	}
  3016  	var xl xlMetaV2
  3017  	err = xl.LoadOrConvert(buf)
  3018  	if err != nil {
  3019  		return err
  3020  	}
  3021  	foundDirs := make(map[string]struct{}, len(xl.versions))
  3022  	err = readDirFn(baseDir, func(name string, typ os.FileMode) error {
  3023  		if !typ.IsDir() {
  3024  			return nil
  3025  		}
  3026  		// See if directory has a UUID name.
  3027  		base := filepath.Base(name)
  3028  		_, err := uuid.Parse(base)
  3029  		if err == nil {
  3030  			foundDirs[base] = struct{}{}
  3031  		}
  3032  		return nil
  3033  	})
  3034  	if err != nil {
  3035  		return err
  3036  	}
  3037  	wantDirs, err := xl.getDataDirs()
  3038  	if err != nil {
  3039  		return err
  3040  	}
  3041  
  3042  	// Delete all directories we expect to be there.
  3043  	for _, dir := range wantDirs {
  3044  		delete(foundDirs, dir)
  3045  	}
  3046  
  3047  	// Delete excessive directories.
  3048  	// Do not abort on context errors.
  3049  	for dir := range foundDirs {
  3050  		toRemove := pathJoin(volumeDir, path, dir+SlashSeparator)
  3051  		err := s.deleteFile(volumeDir, toRemove, true, true)
  3052  		diskHealthCheckOK(ctx, err)
  3053  	}
  3054  
  3055  	// Do the same for inline data
  3056  	dirs, err := xl.data.list()
  3057  	if err != nil {
  3058  		return err
  3059  	}
  3060  	// Clear and repopulate
  3061  	for k := range foundDirs {
  3062  		delete(foundDirs, k)
  3063  	}
  3064  	// Populate into map
  3065  	for _, k := range dirs {
  3066  		foundDirs[k] = struct{}{}
  3067  	}
  3068  	// Delete all directories we expect to be there.
  3069  	for _, dir := range wantDirs {
  3070  		delete(foundDirs, dir)
  3071  	}
  3072  
  3073  	// Delete excessive inline entries.
  3074  	if len(foundDirs) > 0 {
  3075  		// Convert to slice.
  3076  		dirs = dirs[:0]
  3077  		for dir := range foundDirs {
  3078  			dirs = append(dirs, dir)
  3079  		}
  3080  		if xl.data.remove(dirs...) {
  3081  			newBuf, err := xl.AppendTo(metaDataPoolGet())
  3082  			if err == nil {
  3083  				defer metaDataPoolPut(newBuf)
  3084  				return s.WriteAll(ctx, volume, pathJoin(path, xlStorageFormatFile), buf)
  3085  			}
  3086  		}
  3087  	}
  3088  	return nil
  3089  }
  3090  
  3091  func convertAccessError(err, permErr error) error {
  3092  	switch {
  3093  	case osIsNotExist(err):
  3094  		return errVolumeNotFound
  3095  	case isSysErrIO(err):
  3096  		return errFaultyDisk
  3097  	case osIsPermission(err):
  3098  		return permErr
  3099  	default:
  3100  		return err
  3101  	}
  3102  }