github.com/minio/minio@v0.0.0-20240328213742-3f72439b8a27/cmd/xl-storage.go (about) 1 // Copyright (c) 2015-2023 MinIO, Inc. 2 // 3 // This file is part of MinIO Object Storage stack 4 // 5 // This program is free software: you can redistribute it and/or modify 6 // it under the terms of the GNU Affero General Public License as published by 7 // the Free Software Foundation, either version 3 of the License, or 8 // (at your option) any later version. 9 // 10 // This program is distributed in the hope that it will be useful 11 // but WITHOUT ANY WARRANTY; without even the implied warranty of 12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 // GNU Affero General Public License for more details. 14 // 15 // You should have received a copy of the GNU Affero General Public License 16 // along with this program. If not, see <http://www.gnu.org/licenses/>. 17 18 package cmd 19 20 import ( 21 "bytes" 22 "context" 23 "crypto/rand" 24 "encoding/binary" 25 "errors" 26 "fmt" 27 "io" 28 "net/url" 29 "os" 30 pathutil "path" 31 "path/filepath" 32 "runtime" 33 "strconv" 34 "strings" 35 "sync" 36 "sync/atomic" 37 "syscall" 38 "time" 39 40 "github.com/dustin/go-humanize" 41 "github.com/google/uuid" 42 jsoniter "github.com/json-iterator/go" 43 "github.com/klauspost/filepathx" 44 "github.com/minio/madmin-go/v3" 45 "github.com/minio/minio/internal/bucket/lifecycle" 46 "github.com/minio/minio/internal/cachevalue" 47 "github.com/minio/minio/internal/config/storageclass" 48 "github.com/minio/minio/internal/disk" 49 xioutil "github.com/minio/minio/internal/ioutil" 50 "github.com/minio/minio/internal/logger" 51 "github.com/pkg/xattr" 52 "github.com/zeebo/xxh3" 53 ) 54 55 const ( 56 nullVersionID = "null" 57 58 // Small file threshold below which data accompanies metadata from storage layer. 59 smallFileThreshold = 128 * humanize.KiByte // Optimized for NVMe/SSDs 60 61 // For hardrives it is possible to set this to a lower value to avoid any 62 // spike in latency. But currently we are simply keeping it optimal for SSDs. 63 64 // bigFileThreshold is the point where we add readahead to put operations. 65 bigFileThreshold = 128 * humanize.MiByte 66 67 // XL metadata file carries per object metadata. 68 xlStorageFormatFile = "xl.meta" 69 ) 70 71 var alignedBuf []byte 72 73 func init() { 74 alignedBuf = disk.AlignedBlock(xioutil.DirectioAlignSize) 75 _, _ = rand.Read(alignedBuf) 76 } 77 78 // isValidVolname verifies a volname name in accordance with object 79 // layer requirements. 80 func isValidVolname(volname string) bool { 81 if len(volname) < 3 { 82 return false 83 } 84 85 if runtime.GOOS == "windows" { 86 // Volname shouldn't have reserved characters in Windows. 87 return !strings.ContainsAny(volname, `\:*?\"<>|`) 88 } 89 90 return true 91 } 92 93 // xlStorage - implements StorageAPI interface. 94 type xlStorage struct { 95 // Indicate of NSScanner is in progress in this disk 96 scanning int32 97 98 drivePath string 99 endpoint Endpoint 100 101 globalSync bool 102 oDirect bool // indicates if this disk supports ODirect 103 104 diskID string 105 106 // Indexes, will be -1 until assigned a set. 107 poolIndex, setIndex, diskIndex int 108 109 formatFileInfo os.FileInfo 110 formatFile string 111 formatLegacy bool 112 formatLastCheck time.Time 113 114 diskInfoCache *cachevalue.Cache[DiskInfo] 115 sync.RWMutex 116 formatData []byte 117 118 nrRequests uint64 119 major, minor uint32 120 121 immediatePurge chan string 122 123 // mutex to prevent concurrent read operations overloading walks. 124 rotational bool 125 walkMu *sync.Mutex 126 walkReadMu *sync.Mutex 127 } 128 129 // checkPathLength - returns error if given path name length more than 255 130 func checkPathLength(pathName string) error { 131 // Apple OS X path length is limited to 1016 132 if runtime.GOOS == "darwin" && len(pathName) > 1016 { 133 return errFileNameTooLong 134 } 135 136 // Disallow more than 1024 characters on windows, there 137 // are no known name_max limits on Windows. 138 if runtime.GOOS == "windows" && len(pathName) > 1024 { 139 return errFileNameTooLong 140 } 141 142 // On Unix we reject paths if they are just '.', '..' or '/' 143 if pathName == "." || pathName == ".." || pathName == slashSeparator { 144 return errFileAccessDenied 145 } 146 147 // Check each path segment length is > 255 on all Unix 148 // platforms, look for this value as NAME_MAX in 149 // /usr/include/linux/limits.h 150 var count int64 151 for _, p := range pathName { 152 switch p { 153 case '/': 154 count = 0 // Reset 155 case '\\': 156 if runtime.GOOS == globalWindowsOSName { 157 count = 0 158 } 159 default: 160 count++ 161 if count > 255 { 162 return errFileNameTooLong 163 } 164 } 165 } // Success. 166 return nil 167 } 168 169 func getValidPath(path string) (string, error) { 170 if path == "" { 171 return path, errInvalidArgument 172 } 173 174 var err error 175 // Disallow relative paths, figure out absolute paths. 176 path, err = filepath.Abs(path) 177 if err != nil { 178 return path, err 179 } 180 181 fi, err := Lstat(path) 182 if err != nil && !osIsNotExist(err) { 183 return path, err 184 } 185 if osIsNotExist(err) { 186 // Disk not found create it. 187 if err = mkdirAll(path, 0o777, ""); err != nil { 188 return path, err 189 } 190 } 191 if fi != nil && !fi.IsDir() { 192 return path, errDiskNotDir 193 } 194 195 return path, nil 196 } 197 198 // Initialize a new storage disk. 199 func newLocalXLStorage(path string) (*xlStorage, error) { 200 u := url.URL{Path: path} 201 return newXLStorage(Endpoint{ 202 URL: &u, 203 IsLocal: true, 204 }, true) 205 } 206 207 // Make Erasure backend meta volumes. 208 func makeFormatErasureMetaVolumes(disk StorageAPI) error { 209 if disk == nil { 210 return errDiskNotFound 211 } 212 volumes := []string{ 213 minioMetaTmpDeletedBucket, // creates .minio.sys/tmp as well as .minio.sys/tmp/.trash 214 minioMetaMultipartBucket, // creates .minio.sys/multipart 215 dataUsageBucket, // creates .minio.sys/buckets 216 minioConfigBucket, // creates .minio.sys/config 217 } 218 // Attempt to create MinIO internal buckets. 219 return disk.MakeVolBulk(context.TODO(), volumes...) 220 } 221 222 // Initialize a new storage disk. 223 func newXLStorage(ep Endpoint, cleanUp bool) (s *xlStorage, err error) { 224 immediatePurgeQueue := 100000 225 if globalIsTesting || globalIsCICD { 226 immediatePurgeQueue = 1 227 } 228 s = &xlStorage{ 229 drivePath: ep.Path, 230 endpoint: ep, 231 globalSync: globalFSOSync, 232 diskInfoCache: cachevalue.New[DiskInfo](), 233 poolIndex: -1, 234 setIndex: -1, 235 diskIndex: -1, 236 immediatePurge: make(chan string, immediatePurgeQueue), 237 } 238 239 defer func() { 240 if err == nil { 241 go s.cleanupTrashImmediateCallers(GlobalContext) 242 } 243 }() 244 245 s.drivePath, err = getValidPath(ep.Path) 246 if err != nil { 247 s.drivePath = ep.Path 248 return s, err 249 } 250 251 info, err := disk.GetInfo(s.drivePath, true) 252 if err != nil { 253 return s, err 254 } 255 s.major = info.Major 256 s.minor = info.Minor 257 258 if !globalIsCICD && !globalIsErasureSD { 259 var rootDrive bool 260 if globalRootDiskThreshold > 0 { 261 // Use MINIO_ROOTDISK_THRESHOLD_SIZE to figure out if 262 // this disk is a root disk. treat those disks with 263 // size less than or equal to the threshold as rootDrives. 264 rootDrive = info.Total <= globalRootDiskThreshold 265 } else { 266 rootDrive, err = disk.IsRootDisk(s.drivePath, SlashSeparator) 267 if err != nil { 268 return nil, err 269 } 270 } 271 if rootDrive { 272 return s, errDriveIsRoot 273 } 274 } 275 276 // Sanitize before setting it 277 if info.NRRequests > 0 { 278 s.nrRequests = info.NRRequests 279 } 280 281 // We stagger listings only on HDDs. 282 if info.Rotational == nil || *info.Rotational { 283 s.rotational = true 284 s.walkMu = &sync.Mutex{} 285 s.walkReadMu = &sync.Mutex{} 286 } 287 288 if cleanUp { 289 bgFormatErasureCleanupTmp(s.drivePath) // cleanup any old data. 290 } 291 292 formatData, formatFi, err := formatErasureMigrate(s.drivePath) 293 if err != nil && !errors.Is(err, os.ErrNotExist) { 294 if os.IsPermission(err) { 295 return s, errDiskAccessDenied 296 } else if isSysErrIO(err) { 297 return s, errFaultyDisk 298 } 299 return s, err 300 } 301 s.formatData = formatData 302 s.formatFileInfo = formatFi 303 s.formatFile = pathJoin(s.drivePath, minioMetaBucket, formatConfigFile) 304 305 // Create all necessary bucket folders if possible. 306 if err = makeFormatErasureMetaVolumes(s); err != nil { 307 return s, err 308 } 309 310 if len(s.formatData) > 0 { 311 format := &formatErasureV3{} 312 json := jsoniter.ConfigCompatibleWithStandardLibrary 313 if err = json.Unmarshal(s.formatData, &format); err != nil { 314 return s, errCorruptedFormat 315 } 316 s.diskID = format.Erasure.This 317 s.formatLastCheck = time.Now() 318 s.formatLegacy = format.Erasure.DistributionAlgo == formatErasureVersionV2DistributionAlgoV1 319 } 320 321 // Return an error if ODirect is not supported. Single disk will have 322 // oDirect off. 323 if globalIsErasureSD || !disk.ODirectPlatform { 324 s.oDirect = false 325 } else if err := s.checkODirectDiskSupport(info.FSType); err == nil { 326 s.oDirect = true 327 } else { 328 return s, err 329 } 330 331 // Initialize DiskInfo cache 332 s.diskInfoCache.InitOnce(time.Second, cachevalue.Opts{}, 333 func() (DiskInfo, error) { 334 dcinfo := DiskInfo{} 335 di, err := getDiskInfo(s.drivePath) 336 if err != nil { 337 return dcinfo, err 338 } 339 dcinfo.Major = di.Major 340 dcinfo.Minor = di.Minor 341 dcinfo.Total = di.Total 342 dcinfo.Free = di.Free 343 dcinfo.Used = di.Used 344 dcinfo.UsedInodes = di.Files - di.Ffree 345 dcinfo.FreeInodes = di.Ffree 346 dcinfo.FSType = di.FSType 347 diskID, err := s.GetDiskID() 348 // Healing is 'true' when 349 // - if we found an unformatted disk (no 'format.json') 350 // - if we found healing tracker 'healing.bin' 351 dcinfo.Healing = errors.Is(err, errUnformattedDisk) || (s.Healing() != nil) 352 dcinfo.ID = diskID 353 return dcinfo, err 354 }, 355 ) 356 357 // Success. 358 return s, nil 359 } 360 361 // getDiskInfo returns given disk information. 362 func getDiskInfo(drivePath string) (di disk.Info, err error) { 363 if err = checkPathLength(drivePath); err == nil { 364 di, err = disk.GetInfo(drivePath, false) 365 } 366 switch { 367 case osIsNotExist(err): 368 err = errDiskNotFound 369 case isSysErrTooLong(err): 370 err = errFileNameTooLong 371 case isSysErrIO(err): 372 err = errFaultyDisk 373 } 374 375 return di, err 376 } 377 378 // Implements stringer compatible interface. 379 func (s *xlStorage) String() string { 380 return s.drivePath 381 } 382 383 func (s *xlStorage) Hostname() string { 384 return s.endpoint.Host 385 } 386 387 func (s *xlStorage) Endpoint() Endpoint { 388 return s.endpoint 389 } 390 391 func (*xlStorage) Close() error { 392 return nil 393 } 394 395 func (s *xlStorage) IsOnline() bool { 396 return true 397 } 398 399 func (s *xlStorage) LastConn() time.Time { 400 return time.Time{} 401 } 402 403 func (s *xlStorage) IsLocal() bool { 404 return true 405 } 406 407 // Retrieve location indexes. 408 func (s *xlStorage) GetDiskLoc() (poolIdx, setIdx, diskIdx int) { 409 // If unset, see if we can locate it. 410 if s.poolIndex < 0 || s.setIndex < 0 || s.diskIndex < 0 { 411 return getXLDiskLoc(s.diskID) 412 } 413 return s.poolIndex, s.setIndex, s.diskIndex 414 } 415 416 func (s *xlStorage) SetFormatData(b []byte) { 417 s.Lock() 418 defer s.Unlock() 419 s.formatData = b 420 } 421 422 // Set location indexes. 423 func (s *xlStorage) SetDiskLoc(poolIdx, setIdx, diskIdx int) { 424 s.poolIndex = poolIdx 425 s.setIndex = setIdx 426 s.diskIndex = diskIdx 427 } 428 429 func (s *xlStorage) Healing() *healingTracker { 430 healingFile := pathJoin(s.drivePath, minioMetaBucket, 431 bucketMetaPrefix, healingTrackerFilename) 432 b, err := os.ReadFile(healingFile) 433 if err != nil { 434 return nil 435 } 436 h := newHealingTracker() 437 _, err = h.UnmarshalMsg(b) 438 logger.LogIf(GlobalContext, err) 439 return h 440 } 441 442 // checkODirectDiskSupport asks the disk to write some data 443 // with O_DIRECT support, return an error if any and return 444 // errUnsupportedDisk if there is no O_DIRECT support 445 func (s *xlStorage) checkODirectDiskSupport(fsType string) error { 446 if !disk.ODirectPlatform { 447 return errUnsupportedDisk 448 } 449 450 // We know XFS already supports O_DIRECT no need to check. 451 if fsType == "XFS" { 452 return nil 453 } 454 455 // For all other FS pay the price of not using our recommended filesystem. 456 457 // Check if backend is writable and supports O_DIRECT 458 uuid := mustGetUUID() 459 filePath := pathJoin(s.drivePath, minioMetaTmpDeletedBucket, ".writable-check-"+uuid+".tmp") 460 461 // Create top level directories if they don't exist. 462 // with mode 0o777 mkdir honors system umask. 463 mkdirAll(pathutil.Dir(filePath), 0o777, s.drivePath) // don't need to fail here 464 465 w, err := s.openFileDirect(filePath, os.O_CREATE|os.O_WRONLY|os.O_EXCL) 466 if err != nil { 467 return err 468 } 469 _, err = w.Write(alignedBuf) 470 w.Close() 471 if err != nil { 472 if isSysErrInvalidArg(err) { 473 err = errUnsupportedDisk 474 } 475 } 476 return err 477 } 478 479 // readsMetadata and returns disk mTime information for xl.meta 480 func (s *xlStorage) readMetadataWithDMTime(ctx context.Context, itemPath string) ([]byte, time.Time, error) { 481 if contextCanceled(ctx) { 482 return nil, time.Time{}, ctx.Err() 483 } 484 485 if err := checkPathLength(itemPath); err != nil { 486 return nil, time.Time{}, err 487 } 488 489 f, err := OpenFile(itemPath, readMode, 0o666) 490 if err != nil { 491 return nil, time.Time{}, err 492 } 493 defer f.Close() 494 stat, err := f.Stat() 495 if err != nil { 496 return nil, time.Time{}, err 497 } 498 if stat.IsDir() { 499 return nil, time.Time{}, &os.PathError{ 500 Op: "open", 501 Path: itemPath, 502 Err: syscall.EISDIR, 503 } 504 } 505 buf, err := readXLMetaNoData(f, stat.Size()) 506 if err != nil { 507 return nil, stat.ModTime().UTC(), fmt.Errorf("%w -> %s", err, itemPath) 508 } 509 return buf, stat.ModTime().UTC(), err 510 } 511 512 func (s *xlStorage) readMetadata(ctx context.Context, itemPath string) ([]byte, error) { 513 return xioutil.WithDeadline[[]byte](ctx, globalDriveConfig.GetMaxTimeout(), func(ctx context.Context) ([]byte, error) { 514 buf, _, err := s.readMetadataWithDMTime(ctx, itemPath) 515 return buf, err 516 }) 517 } 518 519 func (s *xlStorage) NSScanner(ctx context.Context, cache dataUsageCache, updates chan<- dataUsageEntry, scanMode madmin.HealScanMode, weSleep func() bool) (dataUsageCache, error) { 520 atomic.AddInt32(&s.scanning, 1) 521 defer atomic.AddInt32(&s.scanning, -1) 522 523 var err error 524 stopFn := globalScannerMetrics.log(scannerMetricScanBucketDrive, s.drivePath, cache.Info.Name) 525 defer func() { 526 res := make(map[string]string) 527 if err != nil { 528 res["err"] = err.Error() 529 } 530 stopFn(res) 531 }() 532 533 // Updates must be closed before we return. 534 defer xioutil.SafeClose(updates) 535 var lc *lifecycle.Lifecycle 536 537 // Check if the current bucket has a configured lifecycle policy 538 if globalLifecycleSys != nil { 539 lc, err = globalLifecycleSys.Get(cache.Info.Name) 540 if err == nil && lc.HasActiveRules("") { 541 cache.Info.lifeCycle = lc 542 } 543 } 544 545 // Check if the current bucket has replication configuration 546 if rcfg, _, err := globalBucketMetadataSys.GetReplicationConfig(ctx, cache.Info.Name); err == nil { 547 if rcfg.HasActiveRules("", true) { 548 tgts, err := globalBucketTargetSys.ListBucketTargets(ctx, cache.Info.Name) 549 if err == nil { 550 cache.Info.replication = replicationConfig{ 551 Config: rcfg, 552 remotes: tgts, 553 } 554 } 555 } 556 } 557 558 vcfg, _ := globalBucketVersioningSys.Get(cache.Info.Name) 559 560 // return initialized object layer 561 objAPI := newObjectLayerFn() 562 // object layer not initialized, return. 563 if objAPI == nil { 564 return cache, errServerNotInitialized 565 } 566 567 poolIdx, setIdx, _ := s.GetDiskLoc() 568 569 disks, err := objAPI.GetDisks(poolIdx, setIdx) 570 if err != nil { 571 return cache, err 572 } 573 574 cache.Info.updates = updates 575 576 dataUsageInfo, err := scanDataFolder(ctx, disks, s.drivePath, cache, func(item scannerItem) (sizeSummary, error) { 577 // Look for `xl.meta/xl.json' at the leaf. 578 if !strings.HasSuffix(item.Path, SlashSeparator+xlStorageFormatFile) && 579 !strings.HasSuffix(item.Path, SlashSeparator+xlStorageFormatFileV1) { 580 // if no xl.meta/xl.json found, skip the file. 581 return sizeSummary{}, errSkipFile 582 } 583 stopFn := globalScannerMetrics.log(scannerMetricScanObject, s.drivePath, pathJoin(item.bucket, item.objectPath())) 584 res := make(map[string]string, 8) 585 defer func() { 586 stopFn(res) 587 }() 588 589 doneSz := globalScannerMetrics.timeSize(scannerMetricReadMetadata) 590 buf, err := s.readMetadata(ctx, item.Path) 591 doneSz(len(buf)) 592 res["metasize"] = strconv.Itoa(len(buf)) 593 if err != nil { 594 res["err"] = err.Error() 595 return sizeSummary{}, errSkipFile 596 } 597 598 // Remove filename which is the meta file. 599 item.transformMetaDir() 600 601 fivs, err := getFileInfoVersions(buf, item.bucket, item.objectPath(), false) 602 metaDataPoolPut(buf) 603 if err != nil { 604 res["err"] = err.Error() 605 return sizeSummary{}, errSkipFile 606 } 607 608 sizeS := sizeSummary{} 609 for _, tier := range globalTierConfigMgr.ListTiers() { 610 if sizeS.tiers == nil { 611 sizeS.tiers = make(map[string]tierStats) 612 } 613 sizeS.tiers[tier.Name] = tierStats{} 614 } 615 if sizeS.tiers != nil { 616 sizeS.tiers[storageclass.STANDARD] = tierStats{} 617 sizeS.tiers[storageclass.RRS] = tierStats{} 618 } 619 620 done := globalScannerMetrics.time(scannerMetricApplyAll) 621 objInfos, err := item.applyVersionActions(ctx, objAPI, fivs.Versions, globalExpiryState) 622 done() 623 624 if err != nil { 625 res["err"] = err.Error() 626 return sizeSummary{}, errSkipFile 627 } 628 629 versioned := vcfg != nil && vcfg.Versioned(item.objectPath()) 630 631 var objDeleted bool 632 for _, oi := range objInfos { 633 done = globalScannerMetrics.time(scannerMetricApplyVersion) 634 var sz int64 635 objDeleted, sz = item.applyActions(ctx, objAPI, oi, &sizeS) 636 done() 637 638 // DeleteAllVersionsAction: The object and all its 639 // versions are expired and 640 // doesn't contribute toward data usage. 641 if objDeleted { 642 break 643 } 644 actualSz, err := oi.GetActualSize() 645 if err != nil { 646 continue 647 } 648 649 if oi.DeleteMarker { 650 sizeS.deleteMarkers++ 651 } 652 if oi.VersionID != "" && sz == actualSz { 653 sizeS.versions++ 654 } 655 sizeS.totalSize += sz 656 657 // Skip tier accounting if object version is a delete-marker or a free-version 658 // tracking deleted transitioned objects 659 switch { 660 case oi.DeleteMarker, oi.TransitionedObject.FreeVersion: 661 continue 662 } 663 tier := oi.StorageClass 664 if tier == "" { 665 tier = storageclass.STANDARD // no SC means "STANDARD" 666 } 667 if oi.TransitionedObject.Status == lifecycle.TransitionComplete { 668 tier = oi.TransitionedObject.Tier 669 } 670 if sizeS.tiers != nil { 671 if st, ok := sizeS.tiers[tier]; ok { 672 sizeS.tiers[tier] = st.add(oi.tierStats()) 673 } 674 } 675 } 676 677 // apply tier sweep action on free versions 678 for _, freeVersion := range fivs.FreeVersions { 679 oi := freeVersion.ToObjectInfo(item.bucket, item.objectPath(), versioned) 680 done = globalScannerMetrics.time(scannerMetricTierObjSweep) 681 globalExpiryState.enqueueFreeVersion(oi) 682 done() 683 } 684 685 // These are rather expensive. Skip if nobody listens. 686 if globalTrace.NumSubscribers(madmin.TraceScanner) > 0 { 687 if len(fivs.FreeVersions) > 0 { 688 res["free-versions"] = strconv.Itoa(len(fivs.FreeVersions)) 689 } 690 691 if sizeS.versions > 0 { 692 res["versions"] = strconv.FormatUint(sizeS.versions, 10) 693 } 694 res["size"] = strconv.FormatInt(sizeS.totalSize, 10) 695 for name, tier := range sizeS.tiers { 696 res["tier-size-"+name] = strconv.FormatUint(tier.TotalSize, 10) 697 res["tier-versions-"+name] = strconv.Itoa(tier.NumVersions) 698 } 699 if sizeS.failedCount > 0 { 700 res["repl-failed"] = fmt.Sprintf("%d versions, %d bytes", sizeS.failedCount, sizeS.failedSize) 701 } 702 if sizeS.pendingCount > 0 { 703 res["repl-pending"] = fmt.Sprintf("%d versions, %d bytes", sizeS.pendingCount, sizeS.pendingSize) 704 } 705 for tgt, st := range sizeS.replTargetStats { 706 res["repl-size-"+tgt] = strconv.FormatInt(st.replicatedSize, 10) 707 res["repl-count-"+tgt] = strconv.FormatInt(st.replicatedCount, 10) 708 if st.failedCount > 0 { 709 res["repl-failed-"+tgt] = fmt.Sprintf("%d versions, %d bytes", st.failedCount, st.failedSize) 710 } 711 if st.pendingCount > 0 { 712 res["repl-pending-"+tgt] = fmt.Sprintf("%d versions, %d bytes", st.pendingCount, st.pendingSize) 713 } 714 } 715 } 716 if objDeleted { 717 // we return errIgnoreFileContrib to signal this function's 718 // callers to skip this object's contribution towards 719 // usage. 720 return sizeSummary{}, errIgnoreFileContrib 721 } 722 return sizeS, nil 723 }, scanMode, weSleep) 724 if err != nil { 725 return dataUsageInfo, err 726 } 727 728 dataUsageInfo.Info.LastUpdate = time.Now() 729 return dataUsageInfo, nil 730 } 731 732 func (s *xlStorage) getDeleteAttribute() uint64 { 733 attr := "user.total_deletes" 734 buf, err := xattr.LGet(s.formatFile, attr) 735 if err != nil { 736 // We start off with '0' if we can read the attributes 737 return 0 738 } 739 return binary.LittleEndian.Uint64(buf[:8]) 740 } 741 742 func (s *xlStorage) getWriteAttribute() uint64 { 743 attr := "user.total_writes" 744 buf, err := xattr.LGet(s.formatFile, attr) 745 if err != nil { 746 // We start off with '0' if we can read the attributes 747 return 0 748 } 749 750 return binary.LittleEndian.Uint64(buf[:8]) 751 } 752 753 func (s *xlStorage) setDeleteAttribute(deleteCount uint64) error { 754 attr := "user.total_deletes" 755 756 data := make([]byte, 8) 757 binary.LittleEndian.PutUint64(data, deleteCount) 758 return xattr.LSet(s.formatFile, attr, data) 759 } 760 761 func (s *xlStorage) setWriteAttribute(writeCount uint64) error { 762 attr := "user.total_writes" 763 764 data := make([]byte, 8) 765 binary.LittleEndian.PutUint64(data, writeCount) 766 return xattr.LSet(s.formatFile, attr, data) 767 } 768 769 // DiskInfo provides current information about disk space usage, 770 // total free inodes and underlying filesystem. 771 func (s *xlStorage) DiskInfo(_ context.Context, _ DiskInfoOptions) (info DiskInfo, err error) { 772 info, err = s.diskInfoCache.Get() 773 info.NRRequests = s.nrRequests 774 info.Rotational = s.rotational 775 info.MountPath = s.drivePath 776 info.Endpoint = s.endpoint.String() 777 info.Scanning = atomic.LoadInt32(&s.scanning) == 1 778 return info, err 779 } 780 781 // getVolDir - will convert incoming volume names to 782 // corresponding valid volume names on the backend in a platform 783 // compatible way for all operating systems. If volume is not found 784 // an error is generated. 785 func (s *xlStorage) getVolDir(volume string) (string, error) { 786 if volume == "" || volume == "." || volume == ".." { 787 return "", errVolumeNotFound 788 } 789 volumeDir := pathJoin(s.drivePath, volume) 790 return volumeDir, nil 791 } 792 793 func (s *xlStorage) checkFormatJSON() (os.FileInfo, error) { 794 fi, err := Lstat(s.formatFile) 795 if err != nil { 796 // If the disk is still not initialized. 797 if osIsNotExist(err) { 798 if err = Access(s.drivePath); err == nil { 799 // Disk is present but missing `format.json` 800 return nil, errUnformattedDisk 801 } 802 if osIsNotExist(err) { 803 return nil, errDiskNotFound 804 } else if osIsPermission(err) { 805 return nil, errDiskAccessDenied 806 } 807 logger.LogOnceIf(GlobalContext, err, "check-format-json") // log unexpected errors 808 return nil, errCorruptedBackend 809 } else if osIsPermission(err) { 810 return nil, errDiskAccessDenied 811 } 812 logger.LogOnceIf(GlobalContext, err, "check-format-json") // log unexpected errors 813 return nil, errCorruptedBackend 814 } 815 return fi, nil 816 } 817 818 // GetDiskID - returns the cached disk uuid 819 func (s *xlStorage) GetDiskID() (string, error) { 820 s.RLock() 821 diskID := s.diskID 822 fileInfo := s.formatFileInfo 823 lastCheck := s.formatLastCheck 824 825 // check if we have a valid disk ID that is less than 1 seconds old. 826 if fileInfo != nil && diskID != "" && time.Since(lastCheck) <= 1*time.Second { 827 s.RUnlock() 828 return diskID, nil 829 } 830 s.RUnlock() 831 832 fi, err := s.checkFormatJSON() 833 if err != nil { 834 return "", err 835 } 836 837 if xioutil.SameFile(fi, fileInfo) && diskID != "" { 838 s.Lock() 839 // If the file has not changed, just return the cached diskID information. 840 s.formatLastCheck = time.Now() 841 s.Unlock() 842 return diskID, nil 843 } 844 845 b, err := os.ReadFile(s.formatFile) 846 if err != nil { 847 // If the disk is still not initialized. 848 if osIsNotExist(err) { 849 if err = Access(s.drivePath); err == nil { 850 // Disk is present but missing `format.json` 851 return "", errUnformattedDisk 852 } 853 if osIsNotExist(err) { 854 return "", errDiskNotFound 855 } else if osIsPermission(err) { 856 return "", errDiskAccessDenied 857 } 858 logger.LogOnceIf(GlobalContext, err, "check-format-json") // log unexpected errors 859 return "", errCorruptedBackend 860 } else if osIsPermission(err) { 861 return "", errDiskAccessDenied 862 } 863 logger.LogOnceIf(GlobalContext, err, "check-format-json") // log unexpected errors 864 return "", errCorruptedBackend 865 } 866 867 format := &formatErasureV3{} 868 json := jsoniter.ConfigCompatibleWithStandardLibrary 869 if err = json.Unmarshal(b, &format); err != nil { 870 logger.LogOnceIf(GlobalContext, err, "check-format-json") // log unexpected errors 871 return "", errCorruptedFormat 872 } 873 874 s.Lock() 875 defer s.Unlock() 876 s.formatData = b 877 s.diskID = format.Erasure.This 878 s.formatLegacy = format.Erasure.DistributionAlgo == formatErasureVersionV2DistributionAlgoV1 879 s.formatFileInfo = fi 880 s.formatLastCheck = time.Now() 881 return s.diskID, nil 882 } 883 884 // Make a volume entry. 885 func (s *xlStorage) SetDiskID(id string) { 886 // NO-OP for xlStorage as it is handled either by xlStorageDiskIDCheck{} for local disks or 887 // storage rest server for remote disks. 888 } 889 890 func (s *xlStorage) MakeVolBulk(ctx context.Context, volumes ...string) error { 891 for _, volume := range volumes { 892 err := s.MakeVol(ctx, volume) 893 if err != nil && !errors.Is(err, errVolumeExists) { 894 return err 895 } 896 diskHealthCheckOK(ctx, err) 897 } 898 return nil 899 } 900 901 // Make a volume entry. 902 func (s *xlStorage) MakeVol(ctx context.Context, volume string) error { 903 if !isValidVolname(volume) { 904 return errInvalidArgument 905 } 906 907 volumeDir, err := s.getVolDir(volume) 908 if err != nil { 909 return err 910 } 911 912 if err = Access(volumeDir); err != nil { 913 // Volume does not exist we proceed to create. 914 if osIsNotExist(err) { 915 // Make a volume entry, with mode 0777 mkdir honors system umask. 916 err = mkdirAll(volumeDir, 0o777, s.drivePath) 917 } 918 if osIsPermission(err) { 919 return errDiskAccessDenied 920 } else if isSysErrIO(err) { 921 return errFaultyDisk 922 } 923 return err 924 } 925 926 // Stat succeeds we return errVolumeExists. 927 return errVolumeExists 928 } 929 930 // ListVols - list volumes. 931 func (s *xlStorage) ListVols(ctx context.Context) (volsInfo []VolInfo, err error) { 932 return listVols(ctx, s.drivePath) 933 } 934 935 // List all the volumes from drivePath. 936 func listVols(ctx context.Context, dirPath string) ([]VolInfo, error) { 937 if err := checkPathLength(dirPath); err != nil { 938 return nil, err 939 } 940 entries, err := readDir(dirPath) 941 if err != nil { 942 if errors.Is(err, errFileAccessDenied) { 943 return nil, errDiskAccessDenied 944 } else if errors.Is(err, errFileNotFound) { 945 return nil, errDiskNotFound 946 } 947 return nil, err 948 } 949 volsInfo := make([]VolInfo, 0, len(entries)) 950 for _, entry := range entries { 951 if !HasSuffix(entry, SlashSeparator) || !isValidVolname(pathutil.Clean(entry)) { 952 // Skip if entry is neither a directory not a valid volume name. 953 continue 954 } 955 volsInfo = append(volsInfo, VolInfo{ 956 Name: pathutil.Clean(entry), 957 }) 958 } 959 return volsInfo, nil 960 } 961 962 // StatVol - get volume info. 963 func (s *xlStorage) StatVol(ctx context.Context, volume string) (vol VolInfo, err error) { 964 // Verify if volume is valid and it exists. 965 volumeDir, err := s.getVolDir(volume) 966 if err != nil { 967 return VolInfo{}, err 968 } 969 970 // Stat a volume entry. 971 var st os.FileInfo 972 st, err = Lstat(volumeDir) 973 if err != nil { 974 switch { 975 case osIsNotExist(err): 976 return VolInfo{}, errVolumeNotFound 977 case osIsPermission(err): 978 return VolInfo{}, errDiskAccessDenied 979 case isSysErrIO(err): 980 return VolInfo{}, errFaultyDisk 981 default: 982 return VolInfo{}, err 983 } 984 } 985 // As os.Lstat() doesn't carry other than ModTime(), use ModTime() 986 // as CreatedTime. 987 createdTime := st.ModTime() 988 return VolInfo{ 989 Name: volume, 990 Created: createdTime, 991 }, nil 992 } 993 994 // DeleteVol - delete a volume. 995 func (s *xlStorage) DeleteVol(ctx context.Context, volume string, forceDelete bool) (err error) { 996 // Verify if volume is valid and it exists. 997 volumeDir, err := s.getVolDir(volume) 998 if err != nil { 999 return err 1000 } 1001 1002 if forceDelete { 1003 err = s.moveToTrash(volumeDir, true, true) 1004 } else { 1005 err = Remove(volumeDir) 1006 } 1007 1008 if err != nil { 1009 switch { 1010 case errors.Is(err, errFileNotFound): 1011 return errVolumeNotFound 1012 case osIsNotExist(err): 1013 return errVolumeNotFound 1014 case isSysErrNotEmpty(err): 1015 return errVolumeNotEmpty 1016 case osIsPermission(err): 1017 return errDiskAccessDenied 1018 case isSysErrIO(err): 1019 return errFaultyDisk 1020 default: 1021 return err 1022 } 1023 } 1024 return nil 1025 } 1026 1027 // ListDir - return all the entries at the given directory path. 1028 // If an entry is a directory it will be returned with a trailing SlashSeparator. 1029 func (s *xlStorage) ListDir(ctx context.Context, origvolume, volume, dirPath string, count int) (entries []string, err error) { 1030 if contextCanceled(ctx) { 1031 return nil, ctx.Err() 1032 } 1033 1034 if origvolume != "" { 1035 if !skipAccessChecks(origvolume) { 1036 origvolumeDir, err := s.getVolDir(origvolume) 1037 if err != nil { 1038 return nil, err 1039 } 1040 if err = Access(origvolumeDir); err != nil { 1041 return nil, convertAccessError(err, errVolumeAccessDenied) 1042 } 1043 } 1044 } 1045 1046 // Verify if volume is valid and it exists. 1047 volumeDir, err := s.getVolDir(volume) 1048 if err != nil { 1049 return nil, err 1050 } 1051 1052 dirPathAbs := pathJoin(volumeDir, dirPath) 1053 if count > 0 { 1054 entries, err = readDirN(dirPathAbs, count) 1055 } else { 1056 entries, err = readDir(dirPathAbs) 1057 } 1058 if err != nil { 1059 if errors.Is(err, errFileNotFound) && !skipAccessChecks(volume) { 1060 if ierr := Access(volumeDir); ierr != nil { 1061 return nil, convertAccessError(ierr, errVolumeAccessDenied) 1062 } 1063 } 1064 return nil, err 1065 } 1066 1067 return entries, nil 1068 } 1069 1070 func (s *xlStorage) deleteVersions(ctx context.Context, volume, path string, fis ...FileInfo) error { 1071 volumeDir, err := s.getVolDir(volume) 1072 if err != nil { 1073 return err 1074 } 1075 1076 discard := true 1077 1078 var legacyJSON bool 1079 buf, _, err := s.readAllData(ctx, volume, volumeDir, pathJoin(volumeDir, path, xlStorageFormatFile), discard) 1080 if err != nil { 1081 if !errors.Is(err, errFileNotFound) { 1082 return err 1083 } 1084 1085 s.RLock() 1086 legacy := s.formatLegacy 1087 s.RUnlock() 1088 if legacy { 1089 buf, _, err = s.readAllData(ctx, volume, volumeDir, pathJoin(volumeDir, path, xlStorageFormatFileV1), discard) 1090 if err != nil { 1091 return err 1092 } 1093 legacyJSON = true 1094 } 1095 } 1096 1097 if len(buf) == 0 { 1098 if errors.Is(err, errFileNotFound) && !skipAccessChecks(volume) { 1099 if aerr := Access(volumeDir); aerr != nil && osIsNotExist(aerr) { 1100 return errVolumeNotFound 1101 } 1102 } 1103 return errFileNotFound 1104 } 1105 1106 if legacyJSON { 1107 // Delete the meta file, if there are no more versions the 1108 // top level parent is automatically removed. 1109 return s.deleteFile(volumeDir, pathJoin(volumeDir, path), true, false) 1110 } 1111 1112 var xlMeta xlMetaV2 1113 if err = xlMeta.LoadOrConvert(buf); err != nil { 1114 return err 1115 } 1116 1117 for _, fi := range fis { 1118 dataDir, err := xlMeta.DeleteVersion(fi) 1119 if err != nil { 1120 if !fi.Deleted && (err == errFileNotFound || err == errFileVersionNotFound) { 1121 // Ignore these since they do not exist 1122 continue 1123 } 1124 return err 1125 } 1126 if dataDir != "" { 1127 versionID := fi.VersionID 1128 if versionID == "" { 1129 versionID = nullVersionID 1130 } 1131 1132 // PR #11758 used DataDir, preserve it 1133 // for users who might have used master 1134 // branch 1135 xlMeta.data.remove(versionID, dataDir) 1136 1137 // We need to attempt delete "dataDir" on the disk 1138 // due to a CopyObject() bug where it might have 1139 // inlined the data incorrectly, to avoid a situation 1140 // where we potentially leave "DataDir" 1141 filePath := pathJoin(volumeDir, path, dataDir) 1142 if err = checkPathLength(filePath); err != nil { 1143 return err 1144 } 1145 if err = s.moveToTrash(filePath, true, false); err != nil { 1146 if err != errFileNotFound { 1147 return err 1148 } 1149 } 1150 } 1151 } 1152 1153 lastVersion := len(xlMeta.versions) == 0 1154 if !lastVersion { 1155 buf, err = xlMeta.AppendTo(metaDataPoolGet()) 1156 defer metaDataPoolPut(buf) 1157 if err != nil { 1158 return err 1159 } 1160 1161 return s.WriteAll(ctx, volume, pathJoin(path, xlStorageFormatFile), buf) 1162 } 1163 1164 return s.deleteFile(volumeDir, pathJoin(volumeDir, path, xlStorageFormatFile), true, false) 1165 } 1166 1167 // DeleteVersions deletes slice of versions, it can be same object 1168 // or multiple objects. 1169 func (s *xlStorage) DeleteVersions(ctx context.Context, volume string, versions []FileInfoVersions, opts DeleteOptions) []error { 1170 errs := make([]error, len(versions)) 1171 1172 for i, fiv := range versions { 1173 if contextCanceled(ctx) { 1174 errs[i] = ctx.Err() 1175 continue 1176 } 1177 w := xioutil.NewDeadlineWorker(globalDriveConfig.GetMaxTimeout()) 1178 if err := w.Run(func() error { return s.deleteVersions(ctx, volume, fiv.Name, fiv.Versions...) }); err != nil { 1179 errs[i] = err 1180 } 1181 diskHealthCheckOK(ctx, errs[i]) 1182 } 1183 1184 return errs 1185 } 1186 1187 func (s *xlStorage) cleanupTrashImmediateCallers(ctx context.Context) { 1188 for { 1189 select { 1190 case <-ctx.Done(): 1191 return 1192 case entry := <-s.immediatePurge: 1193 removeAll(entry) 1194 } 1195 } 1196 } 1197 1198 const almostFilledPercent = 0.05 1199 1200 func (s *xlStorage) diskAlmostFilled() bool { 1201 info, err := s.diskInfoCache.Get() 1202 if err != nil { 1203 return false 1204 } 1205 if info.Used == 0 || info.UsedInodes == 0 { 1206 return false 1207 } 1208 return (float64(info.Free)/float64(info.Used)) < almostFilledPercent || (float64(info.FreeInodes)/float64(info.UsedInodes)) < almostFilledPercent 1209 } 1210 1211 func (s *xlStorage) moveToTrash(filePath string, recursive, immediatePurge bool) (err error) { 1212 pathUUID := mustGetUUID() 1213 targetPath := pathutil.Join(s.drivePath, minioMetaTmpDeletedBucket, pathUUID) 1214 1215 if recursive { 1216 err = renameAll(filePath, targetPath, pathutil.Join(s.drivePath, minioMetaBucket)) 1217 } else { 1218 err = Rename(filePath, targetPath) 1219 } 1220 1221 var targetPath2 string 1222 if immediatePurge && HasSuffix(filePath, SlashSeparator) { 1223 // With immediate purge also attempt deleting for `__XL_DIR__` folder/directory objects. 1224 targetPath2 = pathutil.Join(s.drivePath, minioMetaTmpDeletedBucket, mustGetUUID()) 1225 renameAll(encodeDirObject(filePath), targetPath2, pathutil.Join(s.drivePath, minioMetaBucket)) 1226 } 1227 1228 // ENOSPC is a valid error from rename(); remove instead of rename in that case 1229 if errors.Is(err, errDiskFull) || isSysErrNoSpace(err) { 1230 if recursive { 1231 err = removeAll(filePath) 1232 } else { 1233 err = Remove(filePath) 1234 } 1235 return err // Avoid the immediate purge since not needed 1236 } 1237 1238 if err != nil { 1239 return err 1240 } 1241 1242 if !immediatePurge && s.diskAlmostFilled() { 1243 immediatePurge = true 1244 } 1245 1246 // immediately purge the target 1247 if immediatePurge { 1248 for _, target := range []string{ 1249 targetPath, 1250 targetPath2, 1251 } { 1252 if target == "" { 1253 continue 1254 } 1255 select { 1256 case s.immediatePurge <- target: 1257 default: 1258 // Too much back pressure, we will perform the delete 1259 // blocking at this point we need to serialize operations. 1260 removeAll(target) 1261 } 1262 } 1263 } 1264 1265 return nil 1266 } 1267 1268 // DeleteVersion - deletes FileInfo metadata for path at `xl.meta`. forceDelMarker 1269 // will force creating a new `xl.meta` to create a new delete marker 1270 func (s *xlStorage) DeleteVersion(ctx context.Context, volume, path string, fi FileInfo, forceDelMarker bool, opts DeleteOptions) (err error) { 1271 if HasSuffix(path, SlashSeparator) { 1272 return s.Delete(ctx, volume, path, DeleteOptions{ 1273 Recursive: false, 1274 Immediate: false, 1275 }) 1276 } 1277 1278 volumeDir, err := s.getVolDir(volume) 1279 if err != nil { 1280 return err 1281 } 1282 1283 // Validate file path length, before reading. 1284 filePath := pathJoin(volumeDir, path) 1285 if err = checkPathLength(filePath); err != nil { 1286 return err 1287 } 1288 1289 var legacyJSON bool 1290 buf, _, err := s.readAllData(ctx, volume, volumeDir, pathJoin(filePath, xlStorageFormatFile), true) 1291 if err != nil { 1292 if !errors.Is(err, errFileNotFound) { 1293 return err 1294 } 1295 metaDataPoolPut(buf) // Never used, return it 1296 if fi.Deleted && forceDelMarker { 1297 // Create a new xl.meta with a delete marker in it 1298 return s.WriteMetadata(ctx, "", volume, path, fi) 1299 } 1300 1301 s.RLock() 1302 legacy := s.formatLegacy 1303 s.RUnlock() 1304 if legacy { 1305 buf, err = s.ReadAll(ctx, volume, pathJoin(path, xlStorageFormatFileV1)) 1306 if err != nil { 1307 if errors.Is(err, errFileNotFound) && fi.VersionID != "" { 1308 return errFileVersionNotFound 1309 } 1310 return err 1311 } 1312 legacyJSON = true 1313 } 1314 } 1315 1316 if len(buf) == 0 { 1317 if fi.VersionID != "" { 1318 return errFileVersionNotFound 1319 } 1320 return errFileNotFound 1321 } 1322 1323 if legacyJSON { 1324 // Delete the meta file, if there are no more versions the 1325 // top level parent is automatically removed. 1326 return s.deleteFile(volumeDir, pathJoin(volumeDir, path), true, false) 1327 } 1328 1329 var xlMeta xlMetaV2 1330 if err = xlMeta.LoadOrConvert(buf); err != nil { 1331 return err 1332 } 1333 1334 dataDir, err := xlMeta.DeleteVersion(fi) 1335 if err != nil { 1336 return err 1337 } 1338 if dataDir != "" { 1339 versionID := fi.VersionID 1340 if versionID == "" { 1341 versionID = nullVersionID 1342 } 1343 // PR #11758 used DataDir, preserve it 1344 // for users who might have used master 1345 // branch 1346 xlMeta.data.remove(versionID, dataDir) 1347 1348 // We need to attempt delete "dataDir" on the disk 1349 // due to a CopyObject() bug where it might have 1350 // inlined the data incorrectly, to avoid a situation 1351 // where we potentially leave "DataDir" 1352 filePath := pathJoin(volumeDir, path, dataDir) 1353 if err = checkPathLength(filePath); err != nil { 1354 return err 1355 } 1356 if err = s.moveToTrash(filePath, true, false); err != nil { 1357 if err != errFileNotFound { 1358 return err 1359 } 1360 } 1361 } 1362 1363 if len(xlMeta.versions) != 0 { 1364 // xl.meta must still exist for other versions, dataDir is purged. 1365 buf, err = xlMeta.AppendTo(metaDataPoolGet()) 1366 defer metaDataPoolPut(buf) 1367 if err != nil { 1368 return err 1369 } 1370 1371 return s.WriteAll(ctx, volume, pathJoin(path, xlStorageFormatFile), buf) 1372 } 1373 1374 return s.deleteFile(volumeDir, pathJoin(volumeDir, path, xlStorageFormatFile), true, false) 1375 } 1376 1377 // Updates only metadata for a given version. 1378 func (s *xlStorage) UpdateMetadata(ctx context.Context, volume, path string, fi FileInfo, opts UpdateMetadataOpts) (err error) { 1379 if len(fi.Metadata) == 0 { 1380 return errInvalidArgument 1381 } 1382 1383 buf, err := s.ReadAll(ctx, volume, pathJoin(path, xlStorageFormatFile)) 1384 if err != nil { 1385 if err == errFileNotFound { 1386 if fi.VersionID != "" { 1387 return errFileVersionNotFound 1388 } 1389 } 1390 return err 1391 } 1392 defer metaDataPoolPut(buf) 1393 1394 if !isXL2V1Format(buf) { 1395 return errFileVersionNotFound 1396 } 1397 1398 var xlMeta xlMetaV2 1399 if err = xlMeta.Load(buf); err != nil { 1400 return err 1401 } 1402 1403 if err = xlMeta.UpdateObjectVersion(fi); err != nil { 1404 return err 1405 } 1406 1407 wbuf, err := xlMeta.AppendTo(metaDataPoolGet()) 1408 if err != nil { 1409 return err 1410 } 1411 defer metaDataPoolPut(wbuf) 1412 1413 return s.writeAll(ctx, volume, pathJoin(path, xlStorageFormatFile), wbuf, !opts.NoPersistence) 1414 } 1415 1416 // WriteMetadata - writes FileInfo metadata for path at `xl.meta` 1417 func (s *xlStorage) WriteMetadata(ctx context.Context, origvolume, volume, path string, fi FileInfo) (err error) { 1418 if fi.Fresh { 1419 if origvolume != "" { 1420 origvolumeDir, err := s.getVolDir(origvolume) 1421 if err != nil { 1422 return err 1423 } 1424 1425 if !skipAccessChecks(origvolume) { 1426 // Stat a volume entry. 1427 if err = Access(origvolumeDir); err != nil { 1428 return convertAccessError(err, errVolumeAccessDenied) 1429 } 1430 } 1431 } 1432 1433 var xlMeta xlMetaV2 1434 if err := xlMeta.AddVersion(fi); err != nil { 1435 return err 1436 } 1437 buf, err := xlMeta.AppendTo(metaDataPoolGet()) 1438 defer metaDataPoolPut(buf) 1439 if err != nil { 1440 return err 1441 } 1442 // First writes for special situations do not write to stable storage. 1443 // this is currently used by 1444 // - emphemeral objects such as objects created during listObjects() calls 1445 // - newMultipartUpload() call.. 1446 return s.writeAll(ctx, volume, pathJoin(path, xlStorageFormatFile), buf, false) 1447 } 1448 1449 buf, err := s.ReadAll(ctx, volume, pathJoin(path, xlStorageFormatFile)) 1450 if err != nil && err != errFileNotFound { 1451 return err 1452 } 1453 defer metaDataPoolPut(buf) 1454 1455 var xlMeta xlMetaV2 1456 if !isXL2V1Format(buf) { 1457 // This is both legacy and without proper version. 1458 if err = xlMeta.AddVersion(fi); err != nil { 1459 return err 1460 } 1461 1462 buf, err = xlMeta.AppendTo(metaDataPoolGet()) 1463 defer metaDataPoolPut(buf) 1464 if err != nil { 1465 return err 1466 } 1467 } else { 1468 if err = xlMeta.Load(buf); err != nil { 1469 // Corrupted data, reset and write. 1470 xlMeta = xlMetaV2{} 1471 } 1472 1473 if err = xlMeta.AddVersion(fi); err != nil { 1474 return err 1475 } 1476 1477 buf, err = xlMeta.AppendTo(metaDataPoolGet()) 1478 defer metaDataPoolPut(buf) 1479 if err != nil { 1480 return err 1481 } 1482 } 1483 1484 return s.WriteAll(ctx, volume, pathJoin(path, xlStorageFormatFile), buf) 1485 } 1486 1487 func (s *xlStorage) renameLegacyMetadata(volumeDir, path string) (err error) { 1488 s.RLock() 1489 legacy := s.formatLegacy 1490 s.RUnlock() 1491 if !legacy { 1492 // if its not a legacy backend then this function is 1493 // a no-op always returns errFileNotFound 1494 return errFileNotFound 1495 } 1496 1497 // Validate file path length, before reading. 1498 filePath := pathJoin(volumeDir, path) 1499 if err = checkPathLength(filePath); err != nil { 1500 return err 1501 } 1502 1503 srcFilePath := pathJoin(filePath, xlStorageFormatFileV1) 1504 dstFilePath := pathJoin(filePath, xlStorageFormatFile) 1505 1506 // Renaming xl.json to xl.meta should be fully synced to disk. 1507 defer func() { 1508 if err == nil && s.globalSync { 1509 // Sync to disk only upon success. 1510 globalSync() 1511 } 1512 }() 1513 1514 if err = Rename(srcFilePath, dstFilePath); err != nil { 1515 switch { 1516 case isSysErrNotDir(err): 1517 return errFileNotFound 1518 case isSysErrPathNotFound(err): 1519 return errFileNotFound 1520 case isSysErrCrossDevice(err): 1521 return fmt.Errorf("%w (%s)->(%s)", errCrossDeviceLink, srcFilePath, dstFilePath) 1522 case osIsNotExist(err): 1523 return errFileNotFound 1524 case osIsExist(err): 1525 // This is returned only when destination is a directory and we 1526 // are attempting a rename from file to directory. 1527 return errIsNotRegular 1528 default: 1529 return err 1530 } 1531 } 1532 return nil 1533 } 1534 1535 func (s *xlStorage) readRaw(ctx context.Context, volume, volumeDir, filePath string, readData bool) (buf []byte, dmTime time.Time, err error) { 1536 if filePath == "" { 1537 return nil, dmTime, errFileNotFound 1538 } 1539 1540 xlPath := pathJoin(filePath, xlStorageFormatFile) 1541 if readData { 1542 buf, dmTime, err = s.readAllData(ctx, volume, volumeDir, xlPath, false) 1543 } else { 1544 buf, dmTime, err = s.readMetadataWithDMTime(ctx, xlPath) 1545 if err != nil { 1546 if osIsNotExist(err) { 1547 if !skipAccessChecks(volume) { 1548 if aerr := Access(volumeDir); aerr != nil && osIsNotExist(aerr) { 1549 return nil, time.Time{}, errVolumeNotFound 1550 } 1551 } 1552 } 1553 err = osErrToFileErr(err) 1554 } 1555 } 1556 1557 s.RLock() 1558 legacy := s.formatLegacy 1559 s.RUnlock() 1560 1561 if err != nil && errors.Is(err, errFileNotFound) && legacy { 1562 buf, dmTime, err = s.readAllData(ctx, volume, volumeDir, pathJoin(filePath, xlStorageFormatFileV1), false) 1563 if err != nil { 1564 return nil, time.Time{}, err 1565 } 1566 } 1567 1568 if len(buf) == 0 { 1569 if err != nil { 1570 return nil, time.Time{}, err 1571 } 1572 return nil, time.Time{}, errFileNotFound 1573 } 1574 1575 return buf, dmTime, nil 1576 } 1577 1578 // ReadXL reads from path/xl.meta, does not interpret the data it read. This 1579 // is a raw call equivalent of ReadVersion(). 1580 func (s *xlStorage) ReadXL(ctx context.Context, volume, path string, readData bool) (RawFileInfo, error) { 1581 volumeDir, err := s.getVolDir(volume) 1582 if err != nil { 1583 return RawFileInfo{}, err 1584 } 1585 1586 // Validate file path length, before reading. 1587 filePath := pathJoin(volumeDir, path) 1588 if err = checkPathLength(filePath); err != nil { 1589 return RawFileInfo{}, err 1590 } 1591 1592 buf, _, err := s.readRaw(ctx, volume, volumeDir, filePath, readData) 1593 return RawFileInfo{ 1594 Buf: buf, 1595 }, err 1596 } 1597 1598 // ReadOptions optional inputs for ReadVersion 1599 type ReadOptions struct { 1600 ReadData bool 1601 Healing bool 1602 } 1603 1604 // ReadVersion - reads metadata and returns FileInfo at path `xl.meta` 1605 // for all objects less than `32KiB` this call returns data as well 1606 // along with metadata. 1607 func (s *xlStorage) ReadVersion(ctx context.Context, origvolume, volume, path, versionID string, opts ReadOptions) (fi FileInfo, err error) { 1608 if origvolume != "" { 1609 origvolumeDir, err := s.getVolDir(origvolume) 1610 if err != nil { 1611 return fi, err 1612 } 1613 1614 if !skipAccessChecks(origvolume) { 1615 // Stat a volume entry. 1616 if err = Access(origvolumeDir); err != nil { 1617 return fi, convertAccessError(err, errVolumeAccessDenied) 1618 } 1619 } 1620 } 1621 1622 volumeDir, err := s.getVolDir(volume) 1623 if err != nil { 1624 return fi, err 1625 } 1626 1627 // Validate file path length, before reading. 1628 filePath := pathJoin(volumeDir, path) 1629 if err = checkPathLength(filePath); err != nil { 1630 return fi, err 1631 } 1632 1633 readData := opts.ReadData 1634 1635 buf, _, err := s.readRaw(ctx, volume, volumeDir, filePath, readData) 1636 if err != nil { 1637 if err == errFileNotFound { 1638 if versionID != "" { 1639 return fi, errFileVersionNotFound 1640 } 1641 } 1642 return fi, err 1643 } 1644 1645 fi, err = getFileInfo(buf, volume, path, versionID, readData, true) 1646 if err != nil { 1647 return fi, err 1648 } 1649 1650 if len(fi.Data) == 0 { 1651 // We did not read inline data, so we have no references. 1652 defer metaDataPoolPut(buf) 1653 } 1654 1655 if readData { 1656 if len(fi.Data) > 0 || fi.Size == 0 { 1657 if fi.InlineData() { 1658 // If written with header we are fine. 1659 return fi, nil 1660 } 1661 if fi.Size == 0 || !(fi.VersionID != "" && fi.VersionID != nullVersionID) { 1662 // If versioned we have no conflicts. 1663 fi.SetInlineData() 1664 return fi, nil 1665 } 1666 1667 // For overwritten objects without header we might have a 1668 // conflict with data written later. Check the data path 1669 // if there is a part with data. 1670 partPath := fmt.Sprintf("part.%d", fi.Parts[0].Number) 1671 dataPath := pathJoin(path, fi.DataDir, partPath) 1672 _, lerr := Lstat(pathJoin(volumeDir, dataPath)) 1673 if lerr != nil { 1674 // Set the inline header, our inlined data is fine. 1675 fi.SetInlineData() 1676 return fi, nil 1677 } 1678 // Data exists on disk, remove the version from metadata. 1679 fi.Data = nil 1680 } 1681 1682 // Reading data for small objects when 1683 // - object has not yet transitioned 1684 // - object size lesser than 128KiB 1685 // - object has maximum of 1 parts 1686 if fi.TransitionStatus == "" && 1687 fi.DataDir != "" && fi.Size <= smallFileThreshold && 1688 len(fi.Parts) == 1 { 1689 partPath := fmt.Sprintf("part.%d", fi.Parts[0].Number) 1690 dataPath := pathJoin(volumeDir, path, fi.DataDir, partPath) 1691 fi.Data, _, err = s.readAllData(ctx, volume, volumeDir, dataPath, false) 1692 if err != nil { 1693 return FileInfo{}, err 1694 } 1695 } 1696 } 1697 1698 if !skipAccessChecks(volume) && !opts.Healing && fi.TransitionStatus == "" && !fi.InlineData() && len(fi.Data) == 0 && fi.DataDir != "" && fi.DataDir != emptyUUID && fi.VersionPurgeStatus().Empty() { 1699 // Verify if the dataDir is present or not when the data 1700 // is not inlined to make sure we return correct errors 1701 // during HeadObject(). 1702 1703 // Healing must not come here and return error, since healing 1704 // deals with dataDirs directly, let healing fix things automatically. 1705 if lerr := Access(pathJoin(volumeDir, path, fi.DataDir)); lerr != nil { 1706 if os.IsNotExist(lerr) { 1707 // Data dir is missing we must return errFileCorrupted 1708 return FileInfo{}, errFileCorrupt 1709 } 1710 return FileInfo{}, osErrToFileErr(lerr) 1711 } 1712 } 1713 1714 return fi, nil 1715 } 1716 1717 func (s *xlStorage) readAllData(ctx context.Context, volume, volumeDir string, filePath string, discard bool) (buf []byte, dmTime time.Time, err error) { 1718 if filePath == "" { 1719 return nil, dmTime, errFileNotFound 1720 } 1721 1722 if contextCanceled(ctx) { 1723 return nil, time.Time{}, ctx.Err() 1724 } 1725 1726 f, err := OpenFile(filePath, readMode, 0o666) 1727 if err != nil { 1728 switch { 1729 case osIsNotExist(err): 1730 // Check if the object doesn't exist because its bucket 1731 // is missing in order to return the correct error. 1732 if !skipAccessChecks(volume) { 1733 if err = Access(volumeDir); err != nil && osIsNotExist(err) { 1734 return nil, dmTime, errVolumeNotFound 1735 } 1736 } 1737 return nil, dmTime, errFileNotFound 1738 case osIsPermission(err): 1739 return nil, dmTime, errFileAccessDenied 1740 case isSysErrNotDir(err) || isSysErrIsDir(err): 1741 return nil, dmTime, errFileNotFound 1742 case isSysErrHandleInvalid(err): 1743 // This case is special and needs to be handled for windows. 1744 return nil, dmTime, errFileNotFound 1745 case isSysErrIO(err): 1746 return nil, dmTime, errFaultyDisk 1747 case isSysErrTooManyFiles(err): 1748 return nil, dmTime, errTooManyOpenFiles 1749 case isSysErrInvalidArg(err): 1750 st, _ := Lstat(filePath) 1751 if st != nil && st.IsDir() { 1752 // Linux returns InvalidArg for directory O_DIRECT 1753 // we need to keep this fallback code to return correct 1754 // errors upwards. 1755 return nil, dmTime, errFileNotFound 1756 } 1757 return nil, dmTime, errUnsupportedDisk 1758 } 1759 return nil, dmTime, err 1760 } 1761 1762 if discard { 1763 // This discard is mostly true for DELETEEs 1764 // so we need to make sure we do not keep 1765 // page-cache references after. 1766 defer disk.Fdatasync(f) 1767 } 1768 1769 defer f.Close() 1770 1771 // Get size for precise allocation. 1772 stat, err := f.Stat() 1773 if err != nil { 1774 buf, err = io.ReadAll(f) 1775 return buf, dmTime, osErrToFileErr(err) 1776 } 1777 if stat.IsDir() { 1778 return nil, dmTime, errFileNotFound 1779 } 1780 1781 // Read into appropriate buffer. 1782 sz := stat.Size() 1783 if sz <= metaDataReadDefault { 1784 buf = metaDataPoolGet() 1785 buf = buf[:sz] 1786 } else { 1787 buf = make([]byte, sz) 1788 } 1789 1790 // Read file... 1791 _, err = io.ReadFull(f, buf) 1792 1793 return buf, stat.ModTime().UTC(), osErrToFileErr(err) 1794 } 1795 1796 // ReadAll is a raw call, reads content at any path and returns the buffer. 1797 func (s *xlStorage) ReadAll(ctx context.Context, volume string, path string) (buf []byte, err error) { 1798 // Specific optimization to avoid re-read from the drives for `format.json` 1799 // in-case the caller is a network operation. 1800 if volume == minioMetaBucket && path == formatConfigFile { 1801 s.RLock() 1802 formatData := make([]byte, len(s.formatData)) 1803 copy(formatData, s.formatData) 1804 s.RUnlock() 1805 if len(formatData) > 0 { 1806 return formatData, nil 1807 } 1808 } 1809 volumeDir, err := s.getVolDir(volume) 1810 if err != nil { 1811 return nil, err 1812 } 1813 1814 // Validate file path length, before reading. 1815 filePath := pathJoin(volumeDir, path) 1816 if err = checkPathLength(filePath); err != nil { 1817 return nil, err 1818 } 1819 1820 buf, _, err = s.readAllData(ctx, volume, volumeDir, filePath, false) 1821 return buf, err 1822 } 1823 1824 // ReadFile reads exactly len(buf) bytes into buf. It returns the 1825 // number of bytes copied. The error is EOF only if no bytes were 1826 // read. On return, n == len(buf) if and only if err == nil. n == 0 1827 // for io.EOF. 1828 // 1829 // If an EOF happens after reading some but not all the bytes, 1830 // ReadFile returns ErrUnexpectedEOF. 1831 // 1832 // If the BitrotVerifier is not nil or not verified ReadFile 1833 // tries to verify whether the disk has bitrot. 1834 // 1835 // Additionally ReadFile also starts reading from an offset. ReadFile 1836 // semantics are same as io.ReadFull. 1837 func (s *xlStorage) ReadFile(ctx context.Context, volume string, path string, offset int64, buffer []byte, verifier *BitrotVerifier) (int64, error) { 1838 if offset < 0 { 1839 return 0, errInvalidArgument 1840 } 1841 1842 volumeDir, err := s.getVolDir(volume) 1843 if err != nil { 1844 return 0, err 1845 } 1846 1847 var n int 1848 1849 if !skipAccessChecks(volume) { 1850 // Stat a volume entry. 1851 if err = Access(volumeDir); err != nil { 1852 return 0, convertAccessError(err, errFileAccessDenied) 1853 } 1854 } 1855 1856 // Validate effective path length before reading. 1857 filePath := pathJoin(volumeDir, path) 1858 if err = checkPathLength(filePath); err != nil { 1859 return 0, err 1860 } 1861 1862 // Open the file for reading. 1863 file, err := OpenFile(filePath, readMode, 0o666) 1864 if err != nil { 1865 switch { 1866 case osIsNotExist(err): 1867 return 0, errFileNotFound 1868 case osIsPermission(err): 1869 return 0, errFileAccessDenied 1870 case isSysErrNotDir(err): 1871 return 0, errFileAccessDenied 1872 case isSysErrIO(err): 1873 return 0, errFaultyDisk 1874 case isSysErrTooManyFiles(err): 1875 return 0, errTooManyOpenFiles 1876 default: 1877 return 0, err 1878 } 1879 } 1880 1881 // Close the file descriptor. 1882 defer file.Close() 1883 1884 st, err := file.Stat() 1885 if err != nil { 1886 return 0, err 1887 } 1888 1889 // Verify it is a regular file, otherwise subsequent Seek is 1890 // undefined. 1891 if !st.Mode().IsRegular() { 1892 return 0, errIsNotRegular 1893 } 1894 1895 if verifier == nil { 1896 n, err = file.ReadAt(buffer, offset) 1897 return int64(n), err 1898 } 1899 1900 h := verifier.algorithm.New() 1901 if _, err = io.Copy(h, io.LimitReader(file, offset)); err != nil { 1902 return 0, err 1903 } 1904 1905 if n, err = io.ReadFull(file, buffer); err != nil { 1906 return int64(n), err 1907 } 1908 1909 if _, err = h.Write(buffer); err != nil { 1910 return 0, err 1911 } 1912 1913 if _, err = io.Copy(h, file); err != nil { 1914 return 0, err 1915 } 1916 1917 if !bytes.Equal(h.Sum(nil), verifier.sum) { 1918 return 0, errFileCorrupt 1919 } 1920 1921 return int64(len(buffer)), nil 1922 } 1923 1924 func (s *xlStorage) openFileDirect(path string, mode int) (f *os.File, err error) { 1925 w, err := OpenFileDirectIO(path, mode, 0o666) 1926 if err != nil { 1927 switch { 1928 case isSysErrInvalidArg(err): 1929 return nil, errUnsupportedDisk 1930 case osIsPermission(err): 1931 return nil, errDiskAccessDenied 1932 case isSysErrIO(err): 1933 return nil, errFaultyDisk 1934 case isSysErrNotDir(err): 1935 return nil, errDiskNotDir 1936 case os.IsNotExist(err): 1937 return nil, errDiskNotFound 1938 } 1939 } 1940 1941 return w, nil 1942 } 1943 1944 func (s *xlStorage) openFileSync(filePath string, mode int) (f *os.File, err error) { 1945 return s.openFile(filePath, mode|writeMode) 1946 } 1947 1948 func (s *xlStorage) openFile(filePath string, mode int) (f *os.File, err error) { 1949 // Create top level directories if they don't exist. 1950 // with mode 0777 mkdir honors system umask. 1951 if err = mkdirAll(pathutil.Dir(filePath), 0o777, s.drivePath); err != nil { 1952 return nil, osErrToFileErr(err) 1953 } 1954 1955 w, err := OpenFile(filePath, mode, 0o666) 1956 if err != nil { 1957 // File path cannot be verified since one of the parents is a file. 1958 switch { 1959 case isSysErrIsDir(err): 1960 return nil, errIsNotRegular 1961 case osIsPermission(err): 1962 return nil, errFileAccessDenied 1963 case isSysErrNotDir(err): 1964 return nil, errFileAccessDenied 1965 case isSysErrIO(err): 1966 return nil, errFaultyDisk 1967 case isSysErrTooManyFiles(err): 1968 return nil, errTooManyOpenFiles 1969 default: 1970 return nil, err 1971 } 1972 } 1973 1974 return w, nil 1975 } 1976 1977 type sendFileReader struct { 1978 io.Reader 1979 io.Closer 1980 } 1981 1982 // ReadFileStream - Returns the read stream of the file. 1983 func (s *xlStorage) ReadFileStream(ctx context.Context, volume, path string, offset, length int64) (io.ReadCloser, error) { 1984 if offset < 0 { 1985 return nil, errInvalidArgument 1986 } 1987 1988 volumeDir, err := s.getVolDir(volume) 1989 if err != nil { 1990 return nil, err 1991 } 1992 1993 // Validate effective path length before reading. 1994 filePath := pathJoin(volumeDir, path) 1995 if err = checkPathLength(filePath); err != nil { 1996 return nil, err 1997 } 1998 1999 file, err := OpenFile(filePath, readMode, 0o666) 2000 if err != nil { 2001 switch { 2002 case osIsNotExist(err): 2003 if !skipAccessChecks(volume) { 2004 if err = Access(volumeDir); err != nil && osIsNotExist(err) { 2005 return nil, errVolumeNotFound 2006 } 2007 } 2008 return nil, errFileNotFound 2009 case osIsPermission(err): 2010 return nil, errFileAccessDenied 2011 case isSysErrNotDir(err): 2012 return nil, errFileAccessDenied 2013 case isSysErrIO(err): 2014 return nil, errFaultyDisk 2015 case isSysErrTooManyFiles(err): 2016 return nil, errTooManyOpenFiles 2017 case isSysErrInvalidArg(err): 2018 return nil, errUnsupportedDisk 2019 default: 2020 return nil, err 2021 } 2022 } 2023 2024 if length < 0 { 2025 return file, nil 2026 } 2027 2028 st, err := file.Stat() 2029 if err != nil { 2030 file.Close() 2031 return nil, err 2032 } 2033 2034 // Verify it is a regular file, otherwise subsequent Seek is 2035 // undefined. 2036 if !st.Mode().IsRegular() { 2037 file.Close() 2038 return nil, errIsNotRegular 2039 } 2040 2041 if st.Size() < offset+length { 2042 // Expected size cannot be satisfied for 2043 // requested offset and length 2044 file.Close() 2045 return nil, errFileCorrupt 2046 } 2047 2048 if offset > 0 { 2049 if _, err = file.Seek(offset, io.SeekStart); err != nil { 2050 file.Close() 2051 return nil, err 2052 } 2053 } 2054 2055 return &sendFileReader{Reader: io.LimitReader(file, length), Closer: file}, nil 2056 } 2057 2058 // closeWrapper converts a function to an io.Closer 2059 type closeWrapper func() error 2060 2061 // Close calls the wrapped function. 2062 func (c closeWrapper) Close() error { 2063 return c() 2064 } 2065 2066 // CreateFile - creates the file. 2067 func (s *xlStorage) CreateFile(ctx context.Context, origvolume, volume, path string, fileSize int64, r io.Reader) (err error) { 2068 if origvolume != "" { 2069 origvolumeDir, err := s.getVolDir(origvolume) 2070 if err != nil { 2071 return err 2072 } 2073 2074 if !skipAccessChecks(origvolume) { 2075 // Stat a volume entry. 2076 if err = Access(origvolumeDir); err != nil { 2077 return convertAccessError(err, errVolumeAccessDenied) 2078 } 2079 } 2080 } 2081 2082 volumeDir, err := s.getVolDir(volume) 2083 if err != nil { 2084 return err 2085 } 2086 2087 filePath := pathJoin(volumeDir, path) 2088 if err = checkPathLength(filePath); err != nil { 2089 return err 2090 } 2091 2092 parentFilePath := pathutil.Dir(filePath) 2093 defer func() { 2094 if err != nil { 2095 if volume == minioMetaTmpBucket { 2096 // only cleanup parent path if the 2097 // parent volume name is minioMetaTmpBucket 2098 removeAll(parentFilePath) 2099 } 2100 } 2101 }() 2102 2103 return s.writeAllDirect(ctx, filePath, fileSize, r, os.O_CREATE|os.O_WRONLY|os.O_EXCL) 2104 } 2105 2106 func (s *xlStorage) writeAllDirect(ctx context.Context, filePath string, fileSize int64, r io.Reader, flags int) (err error) { 2107 if contextCanceled(ctx) { 2108 return ctx.Err() 2109 } 2110 2111 // Create top level directories if they don't exist. 2112 // with mode 0777 mkdir honors system umask. 2113 parentFilePath := pathutil.Dir(filePath) 2114 if err = mkdirAll(parentFilePath, 0o777, s.drivePath); err != nil { 2115 return osErrToFileErr(err) 2116 } 2117 2118 odirectEnabled := globalAPIConfig.odirectEnabled() && s.oDirect && fileSize > 0 2119 2120 var w *os.File 2121 if odirectEnabled { 2122 w, err = OpenFileDirectIO(filePath, flags, 0o666) 2123 } else { 2124 w, err = OpenFile(filePath, flags, 0o666) 2125 } 2126 if err != nil { 2127 return osErrToFileErr(err) 2128 } 2129 2130 var bufp *[]byte 2131 switch { 2132 case fileSize > 0 && fileSize >= xioutil.BlockSizeReallyLarge: 2133 // use a larger 4MiB buffer for a really large streams. 2134 bufp = xioutil.ODirectPoolXLarge.Get().(*[]byte) 2135 defer xioutil.ODirectPoolXLarge.Put(bufp) 2136 case fileSize <= xioutil.BlockSizeSmall: 2137 bufp = xioutil.ODirectPoolSmall.Get().(*[]byte) 2138 defer xioutil.ODirectPoolSmall.Put(bufp) 2139 default: 2140 bufp = xioutil.ODirectPoolLarge.Get().(*[]byte) 2141 defer xioutil.ODirectPoolLarge.Put(bufp) 2142 } 2143 2144 var written int64 2145 if odirectEnabled { 2146 written, err = xioutil.CopyAligned(diskHealthWriter(ctx, w), r, *bufp, fileSize, w) 2147 } else { 2148 written, err = io.CopyBuffer(diskHealthWriter(ctx, w), r, *bufp) 2149 } 2150 if err != nil { 2151 w.Close() 2152 return err 2153 } 2154 2155 if written < fileSize && fileSize >= 0 { 2156 w.Close() 2157 return errLessData 2158 } else if written > fileSize && fileSize >= 0 { 2159 w.Close() 2160 return errMoreData 2161 } 2162 2163 // Only interested in flushing the size_t not mtime/atime 2164 if err = Fdatasync(w); err != nil { 2165 w.Close() 2166 return err 2167 } 2168 2169 // Dealing with error returns from close() - 'man 2 close' 2170 // 2171 // A careful programmer will check the return value of close(), since it is quite possible that 2172 // errors on a previous write(2) operation are reported only on the final close() that releases 2173 // the open file descriptor. 2174 // 2175 // Failing to check the return value when closing a file may lead to silent loss of data. 2176 // This can especially be observed with NFS and with disk quota. 2177 return w.Close() 2178 } 2179 2180 func (s *xlStorage) writeAll(ctx context.Context, volume string, path string, b []byte, sync bool) (err error) { 2181 if contextCanceled(ctx) { 2182 return ctx.Err() 2183 } 2184 2185 volumeDir, err := s.getVolDir(volume) 2186 if err != nil { 2187 return err 2188 } 2189 2190 filePath := pathJoin(volumeDir, path) 2191 if err = checkPathLength(filePath); err != nil { 2192 return err 2193 } 2194 2195 flags := os.O_CREATE | os.O_WRONLY | os.O_TRUNC 2196 2197 var w *os.File 2198 if sync { 2199 // Perform directIO along with fdatasync for larger xl.meta, mostly when 2200 // xl.meta has "inlined data" we prefer writing O_DIRECT and then doing 2201 // fdatasync() at the end instead of opening the file with O_DSYNC. 2202 // 2203 // This is an optimization mainly to ensure faster I/O. 2204 if len(b) > xioutil.DirectioAlignSize { 2205 r := bytes.NewReader(b) 2206 return s.writeAllDirect(ctx, filePath, r.Size(), r, flags) 2207 } 2208 w, err = s.openFileSync(filePath, flags) 2209 } else { 2210 w, err = s.openFile(filePath, flags) 2211 } 2212 if err != nil { 2213 return err 2214 } 2215 2216 n, err := w.Write(b) 2217 if err != nil { 2218 w.Close() 2219 return err 2220 } 2221 2222 if n != len(b) { 2223 w.Close() 2224 return io.ErrShortWrite 2225 } 2226 2227 // Dealing with error returns from close() - 'man 2 close' 2228 // 2229 // A careful programmer will check the return value of close(), since it is quite possible that 2230 // errors on a previous write(2) operation are reported only on the final close() that releases 2231 // the open file descriptor. 2232 // 2233 // Failing to check the return value when closing a file may lead to silent loss of data. 2234 // This can especially be observed with NFS and with disk quota. 2235 return w.Close() 2236 } 2237 2238 func (s *xlStorage) WriteAll(ctx context.Context, volume string, path string, b []byte) (err error) { 2239 return s.writeAll(ctx, volume, path, b, true) 2240 } 2241 2242 // AppendFile - append a byte array at path, if file doesn't exist at 2243 // path this call explicitly creates it. 2244 func (s *xlStorage) AppendFile(ctx context.Context, volume string, path string, buf []byte) (err error) { 2245 volumeDir, err := s.getVolDir(volume) 2246 if err != nil { 2247 return err 2248 } 2249 2250 if !skipAccessChecks(volume) { 2251 // Stat a volume entry. 2252 if err = Access(volumeDir); err != nil { 2253 return convertAccessError(err, errVolumeAccessDenied) 2254 } 2255 } 2256 2257 filePath := pathJoin(volumeDir, path) 2258 if err = checkPathLength(filePath); err != nil { 2259 return err 2260 } 2261 2262 var w *os.File 2263 // Create file if not found. Not doing O_DIRECT here to avoid the code that does buffer aligned writes. 2264 // AppendFile() is only used by healing code to heal objects written in old format. 2265 w, err = s.openFileSync(filePath, os.O_CREATE|os.O_APPEND|os.O_WRONLY) 2266 if err != nil { 2267 return err 2268 } 2269 defer w.Close() 2270 2271 n, err := w.Write(buf) 2272 if err != nil { 2273 return err 2274 } 2275 2276 if n != len(buf) { 2277 return io.ErrShortWrite 2278 } 2279 2280 return nil 2281 } 2282 2283 // CheckParts check if path has necessary parts available. 2284 func (s *xlStorage) CheckParts(ctx context.Context, volume string, path string, fi FileInfo) error { 2285 volumeDir, err := s.getVolDir(volume) 2286 if err != nil { 2287 return err 2288 } 2289 2290 for _, part := range fi.Parts { 2291 partPath := pathJoin(path, fi.DataDir, fmt.Sprintf("part.%d", part.Number)) 2292 filePath := pathJoin(volumeDir, partPath) 2293 if err = checkPathLength(filePath); err != nil { 2294 return err 2295 } 2296 st, err := Lstat(filePath) 2297 if err != nil { 2298 if osIsNotExist(err) { 2299 if !skipAccessChecks(volume) { 2300 // Stat a volume entry. 2301 if verr := Access(volumeDir); verr != nil { 2302 if osIsNotExist(verr) { 2303 return errVolumeNotFound 2304 } 2305 return verr 2306 } 2307 } 2308 } 2309 return osErrToFileErr(err) 2310 } 2311 if st.Mode().IsDir() { 2312 return errFileNotFound 2313 } 2314 // Check if shard is truncated. 2315 if st.Size() < fi.Erasure.ShardFileSize(part.Size) { 2316 return errFileCorrupt 2317 } 2318 } 2319 2320 return nil 2321 } 2322 2323 // deleteFile deletes a file or a directory if its empty unless recursive 2324 // is set to true. If the target is successfully deleted, it will recursively 2325 // move up the tree, deleting empty parent directories until it finds one 2326 // with files in it. Returns nil for a non-empty directory even when 2327 // recursive is set to false. 2328 func (s *xlStorage) deleteFile(basePath, deletePath string, recursive, immediate bool) error { 2329 if basePath == "" || deletePath == "" { 2330 return nil 2331 } 2332 2333 bp := pathutil.Clean(basePath) // do not override basepath / or deletePath / 2334 dp := pathutil.Clean(deletePath) 2335 if !strings.HasPrefix(dp, bp) || dp == bp { 2336 return nil 2337 } 2338 2339 var err error 2340 if recursive { 2341 err = s.moveToTrash(deletePath, true, immediate) 2342 } else { 2343 err = Remove(deletePath) 2344 } 2345 if err != nil { 2346 switch { 2347 case isSysErrNotEmpty(err): 2348 // if object is a directory, but if its not empty 2349 // return FileNotFound to indicate its an empty prefix. 2350 if HasSuffix(deletePath, SlashSeparator) { 2351 return errFileNotFound 2352 } 2353 // if we have .DS_Store only on macOS 2354 if runtime.GOOS == globalMacOSName { 2355 storeFilePath := pathJoin(deletePath, ".DS_Store") 2356 _, err := Stat(storeFilePath) 2357 // .DS_Store exists 2358 if err == nil { 2359 // delete first 2360 Remove(storeFilePath) 2361 // try again 2362 Remove(deletePath) 2363 } 2364 } 2365 // Ignore errors if the directory is not empty. The server relies on 2366 // this functionality, and sometimes uses recursion that should not 2367 // error on parent directories. 2368 return nil 2369 case osIsNotExist(err): 2370 return nil 2371 case errors.Is(err, errFileNotFound): 2372 return nil 2373 case osIsPermission(err): 2374 return errFileAccessDenied 2375 case isSysErrIO(err): 2376 return errFaultyDisk 2377 default: 2378 return err 2379 } 2380 } 2381 2382 // Delete parent directory obviously not recursively. Errors for 2383 // parent directories shouldn't trickle down. 2384 s.deleteFile(basePath, pathutil.Dir(pathutil.Clean(deletePath)), false, false) 2385 2386 return nil 2387 } 2388 2389 // DeleteFile - delete a file at path. 2390 func (s *xlStorage) Delete(ctx context.Context, volume string, path string, deleteOpts DeleteOptions) (err error) { 2391 volumeDir, err := s.getVolDir(volume) 2392 if err != nil { 2393 return err 2394 } 2395 2396 if !skipAccessChecks(volume) { 2397 // Stat a volume entry. 2398 if err = Access(volumeDir); err != nil { 2399 return convertAccessError(err, errVolumeAccessDenied) 2400 } 2401 } 2402 2403 // Following code is needed so that we retain SlashSeparator suffix if any in 2404 // path argument. 2405 filePath := pathJoin(volumeDir, path) 2406 if err = checkPathLength(filePath); err != nil { 2407 return err 2408 } 2409 2410 // Delete file and delete parent directory as well if it's empty. 2411 return s.deleteFile(volumeDir, filePath, deleteOpts.Recursive, deleteOpts.Immediate) 2412 } 2413 2414 func skipAccessChecks(volume string) (ok bool) { 2415 for _, prefix := range []string{ 2416 minioMetaTmpDeletedBucket, 2417 minioMetaTmpBucket, 2418 minioMetaMultipartBucket, 2419 minioMetaBucket, 2420 } { 2421 if strings.HasPrefix(volume, prefix) { 2422 return true 2423 } 2424 } 2425 return ok 2426 } 2427 2428 // RenameData - rename source path to destination path atomically, metadata and data directory. 2429 func (s *xlStorage) RenameData(ctx context.Context, srcVolume, srcPath string, fi FileInfo, dstVolume, dstPath string, opts RenameOptions) (sign uint64, err error) { 2430 defer func() { 2431 ignoredErrs := []error{ 2432 errFileNotFound, 2433 errVolumeNotFound, 2434 errFileVersionNotFound, 2435 errDiskNotFound, 2436 errUnformattedDisk, 2437 errMaxVersionsExceeded, 2438 errFileAccessDenied, 2439 } 2440 if err != nil && !IsErr(err, ignoredErrs...) && !contextCanceled(ctx) { 2441 // Only log these errors if context is not yet canceled. 2442 logger.LogOnceIf(ctx, fmt.Errorf("drive:%s, srcVolume: %s, srcPath: %s, dstVolume: %s:, dstPath: %s - error %v", 2443 s.drivePath, 2444 srcVolume, srcPath, 2445 dstVolume, dstPath, 2446 err), "xl-storage-rename-data-"+dstVolume) 2447 } 2448 if s.globalSync { 2449 globalSync() 2450 } 2451 }() 2452 2453 srcVolumeDir, err := s.getVolDir(srcVolume) 2454 if err != nil { 2455 return 0, err 2456 } 2457 2458 dstVolumeDir, err := s.getVolDir(dstVolume) 2459 if err != nil { 2460 return 0, err 2461 } 2462 2463 if !skipAccessChecks(srcVolume) { 2464 // Stat a volume entry. 2465 if err = Access(srcVolumeDir); err != nil { 2466 return 0, convertAccessError(err, errVolumeAccessDenied) 2467 } 2468 } 2469 2470 if !skipAccessChecks(dstVolume) { 2471 if err = Access(dstVolumeDir); err != nil { 2472 return 0, convertAccessError(err, errVolumeAccessDenied) 2473 } 2474 } 2475 2476 srcFilePath := pathutil.Join(srcVolumeDir, pathJoin(srcPath, xlStorageFormatFile)) 2477 dstFilePath := pathutil.Join(dstVolumeDir, pathJoin(dstPath, xlStorageFormatFile)) 2478 2479 var srcDataPath string 2480 var dstDataPath string 2481 var dataDir string 2482 if !fi.IsRemote() { 2483 dataDir = retainSlash(fi.DataDir) 2484 } 2485 if dataDir != "" { 2486 srcDataPath = retainSlash(pathJoin(srcVolumeDir, srcPath, dataDir)) 2487 // make sure to always use path.Join here, do not use pathJoin as 2488 // it would additionally add `/` at the end and it comes in the 2489 // way of renameAll(), parentDir creation. 2490 dstDataPath = pathutil.Join(dstVolumeDir, dstPath, dataDir) 2491 } 2492 2493 if err = checkPathLength(srcFilePath); err != nil { 2494 return 0, err 2495 } 2496 2497 if err = checkPathLength(dstFilePath); err != nil { 2498 return 0, err 2499 } 2500 2501 dstBuf, err := xioutil.ReadFile(dstFilePath) 2502 if err != nil { 2503 // handle situations when dstFilePath is 'file' 2504 // for example such as someone is trying to 2505 // upload an object such as `prefix/object/xl.meta` 2506 // where `prefix/object` is already an object 2507 if isSysErrNotDir(err) && runtime.GOOS != globalWindowsOSName { 2508 // NOTE: On windows the error happens at 2509 // next line and returns appropriate error. 2510 return 0, errFileAccessDenied 2511 } 2512 if !osIsNotExist(err) { 2513 return 0, osErrToFileErr(err) 2514 } 2515 // errFileNotFound comes here. 2516 err = s.renameLegacyMetadata(dstVolumeDir, dstPath) 2517 if err != nil && err != errFileNotFound { 2518 return 0, err 2519 } 2520 if err == nil { 2521 dstBuf, err = xioutil.ReadFile(dstFilePath) 2522 if err != nil && !osIsNotExist(err) { 2523 return 0, osErrToFileErr(err) 2524 } 2525 } 2526 } 2527 2528 var xlMeta xlMetaV2 2529 var legacyPreserved bool 2530 if len(dstBuf) > 0 { 2531 if isXL2V1Format(dstBuf) { 2532 if err = xlMeta.Load(dstBuf); err != nil { 2533 // Data appears corrupt. Drop data. 2534 xlMeta = xlMetaV2{} 2535 } 2536 } else { 2537 // This code-path is to preserve the legacy data. 2538 xlMetaLegacy := &xlMetaV1Object{} 2539 json := jsoniter.ConfigCompatibleWithStandardLibrary 2540 if err := json.Unmarshal(dstBuf, xlMetaLegacy); err != nil { 2541 logger.LogOnceIf(ctx, err, "read-data-unmarshal-"+dstFilePath) 2542 // Data appears corrupt. Drop data. 2543 } else { 2544 xlMetaLegacy.DataDir = legacyDataDir 2545 if err = xlMeta.AddLegacy(xlMetaLegacy); err != nil { 2546 logger.LogOnceIf(ctx, err, "read-data-add-legacy-"+dstFilePath) 2547 } 2548 legacyPreserved = true 2549 } 2550 } 2551 } else { 2552 s.RLock() 2553 formatLegacy := s.formatLegacy 2554 s.RUnlock() 2555 // It is possible that some drives may not have `xl.meta` file 2556 // in such scenarios verify if at least `part.1` files exist 2557 // to verify for legacy version. 2558 if formatLegacy { 2559 // We only need this code if we are moving 2560 // from `xl.json` to `xl.meta`, we can avoid 2561 // one extra readdir operation here for all 2562 // new deployments. 2563 currentDataPath := pathJoin(dstVolumeDir, dstPath) 2564 entries, err := readDirN(currentDataPath, 1) 2565 if err != nil && err != errFileNotFound { 2566 return 0, osErrToFileErr(err) 2567 } 2568 for _, entry := range entries { 2569 if entry == xlStorageFormatFile || strings.HasSuffix(entry, slashSeparator) { 2570 continue 2571 } 2572 if strings.HasPrefix(entry, "part.") { 2573 legacyPreserved = true 2574 break 2575 } 2576 } 2577 } 2578 } 2579 2580 legacyDataPath := pathJoin(dstVolumeDir, dstPath, legacyDataDir) 2581 if legacyPreserved { 2582 // Preserve all the legacy data, could be slow, but at max there can be 10,000 parts. 2583 currentDataPath := pathJoin(dstVolumeDir, dstPath) 2584 entries, err := readDir(currentDataPath) 2585 if err != nil { 2586 return 0, osErrToFileErr(err) 2587 } 2588 2589 // legacy data dir means its old content, honor system umask. 2590 if err = mkdirAll(legacyDataPath, 0o777, dstVolumeDir); err != nil { 2591 // any failed mkdir-calls delete them. 2592 s.deleteFile(dstVolumeDir, legacyDataPath, true, false) 2593 return 0, osErrToFileErr(err) 2594 } 2595 2596 for _, entry := range entries { 2597 // Skip xl.meta renames further, also ignore any directories such as `legacyDataDir` 2598 if entry == xlStorageFormatFile || strings.HasSuffix(entry, slashSeparator) { 2599 continue 2600 } 2601 2602 if err = Rename(pathJoin(currentDataPath, entry), pathJoin(legacyDataPath, entry)); err != nil { 2603 // Any failed rename calls un-roll previous transaction. 2604 s.deleteFile(dstVolumeDir, legacyDataPath, true, false) 2605 2606 return 0, osErrToFileErr(err) 2607 } 2608 } 2609 } 2610 2611 var oldDstDataPath, reqVID string 2612 2613 if fi.VersionID == "" { 2614 reqVID = nullVersionID 2615 } else { 2616 reqVID = fi.VersionID 2617 } 2618 2619 // Replace the data of null version or any other existing version-id 2620 _, ver, err := xlMeta.findVersionStr(reqVID) 2621 if err == nil { 2622 dataDir := ver.getDataDir() 2623 if dataDir != "" && (xlMeta.SharedDataDirCountStr(reqVID, dataDir) == 0) { 2624 // Purge the destination path as we are not preserving anything 2625 // versioned object was not requested. 2626 oldDstDataPath = pathJoin(dstVolumeDir, dstPath, dataDir) 2627 // if old destination path is same as new destination path 2628 // there is nothing to purge, this is true in case of healing 2629 // avoid setting oldDstDataPath at that point. 2630 if oldDstDataPath == dstDataPath { 2631 oldDstDataPath = "" 2632 } else { 2633 xlMeta.data.remove(reqVID, dataDir) 2634 } 2635 } 2636 } 2637 2638 // Empty fi.VersionID indicates that versioning is either 2639 // suspended or disabled on this bucket. RenameData will replace 2640 // the 'null' version. We add a free-version to track its tiered 2641 // content for asynchronous deletion. 2642 // 2643 // Note: RestoreObject and HealObject requests don't end up replacing the 2644 // null version and therefore don't require the free-version to track 2645 // anything 2646 if fi.VersionID == "" && !fi.IsRestoreObjReq() && !fi.Healing() { 2647 // Note: Restore object request reuses PutObject/Multipart 2648 // upload to copy back its data from the remote tier. This 2649 // doesn't replace the existing version, so we don't need to add 2650 // a free-version. 2651 xlMeta.AddFreeVersion(fi) 2652 } 2653 2654 // indicates if RenameData() is called by healing. 2655 // healing doesn't preserve the dataDir as 'legacy' 2656 healing := fi.XLV1 && fi.DataDir != legacyDataDir 2657 2658 if err = xlMeta.AddVersion(fi); err != nil { 2659 if legacyPreserved { 2660 // Any failed rename calls un-roll previous transaction. 2661 s.deleteFile(dstVolumeDir, legacyDataPath, true, false) 2662 } 2663 return 0, err 2664 } 2665 2666 var sbuf bytes.Buffer 2667 for _, ver := range xlMeta.versions { 2668 sbuf.Write(ver.header.Signature[:]) 2669 } 2670 sign = xxh3.Hash(sbuf.Bytes()) 2671 2672 dstBuf, err = xlMeta.AppendTo(metaDataPoolGet()) 2673 defer metaDataPoolPut(dstBuf) 2674 if err != nil { 2675 if legacyPreserved { 2676 s.deleteFile(dstVolumeDir, legacyDataPath, true, false) 2677 } 2678 return 0, errFileCorrupt 2679 } 2680 2681 if err = s.WriteAll(ctx, srcVolume, pathJoin(srcPath, xlStorageFormatFile), dstBuf); err != nil { 2682 if legacyPreserved { 2683 s.deleteFile(dstVolumeDir, legacyDataPath, true, false) 2684 } 2685 return 0, osErrToFileErr(err) 2686 } 2687 diskHealthCheckOK(ctx, err) 2688 2689 if srcDataPath != "" && len(fi.Data) == 0 && fi.Size > 0 { 2690 // renameAll only for objects that have xl.meta not saved inline. 2691 s.moveToTrash(dstDataPath, true, false) 2692 if healing { 2693 // If we are healing we should purge any legacyDataPath content, 2694 // that was previously preserved during PutObject() call 2695 // on a versioned bucket. 2696 s.moveToTrash(legacyDataPath, true, false) 2697 } 2698 if err = renameAll(srcDataPath, dstDataPath, dstVolumeDir); err != nil { 2699 if legacyPreserved { 2700 // Any failed rename calls un-roll previous transaction. 2701 s.deleteFile(dstVolumeDir, legacyDataPath, true, false) 2702 } 2703 s.deleteFile(dstVolumeDir, dstDataPath, false, false) 2704 return 0, osErrToFileErr(err) 2705 } 2706 } 2707 2708 // Commit meta-file 2709 if err = renameAll(srcFilePath, dstFilePath, dstVolumeDir); err != nil { 2710 if legacyPreserved { 2711 // Any failed rename calls un-roll previous transaction. 2712 s.deleteFile(dstVolumeDir, legacyDataPath, true, false) 2713 } 2714 s.deleteFile(dstVolumeDir, dstDataPath, false, false) 2715 return 0, osErrToFileErr(err) 2716 } 2717 2718 // additionally only purge older data at the end of the transaction of new data-dir 2719 // movement, this is to ensure that previous data references can co-exist for 2720 // any recoverability. 2721 if oldDstDataPath != "" { 2722 s.moveToTrash(oldDstDataPath, true, false) 2723 } 2724 2725 if srcVolume != minioMetaMultipartBucket { 2726 // srcFilePath is some-times minioMetaTmpBucket, an attempt to 2727 // remove the temporary folder is enough since at this point 2728 // ideally all transaction should be complete. 2729 Remove(pathutil.Dir(srcFilePath)) 2730 } else { 2731 s.deleteFile(srcVolumeDir, pathutil.Dir(srcFilePath), true, false) 2732 } 2733 return sign, nil 2734 } 2735 2736 // RenameFile - rename source path to destination path atomically. 2737 func (s *xlStorage) RenameFile(ctx context.Context, srcVolume, srcPath, dstVolume, dstPath string) (err error) { 2738 srcVolumeDir, err := s.getVolDir(srcVolume) 2739 if err != nil { 2740 return err 2741 } 2742 dstVolumeDir, err := s.getVolDir(dstVolume) 2743 if err != nil { 2744 return err 2745 } 2746 if !skipAccessChecks(srcVolume) { 2747 // Stat a volume entry. 2748 if err = Access(srcVolumeDir); err != nil { 2749 if osIsNotExist(err) { 2750 return errVolumeNotFound 2751 } else if isSysErrIO(err) { 2752 return errFaultyDisk 2753 } 2754 return err 2755 } 2756 } 2757 if !skipAccessChecks(dstVolume) { 2758 if err = Access(dstVolumeDir); err != nil { 2759 if osIsNotExist(err) { 2760 return errVolumeNotFound 2761 } else if isSysErrIO(err) { 2762 return errFaultyDisk 2763 } 2764 return err 2765 } 2766 } 2767 srcIsDir := HasSuffix(srcPath, SlashSeparator) 2768 dstIsDir := HasSuffix(dstPath, SlashSeparator) 2769 // Either src and dst have to be directories or files, else return error. 2770 if !(srcIsDir && dstIsDir || !srcIsDir && !dstIsDir) { 2771 return errFileAccessDenied 2772 } 2773 srcFilePath := pathutil.Join(srcVolumeDir, srcPath) 2774 if err = checkPathLength(srcFilePath); err != nil { 2775 return err 2776 } 2777 dstFilePath := pathutil.Join(dstVolumeDir, dstPath) 2778 if err = checkPathLength(dstFilePath); err != nil { 2779 return err 2780 } 2781 if srcIsDir { 2782 // If source is a directory, we expect the destination to be non-existent but we 2783 // we still need to allow overwriting an empty directory since it represents 2784 // an object empty directory. 2785 dirInfo, err := Lstat(dstFilePath) 2786 if isSysErrIO(err) { 2787 return errFaultyDisk 2788 } 2789 if err != nil { 2790 if !osIsNotExist(err) { 2791 return err 2792 } 2793 } else { 2794 if !dirInfo.IsDir() { 2795 return errFileAccessDenied 2796 } 2797 if err = Remove(dstFilePath); err != nil { 2798 if isSysErrNotEmpty(err) || isSysErrNotDir(err) { 2799 return errFileAccessDenied 2800 } else if isSysErrIO(err) { 2801 return errFaultyDisk 2802 } 2803 return err 2804 } 2805 } 2806 } 2807 2808 if err = renameAll(srcFilePath, dstFilePath, dstVolumeDir); err != nil { 2809 if isSysErrNotEmpty(err) || isSysErrNotDir(err) { 2810 return errFileAccessDenied 2811 } 2812 return osErrToFileErr(err) 2813 } 2814 2815 // Remove parent dir of the source file if empty 2816 parentDir := pathutil.Dir(srcFilePath) 2817 s.deleteFile(srcVolumeDir, parentDir, false, false) 2818 2819 return nil 2820 } 2821 2822 func (s *xlStorage) bitrotVerify(ctx context.Context, partPath string, partSize int64, algo BitrotAlgorithm, sum []byte, shardSize int64) error { 2823 // Open the file for reading. 2824 file, err := OpenFile(partPath, readMode, 0o666) 2825 if err != nil { 2826 return osErrToFileErr(err) 2827 } 2828 2829 // Close the file descriptor. 2830 defer file.Close() 2831 fi, err := file.Stat() 2832 if err != nil { 2833 // Unable to stat on the file, return an expected error 2834 // for healing code to fix this file. 2835 return err 2836 } 2837 return bitrotVerify(diskHealthReader(ctx, file), fi.Size(), partSize, algo, sum, shardSize) 2838 } 2839 2840 func (s *xlStorage) VerifyFile(ctx context.Context, volume, path string, fi FileInfo) (err error) { 2841 volumeDir, err := s.getVolDir(volume) 2842 if err != nil { 2843 return err 2844 } 2845 2846 if !skipAccessChecks(volume) { 2847 // Stat a volume entry. 2848 if err = Access(volumeDir); err != nil { 2849 return convertAccessError(err, errVolumeAccessDenied) 2850 } 2851 } 2852 2853 erasure := fi.Erasure 2854 for _, part := range fi.Parts { 2855 checksumInfo := erasure.GetChecksumInfo(part.Number) 2856 partPath := pathJoin(volumeDir, path, fi.DataDir, fmt.Sprintf("part.%d", part.Number)) 2857 if err := s.bitrotVerify(ctx, partPath, 2858 erasure.ShardFileSize(part.Size), 2859 checksumInfo.Algorithm, 2860 checksumInfo.Hash, erasure.ShardSize()); err != nil { 2861 if !IsErr(err, []error{ 2862 errFileNotFound, 2863 errVolumeNotFound, 2864 errFileCorrupt, 2865 errFileAccessDenied, 2866 errFileVersionNotFound, 2867 }...) { 2868 logger.GetReqInfo(ctx).AppendTags("disk", s.String()) 2869 logger.LogOnceIf(ctx, err, partPath) 2870 } 2871 return err 2872 } 2873 } 2874 2875 return nil 2876 } 2877 2878 // ReadMultiple will read multiple files and send each back as response. 2879 // Files are read and returned in the given order. 2880 // The resp channel is closed before the call returns. 2881 // Only a canceled context will return an error. 2882 func (s *xlStorage) ReadMultiple(ctx context.Context, req ReadMultipleReq, resp chan<- ReadMultipleResp) error { 2883 defer xioutil.SafeClose(resp) 2884 2885 volumeDir := pathJoin(s.drivePath, req.Bucket) 2886 found := 0 2887 for _, f := range req.Files { 2888 if contextCanceled(ctx) { 2889 return ctx.Err() 2890 } 2891 r := ReadMultipleResp{ 2892 Bucket: req.Bucket, 2893 Prefix: req.Prefix, 2894 File: f, 2895 } 2896 var data []byte 2897 var mt time.Time 2898 fullPath := pathJoin(volumeDir, req.Prefix, f) 2899 w := xioutil.NewDeadlineWorker(globalDriveConfig.GetMaxTimeout()) 2900 if err := w.Run(func() (err error) { 2901 if req.MetadataOnly { 2902 data, mt, err = s.readMetadataWithDMTime(ctx, fullPath) 2903 } else { 2904 data, mt, err = s.readAllData(ctx, req.Bucket, volumeDir, fullPath, true) 2905 } 2906 return err 2907 }); err != nil { 2908 if !IsErr(err, errFileNotFound, errVolumeNotFound) { 2909 r.Exists = true 2910 r.Error = err.Error() 2911 } 2912 select { 2913 case <-ctx.Done(): 2914 return ctx.Err() 2915 case resp <- r: 2916 } 2917 if req.AbortOn404 && !r.Exists { 2918 // We stop at first file not found. 2919 // We have already reported the error, return nil. 2920 return nil 2921 } 2922 continue 2923 } 2924 diskHealthCheckOK(ctx, nil) 2925 if req.MaxSize > 0 && int64(len(data)) > req.MaxSize { 2926 r.Exists = true 2927 r.Error = fmt.Sprintf("max size (%d) exceeded: %d", req.MaxSize, len(data)) 2928 select { 2929 case <-ctx.Done(): 2930 return ctx.Err() 2931 case resp <- r: 2932 continue 2933 } 2934 } 2935 found++ 2936 r.Exists = true 2937 r.Data = data 2938 r.Modtime = mt 2939 select { 2940 case <-ctx.Done(): 2941 return ctx.Err() 2942 case resp <- r: 2943 } 2944 if req.MaxResults > 0 && found >= req.MaxResults { 2945 return nil 2946 } 2947 } 2948 return nil 2949 } 2950 2951 func (s *xlStorage) StatInfoFile(ctx context.Context, volume, path string, glob bool) (stat []StatInfo, err error) { 2952 volumeDir, err := s.getVolDir(volume) 2953 if err != nil { 2954 return stat, err 2955 } 2956 2957 files := []string{pathJoin(volumeDir, path)} 2958 if glob { 2959 files, err = filepathx.Glob(filepath.Join(volumeDir, path)) 2960 if err != nil { 2961 return nil, err 2962 } 2963 } 2964 for _, filePath := range files { 2965 if err := checkPathLength(filePath); err != nil { 2966 return stat, err 2967 } 2968 st, _ := Lstat(filePath) 2969 if st == nil { 2970 if !skipAccessChecks(volume) { 2971 // Stat a volume entry. 2972 if verr := Access(volumeDir); verr != nil { 2973 return stat, convertAccessError(verr, errVolumeAccessDenied) 2974 } 2975 } 2976 return stat, errPathNotFound 2977 } 2978 name, err := filepath.Rel(volumeDir, filePath) 2979 if err != nil { 2980 name = filePath 2981 } 2982 stat = append(stat, StatInfo{ 2983 Name: filepath.ToSlash(name), 2984 Size: st.Size(), 2985 Dir: st.IsDir(), 2986 Mode: uint32(st.Mode()), 2987 ModTime: st.ModTime(), 2988 }) 2989 } 2990 return stat, nil 2991 } 2992 2993 // CleanAbandonedData will read metadata of the object on disk 2994 // and delete any data directories and inline data that isn't referenced in metadata. 2995 // Metadata itself is not modified, only inline data. 2996 func (s *xlStorage) CleanAbandonedData(ctx context.Context, volume string, path string) error { 2997 if volume == "" || path == "" { 2998 return nil // Ignore 2999 } 3000 3001 volumeDir, err := s.getVolDir(volume) 3002 if err != nil { 3003 return err 3004 } 3005 baseDir := pathJoin(volumeDir, path+slashSeparator) 3006 metaPath := pathutil.Join(baseDir, xlStorageFormatFile) 3007 buf, _, err := s.readAllData(ctx, volume, volumeDir, metaPath, true) 3008 if err != nil { 3009 return err 3010 } 3011 defer metaDataPoolPut(buf) 3012 3013 if !isXL2V1Format(buf) { 3014 return nil 3015 } 3016 var xl xlMetaV2 3017 err = xl.LoadOrConvert(buf) 3018 if err != nil { 3019 return err 3020 } 3021 foundDirs := make(map[string]struct{}, len(xl.versions)) 3022 err = readDirFn(baseDir, func(name string, typ os.FileMode) error { 3023 if !typ.IsDir() { 3024 return nil 3025 } 3026 // See if directory has a UUID name. 3027 base := filepath.Base(name) 3028 _, err := uuid.Parse(base) 3029 if err == nil { 3030 foundDirs[base] = struct{}{} 3031 } 3032 return nil 3033 }) 3034 if err != nil { 3035 return err 3036 } 3037 wantDirs, err := xl.getDataDirs() 3038 if err != nil { 3039 return err 3040 } 3041 3042 // Delete all directories we expect to be there. 3043 for _, dir := range wantDirs { 3044 delete(foundDirs, dir) 3045 } 3046 3047 // Delete excessive directories. 3048 // Do not abort on context errors. 3049 for dir := range foundDirs { 3050 toRemove := pathJoin(volumeDir, path, dir+SlashSeparator) 3051 err := s.deleteFile(volumeDir, toRemove, true, true) 3052 diskHealthCheckOK(ctx, err) 3053 } 3054 3055 // Do the same for inline data 3056 dirs, err := xl.data.list() 3057 if err != nil { 3058 return err 3059 } 3060 // Clear and repopulate 3061 for k := range foundDirs { 3062 delete(foundDirs, k) 3063 } 3064 // Populate into map 3065 for _, k := range dirs { 3066 foundDirs[k] = struct{}{} 3067 } 3068 // Delete all directories we expect to be there. 3069 for _, dir := range wantDirs { 3070 delete(foundDirs, dir) 3071 } 3072 3073 // Delete excessive inline entries. 3074 if len(foundDirs) > 0 { 3075 // Convert to slice. 3076 dirs = dirs[:0] 3077 for dir := range foundDirs { 3078 dirs = append(dirs, dir) 3079 } 3080 if xl.data.remove(dirs...) { 3081 newBuf, err := xl.AppendTo(metaDataPoolGet()) 3082 if err == nil { 3083 defer metaDataPoolPut(newBuf) 3084 return s.WriteAll(ctx, volume, pathJoin(path, xlStorageFormatFile), buf) 3085 } 3086 } 3087 } 3088 return nil 3089 } 3090 3091 func convertAccessError(err, permErr error) error { 3092 switch { 3093 case osIsNotExist(err): 3094 return errVolumeNotFound 3095 case isSysErrIO(err): 3096 return errFaultyDisk 3097 case osIsPermission(err): 3098 return permErr 3099 default: 3100 return err 3101 } 3102 }