storj.io/minio@v0.0.0-20230509071714-0cbc90f649b1/cmd/erasure-healing.go (about) 1 /* 2 * MinIO Cloud Storage, (C) 2016-2020 MinIO, Inc. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package cmd 18 19 import ( 20 "bytes" 21 "context" 22 "errors" 23 "fmt" 24 "io" 25 "sync" 26 "time" 27 28 "storj.io/minio/cmd/logger" 29 "storj.io/minio/pkg/bucket/lifecycle" 30 "storj.io/minio/pkg/madmin" 31 "storj.io/minio/pkg/sync/errgroup" 32 ) 33 34 // Heals a bucket if it doesn't exist on one of the disks, additionally 35 // also heals the missing entries for bucket metadata files 36 // `policy.json, notification.xml, listeners.json`. 37 func (er erasureObjects) HealBucket(ctx context.Context, bucket string, opts madmin.HealOpts) ( 38 result madmin.HealResultItem, err error) { 39 if !opts.DryRun { 40 defer ObjectPathUpdated(bucket) 41 } 42 43 storageDisks := er.getDisks() 44 storageEndpoints := er.getEndpoints() 45 46 // get write quorum for an object 47 writeQuorum := len(storageDisks) - er.defaultParityCount 48 if writeQuorum == er.defaultParityCount { 49 writeQuorum++ 50 } 51 52 // Heal bucket. 53 return healBucket(ctx, storageDisks, storageEndpoints, bucket, writeQuorum, opts) 54 } 55 56 // Heal bucket - create buckets on disks where it does not exist. 57 func healBucket(ctx context.Context, storageDisks []StorageAPI, storageEndpoints []string, bucket string, writeQuorum int, 58 opts madmin.HealOpts) (res madmin.HealResultItem, err error) { 59 60 // Initialize sync waitgroup. 61 g := errgroup.WithNErrs(len(storageDisks)) 62 63 // Disk states slices 64 beforeState := make([]string, len(storageDisks)) 65 afterState := make([]string, len(storageDisks)) 66 67 // Make a volume entry on all underlying storage disks. 68 for index := range storageDisks { 69 index := index 70 g.Go(func() error { 71 if storageDisks[index] == nil { 72 beforeState[index] = madmin.DriveStateOffline 73 afterState[index] = madmin.DriveStateOffline 74 return errDiskNotFound 75 } 76 if _, serr := storageDisks[index].StatVol(ctx, bucket); serr != nil { 77 if serr == errDiskNotFound { 78 beforeState[index] = madmin.DriveStateOffline 79 afterState[index] = madmin.DriveStateOffline 80 return serr 81 } 82 if serr != errVolumeNotFound { 83 beforeState[index] = madmin.DriveStateCorrupt 84 afterState[index] = madmin.DriveStateCorrupt 85 return serr 86 } 87 88 beforeState[index] = madmin.DriveStateMissing 89 afterState[index] = madmin.DriveStateMissing 90 91 // mutate only if not a dry-run 92 if opts.DryRun { 93 return nil 94 } 95 96 return serr 97 } 98 beforeState[index] = madmin.DriveStateOk 99 afterState[index] = madmin.DriveStateOk 100 return nil 101 }, index) 102 } 103 104 errs := g.Wait() 105 106 // Initialize heal result info 107 res = madmin.HealResultItem{ 108 Type: madmin.HealItemBucket, 109 Bucket: bucket, 110 DiskCount: len(storageDisks), 111 ParityBlocks: len(storageDisks) / 2, 112 DataBlocks: len(storageDisks) / 2, 113 } 114 115 for i := range beforeState { 116 res.Before.Drives = append(res.Before.Drives, madmin.HealDriveInfo{ 117 UUID: "", 118 Endpoint: storageEndpoints[i], 119 State: beforeState[i], 120 }) 121 } 122 123 reducedErr := reduceWriteQuorumErrs(ctx, errs, bucketOpIgnoredErrs, writeQuorum-1) 124 if errors.Is(reducedErr, errVolumeNotFound) && !opts.Recreate { 125 for i := range beforeState { 126 res.After.Drives = append(res.After.Drives, madmin.HealDriveInfo{ 127 UUID: "", 128 Endpoint: storageEndpoints[i], 129 State: madmin.DriveStateOk, 130 }) 131 } 132 return res, nil 133 } 134 135 // Initialize sync waitgroup. 136 g = errgroup.WithNErrs(len(storageDisks)) 137 138 // Make a volume entry on all underlying storage disks. 139 for index := range storageDisks { 140 index := index 141 g.Go(func() error { 142 if beforeState[index] == madmin.DriveStateMissing { 143 makeErr := storageDisks[index].MakeVol(ctx, bucket) 144 if makeErr == nil { 145 afterState[index] = madmin.DriveStateOk 146 } 147 return makeErr 148 } 149 return errs[index] 150 }, index) 151 } 152 153 errs = g.Wait() 154 155 reducedErr = reduceWriteQuorumErrs(ctx, errs, bucketOpIgnoredErrs, writeQuorum) 156 if reducedErr != nil { 157 return res, reducedErr 158 } 159 160 for i := range afterState { 161 res.After.Drives = append(res.After.Drives, madmin.HealDriveInfo{ 162 UUID: "", 163 Endpoint: storageEndpoints[i], 164 State: afterState[i], 165 }) 166 } 167 return res, nil 168 } 169 170 // listAllBuckets lists all buckets from all disks. It also 171 // returns the occurrence of each buckets in all disks 172 func listAllBuckets(ctx context.Context, storageDisks []StorageAPI, healBuckets map[string]VolInfo) error { 173 g := errgroup.WithNErrs(len(storageDisks)) 174 var mu sync.Mutex 175 for index := range storageDisks { 176 index := index 177 g.Go(func() error { 178 if storageDisks[index] == nil { 179 // we ignore disk not found errors 180 return nil 181 } 182 volsInfo, err := storageDisks[index].ListVols(ctx) 183 if err != nil { 184 return err 185 } 186 for _, volInfo := range volsInfo { 187 // StorageAPI can send volume names which are 188 // incompatible with buckets - these are 189 // skipped, like the meta-bucket. 190 if isReservedOrInvalidBucket(volInfo.Name, false) { 191 continue 192 } 193 mu.Lock() 194 if _, ok := healBuckets[volInfo.Name]; !ok { 195 healBuckets[volInfo.Name] = volInfo 196 } 197 mu.Unlock() 198 } 199 return nil 200 }, index) 201 } 202 return reduceReadQuorumErrs(ctx, g.Wait(), bucketMetadataOpIgnoredErrs, len(storageDisks)/2) 203 } 204 205 // Only heal on disks where we are sure that healing is needed. We can expand 206 // this list as and when we figure out more errors can be added to this list safely. 207 func shouldHealObjectOnDisk(erErr, dataErr error, meta FileInfo, quorumModTime time.Time) bool { 208 switch { 209 case errors.Is(erErr, errFileNotFound) || errors.Is(erErr, errFileVersionNotFound): 210 return true 211 case errors.Is(erErr, errCorruptedFormat): 212 return true 213 } 214 if erErr == nil { 215 // If xl.meta was read fine but there may be problem with the part.N files. 216 if IsErr(dataErr, []error{ 217 errFileNotFound, 218 errFileVersionNotFound, 219 errFileCorrupt, 220 }...) { 221 return true 222 } 223 if !quorumModTime.Equal(meta.ModTime) { 224 return true 225 } 226 if meta.XLV1 { 227 return true 228 } 229 } 230 return false 231 } 232 233 // Heals an object by re-writing corrupt/missing erasure blocks. 234 func (er erasureObjects) healObject(ctx context.Context, bucket string, object string, versionID string, opts madmin.HealOpts) (result madmin.HealResultItem, err error) { 235 236 dryRun := opts.DryRun 237 scanMode := opts.ScanMode 238 239 storageDisks := er.getDisks() 240 storageEndpoints := er.getEndpoints() 241 242 // Initialize heal result object 243 result = madmin.HealResultItem{ 244 Type: madmin.HealItemObject, 245 Bucket: bucket, 246 Object: object, 247 DiskCount: len(storageDisks), 248 ParityBlocks: er.defaultParityCount, 249 DataBlocks: len(storageDisks) - er.defaultParityCount, 250 } 251 252 lk := er.NewNSLock(bucket, object) 253 if ctx, err = lk.GetLock(ctx, globalOperationTimeout); err != nil { 254 return result, err 255 } 256 defer lk.Unlock() 257 258 // Re-read when we have lock... 259 partsMetadata, errs := readAllFileInfo(ctx, storageDisks, bucket, object, versionID, true) 260 261 // List of disks having latest version of the object er.meta 262 // (by modtime). 263 latestDisks, modTime, dataDir := listOnlineDisks(storageDisks, partsMetadata, errs) 264 265 // List of disks having all parts as per latest er.meta. 266 availableDisks, dataErrs := disksWithAllParts(ctx, latestDisks, partsMetadata, errs, bucket, object, scanMode) 267 268 // Loop to find number of disks with valid data, per-drive 269 // data state and a list of outdated disks on which data needs 270 // to be healed. 271 outDatedDisks := make([]StorageAPI, len(storageDisks)) 272 numAvailableDisks := 0 273 disksToHealCount := 0 274 for i, v := range availableDisks { 275 driveState := "" 276 switch { 277 case v != nil: 278 driveState = madmin.DriveStateOk 279 numAvailableDisks++ 280 // If data is sane on any one disk, we can 281 // extract the correct object size. 282 result.ObjectSize = partsMetadata[i].Size 283 if partsMetadata[i].Erasure.ParityBlocks > 0 && partsMetadata[i].Erasure.DataBlocks > 0 { 284 result.ParityBlocks = partsMetadata[i].Erasure.ParityBlocks 285 result.DataBlocks = partsMetadata[i].Erasure.DataBlocks 286 } 287 case errs[i] == errDiskNotFound, dataErrs[i] == errDiskNotFound: 288 driveState = madmin.DriveStateOffline 289 case errs[i] == errFileNotFound, errs[i] == errFileVersionNotFound, errs[i] == errVolumeNotFound: 290 fallthrough 291 case dataErrs[i] == errFileNotFound, dataErrs[i] == errFileVersionNotFound, dataErrs[i] == errVolumeNotFound: 292 driveState = madmin.DriveStateMissing 293 default: 294 // all remaining cases imply corrupt data/metadata 295 driveState = madmin.DriveStateCorrupt 296 } 297 298 if shouldHealObjectOnDisk(errs[i], dataErrs[i], partsMetadata[i], modTime) { 299 outDatedDisks[i] = storageDisks[i] 300 disksToHealCount++ 301 result.Before.Drives = append(result.Before.Drives, madmin.HealDriveInfo{ 302 UUID: "", 303 Endpoint: storageEndpoints[i], 304 State: driveState, 305 }) 306 result.After.Drives = append(result.After.Drives, madmin.HealDriveInfo{ 307 UUID: "", 308 Endpoint: storageEndpoints[i], 309 State: driveState, 310 }) 311 continue 312 } 313 result.Before.Drives = append(result.Before.Drives, madmin.HealDriveInfo{ 314 UUID: "", 315 Endpoint: storageEndpoints[i], 316 State: driveState, 317 }) 318 result.After.Drives = append(result.After.Drives, madmin.HealDriveInfo{ 319 UUID: "", 320 Endpoint: storageEndpoints[i], 321 State: driveState, 322 }) 323 } 324 325 if isAllNotFound(errs) { 326 err = toObjectErr(errFileNotFound, bucket, object) 327 if versionID != "" { 328 err = toObjectErr(errFileVersionNotFound, bucket, object, versionID) 329 } 330 // File is fully gone, fileInfo is empty. 331 return defaultHealResult(FileInfo{}, storageDisks, storageEndpoints, errs, bucket, object, versionID, er.defaultParityCount), err 332 } 333 334 // If less than read quorum number of disks have all the parts 335 // of the data, we can't reconstruct the erasure-coded data. 336 if numAvailableDisks < result.DataBlocks { 337 return er.purgeObjectDangling(ctx, bucket, object, versionID, partsMetadata, errs, dataErrs, opts) 338 } 339 340 if disksToHealCount == 0 { 341 // Nothing to heal! 342 return result, nil 343 } 344 345 // After this point, only have to repair data on disk - so 346 // return if it is a dry-run 347 if dryRun { 348 return result, nil 349 } 350 351 // Latest FileInfo for reference. If a valid metadata is not 352 // present, it is as good as object not found. 353 latestMeta, err := pickValidFileInfo(ctx, partsMetadata, modTime, dataDir, result.DataBlocks) 354 if err != nil { 355 return result, toObjectErr(err, bucket, object, versionID) 356 } 357 defer ObjectPathUpdated(pathJoin(bucket, object)) 358 359 cleanFileInfo := func(fi FileInfo) FileInfo { 360 // Returns a copy of the 'fi' with checksums and parts nil'ed. 361 nfi := fi 362 nfi.Erasure.Index = 0 363 nfi.Erasure.Checksums = nil 364 nfi.Parts = nil 365 return nfi 366 } 367 368 // We write at temporary location and then rename to final location. 369 tmpID := mustGetUUID() 370 migrateDataDir := mustGetUUID() 371 372 copyPartsMetadata := make([]FileInfo, len(partsMetadata)) 373 for i := range outDatedDisks { 374 if outDatedDisks[i] == nil { 375 continue 376 } 377 copyPartsMetadata[i] = partsMetadata[i] 378 partsMetadata[i] = cleanFileInfo(latestMeta) 379 } 380 381 // source data dir shall be empty in case of XLV1 382 // differentiate it with dstDataDir for readability 383 // srcDataDir is the one used with newBitrotReader() 384 // to read existing content. 385 srcDataDir := latestMeta.DataDir 386 dstDataDir := latestMeta.DataDir 387 if latestMeta.XLV1 { 388 dstDataDir = migrateDataDir 389 } 390 391 var inlineBuffers []*bytes.Buffer 392 if len(latestMeta.Parts) <= 1 && latestMeta.Size < smallFileThreshold { 393 inlineBuffers = make([]*bytes.Buffer, len(outDatedDisks)) 394 } 395 396 if !latestMeta.Deleted || latestMeta.TransitionStatus != lifecycle.TransitionComplete { 397 result.DataBlocks = latestMeta.Erasure.DataBlocks 398 result.ParityBlocks = latestMeta.Erasure.ParityBlocks 399 400 // Reorder so that we have data disks first and parity disks next. 401 latestDisks = shuffleDisks(availableDisks, latestMeta.Erasure.Distribution) 402 outDatedDisks = shuffleDisks(outDatedDisks, latestMeta.Erasure.Distribution) 403 partsMetadata = shufflePartsMetadata(partsMetadata, latestMeta.Erasure.Distribution) 404 copyPartsMetadata = shufflePartsMetadata(copyPartsMetadata, latestMeta.Erasure.Distribution) 405 406 // Heal each part. erasureHealFile() will write the healed 407 // part to .minio/tmp/uuid/ which needs to be renamed later to 408 // the final location. 409 erasure, err := NewErasure(ctx, latestMeta.Erasure.DataBlocks, 410 latestMeta.Erasure.ParityBlocks, latestMeta.Erasure.BlockSize) 411 if err != nil { 412 return result, toObjectErr(err, bucket, object) 413 } 414 415 erasureInfo := latestMeta.Erasure 416 417 for partIndex := 0; partIndex < len(latestMeta.Parts); partIndex++ { 418 partSize := latestMeta.Parts[partIndex].Size 419 partActualSize := latestMeta.Parts[partIndex].ActualSize 420 partNumber := latestMeta.Parts[partIndex].Number 421 tillOffset := erasure.ShardFileOffset(0, partSize, partSize) 422 readers := make([]io.ReaderAt, len(latestDisks)) 423 checksumAlgo := erasureInfo.GetChecksumInfo(partNumber).Algorithm 424 for i, disk := range latestDisks { 425 if disk == OfflineDisk { 426 continue 427 } 428 checksumInfo := copyPartsMetadata[i].Erasure.GetChecksumInfo(partNumber) 429 partPath := pathJoin(object, srcDataDir, fmt.Sprintf("part.%d", partNumber)) 430 readers[i] = newBitrotReader(disk, partsMetadata[i].Data, bucket, partPath, tillOffset, checksumAlgo, checksumInfo.Hash, erasure.ShardSize()) 431 } 432 writers := make([]io.Writer, len(outDatedDisks)) 433 for i, disk := range outDatedDisks { 434 if disk == OfflineDisk { 435 continue 436 } 437 partPath := pathJoin(tmpID, dstDataDir, fmt.Sprintf("part.%d", partNumber)) 438 if len(inlineBuffers) > 0 { 439 inlineBuffers[i] = bytes.NewBuffer(make([]byte, 0, erasure.ShardFileSize(latestMeta.Size))) 440 writers[i] = newStreamingBitrotWriterBuffer(inlineBuffers[i], DefaultBitrotAlgorithm, erasure.ShardSize()) 441 } else { 442 writers[i] = newBitrotWriter(disk, minioMetaTmpBucket, partPath, 443 tillOffset, DefaultBitrotAlgorithm, erasure.ShardSize(), true) 444 } 445 } 446 err = erasure.Heal(ctx, readers, writers, partSize) 447 closeBitrotReaders(readers) 448 closeBitrotWriters(writers) 449 if err != nil { 450 return result, toObjectErr(err, bucket, object) 451 } 452 // outDatedDisks that had write errors should not be 453 // written to for remaining parts, so we nil it out. 454 for i, disk := range outDatedDisks { 455 if disk == OfflineDisk { 456 continue 457 } 458 459 // A non-nil stale disk which did not receive 460 // a healed part checksum had a write error. 461 if writers[i] == nil { 462 outDatedDisks[i] = nil 463 disksToHealCount-- 464 continue 465 } 466 467 partsMetadata[i].DataDir = dstDataDir 468 partsMetadata[i].AddObjectPart(partNumber, "", partSize, partActualSize) 469 partsMetadata[i].Erasure.AddChecksumInfo(ChecksumInfo{ 470 PartNumber: partNumber, 471 Algorithm: checksumAlgo, 472 Hash: bitrotWriterSum(writers[i]), 473 }) 474 if len(inlineBuffers) > 0 && inlineBuffers[i] != nil { 475 partsMetadata[i].Data = inlineBuffers[i].Bytes() 476 } else { 477 partsMetadata[i].Data = nil 478 } 479 } 480 481 // If all disks are having errors, we give up. 482 if disksToHealCount == 0 { 483 return result, fmt.Errorf("all disks had write errors, unable to heal") 484 } 485 } 486 } 487 488 defer er.deleteObject(context.Background(), minioMetaTmpBucket, tmpID, len(storageDisks)/2+1) 489 490 // Rename from tmp location to the actual location. 491 for i, disk := range outDatedDisks { 492 if disk == OfflineDisk { 493 continue 494 } 495 496 // record the index of the updated disks 497 partsMetadata[i].Erasure.Index = i + 1 498 499 // Attempt a rename now from healed data to final location. 500 if err = disk.RenameData(ctx, minioMetaTmpBucket, tmpID, partsMetadata[i], bucket, object); err != nil { 501 logger.LogIf(ctx, err) 502 return result, toObjectErr(err, bucket, object) 503 } 504 505 for i, v := range result.Before.Drives { 506 if v.Endpoint == disk.String() { 507 result.After.Drives[i].State = madmin.DriveStateOk 508 } 509 } 510 } 511 512 // Set the size of the object in the heal result 513 result.ObjectSize = latestMeta.Size 514 515 return result, nil 516 } 517 518 // healObjectDir - heals object directory specifically, this special call 519 // is needed since we do not have a special backend format for directories. 520 func (er erasureObjects) healObjectDir(ctx context.Context, bucket, object string, dryRun bool, remove bool) (hr madmin.HealResultItem, err error) { 521 storageDisks := er.getDisks() 522 storageEndpoints := er.getEndpoints() 523 524 // Initialize heal result object 525 hr = madmin.HealResultItem{ 526 Type: madmin.HealItemObject, 527 Bucket: bucket, 528 Object: object, 529 DiskCount: len(storageDisks), 530 ParityBlocks: er.defaultParityCount, 531 DataBlocks: len(storageDisks) - er.defaultParityCount, 532 ObjectSize: 0, 533 } 534 535 hr.Before.Drives = make([]madmin.HealDriveInfo, len(storageDisks)) 536 hr.After.Drives = make([]madmin.HealDriveInfo, len(storageDisks)) 537 538 errs := statAllDirs(ctx, storageDisks, bucket, object) 539 danglingObject := isObjectDirDangling(errs) 540 if danglingObject { 541 if !dryRun && remove { 542 var wg sync.WaitGroup 543 // Remove versions in bulk for each disk 544 for index, disk := range storageDisks { 545 if disk == nil { 546 continue 547 } 548 wg.Add(1) 549 go func(index int, disk StorageAPI) { 550 defer wg.Done() 551 _ = disk.Delete(ctx, bucket, object, false) 552 }(index, disk) 553 } 554 wg.Wait() 555 ObjectPathUpdated(pathJoin(bucket, object)) 556 } 557 } 558 559 // Prepare object creation in all disks 560 for i, err := range errs { 561 drive := storageEndpoints[i] 562 switch err { 563 case nil: 564 hr.Before.Drives[i] = madmin.HealDriveInfo{Endpoint: drive, State: madmin.DriveStateOk} 565 hr.After.Drives[i] = madmin.HealDriveInfo{Endpoint: drive, State: madmin.DriveStateOk} 566 case errDiskNotFound: 567 hr.Before.Drives[i] = madmin.HealDriveInfo{State: madmin.DriveStateOffline} 568 hr.After.Drives[i] = madmin.HealDriveInfo{State: madmin.DriveStateOffline} 569 case errVolumeNotFound, errFileNotFound: 570 // Bucket or prefix/directory not found 571 hr.Before.Drives[i] = madmin.HealDriveInfo{Endpoint: drive, State: madmin.DriveStateMissing} 572 hr.After.Drives[i] = madmin.HealDriveInfo{Endpoint: drive, State: madmin.DriveStateMissing} 573 default: 574 hr.Before.Drives[i] = madmin.HealDriveInfo{Endpoint: drive, State: madmin.DriveStateCorrupt} 575 hr.After.Drives[i] = madmin.HealDriveInfo{Endpoint: drive, State: madmin.DriveStateCorrupt} 576 } 577 } 578 if dryRun || danglingObject || isAllNotFound(errs) { 579 // Nothing to do, file is already gone. 580 return hr, toObjectErr(errFileNotFound, bucket, object) 581 } 582 for i, err := range errs { 583 if err == errVolumeNotFound || err == errFileNotFound { 584 // Bucket or prefix/directory not found 585 merr := storageDisks[i].MakeVol(ctx, pathJoin(bucket, object)) 586 switch merr { 587 case nil, errVolumeExists: 588 hr.After.Drives[i].State = madmin.DriveStateOk 589 case errDiskNotFound: 590 hr.After.Drives[i].State = madmin.DriveStateOffline 591 default: 592 logger.LogIf(ctx, merr) 593 hr.After.Drives[i].State = madmin.DriveStateCorrupt 594 } 595 } 596 } 597 return hr, nil 598 } 599 600 // Populates default heal result item entries with possible values when we are returning prematurely. 601 // This is to ensure that in any circumstance we are not returning empty arrays with wrong values. 602 func defaultHealResult(lfi FileInfo, storageDisks []StorageAPI, storageEndpoints []string, errs []error, bucket, object, versionID string, defaultParityCount int) madmin.HealResultItem { 603 // Initialize heal result object 604 result := madmin.HealResultItem{ 605 Type: madmin.HealItemObject, 606 Bucket: bucket, 607 Object: object, 608 VersionID: versionID, 609 DiskCount: len(storageDisks), 610 } 611 if lfi.IsValid() { 612 result.ObjectSize = lfi.Size 613 } 614 615 for index, disk := range storageDisks { 616 if disk == nil { 617 result.Before.Drives = append(result.Before.Drives, madmin.HealDriveInfo{ 618 UUID: "", 619 Endpoint: storageEndpoints[index], 620 State: madmin.DriveStateOffline, 621 }) 622 result.After.Drives = append(result.After.Drives, madmin.HealDriveInfo{ 623 UUID: "", 624 Endpoint: storageEndpoints[index], 625 State: madmin.DriveStateOffline, 626 }) 627 continue 628 } 629 driveState := madmin.DriveStateCorrupt 630 switch errs[index] { 631 case errFileNotFound, errVolumeNotFound: 632 driveState = madmin.DriveStateMissing 633 } 634 result.Before.Drives = append(result.Before.Drives, madmin.HealDriveInfo{ 635 UUID: "", 636 Endpoint: storageEndpoints[index], 637 State: driveState, 638 }) 639 result.After.Drives = append(result.After.Drives, madmin.HealDriveInfo{ 640 UUID: "", 641 Endpoint: storageEndpoints[index], 642 State: driveState, 643 }) 644 } 645 646 if !lfi.IsValid() { 647 // Default to most common configuration for erasure blocks. 648 result.ParityBlocks = defaultParityCount 649 result.DataBlocks = len(storageDisks) - defaultParityCount 650 } else { 651 result.ParityBlocks = lfi.Erasure.ParityBlocks 652 result.DataBlocks = lfi.Erasure.DataBlocks 653 } 654 655 return result 656 } 657 658 // Stat all directories. 659 func statAllDirs(ctx context.Context, storageDisks []StorageAPI, bucket, prefix string) []error { 660 g := errgroup.WithNErrs(len(storageDisks)) 661 for index, disk := range storageDisks { 662 if disk == nil { 663 continue 664 } 665 index := index 666 g.Go(func() error { 667 entries, err := storageDisks[index].ListDir(ctx, bucket, prefix, 1) 668 if err != nil { 669 return err 670 } 671 if len(entries) > 0 { 672 return errVolumeNotEmpty 673 } 674 return nil 675 }, index) 676 } 677 678 return g.Wait() 679 } 680 681 // isAllNotFound will return if any element of the error slice is not 682 // errFileNotFound, errFileVersionNotFound or errVolumeNotFound. 683 // A 0 length slice will always return false. 684 func isAllNotFound(errs []error) bool { 685 for _, err := range errs { 686 if errors.Is(err, errFileNotFound) || errors.Is(err, errVolumeNotFound) || errors.Is(err, errFileVersionNotFound) { 687 continue 688 } 689 return false 690 } 691 return len(errs) > 0 692 } 693 694 // ObjectDir is considered dangling/corrupted if any only 695 // if total disks - a combination of corrupted and missing 696 // files is lesser than N/2+1 number of disks. 697 // If no files were found false will be returned. 698 func isObjectDirDangling(errs []error) (ok bool) { 699 var found int 700 var notFound int 701 var foundNotEmpty int 702 var otherFound int 703 for _, readErr := range errs { 704 if readErr == nil { 705 found++ 706 } else if readErr == errFileNotFound || readErr == errVolumeNotFound { 707 notFound++ 708 } else if readErr == errVolumeNotEmpty { 709 foundNotEmpty++ 710 } else { 711 otherFound++ 712 } 713 } 714 found = found + foundNotEmpty + otherFound 715 return found < notFound && found > 0 716 } 717 718 func (er erasureObjects) purgeObjectDangling(ctx context.Context, bucket, object, versionID string, 719 metaArr []FileInfo, errs []error, dataErrs []error, opts madmin.HealOpts) (madmin.HealResultItem, error) { 720 721 storageDisks := er.getDisks() 722 storageEndpoints := er.getEndpoints() 723 724 // Check if the object is dangling, if yes and user requested 725 // remove we simply delete it from namespace. 726 m, ok := isObjectDangling(metaArr, errs, dataErrs) 727 if ok { 728 writeQuorum := m.Erasure.DataBlocks 729 if m.Erasure.DataBlocks == 0 || m.Erasure.DataBlocks == m.Erasure.ParityBlocks { 730 writeQuorum++ 731 } 732 var err error 733 var returnNotFound bool 734 if !opts.DryRun && opts.Remove { 735 if versionID == "" { 736 err = er.deleteObject(ctx, bucket, object, writeQuorum) 737 } else { 738 err = er.deleteObjectVersion(ctx, bucket, object, writeQuorum, FileInfo{VersionID: versionID}, false) 739 } 740 741 // If Delete was successful, make sure to return the appropriate error 742 // and heal result appropriate with delete's error messages 743 errs = make([]error, len(errs)) 744 for i := range errs { 745 errs[i] = err 746 } 747 if err == nil { 748 // Dangling object successfully purged, size is '0' 749 m.Size = 0 750 } 751 752 // Delete successfully purged dangling content, return ObjectNotFound/VersionNotFound instead. 753 if countErrs(errs, nil) == len(errs) { 754 returnNotFound = true 755 } 756 } 757 if returnNotFound { 758 err = toObjectErr(errFileNotFound, bucket, object) 759 if versionID != "" { 760 err = toObjectErr(errFileVersionNotFound, bucket, object, versionID) 761 } 762 return defaultHealResult(m, storageDisks, storageEndpoints, errs, bucket, object, versionID, er.defaultParityCount), err 763 } 764 return defaultHealResult(m, storageDisks, storageEndpoints, errs, bucket, object, versionID, er.defaultParityCount), toObjectErr(err, bucket, object, versionID) 765 } 766 767 readQuorum := len(storageDisks) - er.defaultParityCount 768 769 err := toObjectErr(reduceReadQuorumErrs(ctx, errs, objectOpIgnoredErrs, readQuorum), bucket, object, versionID) 770 return defaultHealResult(m, storageDisks, storageEndpoints, errs, bucket, object, versionID, er.defaultParityCount), err 771 } 772 773 // Object is considered dangling/corrupted if any only 774 // if total disks - a combination of corrupted and missing 775 // files is lesser than number of data blocks. 776 func isObjectDangling(metaArr []FileInfo, errs []error, dataErrs []error) (validMeta FileInfo, ok bool) { 777 // We can consider an object data not reliable 778 // when er.meta is not found in read quorum disks. 779 // or when er.meta is not readable in read quorum disks. 780 var notFoundErasureMeta, corruptedErasureMeta int 781 for _, readErr := range errs { 782 if errors.Is(readErr, errFileNotFound) || errors.Is(readErr, errFileVersionNotFound) { 783 notFoundErasureMeta++ 784 } else if errors.Is(readErr, errCorruptedFormat) { 785 corruptedErasureMeta++ 786 } 787 } 788 var notFoundParts int 789 for i := range dataErrs { 790 // Only count part errors, if the error is not 791 // same as er.meta error. This is to avoid 792 // double counting when both parts and er.meta 793 // are not available. 794 if errs[i] != dataErrs[i] { 795 if IsErr(dataErrs[i], []error{ 796 errFileNotFound, 797 errFileVersionNotFound, 798 }...) { 799 notFoundParts++ 800 } 801 } 802 } 803 804 for _, m := range metaArr { 805 if !m.IsValid() { 806 continue 807 } 808 validMeta = m 809 break 810 } 811 812 if validMeta.Deleted || validMeta.TransitionStatus == lifecycle.TransitionComplete { 813 // notFoundParts is ignored since a 814 // - delete marker does not have any parts 815 // - transition status of complete has no parts 816 return validMeta, corruptedErasureMeta+notFoundErasureMeta > len(errs)/2 817 } 818 819 // We couldn't find any valid meta we are indeed corrupted, return true right away. 820 if validMeta.Erasure.DataBlocks == 0 { 821 return validMeta, true 822 } 823 824 // We have valid meta, now verify if we have enough files with parity blocks. 825 return validMeta, corruptedErasureMeta+notFoundErasureMeta+notFoundParts > validMeta.Erasure.ParityBlocks 826 } 827 828 // HealObject - heal the given object, automatically deletes the object if stale/corrupted if `remove` is true. 829 func (er erasureObjects) HealObject(ctx context.Context, bucket, object, versionID string, opts madmin.HealOpts) (hr madmin.HealResultItem, err error) { 830 // Create context that also contains information about the object and bucket. 831 // The top level handler might not have this information. 832 reqInfo := logger.GetReqInfo(ctx) 833 var newReqInfo *logger.ReqInfo 834 if reqInfo != nil { 835 newReqInfo = logger.NewReqInfo(reqInfo.RemoteHost, reqInfo.UserAgent, reqInfo.DeploymentID, reqInfo.RequestID, reqInfo.API, bucket, object) 836 } else { 837 newReqInfo = logger.NewReqInfo("", "", globalDeploymentID, "", "Heal", bucket, object) 838 } 839 healCtx := logger.SetReqInfo(GlobalContext, newReqInfo) 840 841 // Healing directories handle it separately. 842 if HasSuffix(object, SlashSeparator) { 843 return er.healObjectDir(healCtx, bucket, object, opts.DryRun, opts.Remove) 844 } 845 846 storageDisks := er.getDisks() 847 storageEndpoints := er.getEndpoints() 848 849 // Read metadata files from all the disks 850 851 // When versionID is empty, we read directly from the `null` versionID for healing. 852 if versionID == "" { 853 versionID = nullVersionID 854 } 855 856 partsMetadata, errs := readAllFileInfo(healCtx, storageDisks, bucket, object, versionID, false) 857 858 if isAllNotFound(errs) { 859 err = toObjectErr(errFileNotFound, bucket, object) 860 if versionID != "" { 861 err = toObjectErr(errFileVersionNotFound, bucket, object, versionID) 862 } 863 // Nothing to do, file is already gone. 864 return defaultHealResult(FileInfo{}, storageDisks, storageEndpoints, errs, bucket, object, versionID, er.defaultParityCount), err 865 } 866 867 _, err = getLatestFileInfo(healCtx, partsMetadata, errs) 868 if err != nil { 869 return er.purgeObjectDangling(healCtx, bucket, object, versionID, partsMetadata, errs, []error{}, opts) 870 } 871 872 // Heal the object. 873 return er.healObject(healCtx, bucket, object, versionID, opts) 874 }