github.com/minio/minio@v0.0.0-20240328213742-3f72439b8a27/cmd/global-heal.go (about) 1 // Copyright (c) 2015-2022 MinIO, Inc. 2 // 3 // This file is part of MinIO Object Storage stack 4 // 5 // This program is free software: you can redistribute it and/or modify 6 // it under the terms of the GNU Affero General Public License as published by 7 // the Free Software Foundation, either version 3 of the License, or 8 // (at your option) any later version. 9 // 10 // This program is distributed in the hope that it will be useful 11 // but WITHOUT ANY WARRANTY; without even the implied warranty of 12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 // GNU Affero General Public License for more details. 14 // 15 // You should have received a copy of the GNU Affero General Public License 16 // along with this program. If not, see <http://www.gnu.org/licenses/>. 17 18 package cmd 19 20 import ( 21 "context" 22 "fmt" 23 "runtime" 24 "sort" 25 "time" 26 27 "github.com/dustin/go-humanize" 28 "github.com/minio/madmin-go/v3" 29 "github.com/minio/minio/internal/color" 30 "github.com/minio/minio/internal/config/storageclass" 31 xioutil "github.com/minio/minio/internal/ioutil" 32 "github.com/minio/minio/internal/logger" 33 "github.com/minio/pkg/v2/console" 34 "github.com/minio/pkg/v2/wildcard" 35 "github.com/minio/pkg/v2/workers" 36 ) 37 38 const ( 39 bgHealingUUID = "0000-0000-0000-0000" 40 ) 41 42 // NewBgHealSequence creates a background healing sequence 43 // operation which scans all objects and heal them. 44 func newBgHealSequence() *healSequence { 45 reqInfo := &logger.ReqInfo{API: "BackgroundHeal"} 46 ctx, cancelCtx := context.WithCancel(logger.SetReqInfo(GlobalContext, reqInfo)) 47 48 hs := madmin.HealOpts{ 49 // Remove objects that do not have read-quorum 50 Remove: healDeleteDangling, 51 } 52 53 return &healSequence{ 54 startTime: UTCNow(), 55 clientToken: bgHealingUUID, 56 // run-background heal with reserved bucket 57 bucket: minioReservedBucket, 58 settings: hs, 59 currentStatus: healSequenceStatus{ 60 Summary: healNotStartedStatus, 61 HealSettings: hs, 62 }, 63 cancelCtx: cancelCtx, 64 ctx: ctx, 65 reportProgress: false, 66 scannedItemsMap: make(map[madmin.HealItemType]int64), 67 healedItemsMap: make(map[madmin.HealItemType]int64), 68 healFailedItemsMap: make(map[string]int64), 69 } 70 } 71 72 // getLocalBackgroundHealStatus will return the heal status of the local node 73 func getLocalBackgroundHealStatus(ctx context.Context, o ObjectLayer) (madmin.BgHealState, bool) { 74 if globalBackgroundHealState == nil { 75 return madmin.BgHealState{}, false 76 } 77 78 bgSeq, ok := globalBackgroundHealState.getHealSequenceByToken(bgHealingUUID) 79 if !ok { 80 return madmin.BgHealState{}, false 81 } 82 83 status := madmin.BgHealState{ 84 ScannedItemsCount: bgSeq.getScannedItemsCount(), 85 } 86 87 healDisksMap := map[string]struct{}{} 88 for _, ep := range getLocalDisksToHeal() { 89 healDisksMap[ep.String()] = struct{}{} 90 } 91 92 if o == nil { 93 healing := globalBackgroundHealState.getLocalHealingDisks() 94 for _, disk := range healing { 95 status.HealDisks = append(status.HealDisks, disk.Endpoint) 96 } 97 98 return status, true 99 } 100 101 si := o.LocalStorageInfo(ctx, true) 102 103 indexed := make(map[string][]madmin.Disk) 104 for _, disk := range si.Disks { 105 setIdx := fmt.Sprintf("%d-%d", disk.PoolIndex, disk.SetIndex) 106 indexed[setIdx] = append(indexed[setIdx], disk) 107 } 108 109 for id, disks := range indexed { 110 ss := madmin.SetStatus{ 111 ID: id, 112 SetIndex: disks[0].SetIndex, 113 PoolIndex: disks[0].PoolIndex, 114 } 115 for _, disk := range disks { 116 ss.Disks = append(ss.Disks, disk) 117 if disk.Healing { 118 ss.HealStatus = "Healing" 119 ss.HealPriority = "high" 120 status.HealDisks = append(status.HealDisks, disk.Endpoint) 121 } 122 } 123 sortDisks(ss.Disks) 124 status.Sets = append(status.Sets, ss) 125 } 126 sort.Slice(status.Sets, func(i, j int) bool { 127 return status.Sets[i].ID < status.Sets[j].ID 128 }) 129 130 backendInfo := o.BackendInfo() 131 status.SCParity = make(map[string]int) 132 status.SCParity[storageclass.STANDARD] = backendInfo.StandardSCParity 133 status.SCParity[storageclass.RRS] = backendInfo.RRSCParity 134 135 return status, true 136 } 137 138 // healErasureSet lists and heals all objects in a specific erasure set 139 func (er *erasureObjects) healErasureSet(ctx context.Context, buckets []string, tracker *healingTracker) error { 140 scanMode := madmin.HealNormalScan 141 142 // Make sure to copy since `buckets slice` 143 // is modified in place by tracker. 144 healBuckets := make([]string, len(buckets)) 145 copy(healBuckets, buckets) 146 147 objAPI := newObjectLayerFn() 148 if objAPI == nil { 149 return errServerNotInitialized 150 } 151 152 for _, bucket := range healBuckets { 153 _, err := objAPI.HealBucket(ctx, bucket, madmin.HealOpts{ScanMode: scanMode}) 154 if err != nil { 155 // Log bucket healing error if any, we shall retry again. 156 logger.LogIf(ctx, err) 157 } 158 } 159 160 info, err := tracker.disk.DiskInfo(ctx, DiskInfoOptions{}) 161 if err != nil { 162 return fmt.Errorf("unable to get disk information before healing it: %w", err) 163 } 164 165 var numHealers uint64 166 167 if numCores := uint64(runtime.GOMAXPROCS(0)); info.NRRequests > numCores { 168 numHealers = numCores / 4 169 } else { 170 numHealers = info.NRRequests / 4 171 } 172 if numHealers < 4 { 173 numHealers = 4 174 } 175 // allow overriding this value as well.. 176 if v := globalHealConfig.GetWorkers(); v > 0 { 177 numHealers = uint64(v) 178 } 179 180 logger.Event(ctx, fmt.Sprintf("Healing drive '%s' - use %d parallel workers.", tracker.disk.String(), numHealers)) 181 182 jt, _ := workers.New(int(numHealers)) 183 184 var retErr error 185 // Heal all buckets with all objects 186 for _, bucket := range healBuckets { 187 if tracker.isHealed(bucket) { 188 continue 189 } 190 var forwardTo string 191 // If we resume to the same bucket, forward to last known item. 192 if b := tracker.getBucket(); b != "" { 193 if b == bucket { 194 forwardTo = tracker.getObject() 195 } else { 196 // Reset to where last bucket ended if resuming. 197 tracker.resume() 198 } 199 } 200 tracker.setObject("") 201 tracker.setBucket(bucket) 202 // Heal current bucket again in case if it is failed 203 // in the beginning of erasure set healing 204 if _, err := objAPI.HealBucket(ctx, bucket, madmin.HealOpts{ 205 ScanMode: scanMode, 206 }); err != nil { 207 logger.LogIf(ctx, err) 208 continue 209 } 210 211 vc, _ := globalBucketVersioningSys.Get(bucket) 212 213 // Check if the current bucket has a configured lifecycle policy 214 lc, _ := globalLifecycleSys.Get(bucket) 215 216 // Check if bucket is object locked. 217 lr, _ := globalBucketObjectLockSys.Get(bucket) 218 rcfg, _ := getReplicationConfig(ctx, bucket) 219 220 if serverDebugLog { 221 console.Debugf(color.Green("healDrive:")+" healing bucket %s content on %s erasure set\n", 222 bucket, humanize.Ordinal(er.setIndex+1)) 223 } 224 225 disks, _ := er.getOnlineDisksWithHealing(false) 226 if len(disks) == 0 { 227 // No object healing necessary 228 tracker.bucketDone(bucket) 229 logger.LogIf(ctx, tracker.update(ctx)) 230 continue 231 } 232 233 // Limit listing to 3 drives. 234 if len(disks) > 3 { 235 disks = disks[:3] 236 } 237 238 type healEntryResult struct { 239 bytes uint64 240 success bool 241 skipped bool 242 entryDone bool 243 name string 244 } 245 healEntryDone := func(name string) healEntryResult { 246 return healEntryResult{ 247 entryDone: true, 248 name: name, 249 } 250 } 251 healEntrySuccess := func(sz uint64) healEntryResult { 252 return healEntryResult{ 253 bytes: sz, 254 success: true, 255 } 256 } 257 healEntryFailure := func(sz uint64) healEntryResult { 258 return healEntryResult{ 259 bytes: sz, 260 } 261 } 262 healEntrySkipped := func(sz uint64) healEntryResult { 263 return healEntryResult{ 264 bytes: sz, 265 skipped: true, 266 } 267 } 268 269 filterLifecycle := func(bucket, object string, fi FileInfo) bool { 270 if lc == nil { 271 return false 272 } 273 versioned := vc != nil && vc.Versioned(object) 274 objInfo := fi.ToObjectInfo(bucket, object, versioned) 275 276 evt := evalActionFromLifecycle(ctx, *lc, lr, rcfg, objInfo) 277 switch { 278 case evt.Action.DeleteRestored(): // if restored copy has expired,delete it synchronously 279 applyExpiryOnTransitionedObject(ctx, newObjectLayerFn(), objInfo, evt, lcEventSrc_Heal) 280 return false 281 case evt.Action.Delete(): 282 globalExpiryState.enqueueByDays(objInfo, evt, lcEventSrc_Heal) 283 return true 284 default: 285 return false 286 } 287 } 288 289 // Collect updates to tracker from concurrent healEntry calls 290 results := make(chan healEntryResult, 1000) 291 go func() { 292 for res := range results { 293 if res.entryDone { 294 tracker.setObject(res.name) 295 if time.Since(tracker.getLastUpdate()) > time.Minute { 296 logger.LogIf(ctx, tracker.update(ctx)) 297 } 298 continue 299 } 300 301 tracker.updateProgress(res.success, res.skipped, res.bytes) 302 } 303 }() 304 305 send := func(result healEntryResult) bool { 306 select { 307 case <-ctx.Done(): 308 if !contextCanceled(ctx) { 309 logger.LogIf(ctx, ctx.Err()) 310 } 311 return false 312 case results <- result: 313 return true 314 } 315 } 316 317 // Note: updates from healEntry to tracker must be sent on results channel. 318 healEntry := func(bucket string, entry metaCacheEntry) { 319 defer jt.Give() 320 321 if entry.name == "" && len(entry.metadata) == 0 { 322 // ignore entries that don't have metadata. 323 return 324 } 325 if entry.isDir() { 326 // ignore healing entry.name's with `/` suffix. 327 return 328 } 329 330 // We might land at .metacache, .trash, .multipart 331 // no need to heal them skip, only when bucket 332 // is '.minio.sys' 333 if bucket == minioMetaBucket { 334 if wildcard.Match("buckets/*/.metacache/*", entry.name) { 335 return 336 } 337 if wildcard.Match("tmp/.trash/*", entry.name) { 338 return 339 } 340 if wildcard.Match("multipart/*", entry.name) { 341 return 342 } 343 } 344 345 // erasureObjects layer needs object names to be encoded 346 encodedEntryName := encodeDirObject(entry.name) 347 348 var result healEntryResult 349 fivs, err := entry.fileInfoVersions(bucket) 350 if err != nil { 351 _, err := er.HealObject(ctx, bucket, encodedEntryName, "", 352 madmin.HealOpts{ 353 ScanMode: scanMode, 354 Remove: healDeleteDangling, 355 }) 356 if err != nil { 357 if isErrObjectNotFound(err) || isErrVersionNotFound(err) { 358 // queueing happens across namespace, ignore 359 // objects that are not found. 360 return 361 } 362 result = healEntryFailure(0) 363 logger.LogIf(ctx, fmt.Errorf("unable to heal object %s/%s: %w", bucket, entry.name, err)) 364 } else { 365 result = healEntrySuccess(0) 366 } 367 368 send(result) 369 return 370 } 371 372 var versionNotFound int 373 for _, version := range fivs.Versions { 374 // Ignore a version with a modtime newer than healing start time. 375 if version.ModTime.After(tracker.Started) { 376 continue 377 } 378 379 // Apply lifecycle rules on the objects that are expired. 380 if filterLifecycle(bucket, version.Name, version) { 381 versionNotFound++ 382 if !send(healEntrySkipped(uint64(version.Size))) { 383 return 384 } 385 continue 386 } 387 388 if _, err := er.HealObject(ctx, bucket, encodedEntryName, 389 version.VersionID, madmin.HealOpts{ 390 ScanMode: scanMode, 391 Remove: healDeleteDangling, 392 }); err != nil { 393 if isErrObjectNotFound(err) || isErrVersionNotFound(err) { 394 // queueing happens across namespace, ignore 395 // objects that are not found. 396 versionNotFound++ 397 continue 398 } 399 // If not deleted, assume they failed. 400 result = healEntryFailure(uint64(version.Size)) 401 if version.VersionID != "" { 402 logger.LogIf(ctx, fmt.Errorf("unable to heal object %s/%s-v(%s): %w", bucket, version.Name, version.VersionID, err)) 403 } else { 404 logger.LogIf(ctx, fmt.Errorf("unable to heal object %s/%s: %w", bucket, version.Name, err)) 405 } 406 } else { 407 result = healEntrySuccess(uint64(version.Size)) 408 } 409 410 if !send(result) { 411 return 412 } 413 } 414 // All versions resulted in 'ObjectNotFound/VersionNotFound' 415 if versionNotFound == len(fivs.Versions) { 416 return 417 } 418 select { 419 case <-ctx.Done(): 420 return 421 case results <- healEntryDone(entry.name): 422 } 423 424 // Wait and proceed if there are active requests 425 waitForLowHTTPReq() 426 } 427 428 actualBucket, prefix := path2BucketObject(bucket) 429 430 // How to resolve partial results. 431 resolver := metadataResolutionParams{ 432 dirQuorum: 1, 433 objQuorum: 1, 434 bucket: actualBucket, 435 } 436 437 err := listPathRaw(ctx, listPathRawOptions{ 438 disks: disks, 439 bucket: actualBucket, 440 path: prefix, 441 recursive: true, 442 forwardTo: forwardTo, 443 minDisks: 1, 444 reportNotFound: false, 445 agreed: func(entry metaCacheEntry) { 446 jt.Take() 447 go healEntry(actualBucket, entry) 448 }, 449 partial: func(entries metaCacheEntries, _ []error) { 450 entry, ok := entries.resolve(&resolver) 451 if !ok { 452 // check if we can get one entry at least 453 // proceed to heal nonetheless. 454 entry, _ = entries.firstFound() 455 } 456 jt.Take() 457 go healEntry(actualBucket, *entry) 458 }, 459 finished: nil, 460 }) 461 jt.Wait() // synchronize all the concurrent heal jobs 462 xioutil.SafeClose(results) 463 if err != nil { 464 // Set this such that when we return this function 465 // we let the caller retry this disk again for the 466 // buckets it failed to list. 467 retErr = err 468 logger.LogIf(ctx, err) 469 continue 470 } 471 472 select { 473 // If context is canceled don't mark as done... 474 case <-ctx.Done(): 475 return ctx.Err() 476 default: 477 tracker.bucketDone(bucket) 478 logger.LogIf(ctx, tracker.update(ctx)) 479 } 480 } 481 482 tracker.setObject("") 483 tracker.setBucket("") 484 485 return retErr 486 } 487 488 func healBucket(bucket string, scan madmin.HealScanMode) error { 489 // Get background heal sequence to send elements to heal 490 bgSeq, ok := globalBackgroundHealState.getHealSequenceByToken(bgHealingUUID) 491 if ok { 492 return bgSeq.queueHealTask(healSource{bucket: bucket}, madmin.HealItemBucket) 493 } 494 return nil 495 } 496 497 // healObject sends the given object/version to the background healing workers 498 func healObject(bucket, object, versionID string, scan madmin.HealScanMode) error { 499 // Get background heal sequence to send elements to heal 500 bgSeq, ok := globalBackgroundHealState.getHealSequenceByToken(bgHealingUUID) 501 if ok { 502 return bgSeq.queueHealTask(healSource{ 503 bucket: bucket, 504 object: object, 505 versionID: versionID, 506 noWait: true, // do not block callers. 507 opts: &madmin.HealOpts{ 508 Remove: healDeleteDangling, // if found dangling purge it. 509 ScanMode: scan, 510 }, 511 }, madmin.HealItemObject) 512 } 513 return nil 514 }