github.com/minio/minio@v0.0.0-20240328213742-3f72439b8a27/cmd/global-heal.go (about)

     1  // Copyright (c) 2015-2022 MinIO, Inc.
     2  //
     3  // This file is part of MinIO Object Storage stack
     4  //
     5  // This program is free software: you can redistribute it and/or modify
     6  // it under the terms of the GNU Affero General Public License as published by
     7  // the Free Software Foundation, either version 3 of the License, or
     8  // (at your option) any later version.
     9  //
    10  // This program is distributed in the hope that it will be useful
    11  // but WITHOUT ANY WARRANTY; without even the implied warranty of
    12  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    13  // GNU Affero General Public License for more details.
    14  //
    15  // You should have received a copy of the GNU Affero General Public License
    16  // along with this program.  If not, see <http://www.gnu.org/licenses/>.
    17  
    18  package cmd
    19  
    20  import (
    21  	"context"
    22  	"fmt"
    23  	"runtime"
    24  	"sort"
    25  	"time"
    26  
    27  	"github.com/dustin/go-humanize"
    28  	"github.com/minio/madmin-go/v3"
    29  	"github.com/minio/minio/internal/color"
    30  	"github.com/minio/minio/internal/config/storageclass"
    31  	xioutil "github.com/minio/minio/internal/ioutil"
    32  	"github.com/minio/minio/internal/logger"
    33  	"github.com/minio/pkg/v2/console"
    34  	"github.com/minio/pkg/v2/wildcard"
    35  	"github.com/minio/pkg/v2/workers"
    36  )
    37  
    38  const (
    39  	bgHealingUUID = "0000-0000-0000-0000"
    40  )
    41  
    42  // NewBgHealSequence creates a background healing sequence
    43  // operation which scans all objects and heal them.
    44  func newBgHealSequence() *healSequence {
    45  	reqInfo := &logger.ReqInfo{API: "BackgroundHeal"}
    46  	ctx, cancelCtx := context.WithCancel(logger.SetReqInfo(GlobalContext, reqInfo))
    47  
    48  	hs := madmin.HealOpts{
    49  		// Remove objects that do not have read-quorum
    50  		Remove: healDeleteDangling,
    51  	}
    52  
    53  	return &healSequence{
    54  		startTime:   UTCNow(),
    55  		clientToken: bgHealingUUID,
    56  		// run-background heal with reserved bucket
    57  		bucket:   minioReservedBucket,
    58  		settings: hs,
    59  		currentStatus: healSequenceStatus{
    60  			Summary:      healNotStartedStatus,
    61  			HealSettings: hs,
    62  		},
    63  		cancelCtx:          cancelCtx,
    64  		ctx:                ctx,
    65  		reportProgress:     false,
    66  		scannedItemsMap:    make(map[madmin.HealItemType]int64),
    67  		healedItemsMap:     make(map[madmin.HealItemType]int64),
    68  		healFailedItemsMap: make(map[string]int64),
    69  	}
    70  }
    71  
    72  // getLocalBackgroundHealStatus will return the heal status of the local node
    73  func getLocalBackgroundHealStatus(ctx context.Context, o ObjectLayer) (madmin.BgHealState, bool) {
    74  	if globalBackgroundHealState == nil {
    75  		return madmin.BgHealState{}, false
    76  	}
    77  
    78  	bgSeq, ok := globalBackgroundHealState.getHealSequenceByToken(bgHealingUUID)
    79  	if !ok {
    80  		return madmin.BgHealState{}, false
    81  	}
    82  
    83  	status := madmin.BgHealState{
    84  		ScannedItemsCount: bgSeq.getScannedItemsCount(),
    85  	}
    86  
    87  	healDisksMap := map[string]struct{}{}
    88  	for _, ep := range getLocalDisksToHeal() {
    89  		healDisksMap[ep.String()] = struct{}{}
    90  	}
    91  
    92  	if o == nil {
    93  		healing := globalBackgroundHealState.getLocalHealingDisks()
    94  		for _, disk := range healing {
    95  			status.HealDisks = append(status.HealDisks, disk.Endpoint)
    96  		}
    97  
    98  		return status, true
    99  	}
   100  
   101  	si := o.LocalStorageInfo(ctx, true)
   102  
   103  	indexed := make(map[string][]madmin.Disk)
   104  	for _, disk := range si.Disks {
   105  		setIdx := fmt.Sprintf("%d-%d", disk.PoolIndex, disk.SetIndex)
   106  		indexed[setIdx] = append(indexed[setIdx], disk)
   107  	}
   108  
   109  	for id, disks := range indexed {
   110  		ss := madmin.SetStatus{
   111  			ID:        id,
   112  			SetIndex:  disks[0].SetIndex,
   113  			PoolIndex: disks[0].PoolIndex,
   114  		}
   115  		for _, disk := range disks {
   116  			ss.Disks = append(ss.Disks, disk)
   117  			if disk.Healing {
   118  				ss.HealStatus = "Healing"
   119  				ss.HealPriority = "high"
   120  				status.HealDisks = append(status.HealDisks, disk.Endpoint)
   121  			}
   122  		}
   123  		sortDisks(ss.Disks)
   124  		status.Sets = append(status.Sets, ss)
   125  	}
   126  	sort.Slice(status.Sets, func(i, j int) bool {
   127  		return status.Sets[i].ID < status.Sets[j].ID
   128  	})
   129  
   130  	backendInfo := o.BackendInfo()
   131  	status.SCParity = make(map[string]int)
   132  	status.SCParity[storageclass.STANDARD] = backendInfo.StandardSCParity
   133  	status.SCParity[storageclass.RRS] = backendInfo.RRSCParity
   134  
   135  	return status, true
   136  }
   137  
   138  // healErasureSet lists and heals all objects in a specific erasure set
   139  func (er *erasureObjects) healErasureSet(ctx context.Context, buckets []string, tracker *healingTracker) error {
   140  	scanMode := madmin.HealNormalScan
   141  
   142  	// Make sure to copy since `buckets slice`
   143  	// is modified in place by tracker.
   144  	healBuckets := make([]string, len(buckets))
   145  	copy(healBuckets, buckets)
   146  
   147  	objAPI := newObjectLayerFn()
   148  	if objAPI == nil {
   149  		return errServerNotInitialized
   150  	}
   151  
   152  	for _, bucket := range healBuckets {
   153  		_, err := objAPI.HealBucket(ctx, bucket, madmin.HealOpts{ScanMode: scanMode})
   154  		if err != nil {
   155  			// Log bucket healing error if any, we shall retry again.
   156  			logger.LogIf(ctx, err)
   157  		}
   158  	}
   159  
   160  	info, err := tracker.disk.DiskInfo(ctx, DiskInfoOptions{})
   161  	if err != nil {
   162  		return fmt.Errorf("unable to get disk information before healing it: %w", err)
   163  	}
   164  
   165  	var numHealers uint64
   166  
   167  	if numCores := uint64(runtime.GOMAXPROCS(0)); info.NRRequests > numCores {
   168  		numHealers = numCores / 4
   169  	} else {
   170  		numHealers = info.NRRequests / 4
   171  	}
   172  	if numHealers < 4 {
   173  		numHealers = 4
   174  	}
   175  	// allow overriding this value as well..
   176  	if v := globalHealConfig.GetWorkers(); v > 0 {
   177  		numHealers = uint64(v)
   178  	}
   179  
   180  	logger.Event(ctx, fmt.Sprintf("Healing drive '%s' - use %d parallel workers.", tracker.disk.String(), numHealers))
   181  
   182  	jt, _ := workers.New(int(numHealers))
   183  
   184  	var retErr error
   185  	// Heal all buckets with all objects
   186  	for _, bucket := range healBuckets {
   187  		if tracker.isHealed(bucket) {
   188  			continue
   189  		}
   190  		var forwardTo string
   191  		// If we resume to the same bucket, forward to last known item.
   192  		if b := tracker.getBucket(); b != "" {
   193  			if b == bucket {
   194  				forwardTo = tracker.getObject()
   195  			} else {
   196  				// Reset to where last bucket ended if resuming.
   197  				tracker.resume()
   198  			}
   199  		}
   200  		tracker.setObject("")
   201  		tracker.setBucket(bucket)
   202  		// Heal current bucket again in case if it is failed
   203  		// in the beginning of erasure set healing
   204  		if _, err := objAPI.HealBucket(ctx, bucket, madmin.HealOpts{
   205  			ScanMode: scanMode,
   206  		}); err != nil {
   207  			logger.LogIf(ctx, err)
   208  			continue
   209  		}
   210  
   211  		vc, _ := globalBucketVersioningSys.Get(bucket)
   212  
   213  		// Check if the current bucket has a configured lifecycle policy
   214  		lc, _ := globalLifecycleSys.Get(bucket)
   215  
   216  		// Check if bucket is object locked.
   217  		lr, _ := globalBucketObjectLockSys.Get(bucket)
   218  		rcfg, _ := getReplicationConfig(ctx, bucket)
   219  
   220  		if serverDebugLog {
   221  			console.Debugf(color.Green("healDrive:")+" healing bucket %s content on %s erasure set\n",
   222  				bucket, humanize.Ordinal(er.setIndex+1))
   223  		}
   224  
   225  		disks, _ := er.getOnlineDisksWithHealing(false)
   226  		if len(disks) == 0 {
   227  			// No object healing necessary
   228  			tracker.bucketDone(bucket)
   229  			logger.LogIf(ctx, tracker.update(ctx))
   230  			continue
   231  		}
   232  
   233  		// Limit listing to 3 drives.
   234  		if len(disks) > 3 {
   235  			disks = disks[:3]
   236  		}
   237  
   238  		type healEntryResult struct {
   239  			bytes     uint64
   240  			success   bool
   241  			skipped   bool
   242  			entryDone bool
   243  			name      string
   244  		}
   245  		healEntryDone := func(name string) healEntryResult {
   246  			return healEntryResult{
   247  				entryDone: true,
   248  				name:      name,
   249  			}
   250  		}
   251  		healEntrySuccess := func(sz uint64) healEntryResult {
   252  			return healEntryResult{
   253  				bytes:   sz,
   254  				success: true,
   255  			}
   256  		}
   257  		healEntryFailure := func(sz uint64) healEntryResult {
   258  			return healEntryResult{
   259  				bytes: sz,
   260  			}
   261  		}
   262  		healEntrySkipped := func(sz uint64) healEntryResult {
   263  			return healEntryResult{
   264  				bytes:   sz,
   265  				skipped: true,
   266  			}
   267  		}
   268  
   269  		filterLifecycle := func(bucket, object string, fi FileInfo) bool {
   270  			if lc == nil {
   271  				return false
   272  			}
   273  			versioned := vc != nil && vc.Versioned(object)
   274  			objInfo := fi.ToObjectInfo(bucket, object, versioned)
   275  
   276  			evt := evalActionFromLifecycle(ctx, *lc, lr, rcfg, objInfo)
   277  			switch {
   278  			case evt.Action.DeleteRestored(): // if restored copy has expired,delete it synchronously
   279  				applyExpiryOnTransitionedObject(ctx, newObjectLayerFn(), objInfo, evt, lcEventSrc_Heal)
   280  				return false
   281  			case evt.Action.Delete():
   282  				globalExpiryState.enqueueByDays(objInfo, evt, lcEventSrc_Heal)
   283  				return true
   284  			default:
   285  				return false
   286  			}
   287  		}
   288  
   289  		// Collect updates to tracker from concurrent healEntry calls
   290  		results := make(chan healEntryResult, 1000)
   291  		go func() {
   292  			for res := range results {
   293  				if res.entryDone {
   294  					tracker.setObject(res.name)
   295  					if time.Since(tracker.getLastUpdate()) > time.Minute {
   296  						logger.LogIf(ctx, tracker.update(ctx))
   297  					}
   298  					continue
   299  				}
   300  
   301  				tracker.updateProgress(res.success, res.skipped, res.bytes)
   302  			}
   303  		}()
   304  
   305  		send := func(result healEntryResult) bool {
   306  			select {
   307  			case <-ctx.Done():
   308  				if !contextCanceled(ctx) {
   309  					logger.LogIf(ctx, ctx.Err())
   310  				}
   311  				return false
   312  			case results <- result:
   313  				return true
   314  			}
   315  		}
   316  
   317  		// Note: updates from healEntry to tracker must be sent on results channel.
   318  		healEntry := func(bucket string, entry metaCacheEntry) {
   319  			defer jt.Give()
   320  
   321  			if entry.name == "" && len(entry.metadata) == 0 {
   322  				// ignore entries that don't have metadata.
   323  				return
   324  			}
   325  			if entry.isDir() {
   326  				// ignore healing entry.name's with `/` suffix.
   327  				return
   328  			}
   329  
   330  			// We might land at .metacache, .trash, .multipart
   331  			// no need to heal them skip, only when bucket
   332  			// is '.minio.sys'
   333  			if bucket == minioMetaBucket {
   334  				if wildcard.Match("buckets/*/.metacache/*", entry.name) {
   335  					return
   336  				}
   337  				if wildcard.Match("tmp/.trash/*", entry.name) {
   338  					return
   339  				}
   340  				if wildcard.Match("multipart/*", entry.name) {
   341  					return
   342  				}
   343  			}
   344  
   345  			// erasureObjects layer needs object names to be encoded
   346  			encodedEntryName := encodeDirObject(entry.name)
   347  
   348  			var result healEntryResult
   349  			fivs, err := entry.fileInfoVersions(bucket)
   350  			if err != nil {
   351  				_, err := er.HealObject(ctx, bucket, encodedEntryName, "",
   352  					madmin.HealOpts{
   353  						ScanMode: scanMode,
   354  						Remove:   healDeleteDangling,
   355  					})
   356  				if err != nil {
   357  					if isErrObjectNotFound(err) || isErrVersionNotFound(err) {
   358  						// queueing happens across namespace, ignore
   359  						// objects that are not found.
   360  						return
   361  					}
   362  					result = healEntryFailure(0)
   363  					logger.LogIf(ctx, fmt.Errorf("unable to heal object %s/%s: %w", bucket, entry.name, err))
   364  				} else {
   365  					result = healEntrySuccess(0)
   366  				}
   367  
   368  				send(result)
   369  				return
   370  			}
   371  
   372  			var versionNotFound int
   373  			for _, version := range fivs.Versions {
   374  				// Ignore a version with a modtime newer than healing start time.
   375  				if version.ModTime.After(tracker.Started) {
   376  					continue
   377  				}
   378  
   379  				// Apply lifecycle rules on the objects that are expired.
   380  				if filterLifecycle(bucket, version.Name, version) {
   381  					versionNotFound++
   382  					if !send(healEntrySkipped(uint64(version.Size))) {
   383  						return
   384  					}
   385  					continue
   386  				}
   387  
   388  				if _, err := er.HealObject(ctx, bucket, encodedEntryName,
   389  					version.VersionID, madmin.HealOpts{
   390  						ScanMode: scanMode,
   391  						Remove:   healDeleteDangling,
   392  					}); err != nil {
   393  					if isErrObjectNotFound(err) || isErrVersionNotFound(err) {
   394  						// queueing happens across namespace, ignore
   395  						// objects that are not found.
   396  						versionNotFound++
   397  						continue
   398  					}
   399  					// If not deleted, assume they failed.
   400  					result = healEntryFailure(uint64(version.Size))
   401  					if version.VersionID != "" {
   402  						logger.LogIf(ctx, fmt.Errorf("unable to heal object %s/%s-v(%s): %w", bucket, version.Name, version.VersionID, err))
   403  					} else {
   404  						logger.LogIf(ctx, fmt.Errorf("unable to heal object %s/%s: %w", bucket, version.Name, err))
   405  					}
   406  				} else {
   407  					result = healEntrySuccess(uint64(version.Size))
   408  				}
   409  
   410  				if !send(result) {
   411  					return
   412  				}
   413  			}
   414  			// All versions resulted in 'ObjectNotFound/VersionNotFound'
   415  			if versionNotFound == len(fivs.Versions) {
   416  				return
   417  			}
   418  			select {
   419  			case <-ctx.Done():
   420  				return
   421  			case results <- healEntryDone(entry.name):
   422  			}
   423  
   424  			// Wait and proceed if there are active requests
   425  			waitForLowHTTPReq()
   426  		}
   427  
   428  		actualBucket, prefix := path2BucketObject(bucket)
   429  
   430  		// How to resolve partial results.
   431  		resolver := metadataResolutionParams{
   432  			dirQuorum: 1,
   433  			objQuorum: 1,
   434  			bucket:    actualBucket,
   435  		}
   436  
   437  		err := listPathRaw(ctx, listPathRawOptions{
   438  			disks:          disks,
   439  			bucket:         actualBucket,
   440  			path:           prefix,
   441  			recursive:      true,
   442  			forwardTo:      forwardTo,
   443  			minDisks:       1,
   444  			reportNotFound: false,
   445  			agreed: func(entry metaCacheEntry) {
   446  				jt.Take()
   447  				go healEntry(actualBucket, entry)
   448  			},
   449  			partial: func(entries metaCacheEntries, _ []error) {
   450  				entry, ok := entries.resolve(&resolver)
   451  				if !ok {
   452  					// check if we can get one entry at least
   453  					// proceed to heal nonetheless.
   454  					entry, _ = entries.firstFound()
   455  				}
   456  				jt.Take()
   457  				go healEntry(actualBucket, *entry)
   458  			},
   459  			finished: nil,
   460  		})
   461  		jt.Wait() // synchronize all the concurrent heal jobs
   462  		xioutil.SafeClose(results)
   463  		if err != nil {
   464  			// Set this such that when we return this function
   465  			// we let the caller retry this disk again for the
   466  			// buckets it failed to list.
   467  			retErr = err
   468  			logger.LogIf(ctx, err)
   469  			continue
   470  		}
   471  
   472  		select {
   473  		// If context is canceled don't mark as done...
   474  		case <-ctx.Done():
   475  			return ctx.Err()
   476  		default:
   477  			tracker.bucketDone(bucket)
   478  			logger.LogIf(ctx, tracker.update(ctx))
   479  		}
   480  	}
   481  
   482  	tracker.setObject("")
   483  	tracker.setBucket("")
   484  
   485  	return retErr
   486  }
   487  
   488  func healBucket(bucket string, scan madmin.HealScanMode) error {
   489  	// Get background heal sequence to send elements to heal
   490  	bgSeq, ok := globalBackgroundHealState.getHealSequenceByToken(bgHealingUUID)
   491  	if ok {
   492  		return bgSeq.queueHealTask(healSource{bucket: bucket}, madmin.HealItemBucket)
   493  	}
   494  	return nil
   495  }
   496  
   497  // healObject sends the given object/version to the background healing workers
   498  func healObject(bucket, object, versionID string, scan madmin.HealScanMode) error {
   499  	// Get background heal sequence to send elements to heal
   500  	bgSeq, ok := globalBackgroundHealState.getHealSequenceByToken(bgHealingUUID)
   501  	if ok {
   502  		return bgSeq.queueHealTask(healSource{
   503  			bucket:    bucket,
   504  			object:    object,
   505  			versionID: versionID,
   506  			noWait:    true, // do not block callers.
   507  			opts: &madmin.HealOpts{
   508  				Remove:   healDeleteDangling, // if found dangling purge it.
   509  				ScanMode: scan,
   510  			},
   511  		}, madmin.HealItemObject)
   512  	}
   513  	return nil
   514  }