storj.io/minio@v0.0.0-20230509071714-0cbc90f649b1/cmd/background-newdisks-heal-ops.go (about)

     1  /*
     2   * MinIO Cloud Storage, (C) 2019 MinIO, Inc.
     3   *
     4   * Licensed under the Apache License, Version 2.0 (the "License");
     5   * you may not use this file except in compliance with the License.
     6   * You may obtain a copy of the License at
     7   *
     8   *     http://www.apache.org/licenses/LICENSE-2.0
     9   *
    10   * Unless required by applicable law or agreed to in writing, software
    11   * distributed under the License is distributed on an "AS IS" BASIS,
    12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13   * See the License for the specific language governing permissions and
    14   * limitations under the License.
    15   */
    16  
    17  package cmd
    18  
    19  import (
    20  	"bytes"
    21  	"context"
    22  	"encoding/json"
    23  	"errors"
    24  	"fmt"
    25  	"io"
    26  	"sort"
    27  	"strings"
    28  	"sync"
    29  	"time"
    30  
    31  	"github.com/dustin/go-humanize"
    32  	"github.com/minio/minio-go/v7/pkg/set"
    33  
    34  	"storj.io/minio/cmd/logger"
    35  	"storj.io/minio/pkg/color"
    36  	"storj.io/minio/pkg/console"
    37  	"storj.io/minio/pkg/madmin"
    38  )
    39  
    40  const (
    41  	defaultMonitorNewDiskInterval = time.Second * 10
    42  	healingTrackerFilename        = ".healing.bin"
    43  )
    44  
    45  //go:generate msgp -file $GOFILE -unexported
    46  
    47  // healingTracker is used to persist healing information during a heal.
    48  type healingTracker struct {
    49  	disk StorageAPI `msg:"-"`
    50  
    51  	ID            string
    52  	PoolIndex     int
    53  	SetIndex      int
    54  	DiskIndex     int
    55  	Path          string
    56  	Endpoint      string
    57  	Started       time.Time
    58  	LastUpdate    time.Time
    59  	ObjectsHealed uint64
    60  	ObjectsFailed uint64
    61  	BytesDone     uint64
    62  	BytesFailed   uint64
    63  
    64  	// Last object scanned.
    65  	Bucket string `json:"-"`
    66  	Object string `json:"-"`
    67  
    68  	// Numbers when current bucket started healing,
    69  	// for resuming with correct numbers.
    70  	ResumeObjectsHealed uint64 `json:"-"`
    71  	ResumeObjectsFailed uint64 `json:"-"`
    72  	ResumeBytesDone     uint64 `json:"-"`
    73  	ResumeBytesFailed   uint64 `json:"-"`
    74  
    75  	// Filled on startup/restarts.
    76  	QueuedBuckets []string
    77  
    78  	// Filled during heal.
    79  	HealedBuckets []string
    80  	// Add future tracking capabilities
    81  	// Be sure that they are included in toHealingDisk
    82  }
    83  
    84  // loadHealingTracker will load the healing tracker from the supplied disk.
    85  // The disk ID will be validated against the loaded one.
    86  func loadHealingTracker(ctx context.Context, disk StorageAPI) (*healingTracker, error) {
    87  	if disk == nil {
    88  		return nil, errors.New("loadHealingTracker: nil disk given")
    89  	}
    90  	diskID, err := disk.GetDiskID()
    91  	if err != nil {
    92  		return nil, err
    93  	}
    94  	b, err := disk.ReadAll(ctx, minioMetaBucket,
    95  		pathJoin(bucketMetaPrefix, slashSeparator, healingTrackerFilename))
    96  	if err != nil {
    97  		return nil, err
    98  	}
    99  	var h healingTracker
   100  	_, err = h.UnmarshalMsg(b)
   101  	if err != nil {
   102  		return nil, err
   103  	}
   104  	if h.ID != diskID && h.ID != "" {
   105  		return nil, fmt.Errorf("loadHealingTracker: disk id mismatch expected %s, got %s", h.ID, diskID)
   106  	}
   107  	h.disk = disk
   108  	h.ID = diskID
   109  	return &h, nil
   110  }
   111  
   112  // newHealingTracker will create a new healing tracker for the disk.
   113  func newHealingTracker(disk StorageAPI) *healingTracker {
   114  	diskID, _ := disk.GetDiskID()
   115  	h := healingTracker{
   116  		disk:     disk,
   117  		ID:       diskID,
   118  		Path:     disk.String(),
   119  		Endpoint: disk.Endpoint().String(),
   120  		Started:  time.Now().UTC(),
   121  	}
   122  	h.PoolIndex, h.SetIndex, h.DiskIndex = disk.GetDiskLoc()
   123  	return &h
   124  }
   125  
   126  // update will update the tracker on the disk.
   127  // If the tracker has been deleted an error is returned.
   128  func (h *healingTracker) update(ctx context.Context) error {
   129  	if h.disk.Healing() == nil {
   130  		return fmt.Errorf("healingTracker: disk %q is not marked as healing", h.ID)
   131  	}
   132  	if h.ID == "" || h.PoolIndex < 0 || h.SetIndex < 0 || h.DiskIndex < 0 {
   133  		h.ID, _ = h.disk.GetDiskID()
   134  		h.PoolIndex, h.SetIndex, h.DiskIndex = h.disk.GetDiskLoc()
   135  	}
   136  	return h.save(ctx)
   137  }
   138  
   139  // save will unconditionally save the tracker and will be created if not existing.
   140  func (h *healingTracker) save(ctx context.Context) error {
   141  	if h.PoolIndex < 0 || h.SetIndex < 0 || h.DiskIndex < 0 {
   142  		// Attempt to get location.
   143  		if api := newObjectLayerFn(); api != nil {
   144  			if ep, ok := api.(*erasureServerPools); ok {
   145  				h.PoolIndex, h.SetIndex, h.DiskIndex, _ = ep.getPoolAndSet(h.ID)
   146  			}
   147  		}
   148  	}
   149  	h.LastUpdate = time.Now().UTC()
   150  	htrackerBytes, err := h.MarshalMsg(nil)
   151  	if err != nil {
   152  		return err
   153  	}
   154  	globalBackgroundHealState.updateHealStatus(h)
   155  	return h.disk.WriteAll(ctx, minioMetaBucket,
   156  		pathJoin(bucketMetaPrefix, slashSeparator, healingTrackerFilename),
   157  		htrackerBytes)
   158  }
   159  
   160  // delete the tracker on disk.
   161  func (h *healingTracker) delete(ctx context.Context) error {
   162  	return h.disk.Delete(ctx, minioMetaBucket,
   163  		pathJoin(bucketMetaPrefix, slashSeparator, healingTrackerFilename),
   164  		false)
   165  }
   166  
   167  func (h *healingTracker) isHealed(bucket string) bool {
   168  	for _, v := range h.HealedBuckets {
   169  		if v == bucket {
   170  			return true
   171  		}
   172  	}
   173  	return false
   174  }
   175  
   176  // resume will reset progress to the numbers at the start of the bucket.
   177  func (h *healingTracker) resume() {
   178  	h.ObjectsHealed = h.ResumeObjectsHealed
   179  	h.ObjectsFailed = h.ResumeObjectsFailed
   180  	h.BytesDone = h.ResumeBytesDone
   181  	h.BytesFailed = h.ResumeBytesFailed
   182  }
   183  
   184  // bucketDone should be called when a bucket is done healing.
   185  // Adds the bucket to the list of healed buckets and updates resume numbers.
   186  func (h *healingTracker) bucketDone(bucket string) {
   187  	h.ResumeObjectsHealed = h.ObjectsHealed
   188  	h.ResumeObjectsFailed = h.ObjectsFailed
   189  	h.ResumeBytesDone = h.BytesDone
   190  	h.ResumeBytesFailed = h.BytesFailed
   191  	h.HealedBuckets = append(h.HealedBuckets, bucket)
   192  	for i, b := range h.QueuedBuckets {
   193  		if b == bucket {
   194  			// Delete...
   195  			h.QueuedBuckets = append(h.QueuedBuckets[:i], h.QueuedBuckets[i+1:]...)
   196  		}
   197  	}
   198  }
   199  
   200  // setQueuedBuckets will add buckets, but exclude any that is already in h.HealedBuckets.
   201  // Order is preserved.
   202  func (h *healingTracker) setQueuedBuckets(buckets []BucketInfo) {
   203  	s := set.CreateStringSet(h.HealedBuckets...)
   204  	h.QueuedBuckets = make([]string, 0, len(buckets))
   205  	for _, b := range buckets {
   206  		if !s.Contains(b.Name) {
   207  			h.QueuedBuckets = append(h.QueuedBuckets, b.Name)
   208  		}
   209  	}
   210  }
   211  
   212  func (h *healingTracker) printTo(writer io.Writer) {
   213  	b, err := json.MarshalIndent(h, "", "  ")
   214  	if err != nil {
   215  		writer.Write([]byte(err.Error()))
   216  	}
   217  	writer.Write(b)
   218  }
   219  
   220  // toHealingDisk converts the information to madmin.HealingDisk
   221  func (h *healingTracker) toHealingDisk() madmin.HealingDisk {
   222  	return madmin.HealingDisk{
   223  		ID:            h.ID,
   224  		Endpoint:      h.Endpoint,
   225  		PoolIndex:     h.PoolIndex,
   226  		SetIndex:      h.SetIndex,
   227  		DiskIndex:     h.DiskIndex,
   228  		Path:          h.Path,
   229  		Started:       h.Started.UTC(),
   230  		LastUpdate:    h.LastUpdate.UTC(),
   231  		ObjectsHealed: h.ObjectsHealed,
   232  		ObjectsFailed: h.ObjectsFailed,
   233  		BytesDone:     h.BytesDone,
   234  		BytesFailed:   h.BytesFailed,
   235  		Bucket:        h.Bucket,
   236  		Object:        h.Object,
   237  		QueuedBuckets: h.QueuedBuckets,
   238  		HealedBuckets: h.HealedBuckets,
   239  	}
   240  }
   241  
   242  func initAutoHeal(ctx context.Context, objAPI ObjectLayer) {
   243  	z, ok := objAPI.(*erasureServerPools)
   244  	if !ok {
   245  		return
   246  	}
   247  
   248  	initBackgroundHealing(ctx, objAPI) // start quick background healing
   249  
   250  	bgSeq := mustGetHealSequence(ctx)
   251  
   252  	globalBackgroundHealState.pushHealLocalDisks(getLocalDisksToHeal()...)
   253  
   254  	if drivesToHeal := globalBackgroundHealState.healDriveCount(); drivesToHeal > 0 {
   255  		logger.Info(fmt.Sprintf("Found drives to heal %d, waiting until %s to heal the content...",
   256  			drivesToHeal, defaultMonitorNewDiskInterval))
   257  
   258  		// Heal any disk format and metadata early, if possible.
   259  		// Start with format healing
   260  		if err := bgSeq.healDiskFormat(); err != nil {
   261  			if newObjectLayerFn() != nil {
   262  				// log only in situations, when object layer
   263  				// has fully initialized.
   264  				logger.LogIf(bgSeq.ctx, err)
   265  			}
   266  		}
   267  	}
   268  
   269  	if err := bgSeq.healDiskMeta(objAPI); err != nil {
   270  		if newObjectLayerFn() != nil {
   271  			// log only in situations, when object layer
   272  			// has fully initialized.
   273  			logger.LogIf(bgSeq.ctx, err)
   274  		}
   275  	}
   276  
   277  	go monitorLocalDisksAndHeal(ctx, z, bgSeq)
   278  }
   279  
   280  func getLocalDisksToHeal() (disksToHeal Endpoints) {
   281  	for _, ep := range globalEndpoints {
   282  		for _, endpoint := range ep.Endpoints {
   283  			if !endpoint.IsLocal {
   284  				continue
   285  			}
   286  			// Try to connect to the current endpoint
   287  			// and reformat if the current disk is not formatted
   288  			disk, _, err := connectEndpoint(endpoint)
   289  			if errors.Is(err, errUnformattedDisk) {
   290  				disksToHeal = append(disksToHeal, endpoint)
   291  			} else if err == nil && disk != nil && disk.Healing() != nil {
   292  				disksToHeal = append(disksToHeal, disk.Endpoint())
   293  			}
   294  		}
   295  	}
   296  	return disksToHeal
   297  
   298  }
   299  
   300  func initBackgroundHealing(ctx context.Context, objAPI ObjectLayer) {
   301  	// Run the background healer
   302  	globalBackgroundHealRoutine = newHealRoutine()
   303  	go globalBackgroundHealRoutine.run(ctx, objAPI)
   304  
   305  	globalBackgroundHealState.LaunchNewHealSequence(newBgHealSequence(), objAPI)
   306  }
   307  
   308  // monitorLocalDisksAndHeal - ensures that detected new disks are healed
   309  //  1. Only the concerned erasure set will be listed and healed
   310  //  2. Only the node hosting the disk is responsible to perform the heal
   311  func monitorLocalDisksAndHeal(ctx context.Context, z *erasureServerPools, bgSeq *healSequence) {
   312  	// Perform automatic disk healing when a disk is replaced locally.
   313  	diskCheckTimer := time.NewTimer(defaultMonitorNewDiskInterval)
   314  	defer diskCheckTimer.Stop()
   315  
   316  	for {
   317  		select {
   318  		case <-ctx.Done():
   319  			return
   320  		case <-diskCheckTimer.C:
   321  			// Reset to next interval.
   322  			diskCheckTimer.Reset(defaultMonitorNewDiskInterval)
   323  
   324  			var erasureSetInPoolDisksToHeal []map[int][]StorageAPI
   325  
   326  			healDisks := globalBackgroundHealState.getHealLocalDiskEndpoints()
   327  			if len(healDisks) > 0 {
   328  				// Reformat disks
   329  				bgSeq.sourceCh <- healSource{bucket: SlashSeparator}
   330  
   331  				// Ensure that reformatting disks is finished
   332  				bgSeq.sourceCh <- healSource{bucket: nopHeal}
   333  
   334  				logger.Info(fmt.Sprintf("Found drives to heal %d, proceeding to heal content...",
   335  					len(healDisks)))
   336  
   337  				erasureSetInPoolDisksToHeal = make([]map[int][]StorageAPI, len(z.serverPools))
   338  				for i := range z.serverPools {
   339  					erasureSetInPoolDisksToHeal[i] = map[int][]StorageAPI{}
   340  				}
   341  			}
   342  
   343  			if serverDebugLog {
   344  				console.Debugf(color.Green("healDisk:")+" disk check timer fired, attempting to heal %d drives\n", len(healDisks))
   345  			}
   346  
   347  			// heal only if new disks found.
   348  			for _, endpoint := range healDisks {
   349  				disk, format, err := connectEndpoint(endpoint)
   350  				if err != nil {
   351  					printEndpointError(endpoint, err, true)
   352  					continue
   353  				}
   354  
   355  				poolIdx := globalEndpoints.GetLocalPoolIdx(disk.Endpoint())
   356  				if poolIdx < 0 {
   357  					continue
   358  				}
   359  
   360  				// Calculate the set index where the current endpoint belongs
   361  				z.serverPools[poolIdx].erasureDisksMu.RLock()
   362  				// Protect reading reference format.
   363  				setIndex, _, err := findDiskIndex(z.serverPools[poolIdx].format, format)
   364  				z.serverPools[poolIdx].erasureDisksMu.RUnlock()
   365  				if err != nil {
   366  					printEndpointError(endpoint, err, false)
   367  					continue
   368  				}
   369  
   370  				erasureSetInPoolDisksToHeal[poolIdx][setIndex] = append(erasureSetInPoolDisksToHeal[poolIdx][setIndex], disk)
   371  			}
   372  
   373  			buckets, _ := z.ListBuckets(ctx)
   374  
   375  			buckets = append(buckets, BucketInfo{
   376  				Name: pathJoin(minioMetaBucket, minioConfigPrefix),
   377  			})
   378  
   379  			// Buckets data are dispersed in multiple zones/sets, make
   380  			// sure to heal all bucket metadata configuration.
   381  			buckets = append(buckets, []BucketInfo{
   382  				{Name: pathJoin(minioMetaBucket, bucketMetaPrefix)},
   383  			}...)
   384  
   385  			// Heal latest buckets first.
   386  			sort.Slice(buckets, func(i, j int) bool {
   387  				a, b := strings.HasPrefix(buckets[i].Name, minioMetaBucket), strings.HasPrefix(buckets[j].Name, minioMetaBucket)
   388  				if a != b {
   389  					return a
   390  				}
   391  				return buckets[i].Created.After(buckets[j].Created)
   392  			})
   393  
   394  			// TODO(klauspost): This will block until all heals are done,
   395  			// in the future this should be able to start healing other sets at once.
   396  			var wg sync.WaitGroup
   397  			for i, setMap := range erasureSetInPoolDisksToHeal {
   398  				i := i
   399  				for setIndex, disks := range setMap {
   400  					if len(disks) == 0 {
   401  						continue
   402  					}
   403  					wg.Add(1)
   404  					go func(setIndex int, disks []StorageAPI) {
   405  						defer wg.Done()
   406  						for _, disk := range disks {
   407  							logger.Info("Healing disk '%v' on %s pool", disk, humanize.Ordinal(i+1))
   408  
   409  							// So someone changed the drives underneath, healing tracker missing.
   410  							tracker, err := loadHealingTracker(ctx, disk)
   411  							if err != nil {
   412  								logger.Info("Healing tracker missing on '%s', disk was swapped again on %s pool", disk, humanize.Ordinal(i+1))
   413  								tracker = newHealingTracker(disk)
   414  							}
   415  
   416  							tracker.PoolIndex, tracker.SetIndex, tracker.DiskIndex = disk.GetDiskLoc()
   417  							tracker.setQueuedBuckets(buckets)
   418  							if err := tracker.save(ctx); err != nil {
   419  								logger.LogIf(ctx, err)
   420  								// Unable to write healing tracker, permission denied or some
   421  								// other unexpected error occurred. Proceed to look for new
   422  								// disks to be healed again, we cannot proceed further.
   423  								return
   424  							}
   425  
   426  							err = z.serverPools[i].sets[setIndex].healErasureSet(ctx, buckets, tracker)
   427  							if err != nil {
   428  								logger.LogIf(ctx, err)
   429  								continue
   430  							}
   431  
   432  							logger.Info("Healing disk '%s' on %s pool complete", disk, humanize.Ordinal(i+1))
   433  							var buf bytes.Buffer
   434  							tracker.printTo(&buf)
   435  							logger.Info("Summary:\n%s", buf.String())
   436  							logger.LogIf(ctx, tracker.delete(ctx))
   437  
   438  							// Only upon success pop the healed disk.
   439  							globalBackgroundHealState.popHealLocalDisks(disk.Endpoint())
   440  						}
   441  					}(setIndex, disks)
   442  				}
   443  			}
   444  			wg.Wait()
   445  		}
   446  	}
   447  }