storj.io/minio@v0.0.0-20230509071714-0cbc90f649b1/cmd/erasure-healing-common.go (about)

     1  /*
     2   * MinIO Cloud Storage, (C) 2016-2019 MinIO, Inc.
     3   *
     4   * Licensed under the Apache License, Version 2.0 (the "License");
     5   * you may not use this file except in compliance with the License.
     6   * You may obtain a copy of the License at
     7   *
     8   *     http://www.apache.org/licenses/LICENSE-2.0
     9   *
    10   * Unless required by applicable law or agreed to in writing, software
    11   * distributed under the License is distributed on an "AS IS" BASIS,
    12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13   * See the License for the specific language governing permissions and
    14   * limitations under the License.
    15   */
    16  
    17  package cmd
    18  
    19  import (
    20  	"bytes"
    21  	"context"
    22  	"time"
    23  
    24  	"storj.io/minio/pkg/madmin"
    25  )
    26  
    27  // commonTime returns a maximally occurring time from a list of time.
    28  func commonTime(modTimes []time.Time, dataDirs []string) (modTime time.Time, dataDir string) {
    29  	var maxima int // Counter for remembering max occurrence of elements.
    30  
    31  	timeOccurenceMap := make(map[int64]int, len(modTimes))
    32  	dataDirOccurenceMap := make(map[string]int, len(dataDirs))
    33  	// Ignore the uuid sentinel and count the rest.
    34  	for _, time := range modTimes {
    35  		if time.Equal(timeSentinel) {
    36  			continue
    37  		}
    38  		timeOccurenceMap[time.UnixNano()]++
    39  	}
    40  
    41  	for _, dataDir := range dataDirs {
    42  		if dataDir == "" {
    43  			continue
    44  		}
    45  		dataDirOccurenceMap[dataDir]++
    46  	}
    47  
    48  	// Find the common cardinality from previously collected
    49  	// occurrences of elements.
    50  	for nano, count := range timeOccurenceMap {
    51  		t := time.Unix(0, nano)
    52  		if count > maxima || (count == maxima && t.After(modTime)) {
    53  			maxima = count
    54  			modTime = t
    55  		}
    56  	}
    57  
    58  	// Find the common cardinality from the previously collected
    59  	// occurrences of elements.
    60  	var dmaxima int
    61  	for ddataDir, count := range dataDirOccurenceMap {
    62  		if count > dmaxima {
    63  			dmaxima = count
    64  			dataDir = ddataDir
    65  		}
    66  	}
    67  
    68  	// Return the collected common uuid.
    69  	return modTime, dataDir
    70  }
    71  
    72  // Beginning of unix time is treated as sentinel value here.
    73  var timeSentinel = time.Unix(0, 0).UTC()
    74  
    75  // Boot modTimes up to disk count, setting the value to time sentinel.
    76  func bootModtimes(diskCount int) []time.Time {
    77  	modTimes := make([]time.Time, diskCount)
    78  	// Boots up all the modtimes.
    79  	for i := range modTimes {
    80  		modTimes[i] = timeSentinel
    81  	}
    82  	return modTimes
    83  }
    84  
    85  // Extracts list of times from FileInfo slice and returns, skips
    86  // slice elements which have errors.
    87  func listObjectModtimes(partsMetadata []FileInfo, errs []error) (modTimes []time.Time) {
    88  	modTimes = bootModtimes(len(partsMetadata))
    89  	for index, metadata := range partsMetadata {
    90  		if errs[index] != nil {
    91  			continue
    92  		}
    93  		// Once the file is found, save the uuid saved on disk.
    94  		modTimes[index] = metadata.ModTime
    95  	}
    96  	return modTimes
    97  }
    98  
    99  // Notes:
   100  // There are 5 possible states a disk could be in,
   101  // 1. __online__             - has the latest copy of xl.meta - returned by listOnlineDisks
   102  //
   103  // 2. __offline__            - err == errDiskNotFound
   104  //
   105  // 3. __availableWithParts__ - has the latest copy of xl.meta and has all
   106  //                             parts with checksums matching; returned by disksWithAllParts
   107  //
   108  // 4. __outdated__           - returned by outDatedDisk, provided []StorageAPI
   109  //                             returned by diskWithAllParts is passed for latestDisks.
   110  //    - has an old copy of xl.meta
   111  //    - doesn't have xl.meta (errFileNotFound)
   112  //    - has the latest xl.meta but one or more parts are corrupt
   113  //
   114  // 5. __missingParts__       - has the latest copy of xl.meta but has some parts
   115  // missing.  This is identified separately since this may need manual
   116  // inspection to understand the root cause. E.g, this could be due to
   117  // backend filesystem corruption.
   118  
   119  // listOnlineDisks - returns
   120  // - a slice of disks where disk having 'older' xl.meta (or nothing)
   121  // are set to nil.
   122  // - latest (in time) of the maximally occurring modTime(s).
   123  func listOnlineDisks(disks []StorageAPI, partsMetadata []FileInfo, errs []error) (onlineDisks []StorageAPI, modTime time.Time, dataDir string) {
   124  	onlineDisks = make([]StorageAPI, len(disks))
   125  
   126  	// List all the file commit ids from parts metadata.
   127  	modTimes := listObjectModtimes(partsMetadata, errs)
   128  
   129  	dataDirs := make([]string, len(partsMetadata))
   130  	for idx, fi := range partsMetadata {
   131  		if errs[idx] != nil {
   132  			continue
   133  		}
   134  		dataDirs[idx] = fi.DataDir
   135  	}
   136  
   137  	// Reduce list of UUIDs to a single common value.
   138  	modTime, dataDir = commonTime(modTimes, dataDirs)
   139  
   140  	// Create a new online disks slice, which have common uuid.
   141  	for index, t := range modTimes {
   142  		if partsMetadata[index].IsValid() && t.Equal(modTime) && partsMetadata[index].DataDir == dataDir {
   143  			onlineDisks[index] = disks[index]
   144  		} else {
   145  			onlineDisks[index] = nil
   146  		}
   147  	}
   148  
   149  	return onlineDisks, modTime, dataDir
   150  }
   151  
   152  // Returns the latest updated FileInfo files and error in case of failure.
   153  func getLatestFileInfo(ctx context.Context, partsMetadata []FileInfo, errs []error) (FileInfo, error) {
   154  	// There should be atleast half correct entries, if not return failure
   155  	if reducedErr := reduceReadQuorumErrs(ctx, errs, objectOpIgnoredErrs, len(partsMetadata)/2); reducedErr != nil {
   156  		return FileInfo{}, reducedErr
   157  	}
   158  
   159  	// List all the file commit ids from parts metadata.
   160  	modTimes := listObjectModtimes(partsMetadata, errs)
   161  
   162  	dataDirs := make([]string, len(partsMetadata))
   163  	for idx, fi := range partsMetadata {
   164  		if errs[idx] != nil {
   165  			continue
   166  		}
   167  		dataDirs[idx] = fi.DataDir
   168  	}
   169  
   170  	// Count all latest updated FileInfo values
   171  	var count int
   172  	var latestFileInfo FileInfo
   173  
   174  	// Reduce list of UUIDs to a single common value - i.e. the last updated Time
   175  	modTime, dataDir := commonTime(modTimes, dataDirs)
   176  
   177  	// Interate through all the modTimes and count the FileInfo(s) with latest time.
   178  	for index, t := range modTimes {
   179  		if partsMetadata[index].IsValid() && t.Equal(modTime) && dataDir == partsMetadata[index].DataDir {
   180  			latestFileInfo = partsMetadata[index]
   181  			count++
   182  		}
   183  	}
   184  	if count < len(partsMetadata)/2 {
   185  		return FileInfo{}, errErasureReadQuorum
   186  	}
   187  
   188  	return latestFileInfo, nil
   189  }
   190  
   191  // disksWithAllParts - This function needs to be called with
   192  // []StorageAPI returned by listOnlineDisks. Returns,
   193  //
   194  // - disks which have all parts specified in the latest xl.meta.
   195  //
   196  // - slice of errors about the state of data files on disk - can have
   197  //   a not-found error or a hash-mismatch error.
   198  func disksWithAllParts(ctx context.Context, onlineDisks []StorageAPI, partsMetadata []FileInfo, errs []error, bucket,
   199  	object string, scanMode madmin.HealScanMode) ([]StorageAPI, []error) {
   200  	availableDisks := make([]StorageAPI, len(onlineDisks))
   201  	dataErrs := make([]error, len(onlineDisks))
   202  
   203  	inconsistent := 0
   204  	for i, meta := range partsMetadata {
   205  		if !meta.IsValid() {
   206  			// Since for majority of the cases erasure.Index matches with erasure.Distribution we can
   207  			// consider the offline disks as consistent.
   208  			continue
   209  		}
   210  		if len(meta.Erasure.Distribution) != len(onlineDisks) {
   211  			// Erasure distribution seems to have lesser
   212  			// number of items than number of online disks.
   213  			inconsistent++
   214  			continue
   215  		}
   216  		if meta.Erasure.Distribution[i] != meta.Erasure.Index {
   217  			// Mismatch indexes with distribution order
   218  			inconsistent++
   219  		}
   220  	}
   221  
   222  	erasureDistributionReliable := true
   223  	if inconsistent > len(partsMetadata)/2 {
   224  		// If there are too many inconsistent files, then we can't trust erasure.Distribution (most likely
   225  		// because of bugs found in CopyObject/PutObjectTags) https://github.com/minio/minio/pull/10772
   226  		erasureDistributionReliable = false
   227  	}
   228  
   229  	for i, onlineDisk := range onlineDisks {
   230  		if errs[i] != nil {
   231  			dataErrs[i] = errs[i]
   232  			continue
   233  		}
   234  		if onlineDisk == nil {
   235  			dataErrs[i] = errDiskNotFound
   236  			continue
   237  		}
   238  		meta := partsMetadata[i]
   239  		if erasureDistributionReliable {
   240  			if !meta.IsValid() {
   241  				continue
   242  			}
   243  
   244  			if len(meta.Erasure.Distribution) != len(onlineDisks) {
   245  				// Erasure distribution is not the same as onlineDisks
   246  				// attempt a fix if possible, assuming other entries
   247  				// might have the right erasure distribution.
   248  				partsMetadata[i] = FileInfo{}
   249  				dataErrs[i] = errFileCorrupt
   250  				continue
   251  			}
   252  
   253  			// Since erasure.Distribution is trustable we can fix the mismatching erasure.Index
   254  			if meta.Erasure.Distribution[i] != meta.Erasure.Index {
   255  				partsMetadata[i] = FileInfo{}
   256  				dataErrs[i] = errFileCorrupt
   257  				continue
   258  			}
   259  		}
   260  
   261  		// Always check data, if we got it.
   262  		if (len(meta.Data) > 0 || meta.Size == 0) && len(meta.Parts) > 0 {
   263  			checksumInfo := meta.Erasure.GetChecksumInfo(meta.Parts[0].Number)
   264  			dataErrs[i] = bitrotVerify(bytes.NewBuffer(meta.Data),
   265  				int64(len(meta.Data)),
   266  				meta.Erasure.ShardFileSize(meta.Size),
   267  				checksumInfo.Algorithm,
   268  				checksumInfo.Hash, meta.Erasure.ShardSize())
   269  			if dataErrs[i] == nil {
   270  				// All parts verified, mark it as all data available.
   271  				availableDisks[i] = onlineDisk
   272  			}
   273  			continue
   274  		}
   275  
   276  		switch scanMode {
   277  		case madmin.HealDeepScan:
   278  			// disk has a valid xl.meta but may not have all the
   279  			// parts. This is considered an outdated disk, since
   280  			// it needs healing too.
   281  			dataErrs[i] = onlineDisk.VerifyFile(ctx, bucket, object, partsMetadata[i])
   282  		case madmin.HealNormalScan:
   283  			dataErrs[i] = onlineDisk.CheckParts(ctx, bucket, object, partsMetadata[i])
   284  		}
   285  
   286  		if dataErrs[i] == nil {
   287  			// All parts verified, mark it as all data available.
   288  			availableDisks[i] = onlineDisk
   289  		}
   290  	}
   291  
   292  	return availableDisks, dataErrs
   293  }