github.com/minio/minio@v0.0.0-20240328213742-3f72439b8a27/cmd/erasure-metadata-utils.go (about)

     1  // Copyright (c) 2015-2021 MinIO, Inc.
     2  //
     3  // This file is part of MinIO Object Storage stack
     4  //
     5  // This program is free software: you can redistribute it and/or modify
     6  // it under the terms of the GNU Affero General Public License as published by
     7  // the Free Software Foundation, either version 3 of the License, or
     8  // (at your option) any later version.
     9  //
    10  // This program is distributed in the hope that it will be useful
    11  // but WITHOUT ANY WARRANTY; without even the implied warranty of
    12  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    13  // GNU Affero General Public License for more details.
    14  //
    15  // You should have received a copy of the GNU Affero General Public License
    16  // along with this program.  If not, see <http://www.gnu.org/licenses/>.
    17  
    18  package cmd
    19  
    20  import (
    21  	"context"
    22  	"errors"
    23  	"hash/crc32"
    24  
    25  	"github.com/minio/minio/internal/logger"
    26  	"github.com/minio/pkg/v2/sync/errgroup"
    27  )
    28  
    29  // figure out the most commonVersions across disk that satisfies
    30  // the 'writeQuorum' this function returns '0' if quorum cannot
    31  // be achieved and disks have too many inconsistent versions.
    32  func reduceCommonVersions(diskVersions []uint64, writeQuorum int) (commonVersions uint64) {
    33  	diskVersionsCount := make(map[uint64]int)
    34  	for _, versions := range diskVersions {
    35  		diskVersionsCount[versions]++
    36  	}
    37  
    38  	max := 0
    39  	for versions, count := range diskVersionsCount {
    40  		if max < count {
    41  			max = count
    42  			commonVersions = versions
    43  		}
    44  	}
    45  
    46  	if max >= writeQuorum {
    47  		return commonVersions
    48  	}
    49  
    50  	return 0
    51  }
    52  
    53  // Returns number of errors that occurred the most (incl. nil) and the
    54  // corresponding error value. NB When there is more than one error value that
    55  // occurs maximum number of times, the error value returned depends on how
    56  // golang's map orders keys. This doesn't affect correctness as long as quorum
    57  // value is greater than or equal to simple majority, since none of the equally
    58  // maximal values would occur quorum or more number of times.
    59  func reduceErrs(errs []error, ignoredErrs []error) (maxCount int, maxErr error) {
    60  	errorCounts := make(map[error]int)
    61  	for _, err := range errs {
    62  		if IsErrIgnored(err, ignoredErrs...) {
    63  			continue
    64  		}
    65  		// Errors due to context cancellation may be wrapped - group them by context.Canceled.
    66  		if errors.Is(err, context.Canceled) {
    67  			errorCounts[context.Canceled]++
    68  			continue
    69  		}
    70  		errorCounts[err]++
    71  	}
    72  
    73  	max := 0
    74  	for err, count := range errorCounts {
    75  		switch {
    76  		case max < count:
    77  			max = count
    78  			maxErr = err
    79  
    80  		// Prefer `nil` over other error values with the same
    81  		// number of occurrences.
    82  		case max == count && err == nil:
    83  			maxErr = err
    84  		}
    85  	}
    86  	return max, maxErr
    87  }
    88  
    89  // reduceQuorumErrs behaves like reduceErrs by only for returning
    90  // values of maximally occurring errors validated against a generic
    91  // quorum number that can be read or write quorum depending on usage.
    92  func reduceQuorumErrs(ctx context.Context, errs []error, ignoredErrs []error, quorum int, quorumErr error) error {
    93  	if contextCanceled(ctx) {
    94  		return context.Canceled
    95  	}
    96  	maxCount, maxErr := reduceErrs(errs, ignoredErrs)
    97  	if maxCount >= quorum {
    98  		return maxErr
    99  	}
   100  	return quorumErr
   101  }
   102  
   103  // reduceReadQuorumErrs behaves like reduceErrs but only for returning
   104  // values of maximally occurring errors validated against readQuorum.
   105  func reduceReadQuorumErrs(ctx context.Context, errs []error, ignoredErrs []error, readQuorum int) (maxErr error) {
   106  	return reduceQuorumErrs(ctx, errs, ignoredErrs, readQuorum, errErasureReadQuorum)
   107  }
   108  
   109  // reduceWriteQuorumErrs behaves like reduceErrs but only for returning
   110  // values of maximally occurring errors validated against writeQuorum.
   111  func reduceWriteQuorumErrs(ctx context.Context, errs []error, ignoredErrs []error, writeQuorum int) (maxErr error) {
   112  	return reduceQuorumErrs(ctx, errs, ignoredErrs, writeQuorum, errErasureWriteQuorum)
   113  }
   114  
   115  // Similar to 'len(slice)' but returns the actual elements count
   116  // skipping the unallocated elements.
   117  func diskCount(disks []StorageAPI) int {
   118  	diskCount := 0
   119  	for _, disk := range disks {
   120  		if disk == nil {
   121  			continue
   122  		}
   123  		diskCount++
   124  	}
   125  	return diskCount
   126  }
   127  
   128  // hashOrder - hashes input key to return consistent
   129  // hashed integer slice. Returned integer order is salted
   130  // with an input key. This results in consistent order.
   131  // NOTE: collisions are fine, we are not looking for uniqueness
   132  // in the slices returned.
   133  func hashOrder(key string, cardinality int) []int {
   134  	if cardinality <= 0 {
   135  		// Returns an empty int slice for cardinality < 0.
   136  		return nil
   137  	}
   138  
   139  	nums := make([]int, cardinality)
   140  	keyCrc := crc32.Checksum([]byte(key), crc32.IEEETable)
   141  
   142  	start := int(keyCrc % uint32(cardinality))
   143  	for i := 1; i <= cardinality; i++ {
   144  		nums[i-1] = 1 + ((start + i) % cardinality)
   145  	}
   146  	return nums
   147  }
   148  
   149  // Reads all `xl.meta` metadata as a FileInfo slice.
   150  // Returns error slice indicating the failed metadata reads.
   151  func readAllFileInfo(ctx context.Context, disks []StorageAPI, origbucket string, bucket, object, versionID string, readData, healing bool) ([]FileInfo, []error) {
   152  	metadataArray := make([]FileInfo, len(disks))
   153  
   154  	opts := ReadOptions{
   155  		ReadData: readData,
   156  		Healing:  healing,
   157  	}
   158  
   159  	g := errgroup.WithNErrs(len(disks))
   160  	// Read `xl.meta` in parallel across disks.
   161  	for index := range disks {
   162  		index := index
   163  		g.Go(func() (err error) {
   164  			if disks[index] == nil {
   165  				return errDiskNotFound
   166  			}
   167  			metadataArray[index], err = disks[index].ReadVersion(ctx, origbucket, bucket, object, versionID, opts)
   168  			return err
   169  		}, index)
   170  	}
   171  
   172  	return metadataArray, g.Wait()
   173  }
   174  
   175  // shuffleDisksAndPartsMetadataByIndex this function should be always used by GetObjectNInfo()
   176  // and CompleteMultipartUpload code path, it is not meant to be used with PutObject,
   177  // NewMultipartUpload metadata shuffling.
   178  func shuffleDisksAndPartsMetadataByIndex(disks []StorageAPI, metaArr []FileInfo, fi FileInfo) (shuffledDisks []StorageAPI, shuffledPartsMetadata []FileInfo) {
   179  	shuffledDisks = make([]StorageAPI, len(disks))
   180  	shuffledPartsMetadata = make([]FileInfo, len(disks))
   181  	distribution := fi.Erasure.Distribution
   182  
   183  	var inconsistent int
   184  	for i, meta := range metaArr {
   185  		if disks[i] == nil {
   186  			// Assuming offline drives as inconsistent,
   187  			// to be safe and fallback to original
   188  			// distribution order.
   189  			inconsistent++
   190  			continue
   191  		}
   192  		if !meta.IsValid() {
   193  			inconsistent++
   194  			continue
   195  		}
   196  		if meta.XLV1 != fi.XLV1 {
   197  			inconsistent++
   198  			continue
   199  		}
   200  		// check if erasure distribution order matches the index
   201  		// position if this is not correct we discard the disk
   202  		// and move to collect others
   203  		if distribution[i] != meta.Erasure.Index {
   204  			inconsistent++ // keep track of inconsistent entries
   205  			continue
   206  		}
   207  		shuffledDisks[meta.Erasure.Index-1] = disks[i]
   208  		shuffledPartsMetadata[meta.Erasure.Index-1] = metaArr[i]
   209  	}
   210  
   211  	// Inconsistent meta info is with in the limit of
   212  	// expected quorum, proceed with EcIndex based
   213  	// disk order.
   214  	if inconsistent < fi.Erasure.ParityBlocks {
   215  		return shuffledDisks, shuffledPartsMetadata
   216  	}
   217  
   218  	// fall back to original distribution based order.
   219  	return shuffleDisksAndPartsMetadata(disks, metaArr, fi)
   220  }
   221  
   222  // Return shuffled partsMetadata depending on fi.Distribution.
   223  // additional validation is attempted and invalid metadata is
   224  // automatically skipped only when fi.ModTime is non-zero
   225  // indicating that this is called during read-phase
   226  func shuffleDisksAndPartsMetadata(disks []StorageAPI, partsMetadata []FileInfo, fi FileInfo) (shuffledDisks []StorageAPI, shuffledPartsMetadata []FileInfo) {
   227  	shuffledDisks = make([]StorageAPI, len(disks))
   228  	shuffledPartsMetadata = make([]FileInfo, len(partsMetadata))
   229  	distribution := fi.Erasure.Distribution
   230  
   231  	init := fi.ModTime.IsZero()
   232  	// Shuffle slice xl metadata for expected distribution.
   233  	for index := range partsMetadata {
   234  		if disks[index] == nil {
   235  			continue
   236  		}
   237  		if !init && !partsMetadata[index].IsValid() {
   238  			// Check for parts metadata validity for only
   239  			// fi.ModTime is not empty - ModTime is always set,
   240  			// if object was ever written previously.
   241  			continue
   242  		}
   243  		if !init && fi.XLV1 != partsMetadata[index].XLV1 {
   244  			continue
   245  		}
   246  		blockIndex := distribution[index]
   247  		shuffledPartsMetadata[blockIndex-1] = partsMetadata[index]
   248  		shuffledDisks[blockIndex-1] = disks[index]
   249  	}
   250  	return shuffledDisks, shuffledPartsMetadata
   251  }
   252  
   253  // Return shuffled partsMetadata depending on distribution.
   254  func shufflePartsMetadata(partsMetadata []FileInfo, distribution []int) (shuffledPartsMetadata []FileInfo) {
   255  	if distribution == nil {
   256  		return partsMetadata
   257  	}
   258  	shuffledPartsMetadata = make([]FileInfo, len(partsMetadata))
   259  	// Shuffle slice xl metadata for expected distribution.
   260  	for index := range partsMetadata {
   261  		blockIndex := distribution[index]
   262  		shuffledPartsMetadata[blockIndex-1] = partsMetadata[index]
   263  	}
   264  	return shuffledPartsMetadata
   265  }
   266  
   267  // shuffleDisks - shuffle input disks slice depending on the
   268  // erasure distribution. Return shuffled slice of disks with
   269  // their expected distribution.
   270  func shuffleDisks(disks []StorageAPI, distribution []int) (shuffledDisks []StorageAPI) {
   271  	if distribution == nil {
   272  		return disks
   273  	}
   274  	shuffledDisks = make([]StorageAPI, len(disks))
   275  	// Shuffle disks for expected distribution.
   276  	for index := range disks {
   277  		blockIndex := distribution[index]
   278  		shuffledDisks[blockIndex-1] = disks[index]
   279  	}
   280  	return shuffledDisks
   281  }
   282  
   283  // evalDisks - returns a new slice of disks where nil is set if
   284  // the corresponding error in errs slice is not nil
   285  func evalDisks(disks []StorageAPI, errs []error) []StorageAPI {
   286  	if len(errs) != len(disks) {
   287  		logger.LogIf(GlobalContext, errors.New("unexpected drives/errors slice length"))
   288  		return nil
   289  	}
   290  	newDisks := make([]StorageAPI, len(disks))
   291  	for index := range errs {
   292  		if errs[index] == nil {
   293  			newDisks[index] = disks[index]
   294  		} else {
   295  			newDisks[index] = nil
   296  		}
   297  	}
   298  	return newDisks
   299  }
   300  
   301  // Errors specifically generated by calculatePartSizeFromIdx function.
   302  var (
   303  	errPartSizeZero  = errors.New("Part size cannot be zero")
   304  	errPartSizeIndex = errors.New("Part index cannot be smaller than 1")
   305  )
   306  
   307  // calculatePartSizeFromIdx calculates the part size according to input index.
   308  // returns error if totalSize is -1, partSize is 0, partIndex is 0.
   309  func calculatePartSizeFromIdx(ctx context.Context, totalSize int64, partSize int64, partIndex int) (currPartSize int64, err error) {
   310  	if totalSize < -1 {
   311  		return 0, errInvalidArgument
   312  	}
   313  	if partSize == 0 {
   314  		return 0, errPartSizeZero
   315  	}
   316  	if partIndex < 1 {
   317  		return 0, errPartSizeIndex
   318  	}
   319  	if totalSize == -1 {
   320  		return -1, nil
   321  	}
   322  	if totalSize > 0 {
   323  		// Compute the total count of parts
   324  		partsCount := totalSize/partSize + 1
   325  		// Return the part's size
   326  		switch {
   327  		case int64(partIndex) < partsCount:
   328  			currPartSize = partSize
   329  		case int64(partIndex) == partsCount:
   330  			// Size of last part
   331  			currPartSize = totalSize % partSize
   332  		default:
   333  			currPartSize = 0
   334  		}
   335  	}
   336  	return currPartSize, nil
   337  }