github.com/minio/minio@v0.0.0-20240328213742-3f72439b8a27/cmd/erasure-metadata-utils.go (about) 1 // Copyright (c) 2015-2021 MinIO, Inc. 2 // 3 // This file is part of MinIO Object Storage stack 4 // 5 // This program is free software: you can redistribute it and/or modify 6 // it under the terms of the GNU Affero General Public License as published by 7 // the Free Software Foundation, either version 3 of the License, or 8 // (at your option) any later version. 9 // 10 // This program is distributed in the hope that it will be useful 11 // but WITHOUT ANY WARRANTY; without even the implied warranty of 12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 // GNU Affero General Public License for more details. 14 // 15 // You should have received a copy of the GNU Affero General Public License 16 // along with this program. If not, see <http://www.gnu.org/licenses/>. 17 18 package cmd 19 20 import ( 21 "context" 22 "errors" 23 "hash/crc32" 24 25 "github.com/minio/minio/internal/logger" 26 "github.com/minio/pkg/v2/sync/errgroup" 27 ) 28 29 // figure out the most commonVersions across disk that satisfies 30 // the 'writeQuorum' this function returns '0' if quorum cannot 31 // be achieved and disks have too many inconsistent versions. 32 func reduceCommonVersions(diskVersions []uint64, writeQuorum int) (commonVersions uint64) { 33 diskVersionsCount := make(map[uint64]int) 34 for _, versions := range diskVersions { 35 diskVersionsCount[versions]++ 36 } 37 38 max := 0 39 for versions, count := range diskVersionsCount { 40 if max < count { 41 max = count 42 commonVersions = versions 43 } 44 } 45 46 if max >= writeQuorum { 47 return commonVersions 48 } 49 50 return 0 51 } 52 53 // Returns number of errors that occurred the most (incl. nil) and the 54 // corresponding error value. NB When there is more than one error value that 55 // occurs maximum number of times, the error value returned depends on how 56 // golang's map orders keys. This doesn't affect correctness as long as quorum 57 // value is greater than or equal to simple majority, since none of the equally 58 // maximal values would occur quorum or more number of times. 59 func reduceErrs(errs []error, ignoredErrs []error) (maxCount int, maxErr error) { 60 errorCounts := make(map[error]int) 61 for _, err := range errs { 62 if IsErrIgnored(err, ignoredErrs...) { 63 continue 64 } 65 // Errors due to context cancellation may be wrapped - group them by context.Canceled. 66 if errors.Is(err, context.Canceled) { 67 errorCounts[context.Canceled]++ 68 continue 69 } 70 errorCounts[err]++ 71 } 72 73 max := 0 74 for err, count := range errorCounts { 75 switch { 76 case max < count: 77 max = count 78 maxErr = err 79 80 // Prefer `nil` over other error values with the same 81 // number of occurrences. 82 case max == count && err == nil: 83 maxErr = err 84 } 85 } 86 return max, maxErr 87 } 88 89 // reduceQuorumErrs behaves like reduceErrs by only for returning 90 // values of maximally occurring errors validated against a generic 91 // quorum number that can be read or write quorum depending on usage. 92 func reduceQuorumErrs(ctx context.Context, errs []error, ignoredErrs []error, quorum int, quorumErr error) error { 93 if contextCanceled(ctx) { 94 return context.Canceled 95 } 96 maxCount, maxErr := reduceErrs(errs, ignoredErrs) 97 if maxCount >= quorum { 98 return maxErr 99 } 100 return quorumErr 101 } 102 103 // reduceReadQuorumErrs behaves like reduceErrs but only for returning 104 // values of maximally occurring errors validated against readQuorum. 105 func reduceReadQuorumErrs(ctx context.Context, errs []error, ignoredErrs []error, readQuorum int) (maxErr error) { 106 return reduceQuorumErrs(ctx, errs, ignoredErrs, readQuorum, errErasureReadQuorum) 107 } 108 109 // reduceWriteQuorumErrs behaves like reduceErrs but only for returning 110 // values of maximally occurring errors validated against writeQuorum. 111 func reduceWriteQuorumErrs(ctx context.Context, errs []error, ignoredErrs []error, writeQuorum int) (maxErr error) { 112 return reduceQuorumErrs(ctx, errs, ignoredErrs, writeQuorum, errErasureWriteQuorum) 113 } 114 115 // Similar to 'len(slice)' but returns the actual elements count 116 // skipping the unallocated elements. 117 func diskCount(disks []StorageAPI) int { 118 diskCount := 0 119 for _, disk := range disks { 120 if disk == nil { 121 continue 122 } 123 diskCount++ 124 } 125 return diskCount 126 } 127 128 // hashOrder - hashes input key to return consistent 129 // hashed integer slice. Returned integer order is salted 130 // with an input key. This results in consistent order. 131 // NOTE: collisions are fine, we are not looking for uniqueness 132 // in the slices returned. 133 func hashOrder(key string, cardinality int) []int { 134 if cardinality <= 0 { 135 // Returns an empty int slice for cardinality < 0. 136 return nil 137 } 138 139 nums := make([]int, cardinality) 140 keyCrc := crc32.Checksum([]byte(key), crc32.IEEETable) 141 142 start := int(keyCrc % uint32(cardinality)) 143 for i := 1; i <= cardinality; i++ { 144 nums[i-1] = 1 + ((start + i) % cardinality) 145 } 146 return nums 147 } 148 149 // Reads all `xl.meta` metadata as a FileInfo slice. 150 // Returns error slice indicating the failed metadata reads. 151 func readAllFileInfo(ctx context.Context, disks []StorageAPI, origbucket string, bucket, object, versionID string, readData, healing bool) ([]FileInfo, []error) { 152 metadataArray := make([]FileInfo, len(disks)) 153 154 opts := ReadOptions{ 155 ReadData: readData, 156 Healing: healing, 157 } 158 159 g := errgroup.WithNErrs(len(disks)) 160 // Read `xl.meta` in parallel across disks. 161 for index := range disks { 162 index := index 163 g.Go(func() (err error) { 164 if disks[index] == nil { 165 return errDiskNotFound 166 } 167 metadataArray[index], err = disks[index].ReadVersion(ctx, origbucket, bucket, object, versionID, opts) 168 return err 169 }, index) 170 } 171 172 return metadataArray, g.Wait() 173 } 174 175 // shuffleDisksAndPartsMetadataByIndex this function should be always used by GetObjectNInfo() 176 // and CompleteMultipartUpload code path, it is not meant to be used with PutObject, 177 // NewMultipartUpload metadata shuffling. 178 func shuffleDisksAndPartsMetadataByIndex(disks []StorageAPI, metaArr []FileInfo, fi FileInfo) (shuffledDisks []StorageAPI, shuffledPartsMetadata []FileInfo) { 179 shuffledDisks = make([]StorageAPI, len(disks)) 180 shuffledPartsMetadata = make([]FileInfo, len(disks)) 181 distribution := fi.Erasure.Distribution 182 183 var inconsistent int 184 for i, meta := range metaArr { 185 if disks[i] == nil { 186 // Assuming offline drives as inconsistent, 187 // to be safe and fallback to original 188 // distribution order. 189 inconsistent++ 190 continue 191 } 192 if !meta.IsValid() { 193 inconsistent++ 194 continue 195 } 196 if meta.XLV1 != fi.XLV1 { 197 inconsistent++ 198 continue 199 } 200 // check if erasure distribution order matches the index 201 // position if this is not correct we discard the disk 202 // and move to collect others 203 if distribution[i] != meta.Erasure.Index { 204 inconsistent++ // keep track of inconsistent entries 205 continue 206 } 207 shuffledDisks[meta.Erasure.Index-1] = disks[i] 208 shuffledPartsMetadata[meta.Erasure.Index-1] = metaArr[i] 209 } 210 211 // Inconsistent meta info is with in the limit of 212 // expected quorum, proceed with EcIndex based 213 // disk order. 214 if inconsistent < fi.Erasure.ParityBlocks { 215 return shuffledDisks, shuffledPartsMetadata 216 } 217 218 // fall back to original distribution based order. 219 return shuffleDisksAndPartsMetadata(disks, metaArr, fi) 220 } 221 222 // Return shuffled partsMetadata depending on fi.Distribution. 223 // additional validation is attempted and invalid metadata is 224 // automatically skipped only when fi.ModTime is non-zero 225 // indicating that this is called during read-phase 226 func shuffleDisksAndPartsMetadata(disks []StorageAPI, partsMetadata []FileInfo, fi FileInfo) (shuffledDisks []StorageAPI, shuffledPartsMetadata []FileInfo) { 227 shuffledDisks = make([]StorageAPI, len(disks)) 228 shuffledPartsMetadata = make([]FileInfo, len(partsMetadata)) 229 distribution := fi.Erasure.Distribution 230 231 init := fi.ModTime.IsZero() 232 // Shuffle slice xl metadata for expected distribution. 233 for index := range partsMetadata { 234 if disks[index] == nil { 235 continue 236 } 237 if !init && !partsMetadata[index].IsValid() { 238 // Check for parts metadata validity for only 239 // fi.ModTime is not empty - ModTime is always set, 240 // if object was ever written previously. 241 continue 242 } 243 if !init && fi.XLV1 != partsMetadata[index].XLV1 { 244 continue 245 } 246 blockIndex := distribution[index] 247 shuffledPartsMetadata[blockIndex-1] = partsMetadata[index] 248 shuffledDisks[blockIndex-1] = disks[index] 249 } 250 return shuffledDisks, shuffledPartsMetadata 251 } 252 253 // Return shuffled partsMetadata depending on distribution. 254 func shufflePartsMetadata(partsMetadata []FileInfo, distribution []int) (shuffledPartsMetadata []FileInfo) { 255 if distribution == nil { 256 return partsMetadata 257 } 258 shuffledPartsMetadata = make([]FileInfo, len(partsMetadata)) 259 // Shuffle slice xl metadata for expected distribution. 260 for index := range partsMetadata { 261 blockIndex := distribution[index] 262 shuffledPartsMetadata[blockIndex-1] = partsMetadata[index] 263 } 264 return shuffledPartsMetadata 265 } 266 267 // shuffleDisks - shuffle input disks slice depending on the 268 // erasure distribution. Return shuffled slice of disks with 269 // their expected distribution. 270 func shuffleDisks(disks []StorageAPI, distribution []int) (shuffledDisks []StorageAPI) { 271 if distribution == nil { 272 return disks 273 } 274 shuffledDisks = make([]StorageAPI, len(disks)) 275 // Shuffle disks for expected distribution. 276 for index := range disks { 277 blockIndex := distribution[index] 278 shuffledDisks[blockIndex-1] = disks[index] 279 } 280 return shuffledDisks 281 } 282 283 // evalDisks - returns a new slice of disks where nil is set if 284 // the corresponding error in errs slice is not nil 285 func evalDisks(disks []StorageAPI, errs []error) []StorageAPI { 286 if len(errs) != len(disks) { 287 logger.LogIf(GlobalContext, errors.New("unexpected drives/errors slice length")) 288 return nil 289 } 290 newDisks := make([]StorageAPI, len(disks)) 291 for index := range errs { 292 if errs[index] == nil { 293 newDisks[index] = disks[index] 294 } else { 295 newDisks[index] = nil 296 } 297 } 298 return newDisks 299 } 300 301 // Errors specifically generated by calculatePartSizeFromIdx function. 302 var ( 303 errPartSizeZero = errors.New("Part size cannot be zero") 304 errPartSizeIndex = errors.New("Part index cannot be smaller than 1") 305 ) 306 307 // calculatePartSizeFromIdx calculates the part size according to input index. 308 // returns error if totalSize is -1, partSize is 0, partIndex is 0. 309 func calculatePartSizeFromIdx(ctx context.Context, totalSize int64, partSize int64, partIndex int) (currPartSize int64, err error) { 310 if totalSize < -1 { 311 return 0, errInvalidArgument 312 } 313 if partSize == 0 { 314 return 0, errPartSizeZero 315 } 316 if partIndex < 1 { 317 return 0, errPartSizeIndex 318 } 319 if totalSize == -1 { 320 return -1, nil 321 } 322 if totalSize > 0 { 323 // Compute the total count of parts 324 partsCount := totalSize/partSize + 1 325 // Return the part's size 326 switch { 327 case int64(partIndex) < partsCount: 328 currPartSize = partSize 329 case int64(partIndex) == partsCount: 330 // Size of last part 331 currPartSize = totalSize % partSize 332 default: 333 currPartSize = 0 334 } 335 } 336 return currPartSize, nil 337 }