storj.io/minio@v0.0.0-20230509071714-0cbc90f649b1/cmd/erasure-healing-common.go (about) 1 /* 2 * MinIO Cloud Storage, (C) 2016-2019 MinIO, Inc. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package cmd 18 19 import ( 20 "bytes" 21 "context" 22 "time" 23 24 "storj.io/minio/pkg/madmin" 25 ) 26 27 // commonTime returns a maximally occurring time from a list of time. 28 func commonTime(modTimes []time.Time, dataDirs []string) (modTime time.Time, dataDir string) { 29 var maxima int // Counter for remembering max occurrence of elements. 30 31 timeOccurenceMap := make(map[int64]int, len(modTimes)) 32 dataDirOccurenceMap := make(map[string]int, len(dataDirs)) 33 // Ignore the uuid sentinel and count the rest. 34 for _, time := range modTimes { 35 if time.Equal(timeSentinel) { 36 continue 37 } 38 timeOccurenceMap[time.UnixNano()]++ 39 } 40 41 for _, dataDir := range dataDirs { 42 if dataDir == "" { 43 continue 44 } 45 dataDirOccurenceMap[dataDir]++ 46 } 47 48 // Find the common cardinality from previously collected 49 // occurrences of elements. 50 for nano, count := range timeOccurenceMap { 51 t := time.Unix(0, nano) 52 if count > maxima || (count == maxima && t.After(modTime)) { 53 maxima = count 54 modTime = t 55 } 56 } 57 58 // Find the common cardinality from the previously collected 59 // occurrences of elements. 60 var dmaxima int 61 for ddataDir, count := range dataDirOccurenceMap { 62 if count > dmaxima { 63 dmaxima = count 64 dataDir = ddataDir 65 } 66 } 67 68 // Return the collected common uuid. 69 return modTime, dataDir 70 } 71 72 // Beginning of unix time is treated as sentinel value here. 73 var timeSentinel = time.Unix(0, 0).UTC() 74 75 // Boot modTimes up to disk count, setting the value to time sentinel. 76 func bootModtimes(diskCount int) []time.Time { 77 modTimes := make([]time.Time, diskCount) 78 // Boots up all the modtimes. 79 for i := range modTimes { 80 modTimes[i] = timeSentinel 81 } 82 return modTimes 83 } 84 85 // Extracts list of times from FileInfo slice and returns, skips 86 // slice elements which have errors. 87 func listObjectModtimes(partsMetadata []FileInfo, errs []error) (modTimes []time.Time) { 88 modTimes = bootModtimes(len(partsMetadata)) 89 for index, metadata := range partsMetadata { 90 if errs[index] != nil { 91 continue 92 } 93 // Once the file is found, save the uuid saved on disk. 94 modTimes[index] = metadata.ModTime 95 } 96 return modTimes 97 } 98 99 // Notes: 100 // There are 5 possible states a disk could be in, 101 // 1. __online__ - has the latest copy of xl.meta - returned by listOnlineDisks 102 // 103 // 2. __offline__ - err == errDiskNotFound 104 // 105 // 3. __availableWithParts__ - has the latest copy of xl.meta and has all 106 // parts with checksums matching; returned by disksWithAllParts 107 // 108 // 4. __outdated__ - returned by outDatedDisk, provided []StorageAPI 109 // returned by diskWithAllParts is passed for latestDisks. 110 // - has an old copy of xl.meta 111 // - doesn't have xl.meta (errFileNotFound) 112 // - has the latest xl.meta but one or more parts are corrupt 113 // 114 // 5. __missingParts__ - has the latest copy of xl.meta but has some parts 115 // missing. This is identified separately since this may need manual 116 // inspection to understand the root cause. E.g, this could be due to 117 // backend filesystem corruption. 118 119 // listOnlineDisks - returns 120 // - a slice of disks where disk having 'older' xl.meta (or nothing) 121 // are set to nil. 122 // - latest (in time) of the maximally occurring modTime(s). 123 func listOnlineDisks(disks []StorageAPI, partsMetadata []FileInfo, errs []error) (onlineDisks []StorageAPI, modTime time.Time, dataDir string) { 124 onlineDisks = make([]StorageAPI, len(disks)) 125 126 // List all the file commit ids from parts metadata. 127 modTimes := listObjectModtimes(partsMetadata, errs) 128 129 dataDirs := make([]string, len(partsMetadata)) 130 for idx, fi := range partsMetadata { 131 if errs[idx] != nil { 132 continue 133 } 134 dataDirs[idx] = fi.DataDir 135 } 136 137 // Reduce list of UUIDs to a single common value. 138 modTime, dataDir = commonTime(modTimes, dataDirs) 139 140 // Create a new online disks slice, which have common uuid. 141 for index, t := range modTimes { 142 if partsMetadata[index].IsValid() && t.Equal(modTime) && partsMetadata[index].DataDir == dataDir { 143 onlineDisks[index] = disks[index] 144 } else { 145 onlineDisks[index] = nil 146 } 147 } 148 149 return onlineDisks, modTime, dataDir 150 } 151 152 // Returns the latest updated FileInfo files and error in case of failure. 153 func getLatestFileInfo(ctx context.Context, partsMetadata []FileInfo, errs []error) (FileInfo, error) { 154 // There should be atleast half correct entries, if not return failure 155 if reducedErr := reduceReadQuorumErrs(ctx, errs, objectOpIgnoredErrs, len(partsMetadata)/2); reducedErr != nil { 156 return FileInfo{}, reducedErr 157 } 158 159 // List all the file commit ids from parts metadata. 160 modTimes := listObjectModtimes(partsMetadata, errs) 161 162 dataDirs := make([]string, len(partsMetadata)) 163 for idx, fi := range partsMetadata { 164 if errs[idx] != nil { 165 continue 166 } 167 dataDirs[idx] = fi.DataDir 168 } 169 170 // Count all latest updated FileInfo values 171 var count int 172 var latestFileInfo FileInfo 173 174 // Reduce list of UUIDs to a single common value - i.e. the last updated Time 175 modTime, dataDir := commonTime(modTimes, dataDirs) 176 177 // Interate through all the modTimes and count the FileInfo(s) with latest time. 178 for index, t := range modTimes { 179 if partsMetadata[index].IsValid() && t.Equal(modTime) && dataDir == partsMetadata[index].DataDir { 180 latestFileInfo = partsMetadata[index] 181 count++ 182 } 183 } 184 if count < len(partsMetadata)/2 { 185 return FileInfo{}, errErasureReadQuorum 186 } 187 188 return latestFileInfo, nil 189 } 190 191 // disksWithAllParts - This function needs to be called with 192 // []StorageAPI returned by listOnlineDisks. Returns, 193 // 194 // - disks which have all parts specified in the latest xl.meta. 195 // 196 // - slice of errors about the state of data files on disk - can have 197 // a not-found error or a hash-mismatch error. 198 func disksWithAllParts(ctx context.Context, onlineDisks []StorageAPI, partsMetadata []FileInfo, errs []error, bucket, 199 object string, scanMode madmin.HealScanMode) ([]StorageAPI, []error) { 200 availableDisks := make([]StorageAPI, len(onlineDisks)) 201 dataErrs := make([]error, len(onlineDisks)) 202 203 inconsistent := 0 204 for i, meta := range partsMetadata { 205 if !meta.IsValid() { 206 // Since for majority of the cases erasure.Index matches with erasure.Distribution we can 207 // consider the offline disks as consistent. 208 continue 209 } 210 if len(meta.Erasure.Distribution) != len(onlineDisks) { 211 // Erasure distribution seems to have lesser 212 // number of items than number of online disks. 213 inconsistent++ 214 continue 215 } 216 if meta.Erasure.Distribution[i] != meta.Erasure.Index { 217 // Mismatch indexes with distribution order 218 inconsistent++ 219 } 220 } 221 222 erasureDistributionReliable := true 223 if inconsistent > len(partsMetadata)/2 { 224 // If there are too many inconsistent files, then we can't trust erasure.Distribution (most likely 225 // because of bugs found in CopyObject/PutObjectTags) https://github.com/minio/minio/pull/10772 226 erasureDistributionReliable = false 227 } 228 229 for i, onlineDisk := range onlineDisks { 230 if errs[i] != nil { 231 dataErrs[i] = errs[i] 232 continue 233 } 234 if onlineDisk == nil { 235 dataErrs[i] = errDiskNotFound 236 continue 237 } 238 meta := partsMetadata[i] 239 if erasureDistributionReliable { 240 if !meta.IsValid() { 241 continue 242 } 243 244 if len(meta.Erasure.Distribution) != len(onlineDisks) { 245 // Erasure distribution is not the same as onlineDisks 246 // attempt a fix if possible, assuming other entries 247 // might have the right erasure distribution. 248 partsMetadata[i] = FileInfo{} 249 dataErrs[i] = errFileCorrupt 250 continue 251 } 252 253 // Since erasure.Distribution is trustable we can fix the mismatching erasure.Index 254 if meta.Erasure.Distribution[i] != meta.Erasure.Index { 255 partsMetadata[i] = FileInfo{} 256 dataErrs[i] = errFileCorrupt 257 continue 258 } 259 } 260 261 // Always check data, if we got it. 262 if (len(meta.Data) > 0 || meta.Size == 0) && len(meta.Parts) > 0 { 263 checksumInfo := meta.Erasure.GetChecksumInfo(meta.Parts[0].Number) 264 dataErrs[i] = bitrotVerify(bytes.NewBuffer(meta.Data), 265 int64(len(meta.Data)), 266 meta.Erasure.ShardFileSize(meta.Size), 267 checksumInfo.Algorithm, 268 checksumInfo.Hash, meta.Erasure.ShardSize()) 269 if dataErrs[i] == nil { 270 // All parts verified, mark it as all data available. 271 availableDisks[i] = onlineDisk 272 } 273 continue 274 } 275 276 switch scanMode { 277 case madmin.HealDeepScan: 278 // disk has a valid xl.meta but may not have all the 279 // parts. This is considered an outdated disk, since 280 // it needs healing too. 281 dataErrs[i] = onlineDisk.VerifyFile(ctx, bucket, object, partsMetadata[i]) 282 case madmin.HealNormalScan: 283 dataErrs[i] = onlineDisk.CheckParts(ctx, bucket, object, partsMetadata[i]) 284 } 285 286 if dataErrs[i] == nil { 287 // All parts verified, mark it as all data available. 288 availableDisks[i] = onlineDisk 289 } 290 } 291 292 return availableDisks, dataErrs 293 }