storj.io/minio@v0.0.0-20230509071714-0cbc90f649b1/cmd/background-newdisks-heal-ops.go (about) 1 /* 2 * MinIO Cloud Storage, (C) 2019 MinIO, Inc. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package cmd 18 19 import ( 20 "bytes" 21 "context" 22 "encoding/json" 23 "errors" 24 "fmt" 25 "io" 26 "sort" 27 "strings" 28 "sync" 29 "time" 30 31 "github.com/dustin/go-humanize" 32 "github.com/minio/minio-go/v7/pkg/set" 33 34 "storj.io/minio/cmd/logger" 35 "storj.io/minio/pkg/color" 36 "storj.io/minio/pkg/console" 37 "storj.io/minio/pkg/madmin" 38 ) 39 40 const ( 41 defaultMonitorNewDiskInterval = time.Second * 10 42 healingTrackerFilename = ".healing.bin" 43 ) 44 45 //go:generate msgp -file $GOFILE -unexported 46 47 // healingTracker is used to persist healing information during a heal. 48 type healingTracker struct { 49 disk StorageAPI `msg:"-"` 50 51 ID string 52 PoolIndex int 53 SetIndex int 54 DiskIndex int 55 Path string 56 Endpoint string 57 Started time.Time 58 LastUpdate time.Time 59 ObjectsHealed uint64 60 ObjectsFailed uint64 61 BytesDone uint64 62 BytesFailed uint64 63 64 // Last object scanned. 65 Bucket string `json:"-"` 66 Object string `json:"-"` 67 68 // Numbers when current bucket started healing, 69 // for resuming with correct numbers. 70 ResumeObjectsHealed uint64 `json:"-"` 71 ResumeObjectsFailed uint64 `json:"-"` 72 ResumeBytesDone uint64 `json:"-"` 73 ResumeBytesFailed uint64 `json:"-"` 74 75 // Filled on startup/restarts. 76 QueuedBuckets []string 77 78 // Filled during heal. 79 HealedBuckets []string 80 // Add future tracking capabilities 81 // Be sure that they are included in toHealingDisk 82 } 83 84 // loadHealingTracker will load the healing tracker from the supplied disk. 85 // The disk ID will be validated against the loaded one. 86 func loadHealingTracker(ctx context.Context, disk StorageAPI) (*healingTracker, error) { 87 if disk == nil { 88 return nil, errors.New("loadHealingTracker: nil disk given") 89 } 90 diskID, err := disk.GetDiskID() 91 if err != nil { 92 return nil, err 93 } 94 b, err := disk.ReadAll(ctx, minioMetaBucket, 95 pathJoin(bucketMetaPrefix, slashSeparator, healingTrackerFilename)) 96 if err != nil { 97 return nil, err 98 } 99 var h healingTracker 100 _, err = h.UnmarshalMsg(b) 101 if err != nil { 102 return nil, err 103 } 104 if h.ID != diskID && h.ID != "" { 105 return nil, fmt.Errorf("loadHealingTracker: disk id mismatch expected %s, got %s", h.ID, diskID) 106 } 107 h.disk = disk 108 h.ID = diskID 109 return &h, nil 110 } 111 112 // newHealingTracker will create a new healing tracker for the disk. 113 func newHealingTracker(disk StorageAPI) *healingTracker { 114 diskID, _ := disk.GetDiskID() 115 h := healingTracker{ 116 disk: disk, 117 ID: diskID, 118 Path: disk.String(), 119 Endpoint: disk.Endpoint().String(), 120 Started: time.Now().UTC(), 121 } 122 h.PoolIndex, h.SetIndex, h.DiskIndex = disk.GetDiskLoc() 123 return &h 124 } 125 126 // update will update the tracker on the disk. 127 // If the tracker has been deleted an error is returned. 128 func (h *healingTracker) update(ctx context.Context) error { 129 if h.disk.Healing() == nil { 130 return fmt.Errorf("healingTracker: disk %q is not marked as healing", h.ID) 131 } 132 if h.ID == "" || h.PoolIndex < 0 || h.SetIndex < 0 || h.DiskIndex < 0 { 133 h.ID, _ = h.disk.GetDiskID() 134 h.PoolIndex, h.SetIndex, h.DiskIndex = h.disk.GetDiskLoc() 135 } 136 return h.save(ctx) 137 } 138 139 // save will unconditionally save the tracker and will be created if not existing. 140 func (h *healingTracker) save(ctx context.Context) error { 141 if h.PoolIndex < 0 || h.SetIndex < 0 || h.DiskIndex < 0 { 142 // Attempt to get location. 143 if api := newObjectLayerFn(); api != nil { 144 if ep, ok := api.(*erasureServerPools); ok { 145 h.PoolIndex, h.SetIndex, h.DiskIndex, _ = ep.getPoolAndSet(h.ID) 146 } 147 } 148 } 149 h.LastUpdate = time.Now().UTC() 150 htrackerBytes, err := h.MarshalMsg(nil) 151 if err != nil { 152 return err 153 } 154 globalBackgroundHealState.updateHealStatus(h) 155 return h.disk.WriteAll(ctx, minioMetaBucket, 156 pathJoin(bucketMetaPrefix, slashSeparator, healingTrackerFilename), 157 htrackerBytes) 158 } 159 160 // delete the tracker on disk. 161 func (h *healingTracker) delete(ctx context.Context) error { 162 return h.disk.Delete(ctx, minioMetaBucket, 163 pathJoin(bucketMetaPrefix, slashSeparator, healingTrackerFilename), 164 false) 165 } 166 167 func (h *healingTracker) isHealed(bucket string) bool { 168 for _, v := range h.HealedBuckets { 169 if v == bucket { 170 return true 171 } 172 } 173 return false 174 } 175 176 // resume will reset progress to the numbers at the start of the bucket. 177 func (h *healingTracker) resume() { 178 h.ObjectsHealed = h.ResumeObjectsHealed 179 h.ObjectsFailed = h.ResumeObjectsFailed 180 h.BytesDone = h.ResumeBytesDone 181 h.BytesFailed = h.ResumeBytesFailed 182 } 183 184 // bucketDone should be called when a bucket is done healing. 185 // Adds the bucket to the list of healed buckets and updates resume numbers. 186 func (h *healingTracker) bucketDone(bucket string) { 187 h.ResumeObjectsHealed = h.ObjectsHealed 188 h.ResumeObjectsFailed = h.ObjectsFailed 189 h.ResumeBytesDone = h.BytesDone 190 h.ResumeBytesFailed = h.BytesFailed 191 h.HealedBuckets = append(h.HealedBuckets, bucket) 192 for i, b := range h.QueuedBuckets { 193 if b == bucket { 194 // Delete... 195 h.QueuedBuckets = append(h.QueuedBuckets[:i], h.QueuedBuckets[i+1:]...) 196 } 197 } 198 } 199 200 // setQueuedBuckets will add buckets, but exclude any that is already in h.HealedBuckets. 201 // Order is preserved. 202 func (h *healingTracker) setQueuedBuckets(buckets []BucketInfo) { 203 s := set.CreateStringSet(h.HealedBuckets...) 204 h.QueuedBuckets = make([]string, 0, len(buckets)) 205 for _, b := range buckets { 206 if !s.Contains(b.Name) { 207 h.QueuedBuckets = append(h.QueuedBuckets, b.Name) 208 } 209 } 210 } 211 212 func (h *healingTracker) printTo(writer io.Writer) { 213 b, err := json.MarshalIndent(h, "", " ") 214 if err != nil { 215 writer.Write([]byte(err.Error())) 216 } 217 writer.Write(b) 218 } 219 220 // toHealingDisk converts the information to madmin.HealingDisk 221 func (h *healingTracker) toHealingDisk() madmin.HealingDisk { 222 return madmin.HealingDisk{ 223 ID: h.ID, 224 Endpoint: h.Endpoint, 225 PoolIndex: h.PoolIndex, 226 SetIndex: h.SetIndex, 227 DiskIndex: h.DiskIndex, 228 Path: h.Path, 229 Started: h.Started.UTC(), 230 LastUpdate: h.LastUpdate.UTC(), 231 ObjectsHealed: h.ObjectsHealed, 232 ObjectsFailed: h.ObjectsFailed, 233 BytesDone: h.BytesDone, 234 BytesFailed: h.BytesFailed, 235 Bucket: h.Bucket, 236 Object: h.Object, 237 QueuedBuckets: h.QueuedBuckets, 238 HealedBuckets: h.HealedBuckets, 239 } 240 } 241 242 func initAutoHeal(ctx context.Context, objAPI ObjectLayer) { 243 z, ok := objAPI.(*erasureServerPools) 244 if !ok { 245 return 246 } 247 248 initBackgroundHealing(ctx, objAPI) // start quick background healing 249 250 bgSeq := mustGetHealSequence(ctx) 251 252 globalBackgroundHealState.pushHealLocalDisks(getLocalDisksToHeal()...) 253 254 if drivesToHeal := globalBackgroundHealState.healDriveCount(); drivesToHeal > 0 { 255 logger.Info(fmt.Sprintf("Found drives to heal %d, waiting until %s to heal the content...", 256 drivesToHeal, defaultMonitorNewDiskInterval)) 257 258 // Heal any disk format and metadata early, if possible. 259 // Start with format healing 260 if err := bgSeq.healDiskFormat(); err != nil { 261 if newObjectLayerFn() != nil { 262 // log only in situations, when object layer 263 // has fully initialized. 264 logger.LogIf(bgSeq.ctx, err) 265 } 266 } 267 } 268 269 if err := bgSeq.healDiskMeta(objAPI); err != nil { 270 if newObjectLayerFn() != nil { 271 // log only in situations, when object layer 272 // has fully initialized. 273 logger.LogIf(bgSeq.ctx, err) 274 } 275 } 276 277 go monitorLocalDisksAndHeal(ctx, z, bgSeq) 278 } 279 280 func getLocalDisksToHeal() (disksToHeal Endpoints) { 281 for _, ep := range globalEndpoints { 282 for _, endpoint := range ep.Endpoints { 283 if !endpoint.IsLocal { 284 continue 285 } 286 // Try to connect to the current endpoint 287 // and reformat if the current disk is not formatted 288 disk, _, err := connectEndpoint(endpoint) 289 if errors.Is(err, errUnformattedDisk) { 290 disksToHeal = append(disksToHeal, endpoint) 291 } else if err == nil && disk != nil && disk.Healing() != nil { 292 disksToHeal = append(disksToHeal, disk.Endpoint()) 293 } 294 } 295 } 296 return disksToHeal 297 298 } 299 300 func initBackgroundHealing(ctx context.Context, objAPI ObjectLayer) { 301 // Run the background healer 302 globalBackgroundHealRoutine = newHealRoutine() 303 go globalBackgroundHealRoutine.run(ctx, objAPI) 304 305 globalBackgroundHealState.LaunchNewHealSequence(newBgHealSequence(), objAPI) 306 } 307 308 // monitorLocalDisksAndHeal - ensures that detected new disks are healed 309 // 1. Only the concerned erasure set will be listed and healed 310 // 2. Only the node hosting the disk is responsible to perform the heal 311 func monitorLocalDisksAndHeal(ctx context.Context, z *erasureServerPools, bgSeq *healSequence) { 312 // Perform automatic disk healing when a disk is replaced locally. 313 diskCheckTimer := time.NewTimer(defaultMonitorNewDiskInterval) 314 defer diskCheckTimer.Stop() 315 316 for { 317 select { 318 case <-ctx.Done(): 319 return 320 case <-diskCheckTimer.C: 321 // Reset to next interval. 322 diskCheckTimer.Reset(defaultMonitorNewDiskInterval) 323 324 var erasureSetInPoolDisksToHeal []map[int][]StorageAPI 325 326 healDisks := globalBackgroundHealState.getHealLocalDiskEndpoints() 327 if len(healDisks) > 0 { 328 // Reformat disks 329 bgSeq.sourceCh <- healSource{bucket: SlashSeparator} 330 331 // Ensure that reformatting disks is finished 332 bgSeq.sourceCh <- healSource{bucket: nopHeal} 333 334 logger.Info(fmt.Sprintf("Found drives to heal %d, proceeding to heal content...", 335 len(healDisks))) 336 337 erasureSetInPoolDisksToHeal = make([]map[int][]StorageAPI, len(z.serverPools)) 338 for i := range z.serverPools { 339 erasureSetInPoolDisksToHeal[i] = map[int][]StorageAPI{} 340 } 341 } 342 343 if serverDebugLog { 344 console.Debugf(color.Green("healDisk:")+" disk check timer fired, attempting to heal %d drives\n", len(healDisks)) 345 } 346 347 // heal only if new disks found. 348 for _, endpoint := range healDisks { 349 disk, format, err := connectEndpoint(endpoint) 350 if err != nil { 351 printEndpointError(endpoint, err, true) 352 continue 353 } 354 355 poolIdx := globalEndpoints.GetLocalPoolIdx(disk.Endpoint()) 356 if poolIdx < 0 { 357 continue 358 } 359 360 // Calculate the set index where the current endpoint belongs 361 z.serverPools[poolIdx].erasureDisksMu.RLock() 362 // Protect reading reference format. 363 setIndex, _, err := findDiskIndex(z.serverPools[poolIdx].format, format) 364 z.serverPools[poolIdx].erasureDisksMu.RUnlock() 365 if err != nil { 366 printEndpointError(endpoint, err, false) 367 continue 368 } 369 370 erasureSetInPoolDisksToHeal[poolIdx][setIndex] = append(erasureSetInPoolDisksToHeal[poolIdx][setIndex], disk) 371 } 372 373 buckets, _ := z.ListBuckets(ctx) 374 375 buckets = append(buckets, BucketInfo{ 376 Name: pathJoin(minioMetaBucket, minioConfigPrefix), 377 }) 378 379 // Buckets data are dispersed in multiple zones/sets, make 380 // sure to heal all bucket metadata configuration. 381 buckets = append(buckets, []BucketInfo{ 382 {Name: pathJoin(minioMetaBucket, bucketMetaPrefix)}, 383 }...) 384 385 // Heal latest buckets first. 386 sort.Slice(buckets, func(i, j int) bool { 387 a, b := strings.HasPrefix(buckets[i].Name, minioMetaBucket), strings.HasPrefix(buckets[j].Name, minioMetaBucket) 388 if a != b { 389 return a 390 } 391 return buckets[i].Created.After(buckets[j].Created) 392 }) 393 394 // TODO(klauspost): This will block until all heals are done, 395 // in the future this should be able to start healing other sets at once. 396 var wg sync.WaitGroup 397 for i, setMap := range erasureSetInPoolDisksToHeal { 398 i := i 399 for setIndex, disks := range setMap { 400 if len(disks) == 0 { 401 continue 402 } 403 wg.Add(1) 404 go func(setIndex int, disks []StorageAPI) { 405 defer wg.Done() 406 for _, disk := range disks { 407 logger.Info("Healing disk '%v' on %s pool", disk, humanize.Ordinal(i+1)) 408 409 // So someone changed the drives underneath, healing tracker missing. 410 tracker, err := loadHealingTracker(ctx, disk) 411 if err != nil { 412 logger.Info("Healing tracker missing on '%s', disk was swapped again on %s pool", disk, humanize.Ordinal(i+1)) 413 tracker = newHealingTracker(disk) 414 } 415 416 tracker.PoolIndex, tracker.SetIndex, tracker.DiskIndex = disk.GetDiskLoc() 417 tracker.setQueuedBuckets(buckets) 418 if err := tracker.save(ctx); err != nil { 419 logger.LogIf(ctx, err) 420 // Unable to write healing tracker, permission denied or some 421 // other unexpected error occurred. Proceed to look for new 422 // disks to be healed again, we cannot proceed further. 423 return 424 } 425 426 err = z.serverPools[i].sets[setIndex].healErasureSet(ctx, buckets, tracker) 427 if err != nil { 428 logger.LogIf(ctx, err) 429 continue 430 } 431 432 logger.Info("Healing disk '%s' on %s pool complete", disk, humanize.Ordinal(i+1)) 433 var buf bytes.Buffer 434 tracker.printTo(&buf) 435 logger.Info("Summary:\n%s", buf.String()) 436 logger.LogIf(ctx, tracker.delete(ctx)) 437 438 // Only upon success pop the healed disk. 439 globalBackgroundHealState.popHealLocalDisks(disk.Endpoint()) 440 } 441 }(setIndex, disks) 442 } 443 } 444 wg.Wait() 445 } 446 } 447 }