github.com/minio/minio@v0.0.0-20240328213742-3f72439b8a27/cmd/xl-storage-disk-id-check.go (about) 1 // Copyright (c) 2015-2024 MinIO, Inc. 2 // 3 // This file is part of MinIO Object Storage stack 4 // 5 // This program is free software: you can redistribute it and/or modify 6 // it under the terms of the GNU Affero General Public License as published by 7 // the Free Software Foundation, either version 3 of the License, or 8 // (at your option) any later version. 9 // 10 // This program is distributed in the hope that it will be useful 11 // but WITHOUT ANY WARRANTY; without even the implied warranty of 12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 // GNU Affero General Public License for more details. 14 // 15 // You should have received a copy of the GNU Affero General Public License 16 // along with this program. If not, see <http://www.gnu.org/licenses/>. 17 18 package cmd 19 20 import ( 21 "context" 22 "errors" 23 "fmt" 24 "io" 25 "math/rand" 26 "runtime" 27 "strconv" 28 "strings" 29 "sync" 30 "sync/atomic" 31 "time" 32 33 "github.com/minio/madmin-go/v3" 34 "github.com/minio/minio/internal/cachevalue" 35 xioutil "github.com/minio/minio/internal/ioutil" 36 "github.com/minio/minio/internal/logger" 37 ) 38 39 //go:generate stringer -type=storageMetric -trimprefix=storageMetric $GOFILE 40 41 type storageMetric uint8 42 43 const ( 44 storageMetricMakeVolBulk storageMetric = iota 45 storageMetricMakeVol 46 storageMetricListVols 47 storageMetricStatVol 48 storageMetricDeleteVol 49 storageMetricWalkDir 50 storageMetricListDir 51 storageMetricReadFile 52 storageMetricAppendFile 53 storageMetricCreateFile 54 storageMetricReadFileStream 55 storageMetricRenameFile 56 storageMetricRenameData 57 storageMetricCheckParts 58 storageMetricDelete 59 storageMetricDeleteVersions 60 storageMetricVerifyFile 61 storageMetricWriteAll 62 storageMetricDeleteVersion 63 storageMetricWriteMetadata 64 storageMetricUpdateMetadata 65 storageMetricReadVersion 66 storageMetricReadXL 67 storageMetricReadAll 68 storageMetricStatInfoFile 69 storageMetricReadMultiple 70 storageMetricDeleteAbandonedParts 71 storageMetricDiskInfo 72 73 // .... add more 74 75 storageMetricLast 76 ) 77 78 // Detects change in underlying disk. 79 type xlStorageDiskIDCheck struct { 80 totalWrites atomic.Uint64 81 totalDeletes atomic.Uint64 82 totalErrsAvailability atomic.Uint64 // Captures all data availability errors such as permission denied, faulty disk and timeout errors. 83 totalErrsTimeout atomic.Uint64 // Captures all timeout only errors 84 85 // apiCalls should be placed first so alignment is guaranteed for atomic operations. 86 apiCalls [storageMetricLast]uint64 87 apiLatencies [storageMetricLast]*lockedLastMinuteLatency 88 diskID string 89 storage *xlStorage 90 health *diskHealthTracker 91 healthCheck bool 92 93 metricsCache *cachevalue.Cache[DiskMetrics] 94 diskCtx context.Context 95 diskCancel context.CancelFunc 96 } 97 98 func (p *xlStorageDiskIDCheck) getMetrics() DiskMetrics { 99 p.metricsCache.InitOnce(5*time.Second, 100 cachevalue.Opts{}, 101 func() (DiskMetrics, error) { 102 diskMetric := DiskMetrics{ 103 LastMinute: make(map[string]AccElem, len(p.apiLatencies)), 104 APICalls: make(map[string]uint64, len(p.apiCalls)), 105 } 106 for i, v := range p.apiLatencies { 107 diskMetric.LastMinute[storageMetric(i).String()] = v.total() 108 } 109 for i := range p.apiCalls { 110 diskMetric.APICalls[storageMetric(i).String()] = atomic.LoadUint64(&p.apiCalls[i]) 111 } 112 return diskMetric, nil 113 }, 114 ) 115 116 diskMetric, _ := p.metricsCache.Get() 117 // Do not need this value to be cached. 118 diskMetric.TotalErrorsTimeout = p.totalErrsTimeout.Load() 119 diskMetric.TotalErrorsAvailability = p.totalErrsAvailability.Load() 120 121 return diskMetric 122 } 123 124 // lockedLastMinuteLatency accumulates totals lockless for each second. 125 type lockedLastMinuteLatency struct { 126 cachedSec int64 127 cached atomic.Pointer[AccElem] 128 mu sync.Mutex 129 init sync.Once 130 lastMinuteLatency 131 } 132 133 func (e *lockedLastMinuteLatency) add(value time.Duration) { 134 e.addSize(value, 0) 135 } 136 137 // addSize will add a duration and size. 138 func (e *lockedLastMinuteLatency) addSize(value time.Duration, sz int64) { 139 // alloc on every call, so we have a clean entry to swap in. 140 t := time.Now().Unix() 141 e.init.Do(func() { 142 e.cached.Store(&AccElem{}) 143 atomic.StoreInt64(&e.cachedSec, t) 144 }) 145 acc := e.cached.Load() 146 if lastT := atomic.LoadInt64(&e.cachedSec); lastT != t { 147 // Check if lastT was changed by someone else. 148 if atomic.CompareAndSwapInt64(&e.cachedSec, lastT, t) { 149 // Now we swap in a new. 150 newAcc := &AccElem{} 151 old := e.cached.Swap(newAcc) 152 var a AccElem 153 a.Size = atomic.LoadInt64(&old.Size) 154 a.Total = atomic.LoadInt64(&old.Total) 155 a.N = atomic.LoadInt64(&old.N) 156 e.mu.Lock() 157 e.lastMinuteLatency.addAll(t-1, a) 158 e.mu.Unlock() 159 acc = newAcc 160 } else { 161 // We may be able to grab the new accumulator by yielding. 162 runtime.Gosched() 163 acc = e.cached.Load() 164 } 165 } 166 atomic.AddInt64(&acc.N, 1) 167 atomic.AddInt64(&acc.Total, int64(value)) 168 atomic.AddInt64(&acc.Size, sz) 169 } 170 171 // total returns the total call count and latency for the last minute. 172 func (e *lockedLastMinuteLatency) total() AccElem { 173 e.mu.Lock() 174 defer e.mu.Unlock() 175 return e.lastMinuteLatency.getTotal() 176 } 177 178 func newXLStorageDiskIDCheck(storage *xlStorage, healthCheck bool) *xlStorageDiskIDCheck { 179 xl := xlStorageDiskIDCheck{ 180 storage: storage, 181 health: newDiskHealthTracker(), 182 healthCheck: healthCheck && globalDriveMonitoring, 183 metricsCache: cachevalue.New[DiskMetrics](), 184 } 185 186 xl.totalWrites.Store(xl.storage.getWriteAttribute()) 187 xl.totalDeletes.Store(xl.storage.getDeleteAttribute()) 188 xl.diskCtx, xl.diskCancel = context.WithCancel(context.TODO()) 189 for i := range xl.apiLatencies[:] { 190 xl.apiLatencies[i] = &lockedLastMinuteLatency{} 191 } 192 if xl.healthCheck { 193 go xl.monitorDiskWritable(xl.diskCtx) 194 } 195 return &xl 196 } 197 198 func (p *xlStorageDiskIDCheck) String() string { 199 return p.storage.String() 200 } 201 202 func (p *xlStorageDiskIDCheck) IsOnline() bool { 203 storedDiskID, err := p.storage.GetDiskID() 204 if err != nil { 205 return false 206 } 207 return storedDiskID == p.diskID 208 } 209 210 func (p *xlStorageDiskIDCheck) LastConn() time.Time { 211 return p.storage.LastConn() 212 } 213 214 func (p *xlStorageDiskIDCheck) IsLocal() bool { 215 return p.storage.IsLocal() 216 } 217 218 func (p *xlStorageDiskIDCheck) Endpoint() Endpoint { 219 return p.storage.Endpoint() 220 } 221 222 func (p *xlStorageDiskIDCheck) Hostname() string { 223 return p.storage.Hostname() 224 } 225 226 func (p *xlStorageDiskIDCheck) Healing() *healingTracker { 227 return p.storage.Healing() 228 } 229 230 func (p *xlStorageDiskIDCheck) NSScanner(ctx context.Context, cache dataUsageCache, updates chan<- dataUsageEntry, scanMode madmin.HealScanMode, _ func() bool) (dataUsageCache, error) { 231 if contextCanceled(ctx) { 232 xioutil.SafeClose(updates) 233 return dataUsageCache{}, ctx.Err() 234 } 235 236 if err := p.checkDiskStale(); err != nil { 237 xioutil.SafeClose(updates) 238 return dataUsageCache{}, err 239 } 240 241 weSleep := func() bool { 242 return scannerIdleMode.Load() == 0 243 } 244 245 return p.storage.NSScanner(ctx, cache, updates, scanMode, weSleep) 246 } 247 248 func (p *xlStorageDiskIDCheck) SetFormatData(b []byte) { 249 p.storage.SetFormatData(b) 250 } 251 252 func (p *xlStorageDiskIDCheck) GetDiskLoc() (poolIdx, setIdx, diskIdx int) { 253 return p.storage.GetDiskLoc() 254 } 255 256 func (p *xlStorageDiskIDCheck) SetDiskLoc(poolIdx, setIdx, diskIdx int) { 257 p.storage.SetDiskLoc(poolIdx, setIdx, diskIdx) 258 } 259 260 func (p *xlStorageDiskIDCheck) Close() error { 261 p.diskCancel() 262 return p.storage.Close() 263 } 264 265 func (p *xlStorageDiskIDCheck) GetDiskID() (string, error) { 266 return p.storage.GetDiskID() 267 } 268 269 func (p *xlStorageDiskIDCheck) SetDiskID(id string) { 270 p.diskID = id 271 } 272 273 func (p *xlStorageDiskIDCheck) checkDiskStale() error { 274 if p.diskID == "" { 275 // For empty disk-id we allow the call as the server might be 276 // coming up and trying to read format.json or create format.json 277 return nil 278 } 279 storedDiskID, err := p.storage.GetDiskID() 280 if err != nil { 281 // return any error generated while reading `format.json` 282 return err 283 } 284 if err == nil && p.diskID == storedDiskID { 285 return nil 286 } 287 // not the same disk we remember, take it offline. 288 return errDiskNotFound 289 } 290 291 func (p *xlStorageDiskIDCheck) DiskInfo(ctx context.Context, opts DiskInfoOptions) (info DiskInfo, err error) { 292 if contextCanceled(ctx) { 293 return DiskInfo{}, ctx.Err() 294 } 295 296 si := p.updateStorageMetrics(storageMetricDiskInfo) 297 defer si(&err) 298 299 if opts.NoOp { 300 if opts.Metrics { 301 info.Metrics = p.getMetrics() 302 } 303 info.Metrics.TotalWrites = p.totalWrites.Load() 304 info.Metrics.TotalDeletes = p.totalDeletes.Load() 305 info.Metrics.TotalWaiting = uint32(p.health.waiting.Load()) 306 info.Metrics.TotalErrorsTimeout = p.totalErrsTimeout.Load() 307 info.Metrics.TotalErrorsAvailability = p.totalErrsAvailability.Load() 308 if p.health.isFaulty() { 309 // if disk is already faulty return faulty for 'mc admin info' output and prometheus alerts. 310 return info, errFaultyDisk 311 } 312 return info, nil 313 } 314 315 defer func() { 316 if opts.Metrics { 317 info.Metrics = p.getMetrics() 318 } 319 info.Metrics.TotalWrites = p.totalWrites.Load() 320 info.Metrics.TotalDeletes = p.totalDeletes.Load() 321 info.Metrics.TotalWaiting = uint32(p.health.waiting.Load()) 322 info.Metrics.TotalErrorsTimeout = p.totalErrsTimeout.Load() 323 info.Metrics.TotalErrorsAvailability = p.totalErrsAvailability.Load() 324 }() 325 326 if p.health.isFaulty() { 327 // if disk is already faulty return faulty for 'mc admin info' output and prometheus alerts. 328 return info, errFaultyDisk 329 } 330 331 info, err = p.storage.DiskInfo(ctx, opts) 332 if err != nil { 333 return info, err 334 } 335 336 // check cached diskID against backend 337 // only if its non-empty. 338 if p.diskID != "" && p.diskID != info.ID { 339 return info, errDiskNotFound 340 } 341 return info, nil 342 } 343 344 func (p *xlStorageDiskIDCheck) MakeVolBulk(ctx context.Context, volumes ...string) (err error) { 345 ctx, done, err := p.TrackDiskHealth(ctx, storageMetricMakeVolBulk, volumes...) 346 if err != nil { 347 return err 348 } 349 defer done(&err) 350 351 w := xioutil.NewDeadlineWorker(globalDriveConfig.GetMaxTimeout()) 352 return w.Run(func() error { return p.storage.MakeVolBulk(ctx, volumes...) }) 353 } 354 355 func (p *xlStorageDiskIDCheck) MakeVol(ctx context.Context, volume string) (err error) { 356 ctx, done, err := p.TrackDiskHealth(ctx, storageMetricMakeVol, volume) 357 if err != nil { 358 return err 359 } 360 defer done(&err) 361 362 w := xioutil.NewDeadlineWorker(globalDriveConfig.GetMaxTimeout()) 363 return w.Run(func() error { return p.storage.MakeVol(ctx, volume) }) 364 } 365 366 func (p *xlStorageDiskIDCheck) ListVols(ctx context.Context) (vi []VolInfo, err error) { 367 ctx, done, err := p.TrackDiskHealth(ctx, storageMetricListVols, "/") 368 if err != nil { 369 return nil, err 370 } 371 defer done(&err) 372 373 return p.storage.ListVols(ctx) 374 } 375 376 func (p *xlStorageDiskIDCheck) StatVol(ctx context.Context, volume string) (vol VolInfo, err error) { 377 ctx, done, err := p.TrackDiskHealth(ctx, storageMetricStatVol, volume) 378 if err != nil { 379 return vol, err 380 } 381 defer done(&err) 382 383 return xioutil.WithDeadline[VolInfo](ctx, globalDriveConfig.GetMaxTimeout(), func(ctx context.Context) (result VolInfo, err error) { 384 return p.storage.StatVol(ctx, volume) 385 }) 386 } 387 388 func (p *xlStorageDiskIDCheck) DeleteVol(ctx context.Context, volume string, forceDelete bool) (err error) { 389 ctx, done, err := p.TrackDiskHealth(ctx, storageMetricDeleteVol, volume) 390 if err != nil { 391 return err 392 } 393 defer done(&err) 394 395 w := xioutil.NewDeadlineWorker(globalDriveConfig.GetMaxTimeout()) 396 return w.Run(func() error { return p.storage.DeleteVol(ctx, volume, forceDelete) }) 397 } 398 399 func (p *xlStorageDiskIDCheck) ListDir(ctx context.Context, origvolume, volume, dirPath string, count int) (s []string, err error) { 400 ctx, done, err := p.TrackDiskHealth(ctx, storageMetricListDir, volume, dirPath) 401 if err != nil { 402 return nil, err 403 } 404 defer done(&err) 405 406 return p.storage.ListDir(ctx, origvolume, volume, dirPath, count) 407 } 408 409 // Legacy API - does not have any deadlines 410 func (p *xlStorageDiskIDCheck) ReadFile(ctx context.Context, volume string, path string, offset int64, buf []byte, verifier *BitrotVerifier) (n int64, err error) { 411 ctx, done, err := p.TrackDiskHealth(ctx, storageMetricReadFile, volume, path) 412 if err != nil { 413 return 0, err 414 } 415 defer done(&err) 416 417 return xioutil.WithDeadline[int64](ctx, globalDriveConfig.GetMaxTimeout(), func(ctx context.Context) (result int64, err error) { 418 return p.storage.ReadFile(ctx, volume, path, offset, buf, verifier) 419 }) 420 } 421 422 // Legacy API - does not have any deadlines 423 func (p *xlStorageDiskIDCheck) AppendFile(ctx context.Context, volume string, path string, buf []byte) (err error) { 424 ctx, done, err := p.TrackDiskHealth(ctx, storageMetricAppendFile, volume, path) 425 if err != nil { 426 return err 427 } 428 defer done(&err) 429 430 w := xioutil.NewDeadlineWorker(globalDriveConfig.GetMaxTimeout()) 431 return w.Run(func() error { 432 return p.storage.AppendFile(ctx, volume, path, buf) 433 }) 434 } 435 436 func (p *xlStorageDiskIDCheck) CreateFile(ctx context.Context, origvolume, volume, path string, size int64, reader io.Reader) (err error) { 437 ctx, done, err := p.TrackDiskHealth(ctx, storageMetricCreateFile, volume, path) 438 if err != nil { 439 return err 440 } 441 defer done(&err) 442 443 return p.storage.CreateFile(ctx, origvolume, volume, path, size, io.NopCloser(reader)) 444 } 445 446 func (p *xlStorageDiskIDCheck) ReadFileStream(ctx context.Context, volume, path string, offset, length int64) (io.ReadCloser, error) { 447 ctx, done, err := p.TrackDiskHealth(ctx, storageMetricReadFileStream, volume, path) 448 if err != nil { 449 return nil, err 450 } 451 defer done(&err) 452 453 return xioutil.WithDeadline[io.ReadCloser](ctx, globalDriveConfig.GetMaxTimeout(), func(ctx context.Context) (result io.ReadCloser, err error) { 454 return p.storage.ReadFileStream(ctx, volume, path, offset, length) 455 }) 456 } 457 458 func (p *xlStorageDiskIDCheck) RenameFile(ctx context.Context, srcVolume, srcPath, dstVolume, dstPath string) (err error) { 459 ctx, done, err := p.TrackDiskHealth(ctx, storageMetricRenameFile, srcVolume, srcPath, dstVolume, dstPath) 460 if err != nil { 461 return err 462 } 463 defer done(&err) 464 465 w := xioutil.NewDeadlineWorker(globalDriveConfig.GetMaxTimeout()) 466 return w.Run(func() error { return p.storage.RenameFile(ctx, srcVolume, srcPath, dstVolume, dstPath) }) 467 } 468 469 func (p *xlStorageDiskIDCheck) RenameData(ctx context.Context, srcVolume, srcPath string, fi FileInfo, dstVolume, dstPath string, opts RenameOptions) (sign uint64, err error) { 470 ctx, done, err := p.TrackDiskHealth(ctx, storageMetricRenameData, srcPath, fi.DataDir, dstVolume, dstPath) 471 if err != nil { 472 return 0, err 473 } 474 defer func() { 475 if err == nil && !skipAccessChecks(dstVolume) { 476 p.storage.setWriteAttribute(p.totalWrites.Add(1)) 477 } 478 done(&err) 479 }() 480 481 return xioutil.WithDeadline[uint64](ctx, globalDriveConfig.GetMaxTimeout(), func(ctx context.Context) (result uint64, err error) { 482 return p.storage.RenameData(ctx, srcVolume, srcPath, fi, dstVolume, dstPath, opts) 483 }) 484 } 485 486 func (p *xlStorageDiskIDCheck) CheckParts(ctx context.Context, volume string, path string, fi FileInfo) (err error) { 487 ctx, done, err := p.TrackDiskHealth(ctx, storageMetricCheckParts, volume, path) 488 if err != nil { 489 return err 490 } 491 defer done(&err) 492 493 w := xioutil.NewDeadlineWorker(globalDriveConfig.GetMaxTimeout()) 494 return w.Run(func() error { return p.storage.CheckParts(ctx, volume, path, fi) }) 495 } 496 497 func (p *xlStorageDiskIDCheck) Delete(ctx context.Context, volume string, path string, deleteOpts DeleteOptions) (err error) { 498 ctx, done, err := p.TrackDiskHealth(ctx, storageMetricDelete, volume, path) 499 if err != nil { 500 return err 501 } 502 defer done(&err) 503 504 w := xioutil.NewDeadlineWorker(globalDriveConfig.GetMaxTimeout()) 505 return w.Run(func() error { return p.storage.Delete(ctx, volume, path, deleteOpts) }) 506 } 507 508 // DeleteVersions deletes slice of versions, it can be same object 509 // or multiple objects. 510 func (p *xlStorageDiskIDCheck) DeleteVersions(ctx context.Context, volume string, versions []FileInfoVersions, opts DeleteOptions) (errs []error) { 511 // Merely for tracing storage 512 path := "" 513 if len(versions) > 0 { 514 path = versions[0].Name 515 } 516 errs = make([]error, len(versions)) 517 ctx, done, err := p.TrackDiskHealth(ctx, storageMetricDeleteVersions, volume, path) 518 if err != nil { 519 for i := range errs { 520 errs[i] = ctx.Err() 521 } 522 return errs 523 } 524 defer func() { 525 if !skipAccessChecks(volume) { 526 var permanentDeletes uint64 527 var deleteMarkers uint64 528 529 for i, nerr := range errs { 530 if nerr != nil { 531 continue 532 } 533 for _, fi := range versions[i].Versions { 534 if fi.Deleted { 535 // Delete markers are a write operation not a permanent delete. 536 deleteMarkers++ 537 continue 538 } 539 permanentDeletes++ 540 } 541 } 542 if deleteMarkers > 0 { 543 p.storage.setWriteAttribute(p.totalWrites.Add(deleteMarkers)) 544 } 545 if permanentDeletes > 0 { 546 p.storage.setDeleteAttribute(p.totalDeletes.Add(permanentDeletes)) 547 } 548 } 549 done(&err) 550 }() 551 552 errs = p.storage.DeleteVersions(ctx, volume, versions, opts) 553 for i := range errs { 554 if errs[i] != nil { 555 err = errs[i] 556 break 557 } 558 } 559 560 return errs 561 } 562 563 func (p *xlStorageDiskIDCheck) VerifyFile(ctx context.Context, volume, path string, fi FileInfo) (err error) { 564 ctx, done, err := p.TrackDiskHealth(ctx, storageMetricVerifyFile, volume, path) 565 if err != nil { 566 return err 567 } 568 defer done(&err) 569 570 return p.storage.VerifyFile(ctx, volume, path, fi) 571 } 572 573 func (p *xlStorageDiskIDCheck) WriteAll(ctx context.Context, volume string, path string, b []byte) (err error) { 574 ctx, done, err := p.TrackDiskHealth(ctx, storageMetricWriteAll, volume, path) 575 if err != nil { 576 return err 577 } 578 defer done(&err) 579 580 w := xioutil.NewDeadlineWorker(globalDriveConfig.GetMaxTimeout()) 581 return w.Run(func() error { return p.storage.WriteAll(ctx, volume, path, b) }) 582 } 583 584 func (p *xlStorageDiskIDCheck) DeleteVersion(ctx context.Context, volume, path string, fi FileInfo, forceDelMarker bool, opts DeleteOptions) (err error) { 585 ctx, done, err := p.TrackDiskHealth(ctx, storageMetricDeleteVersion, volume, path) 586 if err != nil { 587 return err 588 } 589 defer func() { 590 defer done(&err) 591 592 if err == nil && !skipAccessChecks(volume) { 593 if opts.UndoWrite { 594 p.storage.setWriteAttribute(p.totalWrites.Add(^uint64(0))) 595 return 596 } 597 598 if fi.Deleted { 599 // Delete markers are a write operation not a permanent delete. 600 p.storage.setWriteAttribute(p.totalWrites.Add(1)) 601 return 602 } 603 604 p.storage.setDeleteAttribute(p.totalDeletes.Add(1)) 605 } 606 }() 607 608 w := xioutil.NewDeadlineWorker(globalDriveConfig.GetMaxTimeout()) 609 return w.Run(func() error { return p.storage.DeleteVersion(ctx, volume, path, fi, forceDelMarker, opts) }) 610 } 611 612 func (p *xlStorageDiskIDCheck) UpdateMetadata(ctx context.Context, volume, path string, fi FileInfo, opts UpdateMetadataOpts) (err error) { 613 ctx, done, err := p.TrackDiskHealth(ctx, storageMetricUpdateMetadata, volume, path) 614 if err != nil { 615 return err 616 } 617 defer done(&err) 618 619 w := xioutil.NewDeadlineWorker(globalDriveConfig.GetMaxTimeout()) 620 return w.Run(func() error { return p.storage.UpdateMetadata(ctx, volume, path, fi, opts) }) 621 } 622 623 func (p *xlStorageDiskIDCheck) WriteMetadata(ctx context.Context, origvolume, volume, path string, fi FileInfo) (err error) { 624 ctx, done, err := p.TrackDiskHealth(ctx, storageMetricWriteMetadata, volume, path) 625 if err != nil { 626 return err 627 } 628 defer done(&err) 629 630 w := xioutil.NewDeadlineWorker(globalDriveConfig.GetMaxTimeout()) 631 return w.Run(func() error { return p.storage.WriteMetadata(ctx, origvolume, volume, path, fi) }) 632 } 633 634 func (p *xlStorageDiskIDCheck) ReadVersion(ctx context.Context, origvolume, volume, path, versionID string, opts ReadOptions) (fi FileInfo, err error) { 635 ctx, done, err := p.TrackDiskHealth(ctx, storageMetricReadVersion, volume, path) 636 if err != nil { 637 return fi, err 638 } 639 defer done(&err) 640 641 return xioutil.WithDeadline[FileInfo](ctx, globalDriveConfig.GetMaxTimeout(), func(ctx context.Context) (result FileInfo, err error) { 642 return p.storage.ReadVersion(ctx, origvolume, volume, path, versionID, opts) 643 }) 644 } 645 646 func (p *xlStorageDiskIDCheck) ReadAll(ctx context.Context, volume string, path string) (buf []byte, err error) { 647 ctx, done, err := p.TrackDiskHealth(ctx, storageMetricReadAll, volume, path) 648 if err != nil { 649 return nil, err 650 } 651 defer done(&err) 652 653 return xioutil.WithDeadline[[]byte](ctx, globalDriveConfig.GetMaxTimeout(), func(ctx context.Context) (result []byte, err error) { 654 return p.storage.ReadAll(ctx, volume, path) 655 }) 656 } 657 658 func (p *xlStorageDiskIDCheck) ReadXL(ctx context.Context, volume string, path string, readData bool) (rf RawFileInfo, err error) { 659 ctx, done, err := p.TrackDiskHealth(ctx, storageMetricReadXL, volume, path) 660 if err != nil { 661 return RawFileInfo{}, err 662 } 663 defer done(&err) 664 665 return xioutil.WithDeadline[RawFileInfo](ctx, globalDriveConfig.GetMaxTimeout(), func(ctx context.Context) (result RawFileInfo, err error) { 666 return p.storage.ReadXL(ctx, volume, path, readData) 667 }) 668 } 669 670 func (p *xlStorageDiskIDCheck) StatInfoFile(ctx context.Context, volume, path string, glob bool) (stat []StatInfo, err error) { 671 ctx, done, err := p.TrackDiskHealth(ctx, storageMetricStatInfoFile, volume, path) 672 if err != nil { 673 return nil, err 674 } 675 defer done(&err) 676 677 return p.storage.StatInfoFile(ctx, volume, path, glob) 678 } 679 680 // ReadMultiple will read multiple files and send each files as response. 681 // Files are read and returned in the given order. 682 // The resp channel is closed before the call returns. 683 // Only a canceled context will return an error. 684 func (p *xlStorageDiskIDCheck) ReadMultiple(ctx context.Context, req ReadMultipleReq, resp chan<- ReadMultipleResp) (err error) { 685 ctx, done, err := p.TrackDiskHealth(ctx, storageMetricReadMultiple, req.Bucket, req.Prefix) 686 if err != nil { 687 xioutil.SafeClose(resp) 688 return err 689 } 690 defer done(&err) 691 692 return p.storage.ReadMultiple(ctx, req, resp) 693 } 694 695 // CleanAbandonedData will read metadata of the object on disk 696 // and delete any data directories and inline data that isn't referenced in metadata. 697 func (p *xlStorageDiskIDCheck) CleanAbandonedData(ctx context.Context, volume string, path string) (err error) { 698 ctx, done, err := p.TrackDiskHealth(ctx, storageMetricDeleteAbandonedParts, volume, path) 699 if err != nil { 700 return err 701 } 702 defer done(&err) 703 704 w := xioutil.NewDeadlineWorker(globalDriveConfig.GetMaxTimeout()) 705 return w.Run(func() error { return p.storage.CleanAbandonedData(ctx, volume, path) }) 706 } 707 708 func storageTrace(s storageMetric, startTime time.Time, duration time.Duration, path string, err string, custom map[string]string) madmin.TraceInfo { 709 return madmin.TraceInfo{ 710 TraceType: madmin.TraceStorage, 711 Time: startTime, 712 NodeName: globalLocalNodeName, 713 FuncName: "storage." + s.String(), 714 Duration: duration, 715 Path: path, 716 Error: err, 717 Custom: custom, 718 } 719 } 720 721 func scannerTrace(s scannerMetric, startTime time.Time, duration time.Duration, path string, custom map[string]string) madmin.TraceInfo { 722 return madmin.TraceInfo{ 723 TraceType: madmin.TraceScanner, 724 Time: startTime, 725 NodeName: globalLocalNodeName, 726 FuncName: "scanner." + s.String(), 727 Duration: duration, 728 Path: path, 729 Custom: custom, 730 } 731 } 732 733 // Update storage metrics 734 func (p *xlStorageDiskIDCheck) updateStorageMetrics(s storageMetric, paths ...string) func(err *error) { 735 startTime := time.Now() 736 trace := globalTrace.NumSubscribers(madmin.TraceStorage) > 0 737 return func(errp *error) { 738 duration := time.Since(startTime) 739 740 var err error 741 if errp != nil && *errp != nil { 742 err = *errp 743 } 744 745 atomic.AddUint64(&p.apiCalls[s], 1) 746 if IsErr(err, []error{ 747 errVolumeAccessDenied, 748 errFileAccessDenied, 749 errDiskAccessDenied, 750 errFaultyDisk, 751 errFaultyRemoteDisk, 752 context.DeadlineExceeded, 753 }...) { 754 p.totalErrsAvailability.Add(1) 755 if errors.Is(err, context.DeadlineExceeded) { 756 p.totalErrsTimeout.Add(1) 757 } 758 } 759 760 p.apiLatencies[s].add(duration) 761 762 if trace { 763 custom := make(map[string]string, 2) 764 paths = append([]string{p.String()}, paths...) 765 var errStr string 766 if err != nil { 767 errStr = err.Error() 768 } 769 custom["total-errs-timeout"] = strconv.FormatUint(p.totalErrsTimeout.Load(), 10) 770 custom["total-errs-availability"] = strconv.FormatUint(p.totalErrsAvailability.Load(), 10) 771 globalTrace.Publish(storageTrace(s, startTime, duration, strings.Join(paths, " "), errStr, custom)) 772 } 773 } 774 } 775 776 const ( 777 diskHealthOK int32 = iota 778 diskHealthFaulty 779 ) 780 781 type diskHealthTracker struct { 782 // atomic time of last success 783 lastSuccess int64 784 785 // atomic time of last time a token was grabbed. 786 lastStarted int64 787 788 // Atomic status of disk. 789 status atomic.Int32 790 791 // Atomic number indicates if a disk is hung 792 waiting atomic.Int32 793 } 794 795 // newDiskHealthTracker creates a new disk health tracker. 796 func newDiskHealthTracker() *diskHealthTracker { 797 d := diskHealthTracker{ 798 lastSuccess: time.Now().UnixNano(), 799 lastStarted: time.Now().UnixNano(), 800 } 801 d.status.Store(diskHealthOK) 802 return &d 803 } 804 805 // logSuccess will update the last successful operation time. 806 func (d *diskHealthTracker) logSuccess() { 807 atomic.StoreInt64(&d.lastSuccess, time.Now().UnixNano()) 808 } 809 810 func (d *diskHealthTracker) isFaulty() bool { 811 return d.status.Load() == diskHealthFaulty 812 } 813 814 type ( 815 healthDiskCtxKey struct{} 816 healthDiskCtxValue struct { 817 lastSuccess *int64 818 } 819 ) 820 821 // logSuccess will update the last successful operation time. 822 func (h *healthDiskCtxValue) logSuccess() { 823 atomic.StoreInt64(h.lastSuccess, time.Now().UnixNano()) 824 } 825 826 // noopDoneFunc is a no-op done func. 827 // Can be reused. 828 var noopDoneFunc = func(_ *error) {} 829 830 // TrackDiskHealth for this request. 831 // When a non-nil error is returned 'done' MUST be called 832 // with the status of the response, if it corresponds to disk health. 833 // If the pointer sent to done is non-nil AND the error 834 // is either nil or io.EOF the disk is considered good. 835 // So if unsure if the disk status is ok, return nil as a parameter to done. 836 // Shadowing will work as long as return error is named: https://go.dev/play/p/sauq86SsTN2 837 func (p *xlStorageDiskIDCheck) TrackDiskHealth(ctx context.Context, s storageMetric, paths ...string) (c context.Context, done func(*error), err error) { 838 done = noopDoneFunc 839 if contextCanceled(ctx) { 840 return ctx, done, ctx.Err() 841 } 842 843 if p.health.status.Load() != diskHealthOK { 844 return ctx, done, errFaultyDisk 845 } 846 847 // Verify if the disk is not stale 848 // - missing format.json (unformatted drive) 849 // - format.json is valid but invalid 'uuid' 850 if err = p.checkDiskStale(); err != nil { 851 return ctx, done, err 852 } 853 854 // Disallow recursive tracking to avoid deadlocks. 855 if ctx.Value(healthDiskCtxKey{}) != nil { 856 done = p.updateStorageMetrics(s, paths...) 857 return ctx, done, nil 858 } 859 860 if contextCanceled(ctx) { 861 return ctx, done, ctx.Err() 862 } 863 864 atomic.StoreInt64(&p.health.lastStarted, time.Now().UnixNano()) 865 p.health.waiting.Add(1) 866 867 ctx = context.WithValue(ctx, healthDiskCtxKey{}, &healthDiskCtxValue{lastSuccess: &p.health.lastSuccess}) 868 si := p.updateStorageMetrics(s, paths...) 869 var once sync.Once 870 return ctx, func(errp *error) { 871 p.health.waiting.Add(-1) 872 once.Do(func() { 873 if errp != nil { 874 err := *errp 875 if err == nil || errors.Is(err, io.EOF) { 876 p.health.logSuccess() 877 } 878 } 879 si(errp) 880 }) 881 }, nil 882 } 883 884 var toWrite = []byte{2048: 42} 885 886 // monitorDiskStatus should be called once when a drive has been marked offline. 887 // Once the disk has been deemed ok, it will return to online status. 888 func (p *xlStorageDiskIDCheck) monitorDiskStatus(spent time.Duration, fn string) { 889 t := time.NewTicker(5 * time.Second) 890 defer t.Stop() 891 892 for range t.C { 893 if contextCanceled(p.diskCtx) { 894 return 895 } 896 897 err := p.storage.WriteAll(context.Background(), minioMetaTmpBucket, fn, toWrite) 898 if err != nil { 899 continue 900 } 901 902 b, err := p.storage.ReadAll(context.Background(), minioMetaTmpBucket, fn) 903 if err != nil || len(b) != len(toWrite) { 904 continue 905 } 906 907 err = p.storage.Delete(context.Background(), minioMetaTmpBucket, fn, DeleteOptions{ 908 Recursive: false, 909 Immediate: false, 910 }) 911 912 if err == nil { 913 logger.Event(context.Background(), "node(%s): Read/Write/Delete successful, bringing drive %s online", globalLocalNodeName, p.storage.String()) 914 p.health.status.Store(diskHealthOK) 915 p.health.waiting.Add(-1) 916 return 917 } 918 } 919 } 920 921 // monitorDiskStatus should be called once when a drive has been marked offline. 922 // Once the disk has been deemed ok, it will return to online status. 923 func (p *xlStorageDiskIDCheck) monitorDiskWritable(ctx context.Context) { 924 var ( 925 // We check every 15 seconds if the disk is writable and we can read back. 926 checkEvery = 15 * time.Second 927 928 // If the disk has completed an operation successfully within last 5 seconds, don't check it. 929 skipIfSuccessBefore = 5 * time.Second 930 ) 931 932 // if disk max timeout is smaller than checkEvery window 933 // reduce checks by a second. 934 if globalDriveConfig.GetMaxTimeout() <= checkEvery { 935 checkEvery = globalDriveConfig.GetMaxTimeout() - time.Second 936 if checkEvery <= 0 { 937 checkEvery = globalDriveConfig.GetMaxTimeout() 938 } 939 } 940 941 // if disk max timeout is smaller than skipIfSuccessBefore window 942 // reduce the skipIfSuccessBefore by a second. 943 if globalDriveConfig.GetMaxTimeout() <= skipIfSuccessBefore { 944 skipIfSuccessBefore = globalDriveConfig.GetMaxTimeout() - time.Second 945 if skipIfSuccessBefore <= 0 { 946 skipIfSuccessBefore = globalDriveConfig.GetMaxTimeout() 947 } 948 } 949 950 t := time.NewTicker(checkEvery) 951 defer t.Stop() 952 fn := mustGetUUID() 953 954 rng := rand.New(rand.NewSource(time.Now().UnixNano())) 955 956 monitor := func() bool { 957 if contextCanceled(ctx) { 958 return false 959 } 960 961 if p.health.status.Load() != diskHealthOK { 962 return true 963 } 964 965 if time.Since(time.Unix(0, atomic.LoadInt64(&p.health.lastSuccess))) < skipIfSuccessBefore { 966 // We recently saw a success - no need to check. 967 return true 968 } 969 970 goOffline := func(err error, spent time.Duration) { 971 if p.health.status.CompareAndSwap(diskHealthOK, diskHealthFaulty) { 972 logger.LogAlwaysIf(ctx, fmt.Errorf("node(%s): taking drive %s offline: %v", globalLocalNodeName, p.storage.String(), err)) 973 p.health.waiting.Add(1) 974 go p.monitorDiskStatus(spent, fn) 975 } 976 } 977 978 // Offset checks a bit. 979 time.Sleep(time.Duration(rng.Int63n(int64(1 * time.Second)))) 980 981 dctx, dcancel := context.WithCancel(ctx) 982 started := time.Now() 983 go func() { 984 timeout := time.NewTimer(globalDriveConfig.GetMaxTimeout()) 985 select { 986 case <-dctx.Done(): 987 if !timeout.Stop() { 988 <-timeout.C 989 } 990 case <-timeout.C: 991 spent := time.Since(started) 992 goOffline(fmt.Errorf("unable to write+read for %v", spent.Round(time.Millisecond)), spent) 993 } 994 }() 995 996 func() { 997 defer dcancel() 998 999 err := p.storage.WriteAll(ctx, minioMetaTmpBucket, fn, toWrite) 1000 if err != nil { 1001 if osErrToFileErr(err) == errFaultyDisk { 1002 goOffline(fmt.Errorf("unable to write: %w", err), 0) 1003 } 1004 return 1005 } 1006 b, err := p.storage.ReadAll(context.Background(), minioMetaTmpBucket, fn) 1007 if err != nil || len(b) != len(toWrite) { 1008 if osErrToFileErr(err) == errFaultyDisk { 1009 goOffline(fmt.Errorf("unable to read: %w", err), 0) 1010 } 1011 return 1012 } 1013 }() 1014 1015 // Continue to monitor 1016 return true 1017 } 1018 1019 for { 1020 select { 1021 case <-ctx.Done(): 1022 return 1023 case <-t.C: 1024 if !monitor() { 1025 return 1026 } 1027 } 1028 } 1029 } 1030 1031 // checkID will check if the disk ID matches the provided ID. 1032 func (p *xlStorageDiskIDCheck) checkID(wantID string) (err error) { 1033 if wantID == "" { 1034 return nil 1035 } 1036 id, err := p.storage.GetDiskID() 1037 if err != nil { 1038 return err 1039 } 1040 if id != wantID { 1041 return fmt.Errorf("disk ID %s does not match. disk reports %s", wantID, id) 1042 } 1043 return nil 1044 } 1045 1046 // diskHealthCheckOK will check if the provided error is nil 1047 // and update disk status if good. 1048 // For convenience a bool is returned to indicate any error state 1049 // that is not io.EOF. 1050 func diskHealthCheckOK(ctx context.Context, err error) bool { 1051 // Check if context has a disk health check. 1052 tracker, ok := ctx.Value(healthDiskCtxKey{}).(*healthDiskCtxValue) 1053 if !ok { 1054 // No tracker, return 1055 return err == nil || errors.Is(err, io.EOF) 1056 } 1057 if err == nil || errors.Is(err, io.EOF) { 1058 tracker.logSuccess() 1059 return true 1060 } 1061 return false 1062 } 1063 1064 // diskHealthWrapper provides either a io.Reader or io.Writer 1065 // that updates status of the provided tracker. 1066 // Use through diskHealthReader or diskHealthWriter. 1067 type diskHealthWrapper struct { 1068 tracker *healthDiskCtxValue 1069 r io.Reader 1070 w io.Writer 1071 } 1072 1073 func (d *diskHealthWrapper) Read(p []byte) (int, error) { 1074 if d.r == nil { 1075 return 0, fmt.Errorf("diskHealthWrapper: Read with no reader") 1076 } 1077 n, err := d.r.Read(p) 1078 if err == nil || err == io.EOF && n > 0 { 1079 d.tracker.logSuccess() 1080 } 1081 return n, err 1082 } 1083 1084 func (d *diskHealthWrapper) Write(p []byte) (int, error) { 1085 if d.w == nil { 1086 return 0, fmt.Errorf("diskHealthWrapper: Write with no writer") 1087 } 1088 n, err := d.w.Write(p) 1089 if err == nil && n == len(p) { 1090 d.tracker.logSuccess() 1091 } 1092 return n, err 1093 } 1094 1095 // diskHealthReader provides a wrapper that will update disk health on 1096 // ctx, on every successful read. 1097 // This should only be used directly at the os/syscall level, 1098 // otherwise buffered operations may return false health checks. 1099 func diskHealthReader(ctx context.Context, r io.Reader) io.Reader { 1100 // Check if context has a disk health check. 1101 tracker, ok := ctx.Value(healthDiskCtxKey{}).(*healthDiskCtxValue) 1102 if !ok { 1103 // No need to wrap 1104 return r 1105 } 1106 return &diskHealthWrapper{r: r, tracker: tracker} 1107 } 1108 1109 // diskHealthWriter provides a wrapper that will update disk health on 1110 // ctx, on every successful write. 1111 // This should only be used directly at the os/syscall level, 1112 // otherwise buffered operations may return false health checks. 1113 func diskHealthWriter(ctx context.Context, w io.Writer) io.Writer { 1114 // Check if context has a disk health check. 1115 tracker, ok := ctx.Value(healthDiskCtxKey{}).(*healthDiskCtxValue) 1116 if !ok { 1117 // No need to wrap 1118 return w 1119 } 1120 return &diskHealthWrapper{w: w, tracker: tracker} 1121 }