storj.io/minio@v0.0.0-20230509071714-0cbc90f649b1/cmd/erasure.go (about) 1 /* 2 * MinIO Cloud Storage, (C) 2016-2020 MinIO, Inc. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package cmd 18 19 import ( 20 "context" 21 "errors" 22 "fmt" 23 "math/rand" 24 "os" 25 "sort" 26 "sync" 27 "time" 28 29 "storj.io/minio/cmd/logger" 30 "storj.io/minio/pkg/bpool" 31 "storj.io/minio/pkg/color" 32 "storj.io/minio/pkg/dsync" 33 "storj.io/minio/pkg/madmin" 34 "storj.io/minio/pkg/sync/errgroup" 35 ) 36 37 // OfflineDisk represents an unavailable disk. 38 var OfflineDisk StorageAPI // zero value is nil 39 40 // partialOperation is a successful upload/delete of an object 41 // but not written in all disks (having quorum) 42 type partialOperation struct { 43 bucket string 44 object string 45 versionID string 46 failedSet int 47 } 48 49 // erasureObjects - Implements ER object layer. 50 type erasureObjects struct { 51 GatewayUnsupported 52 53 setDriveCount int 54 defaultParityCount int 55 56 setIndex int 57 poolIndex int 58 59 // getDisks returns list of storageAPIs. 60 getDisks func() []StorageAPI 61 62 // getLockers returns list of remote and local lockers. 63 getLockers func() ([]dsync.NetLocker, string) 64 65 // getEndpoints returns list of endpoint strings belonging this set. 66 // some may be local and some remote. 67 getEndpoints func() []string 68 69 // Locker mutex map. 70 nsMutex *nsLockMap 71 72 // Byte pools used for temporary i/o buffers. 73 bp *bpool.BytePoolCap 74 75 mrfOpCh chan partialOperation 76 77 deletedCleanupSleeper *dynamicSleeper 78 } 79 80 // NewNSLock - initialize a new namespace RWLocker instance. 81 func (er erasureObjects) NewNSLock(bucket string, objects ...string) RWLocker { 82 return er.nsMutex.NewNSLock(er.getLockers, bucket, objects...) 83 } 84 85 // Shutdown function for object storage interface. 86 func (er erasureObjects) Shutdown(ctx context.Context) error { 87 // Add any object layer shutdown activities here. 88 closeStorageDisks(er.getDisks()) 89 return nil 90 } 91 92 // byDiskTotal is a collection satisfying sort.Interface. 93 type byDiskTotal []madmin.Disk 94 95 func (d byDiskTotal) Len() int { return len(d) } 96 func (d byDiskTotal) Swap(i, j int) { d[i], d[j] = d[j], d[i] } 97 func (d byDiskTotal) Less(i, j int) bool { 98 return d[i].TotalSpace < d[j].TotalSpace 99 } 100 101 func diskErrToDriveState(err error) (state string) { 102 state = madmin.DriveStateUnknown 103 switch { 104 case errors.Is(err, errDiskNotFound): 105 state = madmin.DriveStateOffline 106 case errors.Is(err, errCorruptedFormat): 107 state = madmin.DriveStateCorrupt 108 case errors.Is(err, errUnformattedDisk): 109 state = madmin.DriveStateUnformatted 110 case errors.Is(err, errDiskAccessDenied): 111 state = madmin.DriveStatePermission 112 case errors.Is(err, errFaultyDisk): 113 state = madmin.DriveStateFaulty 114 case err == nil: 115 state = madmin.DriveStateOk 116 } 117 return 118 } 119 120 func getOnlineOfflineDisksStats(disksInfo []madmin.Disk) (onlineDisks, offlineDisks madmin.BackendDisks) { 121 onlineDisks = make(madmin.BackendDisks) 122 offlineDisks = make(madmin.BackendDisks) 123 124 for _, disk := range disksInfo { 125 ep := disk.Endpoint 126 if _, ok := offlineDisks[ep]; !ok { 127 offlineDisks[ep] = 0 128 } 129 if _, ok := onlineDisks[ep]; !ok { 130 onlineDisks[ep] = 0 131 } 132 } 133 134 // Wait for the routines. 135 for _, disk := range disksInfo { 136 ep := disk.Endpoint 137 state := disk.State 138 if state != madmin.DriveStateOk && state != madmin.DriveStateUnformatted { 139 offlineDisks[ep]++ 140 continue 141 } 142 onlineDisks[ep]++ 143 } 144 145 rootDiskCount := 0 146 for _, di := range disksInfo { 147 if di.RootDisk { 148 rootDiskCount++ 149 } 150 } 151 152 // Count offline disks as well to ensure consistent 153 // reportability of offline drives on local setups. 154 if len(disksInfo) == (rootDiskCount + offlineDisks.Sum()) { 155 // Success. 156 return onlineDisks, offlineDisks 157 } 158 159 // Root disk should be considered offline 160 for i := range disksInfo { 161 ep := disksInfo[i].Endpoint 162 if disksInfo[i].RootDisk { 163 offlineDisks[ep]++ 164 onlineDisks[ep]-- 165 } 166 } 167 168 return onlineDisks, offlineDisks 169 } 170 171 // getDisksInfo - fetch disks info across all other storage API. 172 func getDisksInfo(disks []StorageAPI, endpoints []string) (disksInfo []madmin.Disk, errs []error) { 173 disksInfo = make([]madmin.Disk, len(disks)) 174 175 g := errgroup.WithNErrs(len(disks)) 176 for index := range disks { 177 index := index 178 g.Go(func() error { 179 if disks[index] == OfflineDisk { 180 logger.LogIf(GlobalContext, fmt.Errorf("%s: %s", errDiskNotFound, endpoints[index])) 181 disksInfo[index] = madmin.Disk{ 182 State: diskErrToDriveState(errDiskNotFound), 183 Endpoint: endpoints[index], 184 } 185 // Storage disk is empty, perhaps ignored disk or not available. 186 return errDiskNotFound 187 } 188 info, err := disks[index].DiskInfo(context.TODO()) 189 di := madmin.Disk{ 190 Endpoint: info.Endpoint, 191 DrivePath: info.MountPath, 192 TotalSpace: info.Total, 193 UsedSpace: info.Used, 194 AvailableSpace: info.Free, 195 UUID: info.ID, 196 RootDisk: info.RootDisk, 197 Healing: info.Healing, 198 State: diskErrToDriveState(err), 199 } 200 di.PoolIndex, di.SetIndex, di.DiskIndex = disks[index].GetDiskLoc() 201 if info.Healing { 202 if hi := disks[index].Healing(); hi != nil { 203 hd := hi.toHealingDisk() 204 di.HealInfo = &hd 205 } 206 } 207 di.Metrics = &madmin.DiskMetrics{ 208 APILatencies: make(map[string]string), 209 APICalls: make(map[string]uint64), 210 } 211 for k, v := range info.Metrics.APILatencies { 212 di.Metrics.APILatencies[k] = v 213 } 214 for k, v := range info.Metrics.APICalls { 215 di.Metrics.APICalls[k] = v 216 } 217 if info.Total > 0 { 218 di.Utilization = float64(info.Used / info.Total * 100) 219 } 220 disksInfo[index] = di 221 return err 222 }, index) 223 } 224 225 return disksInfo, g.Wait() 226 } 227 228 // Get an aggregated storage info across all disks. 229 func getStorageInfo(disks []StorageAPI, endpoints []string) (StorageInfo, []error) { 230 disksInfo, errs := getDisksInfo(disks, endpoints) 231 232 // Sort so that the first element is the smallest. 233 sort.Sort(byDiskTotal(disksInfo)) 234 235 storageInfo := StorageInfo{ 236 Disks: disksInfo, 237 } 238 239 storageInfo.Backend.Type = madmin.Erasure 240 return storageInfo, errs 241 } 242 243 // StorageInfo - returns underlying storage statistics. 244 func (er erasureObjects) StorageInfo(ctx context.Context) (StorageInfo, []error) { 245 disks := er.getDisks() 246 endpoints := er.getEndpoints() 247 return getStorageInfo(disks, endpoints) 248 } 249 250 // LocalStorageInfo - returns underlying local storage statistics. 251 func (er erasureObjects) LocalStorageInfo(ctx context.Context) (StorageInfo, []error) { 252 disks := er.getLocalDisks() 253 endpoints := make([]string, len(disks)) 254 for i, disk := range disks { 255 if disk != nil { 256 endpoints[i] = disk.String() 257 } 258 } 259 return getStorageInfo(disks, endpoints) 260 } 261 262 func (er erasureObjects) getOnlineDisksWithHealing() (newDisks []StorageAPI, healing bool) { 263 var wg sync.WaitGroup 264 disks := er.getDisks() 265 infos := make([]DiskInfo, len(disks)) 266 for _, i := range hashOrder(UTCNow().String(), len(disks)) { 267 i := i 268 wg.Add(1) 269 go func() { 270 defer wg.Done() 271 272 disk := disks[i-1] 273 274 if disk == nil { 275 infos[i-1].Error = "nil disk" 276 return 277 } 278 279 di, err := disk.DiskInfo(context.Background()) 280 if err != nil { 281 // - Do not consume disks which are not reachable 282 // unformatted or simply not accessible for some reason. 283 // 284 // 285 // - Future: skip busy disks 286 infos[i-1].Error = err.Error() 287 return 288 } 289 290 infos[i-1] = di 291 }() 292 } 293 wg.Wait() 294 295 for i, info := range infos { 296 // Check if one of the drives in the set is being healed. 297 // this information is used by scanner to skip healing 298 // this erasure set while it calculates the usage. 299 if info.Healing || info.Error != "" { 300 healing = true 301 continue 302 } 303 newDisks = append(newDisks, disks[i]) 304 } 305 306 return newDisks, healing 307 } 308 309 // Clean-up previously deleted objects. from .minio.sys/tmp/.trash/ 310 func (er erasureObjects) cleanupDeletedObjects(ctx context.Context) { 311 // run multiple cleanup's local to this server. 312 var wg sync.WaitGroup 313 for _, disk := range er.getLoadBalancedLocalDisks() { 314 if disk != nil { 315 wg.Add(1) 316 go func(disk StorageAPI) { 317 defer wg.Done() 318 diskPath := disk.Endpoint().Path 319 readDirFn(pathJoin(diskPath, minioMetaTmpDeletedBucket), func(ddir string, typ os.FileMode) error { 320 wait := er.deletedCleanupSleeper.Timer(ctx) 321 removeAll(pathJoin(diskPath, minioMetaTmpDeletedBucket, ddir)) 322 wait() 323 return nil 324 }) 325 }(disk) 326 } 327 } 328 wg.Wait() 329 } 330 331 // nsScanner will start scanning buckets and send updated totals as they are traversed. 332 // Updates are sent on a regular basis and the caller *must* consume them. 333 func (er erasureObjects) nsScanner(ctx context.Context, buckets []BucketInfo, bf *bloomFilter, updates chan<- dataUsageCache) error { 334 if len(buckets) == 0 { 335 return nil 336 } 337 338 // Collect disks we can use. 339 disks, healing := er.getOnlineDisksWithHealing() 340 if len(disks) == 0 { 341 logger.Info(color.Green("data-scanner:") + " all disks are offline or being healed, skipping scanner") 342 return nil 343 } 344 345 // Collect disks for healing. 346 allDisks := er.getDisks() 347 allDiskIDs := make([]string, 0, len(allDisks)) 348 for _, disk := range allDisks { 349 if disk == OfflineDisk { 350 // its possible that disk is OfflineDisk 351 continue 352 } 353 id, _ := disk.GetDiskID() 354 if id == "" { 355 // its possible that disk is unformatted 356 // or just went offline 357 continue 358 } 359 allDiskIDs = append(allDiskIDs, id) 360 } 361 362 // Load bucket totals 363 oldCache := dataUsageCache{} 364 if err := oldCache.load(ctx, er, dataUsageCacheName); err != nil { 365 return err 366 } 367 368 // New cache.. 369 cache := dataUsageCache{ 370 Info: dataUsageCacheInfo{ 371 Name: dataUsageRoot, 372 NextCycle: oldCache.Info.NextCycle, 373 }, 374 Cache: make(map[string]dataUsageEntry, len(oldCache.Cache)), 375 } 376 bloom := bf.bytes() 377 378 // Put all buckets into channel. 379 bucketCh := make(chan BucketInfo, len(buckets)) 380 // Add new buckets first 381 for _, b := range buckets { 382 if oldCache.find(b.Name) == nil { 383 bucketCh <- b 384 } 385 } 386 387 // Add existing buckets. 388 for _, b := range buckets { 389 e := oldCache.find(b.Name) 390 if e != nil { 391 cache.replace(b.Name, dataUsageRoot, *e) 392 bucketCh <- b 393 } 394 } 395 396 close(bucketCh) 397 bucketResults := make(chan dataUsageEntryInfo, len(disks)) 398 399 // Start async collector/saver. 400 // This goroutine owns the cache. 401 var saverWg sync.WaitGroup 402 saverWg.Add(1) 403 go func() { 404 // Add jitter to the update time so multiple sets don't sync up. 405 var updateTime = 30*time.Second + time.Duration(float64(10*time.Second)*rand.Float64()) 406 t := time.NewTicker(updateTime) 407 defer t.Stop() 408 defer saverWg.Done() 409 var lastSave time.Time 410 411 for { 412 select { 413 case <-ctx.Done(): 414 // Return without saving. 415 return 416 case <-t.C: 417 if cache.Info.LastUpdate.Equal(lastSave) { 418 continue 419 } 420 logger.LogIf(ctx, cache.save(ctx, er, dataUsageCacheName)) 421 updates <- cache.clone() 422 lastSave = cache.Info.LastUpdate 423 case v, ok := <-bucketResults: 424 if !ok { 425 // Save final state... 426 cache.Info.NextCycle++ 427 cache.Info.LastUpdate = time.Now() 428 logger.LogIf(ctx, cache.save(ctx, er, dataUsageCacheName)) 429 updates <- cache 430 return 431 } 432 cache.replace(v.Name, v.Parent, v.Entry) 433 cache.Info.LastUpdate = time.Now() 434 } 435 } 436 }() 437 438 // Shuffle disks to ensure a total randomness of bucket/disk association to ensure 439 // that objects that are not present in all disks are accounted and ILM applied. 440 r := rand.New(rand.NewSource(time.Now().UnixNano())) 441 r.Shuffle(len(disks), func(i, j int) { disks[i], disks[j] = disks[j], disks[i] }) 442 443 // Start one scanner per disk 444 var wg sync.WaitGroup 445 wg.Add(len(disks)) 446 for i := range disks { 447 go func(i int) { 448 defer wg.Done() 449 disk := disks[i] 450 451 for bucket := range bucketCh { 452 select { 453 case <-ctx.Done(): 454 return 455 default: 456 } 457 458 // Load cache for bucket 459 cacheName := pathJoin(bucket.Name, dataUsageCacheName) 460 cache := dataUsageCache{} 461 logger.LogIf(ctx, cache.load(ctx, er, cacheName)) 462 if cache.Info.Name == "" { 463 cache.Info.Name = bucket.Name 464 } 465 cache.Info.BloomFilter = bloom 466 cache.Info.SkipHealing = healing 467 cache.Disks = allDiskIDs 468 if cache.Info.Name != bucket.Name { 469 logger.LogIf(ctx, fmt.Errorf("cache name mismatch: %s != %s", cache.Info.Name, bucket.Name)) 470 cache.Info = dataUsageCacheInfo{ 471 Name: bucket.Name, 472 LastUpdate: time.Time{}, 473 NextCycle: 0, 474 } 475 } 476 477 // Calc usage 478 before := cache.Info.LastUpdate 479 var err error 480 cache, err = disk.NSScanner(ctx, cache) 481 cache.Info.BloomFilter = nil 482 if err != nil { 483 if !cache.Info.LastUpdate.IsZero() && cache.Info.LastUpdate.After(before) { 484 logger.LogIf(ctx, cache.save(ctx, er, cacheName)) 485 } else { 486 logger.LogIf(ctx, err) 487 } 488 continue 489 } 490 491 var root dataUsageEntry 492 if r := cache.root(); r != nil { 493 root = cache.flatten(*r) 494 } 495 t := time.Now() 496 bucketResults <- dataUsageEntryInfo{ 497 Name: cache.Info.Name, 498 Parent: dataUsageRoot, 499 Entry: root, 500 } 501 // We want to avoid synchronizing up all writes in case 502 // the results are piled up. 503 time.Sleep(time.Duration(float64(time.Since(t)) * rand.Float64())) 504 // Save cache 505 logger.LogIf(ctx, cache.save(ctx, er, cacheName)) 506 } 507 }(i) 508 } 509 wg.Wait() 510 close(bucketResults) 511 saverWg.Wait() 512 513 return nil 514 }