storj.io/minio@v0.0.0-20230509071714-0cbc90f649b1/cmd/erasure-server-pool.go (about) 1 /* 2 * MinIO Cloud Storage, (C) 2019,2020 MinIO, Inc. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package cmd 18 19 import ( 20 "context" 21 "errors" 22 "fmt" 23 "io" 24 "math/rand" 25 "net/http" 26 "sort" 27 "strconv" 28 "strings" 29 "sync" 30 "time" 31 32 "github.com/minio/minio-go/v7/pkg/set" 33 "github.com/minio/minio-go/v7/pkg/tags" 34 35 "storj.io/minio/cmd/config/storageclass" 36 "storj.io/minio/cmd/logger" 37 "storj.io/minio/pkg/madmin" 38 "storj.io/minio/pkg/sync/errgroup" 39 "storj.io/minio/pkg/wildcard" 40 ) 41 42 type erasureServerPools struct { 43 GatewayUnsupported 44 45 serverPools []*erasureSets 46 47 // Shut down async operations 48 shutdown context.CancelFunc 49 } 50 51 func (z *erasureServerPools) SinglePool() bool { 52 return len(z.serverPools) == 1 53 } 54 55 // Initialize new pool of erasure sets. 56 func newErasureServerPools(ctx context.Context, endpointServerPools EndpointServerPools) (ObjectLayer, error) { 57 var ( 58 deploymentID string 59 distributionAlgo string 60 commonParityDrives int 61 err error 62 63 formats = make([]*formatErasureV3, len(endpointServerPools)) 64 storageDisks = make([][]StorageAPI, len(endpointServerPools)) 65 z = &erasureServerPools{serverPools: make([]*erasureSets, len(endpointServerPools))} 66 ) 67 68 var localDrives []string 69 70 local := endpointServerPools.FirstLocal() 71 for i, ep := range endpointServerPools { 72 for _, endpoint := range ep.Endpoints { 73 if endpoint.IsLocal { 74 localDrives = append(localDrives, endpoint.Path) 75 } 76 } 77 78 // If storage class is not set during startup, default values are used 79 // -- Default for Reduced Redundancy Storage class is, parity = 2 80 // -- Default for Standard Storage class is, parity = 2 - disks 4, 5 81 // -- Default for Standard Storage class is, parity = 3 - disks 6, 7 82 // -- Default for Standard Storage class is, parity = 4 - disks 8 to 16 83 if commonParityDrives == 0 { 84 commonParityDrives = ecDrivesNoConfig(ep.DrivesPerSet) 85 } 86 87 if err = storageclass.ValidateParity(commonParityDrives, ep.DrivesPerSet); err != nil { 88 return nil, fmt.Errorf("All current serverPools should have same parity ratio - expected %d, got %d", commonParityDrives, ecDrivesNoConfig(ep.DrivesPerSet)) 89 } 90 91 storageDisks[i], formats[i], err = waitForFormatErasure(local, ep.Endpoints, i+1, 92 ep.SetCount, ep.DrivesPerSet, deploymentID, distributionAlgo) 93 if err != nil { 94 return nil, err 95 } 96 97 if deploymentID == "" { 98 // all zones should have same deployment ID 99 deploymentID = formats[i].ID 100 } 101 102 if distributionAlgo == "" { 103 distributionAlgo = formats[i].Erasure.DistributionAlgo 104 } 105 106 // Validate if users brought different DeploymentID pools. 107 if deploymentID != formats[i].ID { 108 return nil, fmt.Errorf("All serverPools should have same deployment ID expected %s, got %s", deploymentID, formats[i].ID) 109 } 110 111 z.serverPools[i], err = newErasureSets(ctx, ep.Endpoints, storageDisks[i], formats[i], commonParityDrives, i) 112 if err != nil { 113 return nil, err 114 } 115 } 116 ctx, z.shutdown = context.WithCancel(ctx) 117 go intDataUpdateTracker.start(ctx, localDrives...) 118 return z, nil 119 } 120 121 func (z *erasureServerPools) NewNSLock(bucket string, objects ...string) RWLocker { 122 return z.serverPools[0].NewNSLock(bucket, objects...) 123 } 124 125 // GetDisksID will return disks by their ID. 126 func (z *erasureServerPools) GetDisksID(ids ...string) []StorageAPI { 127 idMap := make(map[string]struct{}) 128 for _, id := range ids { 129 idMap[id] = struct{}{} 130 } 131 res := make([]StorageAPI, 0, len(idMap)) 132 for _, s := range z.serverPools { 133 s.erasureDisksMu.RLock() 134 defer s.erasureDisksMu.RUnlock() 135 for _, disks := range s.erasureDisks { 136 for _, disk := range disks { 137 if disk == OfflineDisk { 138 continue 139 } 140 if id, _ := disk.GetDiskID(); id != "" { 141 if _, ok := idMap[id]; ok { 142 res = append(res, disk) 143 } 144 } 145 } 146 } 147 } 148 return res 149 } 150 151 func (z *erasureServerPools) SetDriveCounts() []int { 152 setDriveCounts := make([]int, len(z.serverPools)) 153 for i := range z.serverPools { 154 setDriveCounts[i] = z.serverPools[i].SetDriveCount() 155 } 156 return setDriveCounts 157 } 158 159 type serverPoolsAvailableSpace []poolAvailableSpace 160 161 type poolAvailableSpace struct { 162 Index int 163 Available uint64 164 } 165 166 // TotalAvailable - total available space 167 func (p serverPoolsAvailableSpace) TotalAvailable() uint64 { 168 total := uint64(0) 169 for _, z := range p { 170 total += z.Available 171 } 172 return total 173 } 174 175 // getAvailablePoolIdx will return an index that can hold size bytes. 176 // -1 is returned if no serverPools have available space for the size given. 177 func (z *erasureServerPools) getAvailablePoolIdx(ctx context.Context, size int64) int { 178 serverPools := z.getServerPoolsAvailableSpace(ctx, size) 179 total := serverPools.TotalAvailable() 180 if total == 0 { 181 return -1 182 } 183 // choose when we reach this many 184 choose := rand.Uint64() % total 185 atTotal := uint64(0) 186 for _, pool := range serverPools { 187 atTotal += pool.Available 188 if atTotal > choose && pool.Available > 0 { 189 return pool.Index 190 } 191 } 192 // Should not happen, but print values just in case. 193 logger.LogIf(ctx, fmt.Errorf("reached end of serverPools (total: %v, atTotal: %v, choose: %v)", total, atTotal, choose)) 194 return -1 195 } 196 197 // getServerPoolsAvailableSpace will return the available space of each pool after storing the content. 198 // If there is not enough space the pool will return 0 bytes available. 199 // Negative sizes are seen as 0 bytes. 200 func (z *erasureServerPools) getServerPoolsAvailableSpace(ctx context.Context, size int64) serverPoolsAvailableSpace { 201 if size < 0 { 202 size = 0 203 } 204 var serverPools = make(serverPoolsAvailableSpace, len(z.serverPools)) 205 206 storageInfos := make([]StorageInfo, len(z.serverPools)) 207 g := errgroup.WithNErrs(len(z.serverPools)) 208 for index := range z.serverPools { 209 index := index 210 g.Go(func() error { 211 storageInfos[index] = z.serverPools[index].StorageUsageInfo(ctx) 212 return nil 213 }, index) 214 } 215 216 // Wait for the go routines. 217 g.Wait() 218 219 for i, zinfo := range storageInfos { 220 var available uint64 221 var total uint64 222 for _, disk := range zinfo.Disks { 223 total += disk.TotalSpace 224 available += disk.TotalSpace - disk.UsedSpace 225 } 226 // Make sure we can fit "size" on to the disk without getting above the diskFillFraction 227 if available < uint64(size) { 228 available = 0 229 } 230 if available > 0 { 231 // How much will be left after adding the file. 232 available -= -uint64(size) 233 234 // wantLeft is how much space there at least must be left. 235 wantLeft := uint64(float64(total) * (1.0 - diskFillFraction)) 236 if available <= wantLeft { 237 available = 0 238 } 239 } 240 serverPools[i] = poolAvailableSpace{ 241 Index: i, 242 Available: available, 243 } 244 } 245 return serverPools 246 } 247 248 // getPoolIdxExisting returns the (first) found object pool index containing an object. 249 // If the object exists, but the latest version is a delete marker, the index with it is still returned. 250 // If the object does not exist ObjectNotFound error is returned. 251 // If any other error is found, it is returned. 252 // The check is skipped if there is only one zone, and 0, nil is always returned in that case. 253 func (z *erasureServerPools) getPoolIdxExisting(ctx context.Context, bucket, object string) (idx int, err error) { 254 if z.SinglePool() { 255 return 0, nil 256 } 257 258 errs := make([]error, len(z.serverPools)) 259 objInfos := make([]ObjectInfo, len(z.serverPools)) 260 261 var wg sync.WaitGroup 262 for i, pool := range z.serverPools { 263 wg.Add(1) 264 go func(i int, pool *erasureSets) { 265 defer wg.Done() 266 objInfos[i], errs[i] = pool.GetObjectInfo(ctx, bucket, object, ObjectOptions{}) 267 }(i, pool) 268 } 269 wg.Wait() 270 271 for i, err := range errs { 272 if err == nil { 273 return i, nil 274 } 275 if isErrObjectNotFound(err) { 276 // No object exists or its a delete marker, 277 // check objInfo to confirm. 278 if objInfos[i].DeleteMarker && objInfos[i].Name != "" { 279 return i, nil 280 } 281 282 // objInfo is not valid, truly the object doesn't 283 // exist proceed to next pool. 284 continue 285 } 286 return -1, err 287 } 288 289 return -1, toObjectErr(errFileNotFound, bucket, object) 290 } 291 292 // getPoolIdx returns the found previous object and its corresponding pool idx, 293 // if none are found falls back to most available space pool. 294 func (z *erasureServerPools) getPoolIdx(ctx context.Context, bucket, object string, size int64) (idx int, err error) { 295 if z.SinglePool() { 296 return 0, nil 297 } 298 299 errs := make([]error, len(z.serverPools)) 300 objInfos := make([]ObjectInfo, len(z.serverPools)) 301 302 var wg sync.WaitGroup 303 for i, pool := range z.serverPools { 304 wg.Add(1) 305 go func(i int, pool *erasureSets) { 306 defer wg.Done() 307 objInfos[i], errs[i] = pool.GetObjectInfo(ctx, bucket, object, ObjectOptions{}) 308 }(i, pool) 309 } 310 wg.Wait() 311 312 for i, err := range errs { 313 if err != nil && !isErrObjectNotFound(err) { 314 return -1, err 315 } 316 if isErrObjectNotFound(err) { 317 // No object exists or its a delete marker, 318 // check objInfo to confirm. 319 if objInfos[i].DeleteMarker && objInfos[i].Name != "" { 320 return i, nil 321 } 322 // objInfo is not valid, truly the object doesn't 323 // exist proceed to next pool. 324 continue 325 } 326 // object exists at this pool. 327 return i, nil 328 } 329 330 // We multiply the size by 2 to account for erasure coding. 331 idx = z.getAvailablePoolIdx(ctx, size*2) 332 if idx < 0 { 333 return -1, toObjectErr(errDiskFull) 334 } 335 return idx, nil 336 } 337 338 func (z *erasureServerPools) Shutdown(ctx context.Context) error { 339 defer z.shutdown() 340 341 g := errgroup.WithNErrs(len(z.serverPools)) 342 343 for index := range z.serverPools { 344 index := index 345 g.Go(func() error { 346 return z.serverPools[index].Shutdown(ctx) 347 }, index) 348 } 349 350 for _, err := range g.Wait() { 351 if err != nil { 352 logger.LogIf(ctx, err) 353 } 354 // let's the rest shutdown 355 } 356 return nil 357 } 358 359 func (z *erasureServerPools) BackendInfo() (b madmin.BackendInfo) { 360 b.Type = madmin.Erasure 361 362 scParity := globalStorageClass.GetParityForSC(storageclass.STANDARD) 363 if scParity <= 0 { 364 scParity = z.serverPools[0].defaultParityCount 365 } 366 rrSCParity := globalStorageClass.GetParityForSC(storageclass.RRS) 367 368 // Data blocks can vary per pool, but parity is same. 369 for _, setDriveCount := range z.SetDriveCounts() { 370 b.StandardSCData = append(b.StandardSCData, setDriveCount-scParity) 371 b.RRSCData = append(b.RRSCData, setDriveCount-rrSCParity) 372 } 373 374 b.StandardSCParity = scParity 375 b.RRSCParity = rrSCParity 376 return 377 } 378 379 func (z *erasureServerPools) LocalStorageInfo(ctx context.Context) (StorageInfo, []error) { 380 var storageInfo StorageInfo 381 382 storageInfos := make([]StorageInfo, len(z.serverPools)) 383 storageInfosErrs := make([][]error, len(z.serverPools)) 384 g := errgroup.WithNErrs(len(z.serverPools)) 385 for index := range z.serverPools { 386 index := index 387 g.Go(func() error { 388 storageInfos[index], storageInfosErrs[index] = z.serverPools[index].LocalStorageInfo(ctx) 389 return nil 390 }, index) 391 } 392 393 // Wait for the go routines. 394 g.Wait() 395 396 storageInfo.Backend = z.BackendInfo() 397 for _, lstorageInfo := range storageInfos { 398 storageInfo.Disks = append(storageInfo.Disks, lstorageInfo.Disks...) 399 } 400 401 var errs []error 402 for i := range z.serverPools { 403 errs = append(errs, storageInfosErrs[i]...) 404 } 405 return storageInfo, errs 406 } 407 408 func (z *erasureServerPools) StorageInfo(ctx context.Context) (StorageInfo, []error) { 409 var storageInfo StorageInfo 410 411 storageInfos := make([]StorageInfo, len(z.serverPools)) 412 storageInfosErrs := make([][]error, len(z.serverPools)) 413 g := errgroup.WithNErrs(len(z.serverPools)) 414 for index := range z.serverPools { 415 index := index 416 g.Go(func() error { 417 storageInfos[index], storageInfosErrs[index] = z.serverPools[index].StorageInfo(ctx) 418 return nil 419 }, index) 420 } 421 422 // Wait for the go routines. 423 g.Wait() 424 425 storageInfo.Backend = z.BackendInfo() 426 for _, lstorageInfo := range storageInfos { 427 storageInfo.Disks = append(storageInfo.Disks, lstorageInfo.Disks...) 428 } 429 430 var errs []error 431 for i := range z.serverPools { 432 errs = append(errs, storageInfosErrs[i]...) 433 } 434 return storageInfo, errs 435 } 436 437 func (z *erasureServerPools) NSScanner(ctx context.Context, bf *bloomFilter, updates chan<- madmin.DataUsageInfo) error { 438 ctx, cancel := context.WithCancel(ctx) 439 defer cancel() 440 441 var wg sync.WaitGroup 442 var mu sync.Mutex 443 var results []dataUsageCache 444 var firstErr error 445 446 allBuckets, err := z.ListBuckets(ctx) 447 if err != nil { 448 return err 449 } 450 451 if len(allBuckets) == 0 { 452 updates <- madmin.DataUsageInfo{} // no buckets found update data usage to reflect latest state 453 return nil 454 } 455 456 // Scanner latest allBuckets first. 457 sort.Slice(allBuckets, func(i, j int) bool { 458 return allBuckets[i].Created.After(allBuckets[j].Created) 459 }) 460 461 // Collect for each set in serverPools. 462 for _, z := range z.serverPools { 463 for _, erObj := range z.sets { 464 wg.Add(1) 465 results = append(results, dataUsageCache{}) 466 go func(i int, erObj *erasureObjects) { 467 updates := make(chan dataUsageCache, 1) 468 defer close(updates) 469 // Start update collector. 470 go func() { 471 defer wg.Done() 472 for info := range updates { 473 mu.Lock() 474 results[i] = info 475 mu.Unlock() 476 } 477 }() 478 // Start scanner. Blocks until done. 479 err := erObj.nsScanner(ctx, allBuckets, bf, updates) 480 if err != nil { 481 logger.LogIf(ctx, err) 482 mu.Lock() 483 if firstErr == nil { 484 firstErr = err 485 } 486 // Cancel remaining... 487 cancel() 488 mu.Unlock() 489 return 490 } 491 }(len(results)-1, erObj) 492 } 493 } 494 updateCloser := make(chan chan struct{}) 495 go func() { 496 updateTicker := time.NewTicker(30 * time.Second) 497 defer updateTicker.Stop() 498 var lastUpdate time.Time 499 500 // We need to merge since we will get the same buckets from each pool. 501 // Therefore to get the exact bucket sizes we must merge before we can convert. 502 var allMerged dataUsageCache 503 504 update := func() { 505 mu.Lock() 506 defer mu.Unlock() 507 508 allMerged = dataUsageCache{Info: dataUsageCacheInfo{Name: dataUsageRoot}} 509 for _, info := range results { 510 if info.Info.LastUpdate.IsZero() { 511 // Not filled yet. 512 return 513 } 514 allMerged.merge(info) 515 } 516 if allMerged.root() != nil && allMerged.Info.LastUpdate.After(lastUpdate) { 517 updates <- allMerged.dui(allMerged.Info.Name, allBuckets) 518 lastUpdate = allMerged.Info.LastUpdate 519 } 520 } 521 for { 522 select { 523 case <-ctx.Done(): 524 return 525 case v := <-updateCloser: 526 update() 527 // Enforce quotas when all is done. 528 if firstErr == nil { 529 for _, b := range allBuckets { 530 enforceFIFOQuotaBucket(ctx, z, b.Name, allMerged.bucketUsageInfo(b.Name)) 531 } 532 } 533 close(v) 534 return 535 case <-updateTicker.C: 536 update() 537 } 538 } 539 }() 540 541 wg.Wait() 542 ch := make(chan struct{}) 543 select { 544 case updateCloser <- ch: 545 <-ch 546 case <-ctx.Done(): 547 if firstErr == nil { 548 firstErr = ctx.Err() 549 } 550 } 551 return firstErr 552 } 553 554 // MakeBucketWithLocation - creates a new bucket across all serverPools simultaneously 555 // even if one of the sets fail to create buckets, we proceed all the successful 556 // operations. 557 func (z *erasureServerPools) MakeBucketWithLocation(ctx context.Context, bucket string, opts BucketOptions) error { 558 g := errgroup.WithNErrs(len(z.serverPools)) 559 560 // Create buckets in parallel across all sets. 561 for index := range z.serverPools { 562 index := index 563 g.Go(func() error { 564 return z.serverPools[index].MakeBucketWithLocation(ctx, bucket, opts) 565 }, index) 566 } 567 568 errs := g.Wait() 569 // Return the first encountered error 570 for _, err := range errs { 571 if err != nil { 572 return err 573 } 574 } 575 576 // If it doesn't exist we get a new, so ignore errors 577 meta := newBucketMetadata(bucket) 578 if opts.LockEnabled { 579 meta.VersioningConfigXML = enabledBucketVersioningConfig 580 meta.ObjectLockConfigXML = enabledBucketObjectLockConfig 581 } 582 583 if err := meta.Save(ctx, z); err != nil { 584 return toObjectErr(err, bucket) 585 } 586 587 globalBucketMetadataSys.Set(bucket, meta) 588 589 // Success. 590 return nil 591 592 } 593 594 func (z *erasureServerPools) GetObjectNInfo(ctx context.Context, bucket, object string, rs *HTTPRangeSpec, h http.Header, lockType LockType, opts ObjectOptions) (gr *GetObjectReader, err error) { 595 if err = checkGetObjArgs(ctx, bucket, object); err != nil { 596 return nil, err 597 } 598 599 object = encodeDirObject(object) 600 601 if z.SinglePool() { 602 return z.serverPools[0].GetObjectNInfo(ctx, bucket, object, rs, h, lockType, opts) 603 } 604 605 var unlockOnDefer bool 606 var nsUnlocker = func() {} 607 defer func() { 608 if unlockOnDefer { 609 nsUnlocker() 610 } 611 }() 612 613 // Acquire lock 614 if lockType != noLock { 615 lock := z.NewNSLock(bucket, object) 616 switch lockType { 617 case writeLock: 618 ctx, err = lock.GetLock(ctx, globalOperationTimeout) 619 if err != nil { 620 return nil, err 621 } 622 nsUnlocker = lock.Unlock 623 case readLock: 624 ctx, err = lock.GetRLock(ctx, globalOperationTimeout) 625 if err != nil { 626 return nil, err 627 } 628 nsUnlocker = lock.RUnlock 629 } 630 unlockOnDefer = true 631 } 632 633 errs := make([]error, len(z.serverPools)) 634 grs := make([]*GetObjectReader, len(z.serverPools)) 635 636 lockType = noLock // do not take locks at lower levels 637 var wg sync.WaitGroup 638 for i, pool := range z.serverPools { 639 wg.Add(1) 640 go func(i int, pool *erasureSets) { 641 defer wg.Done() 642 grs[i], errs[i] = pool.GetObjectNInfo(ctx, bucket, object, rs, h, lockType, opts) 643 }(i, pool) 644 } 645 wg.Wait() 646 647 var found int = -1 648 for i, err := range errs { 649 if err == nil { 650 found = i 651 break 652 } 653 if !isErrObjectNotFound(err) && !isErrVersionNotFound(err) { 654 for _, grr := range grs { 655 if grr != nil { 656 grr.Close() 657 } 658 } 659 return gr, err 660 } 661 } 662 663 if found >= 0 { 664 return grs[found], nil 665 } 666 667 object = decodeDirObject(object) 668 if opts.VersionID != "" { 669 return gr, VersionNotFound{Bucket: bucket, Object: object, VersionID: opts.VersionID} 670 } 671 return gr, ObjectNotFound{Bucket: bucket, Object: object} 672 } 673 674 func (z *erasureServerPools) GetObjectInfo(ctx context.Context, bucket, object string, opts ObjectOptions) (objInfo ObjectInfo, err error) { 675 if err = checkGetObjArgs(ctx, bucket, object); err != nil { 676 return objInfo, err 677 } 678 679 object = encodeDirObject(object) 680 681 if z.SinglePool() { 682 return z.serverPools[0].GetObjectInfo(ctx, bucket, object, opts) 683 } 684 685 // Lock the object before reading. 686 lk := z.NewNSLock(bucket, object) 687 ctx, err = lk.GetRLock(ctx, globalOperationTimeout) 688 if err != nil { 689 return ObjectInfo{}, err 690 } 691 defer lk.RUnlock() 692 693 errs := make([]error, len(z.serverPools)) 694 objInfos := make([]ObjectInfo, len(z.serverPools)) 695 696 opts.NoLock = true // avoid taking locks at lower levels for multi-pool setups. 697 var wg sync.WaitGroup 698 for i, pool := range z.serverPools { 699 wg.Add(1) 700 go func(i int, pool *erasureSets) { 701 defer wg.Done() 702 objInfos[i], errs[i] = pool.GetObjectInfo(ctx, bucket, object, opts) 703 }(i, pool) 704 } 705 wg.Wait() 706 707 var found int = -1 708 for i, err := range errs { 709 if err == nil { 710 found = i 711 break 712 } 713 if !isErrObjectNotFound(err) && !isErrVersionNotFound(err) { 714 // some errors such as MethodNotAllowed for delete marker 715 // should be returned upwards. 716 return objInfos[i], err 717 } 718 } 719 720 if found >= 0 { 721 return objInfos[found], nil 722 } 723 724 object = decodeDirObject(object) 725 if opts.VersionID != "" { 726 return objInfo, VersionNotFound{Bucket: bucket, Object: object, VersionID: opts.VersionID} 727 } 728 return objInfo, ObjectNotFound{Bucket: bucket, Object: object} 729 } 730 731 // PutObject - writes an object to least used erasure pool. 732 func (z *erasureServerPools) PutObject(ctx context.Context, bucket string, object string, data *PutObjReader, opts ObjectOptions) (ObjectInfo, error) { 733 // Validate put object input args. 734 if err := checkPutObjectArgs(ctx, bucket, object, z); err != nil { 735 return ObjectInfo{}, err 736 } 737 738 object = encodeDirObject(object) 739 740 if z.SinglePool() { 741 return z.serverPools[0].PutObject(ctx, bucket, object, data, opts) 742 } 743 744 idx, err := z.getPoolIdx(ctx, bucket, object, data.Size()) 745 if err != nil { 746 return ObjectInfo{}, err 747 } 748 749 // Overwrite the object at the right pool 750 return z.serverPools[idx].PutObject(ctx, bucket, object, data, opts) 751 } 752 753 func (z *erasureServerPools) DeleteObject(ctx context.Context, bucket string, object string, opts ObjectOptions) (objInfo ObjectInfo, err error) { 754 if err = checkDelObjArgs(ctx, bucket, object); err != nil { 755 return objInfo, err 756 } 757 758 object = encodeDirObject(object) 759 if z.SinglePool() { 760 return z.serverPools[0].DeleteObject(ctx, bucket, object, opts) 761 } 762 763 idx, err := z.getPoolIdxExisting(ctx, bucket, object) 764 if err != nil { 765 return objInfo, err 766 } 767 768 return z.serverPools[idx].DeleteObject(ctx, bucket, object, opts) 769 } 770 771 func (z *erasureServerPools) DeleteObjects(ctx context.Context, bucket string, objects []ObjectToDelete, opts ObjectOptions) ([]DeletedObject, []error) { 772 derrs := make([]error, len(objects)) 773 dobjects := make([]DeletedObject, len(objects)) 774 objSets := set.NewStringSet() 775 for i := range derrs { 776 objects[i].ObjectName = encodeDirObject(objects[i].ObjectName) 777 778 derrs[i] = checkDelObjArgs(ctx, bucket, objects[i].ObjectName) 779 objSets.Add(objects[i].ObjectName) 780 } 781 782 poolObjIdxMap := map[int][]ObjectToDelete{} 783 origIndexMap := map[int][]int{} 784 if !z.SinglePool() { 785 for j, obj := range objects { 786 idx, err := z.getPoolIdxExisting(ctx, bucket, obj.ObjectName) 787 if isErrObjectNotFound(err) { 788 derrs[j] = err 789 continue 790 } 791 if err != nil { 792 // Unhandled errors return right here. 793 for i := range derrs { 794 derrs[i] = err 795 } 796 return dobjects, derrs 797 } 798 poolObjIdxMap[idx] = append(poolObjIdxMap[idx], obj) 799 origIndexMap[idx] = append(origIndexMap[idx], j) 800 } 801 } 802 803 var err error 804 // Acquire a bulk write lock across 'objects' 805 multiDeleteLock := z.NewNSLock(bucket, objSets.ToSlice()...) 806 ctx, err = multiDeleteLock.GetLock(ctx, globalOperationTimeout) 807 if err != nil { 808 for i := range derrs { 809 derrs[i] = err 810 } 811 return dobjects, derrs 812 } 813 defer multiDeleteLock.Unlock() 814 815 if z.SinglePool() { 816 return z.serverPools[0].DeleteObjects(ctx, bucket, objects, opts) 817 } 818 819 for idx, pool := range z.serverPools { 820 objs := poolObjIdxMap[idx] 821 orgIndexes := origIndexMap[idx] 822 deletedObjects, errs := pool.DeleteObjects(ctx, bucket, objs, opts) 823 for i, derr := range errs { 824 if derr != nil { 825 derrs[orgIndexes[i]] = derr 826 } 827 dobjects[orgIndexes[i]] = deletedObjects[i] 828 } 829 } 830 return dobjects, derrs 831 } 832 833 func (z *erasureServerPools) CopyObject(ctx context.Context, srcBucket, srcObject, dstBucket, dstObject string, srcInfo ObjectInfo, srcOpts, dstOpts ObjectOptions) (objInfo ObjectInfo, err error) { 834 srcObject = encodeDirObject(srcObject) 835 dstObject = encodeDirObject(dstObject) 836 837 cpSrcDstSame := isStringEqual(pathJoin(srcBucket, srcObject), pathJoin(dstBucket, dstObject)) 838 839 poolIdx, err := z.getPoolIdx(ctx, dstBucket, dstObject, srcInfo.Size) 840 if err != nil { 841 return objInfo, err 842 } 843 844 if cpSrcDstSame && srcInfo.metadataOnly { 845 // Version ID is set for the destination and source == destination version ID. 846 if dstOpts.VersionID != "" && srcOpts.VersionID == dstOpts.VersionID { 847 return z.serverPools[poolIdx].CopyObject(ctx, srcBucket, srcObject, dstBucket, dstObject, srcInfo, srcOpts, dstOpts) 848 } 849 // Destination is not versioned and source version ID is empty 850 // perform an in-place update. 851 if !dstOpts.Versioned && srcOpts.VersionID == "" { 852 return z.serverPools[poolIdx].CopyObject(ctx, srcBucket, srcObject, dstBucket, dstObject, srcInfo, srcOpts, dstOpts) 853 } 854 // Destination is versioned, source is not destination version, 855 // as a special case look for if the source object is not legacy 856 // from older format, for older format we will rewrite them as 857 // newer using PutObject() - this is an optimization to save space 858 if dstOpts.Versioned && srcOpts.VersionID != dstOpts.VersionID && !srcInfo.Legacy { 859 // CopyObject optimization where we don't create an entire copy 860 // of the content, instead we add a reference. 861 srcInfo.versionOnly = true 862 return z.serverPools[poolIdx].CopyObject(ctx, srcBucket, srcObject, dstBucket, dstObject, srcInfo, srcOpts, dstOpts) 863 } 864 } 865 866 putOpts := ObjectOptions{ 867 ServerSideEncryption: dstOpts.ServerSideEncryption, 868 UserDefined: srcInfo.UserDefined, 869 Versioned: dstOpts.Versioned, 870 VersionID: dstOpts.VersionID, 871 MTime: dstOpts.MTime, 872 } 873 874 return z.serverPools[poolIdx].PutObject(ctx, dstBucket, dstObject, srcInfo.PutObjReader, putOpts) 875 } 876 877 func (z *erasureServerPools) ListObjectsV2(ctx context.Context, bucket, prefix, continuationToken, delimiter string, maxKeys int, fetchOwner bool, startAfter string) (ListObjectsV2Info, error) { 878 marker := continuationToken 879 if marker == "" { 880 marker = startAfter 881 } 882 883 loi, err := z.ListObjects(ctx, bucket, prefix, marker, delimiter, maxKeys) 884 if err != nil { 885 return ListObjectsV2Info{}, err 886 } 887 888 listObjectsV2Info := ListObjectsV2Info{ 889 IsTruncated: loi.IsTruncated, 890 ContinuationToken: continuationToken, 891 NextContinuationToken: loi.NextMarker, 892 Objects: loi.Objects, 893 Prefixes: loi.Prefixes, 894 } 895 return listObjectsV2Info, err 896 } 897 898 func (z *erasureServerPools) ListObjectVersions(ctx context.Context, bucket, prefix, marker, versionMarker, delimiter string, maxKeys int) (ListObjectVersionsInfo, error) { 899 loi := ListObjectVersionsInfo{} 900 if marker == "" && versionMarker != "" { 901 return loi, NotImplemented{} 902 } 903 904 opts := listPathOptions{ 905 Bucket: bucket, 906 Prefix: prefix, 907 Separator: delimiter, 908 Limit: maxKeysPlusOne(maxKeys, marker != ""), 909 Marker: marker, 910 InclDeleted: true, 911 AskDisks: globalAPIConfig.getListQuorum(), 912 } 913 914 // Shortcut for APN/1.0 Veeam/1.0 Backup/10.0 915 // It requests unique blocks with a specific prefix. 916 // We skip scanning the parent directory for 917 // more objects matching the prefix. 918 ri := logger.GetReqInfo(ctx) 919 if ri != nil && strings.Contains(ri.UserAgent, `1.0 Veeam/1.0 Backup`) && strings.HasSuffix(prefix, ".blk") { 920 opts.discardResult = true 921 opts.Transient = true 922 } 923 924 merged, err := z.listPath(ctx, opts) 925 if err != nil && err != io.EOF { 926 return loi, err 927 } 928 if versionMarker == "" { 929 // If we are not looking for a specific version skip it. 930 marker, _ = parseMarker(marker) 931 merged.forwardPast(marker) 932 } 933 objects := merged.fileInfoVersions(bucket, prefix, delimiter, versionMarker) 934 loi.IsTruncated = err == nil && len(objects) > 0 935 if maxKeys > 0 && len(objects) > maxKeys { 936 objects = objects[:maxKeys] 937 loi.IsTruncated = true 938 } 939 for _, obj := range objects { 940 if obj.IsDir && obj.ModTime.IsZero() && delimiter != "" { 941 loi.Prefixes = append(loi.Prefixes, obj.Name) 942 } else { 943 loi.Objects = append(loi.Objects, obj) 944 } 945 } 946 if loi.IsTruncated { 947 last := objects[len(objects)-1] 948 loi.NextMarker = encodeMarker(last.Name, merged.listID) 949 loi.NextVersionIDMarker = last.VersionID 950 } 951 return loi, nil 952 } 953 954 func maxKeysPlusOne(maxKeys int, addOne bool) int { 955 if maxKeys < 0 || maxKeys > maxObjectList { 956 maxKeys = maxObjectList 957 } 958 if addOne { 959 maxKeys++ 960 } 961 return maxKeys 962 } 963 964 func (z *erasureServerPools) ListObjects(ctx context.Context, bucket, prefix, marker, delimiter string, maxKeys int) (ListObjectsInfo, error) { 965 var loi ListObjectsInfo 966 967 merged, err := z.listPath(ctx, listPathOptions{ 968 Bucket: bucket, 969 Prefix: prefix, 970 Separator: delimiter, 971 Limit: maxKeysPlusOne(maxKeys, marker != ""), 972 Marker: marker, 973 InclDeleted: false, 974 AskDisks: globalAPIConfig.getListQuorum(), 975 }) 976 if err != nil && err != io.EOF { 977 logger.LogIf(ctx, err) 978 return loi, err 979 } 980 marker, _ = parseMarker(marker) 981 merged.forwardPast(marker) 982 983 // Default is recursive, if delimiter is set then list non recursive. 984 objects := merged.fileInfos(bucket, prefix, delimiter) 985 loi.IsTruncated = err == nil && len(objects) > 0 986 if maxKeys > 0 && len(objects) > maxKeys { 987 objects = objects[:maxKeys] 988 loi.IsTruncated = true 989 } 990 for _, obj := range objects { 991 if obj.IsDir && obj.ModTime.IsZero() && delimiter != "" { 992 loi.Prefixes = append(loi.Prefixes, obj.Name) 993 } else { 994 loi.Objects = append(loi.Objects, obj) 995 } 996 } 997 if loi.IsTruncated { 998 last := objects[len(objects)-1] 999 loi.NextMarker = encodeMarker(last.Name, merged.listID) 1000 } 1001 return loi, nil 1002 } 1003 1004 func (z *erasureServerPools) ListMultipartUploads(ctx context.Context, bucket, prefix, keyMarker, uploadIDMarker, delimiter string, maxUploads int) (ListMultipartsInfo, error) { 1005 if err := checkListMultipartArgs(ctx, bucket, prefix, keyMarker, uploadIDMarker, delimiter, z); err != nil { 1006 return ListMultipartsInfo{}, err 1007 } 1008 1009 if z.SinglePool() { 1010 return z.serverPools[0].ListMultipartUploads(ctx, bucket, prefix, keyMarker, uploadIDMarker, delimiter, maxUploads) 1011 } 1012 1013 var poolResult = ListMultipartsInfo{} 1014 poolResult.MaxUploads = maxUploads 1015 poolResult.KeyMarker = keyMarker 1016 poolResult.Prefix = prefix 1017 poolResult.Delimiter = delimiter 1018 for _, pool := range z.serverPools { 1019 result, err := pool.ListMultipartUploads(ctx, bucket, prefix, keyMarker, uploadIDMarker, 1020 delimiter, maxUploads) 1021 if err != nil { 1022 return result, err 1023 } 1024 poolResult.Uploads = append(poolResult.Uploads, result.Uploads...) 1025 } 1026 return poolResult, nil 1027 } 1028 1029 // Initiate a new multipart upload on a hashedSet based on object name. 1030 func (z *erasureServerPools) NewMultipartUpload(ctx context.Context, bucket, object string, opts ObjectOptions) (string, error) { 1031 if err := checkNewMultipartArgs(ctx, bucket, object, z); err != nil { 1032 return "", err 1033 } 1034 1035 if z.SinglePool() { 1036 return z.serverPools[0].NewMultipartUpload(ctx, bucket, object, opts) 1037 } 1038 1039 for idx, pool := range z.serverPools { 1040 result, err := pool.ListMultipartUploads(ctx, bucket, object, "", "", "", maxUploadsList) 1041 if err != nil { 1042 return "", err 1043 } 1044 // If there is a multipart upload with the same bucket/object name, 1045 // create the new multipart in the same pool, this will avoid 1046 // creating two multiparts uploads in two different pools 1047 if len(result.Uploads) != 0 { 1048 return z.serverPools[idx].NewMultipartUpload(ctx, bucket, object, opts) 1049 } 1050 } 1051 1052 // We multiply the size by 2 to account for erasure coding. 1053 idx := z.getAvailablePoolIdx(ctx, (1<<30)*2) 1054 if idx < 0 { 1055 return "", toObjectErr(errDiskFull) 1056 } 1057 1058 return z.serverPools[idx].NewMultipartUpload(ctx, bucket, object, opts) 1059 } 1060 1061 // Copies a part of an object from source hashedSet to destination hashedSet. 1062 func (z *erasureServerPools) CopyObjectPart(ctx context.Context, srcBucket, srcObject, destBucket, destObject string, uploadID string, partID int, startOffset int64, length int64, srcInfo ObjectInfo, srcOpts, dstOpts ObjectOptions) (PartInfo, error) { 1063 if err := checkNewMultipartArgs(ctx, srcBucket, srcObject, z); err != nil { 1064 return PartInfo{}, err 1065 } 1066 1067 return z.PutObjectPart(ctx, destBucket, destObject, uploadID, partID, 1068 NewPutObjReader(srcInfo.Reader), dstOpts) 1069 } 1070 1071 // PutObjectPart - writes part of an object to hashedSet based on the object name. 1072 func (z *erasureServerPools) PutObjectPart(ctx context.Context, bucket, object, uploadID string, partID int, data *PutObjReader, opts ObjectOptions) (PartInfo, error) { 1073 if err := checkPutObjectPartArgs(ctx, bucket, object, z); err != nil { 1074 return PartInfo{}, err 1075 } 1076 1077 if z.SinglePool() { 1078 return z.serverPools[0].PutObjectPart(ctx, bucket, object, uploadID, partID, data, opts) 1079 } 1080 1081 for _, pool := range z.serverPools { 1082 _, err := pool.GetMultipartInfo(ctx, bucket, object, uploadID, opts) 1083 if err == nil { 1084 return pool.PutObjectPart(ctx, bucket, object, uploadID, partID, data, opts) 1085 } 1086 switch err.(type) { 1087 case InvalidUploadID: 1088 // Look for information on the next pool 1089 continue 1090 } 1091 // Any other unhandled errors such as quorum return. 1092 return PartInfo{}, err 1093 } 1094 1095 return PartInfo{}, InvalidUploadID{ 1096 Bucket: bucket, 1097 Object: object, 1098 UploadID: uploadID, 1099 } 1100 } 1101 1102 func (z *erasureServerPools) GetMultipartInfo(ctx context.Context, bucket, object, uploadID string, opts ObjectOptions) (MultipartInfo, error) { 1103 if err := checkListPartsArgs(ctx, bucket, object, z); err != nil { 1104 return MultipartInfo{}, err 1105 } 1106 1107 if z.SinglePool() { 1108 return z.serverPools[0].GetMultipartInfo(ctx, bucket, object, uploadID, opts) 1109 } 1110 for _, pool := range z.serverPools { 1111 mi, err := pool.GetMultipartInfo(ctx, bucket, object, uploadID, opts) 1112 if err == nil { 1113 return mi, nil 1114 } 1115 switch err.(type) { 1116 case InvalidUploadID: 1117 // upload id not found, continue to the next pool. 1118 continue 1119 } 1120 // any other unhandled error return right here. 1121 return MultipartInfo{}, err 1122 } 1123 return MultipartInfo{}, InvalidUploadID{ 1124 Bucket: bucket, 1125 Object: object, 1126 UploadID: uploadID, 1127 } 1128 1129 } 1130 1131 // ListObjectParts - lists all uploaded parts to an object in hashedSet. 1132 func (z *erasureServerPools) ListObjectParts(ctx context.Context, bucket, object, uploadID string, partNumberMarker int, maxParts int, opts ObjectOptions) (ListPartsInfo, error) { 1133 if err := checkListPartsArgs(ctx, bucket, object, z); err != nil { 1134 return ListPartsInfo{}, err 1135 } 1136 1137 if z.SinglePool() { 1138 return z.serverPools[0].ListObjectParts(ctx, bucket, object, uploadID, partNumberMarker, maxParts, opts) 1139 } 1140 for _, pool := range z.serverPools { 1141 _, err := pool.GetMultipartInfo(ctx, bucket, object, uploadID, opts) 1142 if err == nil { 1143 return pool.ListObjectParts(ctx, bucket, object, uploadID, partNumberMarker, maxParts, opts) 1144 } 1145 switch err.(type) { 1146 case InvalidUploadID: 1147 continue 1148 } 1149 return ListPartsInfo{}, err 1150 } 1151 return ListPartsInfo{}, InvalidUploadID{ 1152 Bucket: bucket, 1153 Object: object, 1154 UploadID: uploadID, 1155 } 1156 } 1157 1158 // Aborts an in-progress multipart operation on hashedSet based on the object name. 1159 func (z *erasureServerPools) AbortMultipartUpload(ctx context.Context, bucket, object, uploadID string, opts ObjectOptions) error { 1160 if err := checkAbortMultipartArgs(ctx, bucket, object, z); err != nil { 1161 return err 1162 } 1163 1164 if z.SinglePool() { 1165 return z.serverPools[0].AbortMultipartUpload(ctx, bucket, object, uploadID, opts) 1166 } 1167 1168 for _, pool := range z.serverPools { 1169 _, err := pool.GetMultipartInfo(ctx, bucket, object, uploadID, opts) 1170 if err == nil { 1171 return pool.AbortMultipartUpload(ctx, bucket, object, uploadID, opts) 1172 } 1173 switch err.(type) { 1174 case InvalidUploadID: 1175 // upload id not found move to next pool 1176 continue 1177 } 1178 return err 1179 } 1180 return InvalidUploadID{ 1181 Bucket: bucket, 1182 Object: object, 1183 UploadID: uploadID, 1184 } 1185 } 1186 1187 // CompleteMultipartUpload - completes a pending multipart transaction, on hashedSet based on object name. 1188 func (z *erasureServerPools) CompleteMultipartUpload(ctx context.Context, bucket, object, uploadID string, uploadedParts []CompletePart, opts ObjectOptions) (objInfo ObjectInfo, err error) { 1189 if err = checkCompleteMultipartArgs(ctx, bucket, object, z); err != nil { 1190 return objInfo, err 1191 } 1192 1193 if z.SinglePool() { 1194 return z.serverPools[0].CompleteMultipartUpload(ctx, bucket, object, uploadID, uploadedParts, opts) 1195 } 1196 1197 for _, pool := range z.serverPools { 1198 _, err := pool.GetMultipartInfo(ctx, bucket, object, uploadID, opts) 1199 if err == nil { 1200 return pool.CompleteMultipartUpload(ctx, bucket, object, uploadID, uploadedParts, opts) 1201 } 1202 } 1203 1204 return objInfo, InvalidUploadID{ 1205 Bucket: bucket, 1206 Object: object, 1207 UploadID: uploadID, 1208 } 1209 } 1210 1211 // GetBucketInfo - returns bucket info from one of the erasure coded serverPools. 1212 func (z *erasureServerPools) GetBucketInfo(ctx context.Context, bucket string) (bucketInfo BucketInfo, err error) { 1213 if z.SinglePool() { 1214 bucketInfo, err = z.serverPools[0].GetBucketInfo(ctx, bucket) 1215 if err != nil { 1216 return bucketInfo, err 1217 } 1218 meta, err := globalBucketMetadataSys.Get(bucket) 1219 if err == nil { 1220 bucketInfo.Created = meta.Created 1221 } 1222 return bucketInfo, nil 1223 } 1224 for _, pool := range z.serverPools { 1225 bucketInfo, err = pool.GetBucketInfo(ctx, bucket) 1226 if err != nil { 1227 if isErrBucketNotFound(err) { 1228 continue 1229 } 1230 return bucketInfo, err 1231 } 1232 meta, err := globalBucketMetadataSys.Get(bucket) 1233 if err == nil { 1234 bucketInfo.Created = meta.Created 1235 } 1236 return bucketInfo, nil 1237 } 1238 return bucketInfo, BucketNotFound{ 1239 Bucket: bucket, 1240 } 1241 } 1242 1243 // IsNotificationSupported returns whether bucket notification is applicable for this layer. 1244 func (z *erasureServerPools) IsNotificationSupported() bool { 1245 return true 1246 } 1247 1248 // IsListenSupported returns whether listen bucket notification is applicable for this layer. 1249 func (z *erasureServerPools) IsListenSupported() bool { 1250 return true 1251 } 1252 1253 // IsEncryptionSupported returns whether server side encryption is implemented for this layer. 1254 func (z *erasureServerPools) IsEncryptionSupported() bool { 1255 return true 1256 } 1257 1258 // IsCompressionSupported returns whether compression is applicable for this layer. 1259 func (z *erasureServerPools) IsCompressionSupported() bool { 1260 return true 1261 } 1262 1263 func (z *erasureServerPools) IsTaggingSupported() bool { 1264 return true 1265 } 1266 1267 // DeleteBucket - deletes a bucket on all serverPools simultaneously, 1268 // even if one of the serverPools fail to delete buckets, we proceed to 1269 // undo a successful operation. 1270 func (z *erasureServerPools) DeleteBucket(ctx context.Context, bucket string, forceDelete bool) error { 1271 if z.SinglePool() { 1272 return z.serverPools[0].DeleteBucket(ctx, bucket, forceDelete) 1273 } 1274 g := errgroup.WithNErrs(len(z.serverPools)) 1275 1276 // Delete buckets in parallel across all serverPools. 1277 for index := range z.serverPools { 1278 index := index 1279 g.Go(func() error { 1280 return z.serverPools[index].DeleteBucket(ctx, bucket, forceDelete) 1281 }, index) 1282 } 1283 1284 errs := g.Wait() 1285 1286 // For any write quorum failure, we undo all the delete 1287 // buckets operation by creating all the buckets again. 1288 for _, err := range errs { 1289 if err != nil { 1290 if _, ok := err.(InsufficientWriteQuorum); ok { 1291 undoDeleteBucketServerPools(ctx, bucket, z.serverPools, errs) 1292 } 1293 1294 return err 1295 } 1296 } 1297 1298 // Success. 1299 return nil 1300 } 1301 1302 // deleteAll will delete a bucket+prefix unconditionally across all disks. 1303 // Note that set distribution is ignored so it should only be used in cases where 1304 // data is not distributed across sets. 1305 // Errors are logged but individual disk failures are not returned. 1306 func (z *erasureServerPools) deleteAll(ctx context.Context, bucket, prefix string) { 1307 for _, servers := range z.serverPools { 1308 for _, set := range servers.sets { 1309 set.deleteAll(ctx, bucket, prefix) 1310 } 1311 } 1312 } 1313 1314 // renameAll will rename bucket+prefix unconditionally across all disks to 1315 // minioMetaTmpBucket + unique uuid, 1316 // Note that set distribution is ignored so it should only be used in cases where 1317 // data is not distributed across sets. Errors are logged but individual 1318 // disk failures are not returned. 1319 func (z *erasureServerPools) renameAll(ctx context.Context, bucket, prefix string) { 1320 for _, servers := range z.serverPools { 1321 for _, set := range servers.sets { 1322 set.renameAll(ctx, bucket, prefix) 1323 } 1324 } 1325 } 1326 1327 // This function is used to undo a successful DeleteBucket operation. 1328 func undoDeleteBucketServerPools(ctx context.Context, bucket string, serverPools []*erasureSets, errs []error) { 1329 g := errgroup.WithNErrs(len(serverPools)) 1330 1331 // Undo previous delete bucket on all underlying serverPools. 1332 for index := range serverPools { 1333 index := index 1334 g.Go(func() error { 1335 if errs[index] == nil { 1336 return serverPools[index].MakeBucketWithLocation(ctx, bucket, BucketOptions{}) 1337 } 1338 return nil 1339 }, index) 1340 } 1341 1342 g.Wait() 1343 } 1344 1345 // List all buckets from one of the serverPools, we are not doing merge 1346 // sort here just for simplification. As per design it is assumed 1347 // that all buckets are present on all serverPools. 1348 func (z *erasureServerPools) ListBuckets(ctx context.Context) (buckets []BucketInfo, err error) { 1349 if z.SinglePool() { 1350 buckets, err = z.serverPools[0].ListBuckets(ctx) 1351 } else { 1352 for _, pool := range z.serverPools { 1353 buckets, err = pool.ListBuckets(ctx) 1354 if err != nil { 1355 logger.LogIf(ctx, err) 1356 continue 1357 } 1358 break 1359 } 1360 } 1361 if err != nil { 1362 return nil, err 1363 } 1364 for i := range buckets { 1365 meta, err := globalBucketMetadataSys.Get(buckets[i].Name) 1366 if err == nil { 1367 buckets[i].Created = meta.Created 1368 } 1369 } 1370 return buckets, nil 1371 } 1372 1373 func (z *erasureServerPools) HealFormat(ctx context.Context, dryRun bool) (madmin.HealResultItem, error) { 1374 var err error 1375 // Acquire lock on format.json 1376 formatLock := z.NewNSLock(minioMetaBucket, formatConfigFile) 1377 ctx, err = formatLock.GetLock(ctx, globalOperationTimeout) 1378 if err != nil { 1379 return madmin.HealResultItem{}, err 1380 } 1381 defer formatLock.Unlock() 1382 1383 var r = madmin.HealResultItem{ 1384 Type: madmin.HealItemMetadata, 1385 Detail: "disk-format", 1386 } 1387 1388 var countNoHeal int 1389 for _, pool := range z.serverPools { 1390 result, err := pool.HealFormat(ctx, dryRun) 1391 if err != nil && !errors.Is(err, errNoHealRequired) { 1392 logger.LogIf(ctx, err) 1393 continue 1394 } 1395 // Count errNoHealRequired across all serverPools, 1396 // to return appropriate error to the caller 1397 if errors.Is(err, errNoHealRequired) { 1398 countNoHeal++ 1399 } 1400 r.DiskCount += result.DiskCount 1401 r.SetCount += result.SetCount 1402 r.Before.Drives = append(r.Before.Drives, result.Before.Drives...) 1403 r.After.Drives = append(r.After.Drives, result.After.Drives...) 1404 } 1405 1406 // No heal returned by all serverPools, return errNoHealRequired 1407 if countNoHeal == len(z.serverPools) { 1408 return r, errNoHealRequired 1409 } 1410 1411 return r, nil 1412 } 1413 1414 func (z *erasureServerPools) HealBucket(ctx context.Context, bucket string, opts madmin.HealOpts) (madmin.HealResultItem, error) { 1415 var r = madmin.HealResultItem{ 1416 Type: madmin.HealItemBucket, 1417 Bucket: bucket, 1418 } 1419 1420 // Attempt heal on the bucket metadata, ignore any failures 1421 _, _ = z.HealObject(ctx, minioMetaBucket, pathJoin(bucketConfigPrefix, bucket, bucketMetadataFile), "", opts) 1422 1423 for _, pool := range z.serverPools { 1424 result, err := pool.HealBucket(ctx, bucket, opts) 1425 if err != nil { 1426 switch err.(type) { 1427 case BucketNotFound: 1428 continue 1429 } 1430 return result, err 1431 } 1432 r.DiskCount += result.DiskCount 1433 r.SetCount += result.SetCount 1434 r.Before.Drives = append(r.Before.Drives, result.Before.Drives...) 1435 r.After.Drives = append(r.After.Drives, result.After.Drives...) 1436 } 1437 1438 return r, nil 1439 } 1440 1441 // Walk a bucket, optionally prefix recursively, until we have returned 1442 // all the content to objectInfo channel, it is callers responsibility 1443 // to allocate a receive channel for ObjectInfo, upon any unhandled 1444 // error walker returns error. Optionally if context.Done() is received 1445 // then Walk() stops the walker. 1446 func (z *erasureServerPools) Walk(ctx context.Context, bucket, prefix string, results chan<- ObjectInfo, opts ObjectOptions) error { 1447 if err := checkListObjsArgs(ctx, bucket, prefix, "", z); err != nil { 1448 // Upon error close the channel. 1449 close(results) 1450 return err 1451 } 1452 1453 if opts.WalkVersions { 1454 go func() { 1455 defer close(results) 1456 1457 var marker, versionIDMarker string 1458 for { 1459 loi, err := z.ListObjectVersions(ctx, bucket, prefix, marker, versionIDMarker, "", 1000) 1460 if err != nil { 1461 break 1462 } 1463 1464 for _, obj := range loi.Objects { 1465 results <- obj 1466 } 1467 1468 if !loi.IsTruncated { 1469 break 1470 } 1471 1472 marker = loi.NextMarker 1473 versionIDMarker = loi.NextVersionIDMarker 1474 } 1475 }() 1476 return nil 1477 } 1478 1479 go func() { 1480 defer close(results) 1481 1482 var marker string 1483 for { 1484 loi, err := z.ListObjects(ctx, bucket, prefix, marker, "", 1000) 1485 if err != nil { 1486 break 1487 } 1488 1489 for _, obj := range loi.Objects { 1490 results <- obj 1491 } 1492 1493 if !loi.IsTruncated { 1494 break 1495 } 1496 1497 marker = loi.NextMarker 1498 } 1499 }() 1500 1501 return nil 1502 } 1503 1504 // HealObjectFn closure function heals the object. 1505 type HealObjectFn func(bucket, object, versionID string) error 1506 1507 func (z *erasureServerPools) HealObjects(ctx context.Context, bucket, prefix string, opts madmin.HealOpts, healObject HealObjectFn) error { 1508 errCh := make(chan error) 1509 ctx, cancel := context.WithCancel(ctx) 1510 go func() { 1511 defer close(errCh) 1512 defer cancel() 1513 1514 for _, erasureSet := range z.serverPools { 1515 var wg sync.WaitGroup 1516 for _, set := range erasureSet.sets { 1517 set := set 1518 wg.Add(1) 1519 go func() { 1520 defer wg.Done() 1521 1522 disks, _ := set.getOnlineDisksWithHealing() 1523 if len(disks) == 0 { 1524 errCh <- errors.New("HealObjects: No non-healing disks found") 1525 cancel() 1526 return 1527 } 1528 1529 healEntry := func(entry metaCacheEntry) { 1530 if entry.isDir() { 1531 return 1532 } 1533 // We might land at .metacache, .trash, .multipart 1534 // no need to heal them skip, only when bucket 1535 // is '.minio.sys' 1536 if bucket == minioMetaBucket { 1537 if wildcard.Match("buckets/*/.metacache/*", entry.name) { 1538 return 1539 } 1540 if wildcard.Match("tmp/*", entry.name) { 1541 return 1542 } 1543 if wildcard.Match("multipart/*", entry.name) { 1544 return 1545 } 1546 if wildcard.Match("tmp-old/*", entry.name) { 1547 return 1548 } 1549 } 1550 fivs, err := entry.fileInfoVersions(bucket) 1551 if err != nil { 1552 errCh <- err 1553 cancel() 1554 return 1555 } 1556 waitForLowHTTPReq(globalHealConfig.IOCount, globalHealConfig.Sleep) 1557 for _, version := range fivs.Versions { 1558 if err := healObject(bucket, version.Name, version.VersionID); err != nil { 1559 errCh <- err 1560 cancel() 1561 return 1562 } 1563 } 1564 } 1565 1566 // How to resolve partial results. 1567 resolver := metadataResolutionParams{ 1568 dirQuorum: 1, 1569 objQuorum: 1, 1570 bucket: bucket, 1571 } 1572 1573 path := baseDirFromPrefix(prefix) 1574 if path == "" { 1575 path = prefix 1576 } 1577 1578 if err := listPathRaw(ctx, listPathRawOptions{ 1579 disks: disks, 1580 bucket: bucket, 1581 path: path, 1582 recursive: true, 1583 forwardTo: "", 1584 minDisks: 1, 1585 reportNotFound: false, 1586 agreed: healEntry, 1587 partial: func(entries metaCacheEntries, nAgreed int, errs []error) { 1588 entry, ok := entries.resolve(&resolver) 1589 if ok { 1590 healEntry(*entry) 1591 } 1592 }, 1593 finished: nil, 1594 }); err != nil { 1595 cancel() 1596 return 1597 } 1598 }() 1599 } 1600 wg.Wait() 1601 } 1602 }() 1603 return <-errCh 1604 } 1605 1606 func (z *erasureServerPools) HealObject(ctx context.Context, bucket, object, versionID string, opts madmin.HealOpts) (madmin.HealResultItem, error) { 1607 object = encodeDirObject(object) 1608 1609 for _, pool := range z.serverPools { 1610 result, err := pool.HealObject(ctx, bucket, object, versionID, opts) 1611 result.Object = decodeDirObject(result.Object) 1612 if err != nil { 1613 if isErrObjectNotFound(err) || isErrVersionNotFound(err) { 1614 continue 1615 } 1616 return result, err 1617 } 1618 return result, nil 1619 } 1620 if versionID != "" { 1621 return madmin.HealResultItem{}, VersionNotFound{ 1622 Bucket: bucket, 1623 Object: object, 1624 VersionID: versionID, 1625 } 1626 } 1627 return madmin.HealResultItem{}, ObjectNotFound{ 1628 Bucket: bucket, 1629 Object: object, 1630 } 1631 } 1632 1633 // GetMetrics - returns metrics of local disks 1634 func (z *erasureServerPools) GetMetrics(ctx context.Context) (*BackendMetrics, error) { 1635 logger.LogIf(ctx, NotImplemented{}) 1636 return &BackendMetrics{}, NotImplemented{} 1637 } 1638 1639 func (z *erasureServerPools) getPoolAndSet(id string) (poolIdx, setIdx, diskIdx int, err error) { 1640 for poolIdx := range z.serverPools { 1641 format := z.serverPools[poolIdx].format 1642 for setIdx, set := range format.Erasure.Sets { 1643 for i, diskID := range set { 1644 if diskID == id { 1645 return poolIdx, setIdx, i, nil 1646 } 1647 } 1648 } 1649 } 1650 return -1, -1, -1, fmt.Errorf("DiskID(%s) %w", id, errDiskNotFound) 1651 } 1652 1653 // HealthOptions takes input options to return sepcific information 1654 type HealthOptions struct { 1655 Maintenance bool 1656 } 1657 1658 // HealthResult returns the current state of the system, also 1659 // additionally with any specific heuristic information which 1660 // was queried 1661 type HealthResult struct { 1662 Healthy bool 1663 HealingDrives int 1664 PoolID, SetID int 1665 WriteQuorum int 1666 } 1667 1668 // ReadHealth returns if the cluster can serve read requests 1669 func (z *erasureServerPools) ReadHealth(ctx context.Context) bool { 1670 erasureSetUpCount := make([][]int, len(z.serverPools)) 1671 for i := range z.serverPools { 1672 erasureSetUpCount[i] = make([]int, len(z.serverPools[i].sets)) 1673 } 1674 1675 diskIDs := GlobalNotificationSys.GetLocalDiskIDs(ctx) 1676 diskIDs = append(diskIDs, getLocalDiskIDs(z)) 1677 1678 for _, localDiskIDs := range diskIDs { 1679 for _, id := range localDiskIDs { 1680 poolIdx, setIdx, _, err := z.getPoolAndSet(id) 1681 if err != nil { 1682 logger.LogIf(ctx, err) 1683 continue 1684 } 1685 erasureSetUpCount[poolIdx][setIdx]++ 1686 } 1687 } 1688 1689 b := z.BackendInfo() 1690 readQuorum := b.StandardSCData[0] 1691 1692 for poolIdx := range erasureSetUpCount { 1693 for setIdx := range erasureSetUpCount[poolIdx] { 1694 if erasureSetUpCount[poolIdx][setIdx] < readQuorum { 1695 return false 1696 } 1697 } 1698 } 1699 return true 1700 } 1701 1702 // Health - returns current status of the object layer health, 1703 // provides if write access exists across sets, additionally 1704 // can be used to query scenarios if health may be lost 1705 // if this node is taken down by an external orchestrator. 1706 func (z *erasureServerPools) Health(ctx context.Context, opts HealthOptions) HealthResult { 1707 erasureSetUpCount := make([][]int, len(z.serverPools)) 1708 for i := range z.serverPools { 1709 erasureSetUpCount[i] = make([]int, len(z.serverPools[i].sets)) 1710 } 1711 1712 diskIDs := GlobalNotificationSys.GetLocalDiskIDs(ctx) 1713 if !opts.Maintenance { 1714 diskIDs = append(diskIDs, getLocalDiskIDs(z)) 1715 } 1716 1717 for _, localDiskIDs := range diskIDs { 1718 for _, id := range localDiskIDs { 1719 poolIdx, setIdx, _, err := z.getPoolAndSet(id) 1720 if err != nil { 1721 logger.LogIf(ctx, err) 1722 continue 1723 } 1724 erasureSetUpCount[poolIdx][setIdx]++ 1725 } 1726 } 1727 1728 reqInfo := (&logger.ReqInfo{}).AppendTags("maintenance", strconv.FormatBool(opts.Maintenance)) 1729 1730 b := z.BackendInfo() 1731 writeQuorum := b.StandardSCData[0] 1732 if writeQuorum == b.StandardSCParity { 1733 writeQuorum++ 1734 } 1735 1736 var aggHealStateResult madmin.BgHealState 1737 if opts.Maintenance { 1738 // check if local disks are being healed, if they are being healed 1739 // we need to tell healthy status as 'false' so that this server 1740 // is not taken down for maintenance 1741 var err error 1742 aggHealStateResult, err = getAggregatedBackgroundHealState(ctx, nil) 1743 if err != nil { 1744 logger.LogIf(logger.SetReqInfo(ctx, reqInfo), fmt.Errorf("Unable to verify global heal status: %w", err)) 1745 return HealthResult{ 1746 Healthy: false, 1747 } 1748 } 1749 1750 if len(aggHealStateResult.HealDisks) > 0 { 1751 logger.LogIf(logger.SetReqInfo(ctx, reqInfo), fmt.Errorf("Total drives to be healed %d", len(aggHealStateResult.HealDisks))) 1752 } 1753 } 1754 1755 for poolIdx := range erasureSetUpCount { 1756 for setIdx := range erasureSetUpCount[poolIdx] { 1757 if erasureSetUpCount[poolIdx][setIdx] < writeQuorum { 1758 logger.LogIf(logger.SetReqInfo(ctx, reqInfo), 1759 fmt.Errorf("Write quorum may be lost on pool: %d, set: %d, expected write quorum: %d", 1760 poolIdx, setIdx, writeQuorum)) 1761 return HealthResult{ 1762 Healthy: false, 1763 HealingDrives: len(aggHealStateResult.HealDisks), 1764 PoolID: poolIdx, 1765 SetID: setIdx, 1766 WriteQuorum: writeQuorum, 1767 } 1768 } 1769 } 1770 } 1771 1772 // when maintenance is not specified we don't have 1773 // to look at the healing side of the code. 1774 if !opts.Maintenance { 1775 return HealthResult{ 1776 Healthy: true, 1777 WriteQuorum: writeQuorum, 1778 } 1779 } 1780 1781 return HealthResult{ 1782 Healthy: len(aggHealStateResult.HealDisks) == 0, 1783 HealingDrives: len(aggHealStateResult.HealDisks), 1784 WriteQuorum: writeQuorum, 1785 } 1786 } 1787 1788 // PutObjectMetadata - replace or add tags to an existing object 1789 func (z *erasureServerPools) PutObjectMetadata(ctx context.Context, bucket, object string, opts ObjectOptions) (ObjectInfo, error) { 1790 object = encodeDirObject(object) 1791 if z.SinglePool() { 1792 return z.serverPools[0].PutObjectMetadata(ctx, bucket, object, opts) 1793 } 1794 1795 // We don't know the size here set 1GiB atleast. 1796 idx, err := z.getPoolIdxExisting(ctx, bucket, object) 1797 if err != nil { 1798 return ObjectInfo{}, err 1799 } 1800 1801 return z.serverPools[idx].PutObjectMetadata(ctx, bucket, object, opts) 1802 } 1803 1804 // PutObjectTags - replace or add tags to an existing object 1805 func (z *erasureServerPools) PutObjectTags(ctx context.Context, bucket, object string, tags string, opts ObjectOptions) (ObjectInfo, error) { 1806 object = encodeDirObject(object) 1807 if z.SinglePool() { 1808 return z.serverPools[0].PutObjectTags(ctx, bucket, object, tags, opts) 1809 } 1810 1811 // We don't know the size here set 1GiB atleast. 1812 idx, err := z.getPoolIdxExisting(ctx, bucket, object) 1813 if err != nil { 1814 return ObjectInfo{}, err 1815 } 1816 1817 return z.serverPools[idx].PutObjectTags(ctx, bucket, object, tags, opts) 1818 } 1819 1820 // DeleteObjectTags - delete object tags from an existing object 1821 func (z *erasureServerPools) DeleteObjectTags(ctx context.Context, bucket, object string, opts ObjectOptions) (ObjectInfo, error) { 1822 object = encodeDirObject(object) 1823 if z.SinglePool() { 1824 return z.serverPools[0].DeleteObjectTags(ctx, bucket, object, opts) 1825 } 1826 1827 idx, err := z.getPoolIdxExisting(ctx, bucket, object) 1828 if err != nil { 1829 return ObjectInfo{}, err 1830 } 1831 1832 return z.serverPools[idx].DeleteObjectTags(ctx, bucket, object, opts) 1833 } 1834 1835 // GetObjectTags - get object tags from an existing object 1836 func (z *erasureServerPools) GetObjectTags(ctx context.Context, bucket, object string, opts ObjectOptions) (*tags.Tags, error) { 1837 object = encodeDirObject(object) 1838 if z.SinglePool() { 1839 return z.serverPools[0].GetObjectTags(ctx, bucket, object, opts) 1840 } 1841 1842 idx, err := z.getPoolIdxExisting(ctx, bucket, object) 1843 if err != nil { 1844 return nil, err 1845 } 1846 1847 return z.serverPools[idx].GetObjectTags(ctx, bucket, object, opts) 1848 }