github.com/minio/minio@v0.0.0-20240328213742-3f72439b8a27/cmd/erasure-server-pool-decom.go (about) 1 // Copyright (c) 2015-2023 MinIO, Inc. 2 // 3 // This file is part of MinIO Object Storage stack 4 // 5 // This program is free software: you can redistribute it and/or modify 6 // it under the terms of the GNU Affero General Public License as published by 7 // the Free Software Foundation, either version 3 of the License, or 8 // (at your option) any later version. 9 // 10 // This program is distributed in the hope that it will be useful 11 // but WITHOUT ANY WARRANTY; without even the implied warranty of 12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 // GNU Affero General Public License for more details. 14 // 15 // You should have received a copy of the GNU Affero General Public License 16 // along with this program. If not, see <http://www.gnu.org/licenses/>. 17 18 package cmd 19 20 import ( 21 "context" 22 "encoding/binary" 23 "errors" 24 "fmt" 25 "io" 26 "math/rand" 27 "net/http" 28 "sort" 29 "strings" 30 "time" 31 32 "github.com/dustin/go-humanize" 33 "github.com/minio/madmin-go/v3" 34 "github.com/minio/minio/internal/hash" 35 "github.com/minio/minio/internal/logger" 36 "github.com/minio/pkg/v2/console" 37 "github.com/minio/pkg/v2/env" 38 "github.com/minio/pkg/v2/workers" 39 ) 40 41 // PoolDecommissionInfo currently decommissioning information 42 type PoolDecommissionInfo struct { 43 StartTime time.Time `json:"startTime" msg:"st"` 44 StartSize int64 `json:"startSize" msg:"ss"` 45 TotalSize int64 `json:"totalSize" msg:"ts"` 46 CurrentSize int64 `json:"currentSize" msg:"cs"` 47 48 Complete bool `json:"complete" msg:"cmp"` 49 Failed bool `json:"failed" msg:"fl"` 50 Canceled bool `json:"canceled" msg:"cnl"` 51 52 // Internal information. 53 QueuedBuckets []string `json:"-" msg:"bkts"` 54 DecommissionedBuckets []string `json:"-" msg:"dbkts"` 55 56 // Last bucket/object decommissioned. 57 Bucket string `json:"-" msg:"bkt"` 58 // Captures prefix that is currently being 59 // decommissioned inside the 'Bucket' 60 Prefix string `json:"-" msg:"pfx"` 61 Object string `json:"-" msg:"obj"` 62 63 // Verbose information 64 ItemsDecommissioned int64 `json:"objectsDecommissioned" msg:"id"` 65 ItemsDecommissionFailed int64 `json:"objectsDecommissionedFailed" msg:"idf"` 66 BytesDone int64 `json:"bytesDecommissioned" msg:"bd"` 67 BytesFailed int64 `json:"bytesDecommissionedFailed" msg:"bf"` 68 } 69 70 // Clone make a copy of PoolDecommissionInfo 71 func (pd *PoolDecommissionInfo) Clone() *PoolDecommissionInfo { 72 if pd == nil { 73 return nil 74 } 75 return &PoolDecommissionInfo{ 76 StartTime: pd.StartTime, 77 StartSize: pd.StartSize, 78 TotalSize: pd.TotalSize, 79 CurrentSize: pd.CurrentSize, 80 Complete: pd.Complete, 81 Failed: pd.Failed, 82 Canceled: pd.Canceled, 83 QueuedBuckets: pd.QueuedBuckets, 84 DecommissionedBuckets: pd.DecommissionedBuckets, 85 Bucket: pd.Bucket, 86 Prefix: pd.Prefix, 87 Object: pd.Object, 88 ItemsDecommissioned: pd.ItemsDecommissioned, 89 ItemsDecommissionFailed: pd.ItemsDecommissionFailed, 90 BytesDone: pd.BytesDone, 91 BytesFailed: pd.BytesFailed, 92 } 93 } 94 95 // bucketPop should be called when a bucket is done decommissioning. 96 // Adds the bucket to the list of decommissioned buckets and updates resume numbers. 97 func (pd *PoolDecommissionInfo) bucketPop(bucket string) bool { 98 pd.DecommissionedBuckets = append(pd.DecommissionedBuckets, bucket) 99 for i, b := range pd.QueuedBuckets { 100 if b == bucket { 101 // Bucket is done. 102 pd.QueuedBuckets = append(pd.QueuedBuckets[:i], pd.QueuedBuckets[i+1:]...) 103 // Clear tracker info. 104 if pd.Bucket == bucket { 105 pd.Bucket = "" // empty this out for next bucket 106 pd.Prefix = "" // empty this out for the next bucket 107 pd.Object = "" // empty this out for next object 108 } 109 return true 110 } 111 } 112 return false 113 } 114 115 func (pd *PoolDecommissionInfo) isBucketDecommissioned(bucket string) bool { 116 for _, b := range pd.DecommissionedBuckets { 117 if b == bucket { 118 return true 119 } 120 } 121 return false 122 } 123 124 func (pd *PoolDecommissionInfo) bucketPush(bucket decomBucketInfo) { 125 for _, b := range pd.QueuedBuckets { 126 if pd.isBucketDecommissioned(b) { 127 return 128 } 129 if b == bucket.String() { 130 return 131 } 132 } 133 pd.QueuedBuckets = append(pd.QueuedBuckets, bucket.String()) 134 pd.Bucket = bucket.Name 135 pd.Prefix = bucket.Prefix 136 } 137 138 // PoolStatus captures current pool status 139 type PoolStatus struct { 140 ID int `json:"id" msg:"id"` 141 CmdLine string `json:"cmdline" msg:"cl"` 142 LastUpdate time.Time `json:"lastUpdate" msg:"lu"` 143 Decommission *PoolDecommissionInfo `json:"decommissionInfo,omitempty" msg:"dec"` 144 } 145 146 // Clone returns a copy of PoolStatus 147 func (ps PoolStatus) Clone() PoolStatus { 148 return PoolStatus{ 149 ID: ps.ID, 150 CmdLine: ps.CmdLine, 151 LastUpdate: ps.LastUpdate, 152 Decommission: ps.Decommission.Clone(), 153 } 154 } 155 156 //go:generate msgp -file $GOFILE -unexported 157 type poolMeta struct { 158 Version int `msg:"v"` 159 Pools []PoolStatus `msg:"pls"` 160 161 // Value should not be saved when we have not loaded anything yet. 162 dontSave bool `msg:"-"` 163 } 164 165 // A decommission resumable tells us if decommission is worth 166 // resuming upon restart of a cluster. 167 func (p *poolMeta) returnResumablePools() []PoolStatus { 168 var newPools []PoolStatus 169 for _, pool := range p.Pools { 170 if pool.Decommission == nil { 171 continue 172 } 173 if pool.Decommission.Complete || pool.Decommission.Canceled { 174 // Do not resume decommission upon startup for 175 // - decommission complete 176 // - decommission canceled 177 continue 178 } // In all other situations we need to resume 179 newPools = append(newPools, pool) 180 } 181 return newPools 182 } 183 184 func (p *poolMeta) DecommissionComplete(idx int) bool { 185 if p.Pools[idx].Decommission != nil && !p.Pools[idx].Decommission.Complete { 186 p.Pools[idx].LastUpdate = UTCNow() 187 p.Pools[idx].Decommission.Complete = true 188 p.Pools[idx].Decommission.Failed = false 189 p.Pools[idx].Decommission.Canceled = false 190 return true 191 } 192 return false 193 } 194 195 func (p *poolMeta) DecommissionFailed(idx int) bool { 196 if p.Pools[idx].Decommission != nil && !p.Pools[idx].Decommission.Failed { 197 p.Pools[idx].LastUpdate = UTCNow() 198 p.Pools[idx].Decommission.StartTime = time.Time{} 199 p.Pools[idx].Decommission.Complete = false 200 p.Pools[idx].Decommission.Failed = true 201 p.Pools[idx].Decommission.Canceled = false 202 return true 203 } 204 return false 205 } 206 207 func (p *poolMeta) DecommissionCancel(idx int) bool { 208 if p.Pools[idx].Decommission != nil && !p.Pools[idx].Decommission.Canceled { 209 p.Pools[idx].LastUpdate = UTCNow() 210 p.Pools[idx].Decommission.StartTime = time.Time{} 211 p.Pools[idx].Decommission.Complete = false 212 p.Pools[idx].Decommission.Failed = false 213 p.Pools[idx].Decommission.Canceled = true 214 return true 215 } 216 return false 217 } 218 219 func (p poolMeta) isBucketDecommissioned(idx int, bucket string) bool { 220 return p.Pools[idx].Decommission.isBucketDecommissioned(bucket) 221 } 222 223 func (p *poolMeta) BucketDone(idx int, bucket decomBucketInfo) bool { 224 if p.Pools[idx].Decommission == nil { 225 // Decommission not in progress. 226 return false 227 } 228 return p.Pools[idx].Decommission.bucketPop(bucket.String()) 229 } 230 231 func (p poolMeta) ResumeBucketObject(idx int) (bucket, object string) { 232 if p.Pools[idx].Decommission != nil { 233 bucket = p.Pools[idx].Decommission.Bucket 234 object = p.Pools[idx].Decommission.Object 235 } 236 return 237 } 238 239 func (p *poolMeta) TrackCurrentBucketObject(idx int, bucket string, object string) { 240 if p.Pools[idx].Decommission == nil { 241 // Decommission not in progress. 242 return 243 } 244 p.Pools[idx].Decommission.Bucket = bucket 245 p.Pools[idx].Decommission.Object = object 246 } 247 248 func (p *poolMeta) PendingBuckets(idx int) []decomBucketInfo { 249 if p.Pools[idx].Decommission == nil { 250 // Decommission not in progress. 251 return nil 252 } 253 254 decomBuckets := make([]decomBucketInfo, len(p.Pools[idx].Decommission.QueuedBuckets)) 255 for i := range decomBuckets { 256 bucket, prefix := path2BucketObject(p.Pools[idx].Decommission.QueuedBuckets[i]) 257 decomBuckets[i] = decomBucketInfo{ 258 Name: bucket, 259 Prefix: prefix, 260 } 261 } 262 263 return decomBuckets 264 } 265 266 //msgp:ignore decomBucketInfo 267 type decomBucketInfo struct { 268 Name string 269 Prefix string 270 } 271 272 func (db decomBucketInfo) String() string { 273 return pathJoin(db.Name, db.Prefix) 274 } 275 276 func (p *poolMeta) QueueBuckets(idx int, buckets []decomBucketInfo) { 277 // add new queued buckets 278 for _, bucket := range buckets { 279 p.Pools[idx].Decommission.bucketPush(bucket) 280 } 281 } 282 283 var ( 284 errDecommissionAlreadyRunning = errors.New("decommission is already in progress") 285 errDecommissionComplete = errors.New("decommission is complete, please remove the servers from command-line") 286 errDecommissionNotStarted = errors.New("decommission is not in progress") 287 ) 288 289 func (p *poolMeta) Decommission(idx int, pi poolSpaceInfo) error { 290 // Return an error when there is decommission on going - the user needs 291 // to explicitly cancel it first in order to restart decommissioning again. 292 if p.Pools[idx].Decommission != nil && 293 !p.Pools[idx].Decommission.Complete && 294 !p.Pools[idx].Decommission.Failed && 295 !p.Pools[idx].Decommission.Canceled { 296 return errDecommissionAlreadyRunning 297 } 298 299 now := UTCNow() 300 p.Pools[idx].LastUpdate = now 301 p.Pools[idx].Decommission = &PoolDecommissionInfo{ 302 StartTime: now, 303 StartSize: pi.Free, 304 CurrentSize: pi.Free, 305 TotalSize: pi.Total, 306 } 307 return nil 308 } 309 310 func (p poolMeta) IsSuspended(idx int) bool { 311 if idx >= len(p.Pools) { 312 // We don't really know if the pool is suspended or not, since it doesn't exist. 313 return false 314 } 315 return p.Pools[idx].Decommission != nil 316 } 317 318 func (p *poolMeta) validate(pools []*erasureSets) (bool, error) { 319 type poolInfo struct { 320 position int 321 completed bool 322 decomStarted bool // started but not finished yet 323 } 324 325 rememberedPools := make(map[string]poolInfo) 326 for idx, pool := range p.Pools { 327 complete := false 328 decomStarted := false 329 if pool.Decommission != nil { 330 if pool.Decommission.Complete { 331 complete = true 332 } 333 decomStarted = true 334 } 335 rememberedPools[pool.CmdLine] = poolInfo{ 336 position: idx, 337 completed: complete, 338 decomStarted: decomStarted, 339 } 340 } 341 342 specifiedPools := make(map[string]int) 343 for idx, pool := range pools { 344 specifiedPools[pool.endpoints.CmdLine] = idx 345 } 346 347 var update bool 348 // Check if specified pools need to be removed from decommissioned pool. 349 for k := range specifiedPools { 350 pi, ok := rememberedPools[k] 351 if !ok { 352 // we do not have the pool anymore that we previously remembered, since all 353 // the CLI checks out we can allow updates since we are mostly adding a pool here. 354 update = true 355 } 356 if ok && pi.completed { 357 return false, fmt.Errorf("pool(%s) = %s is decommissioned, please remove from server command line", humanize.Ordinal(pi.position+1), k) 358 } 359 } 360 361 if len(specifiedPools) == len(rememberedPools) { 362 for k, pi := range rememberedPools { 363 pos, ok := specifiedPools[k] 364 if ok && pos != pi.position { 365 update = true // pool order is changing, its okay to allow it. 366 } 367 } 368 } 369 370 if !update { 371 update = len(specifiedPools) != len(rememberedPools) 372 } 373 374 return update, nil 375 } 376 377 func (p *poolMeta) load(ctx context.Context, pool *erasureSets, pools []*erasureSets) error { 378 data, err := readConfig(ctx, pool, poolMetaName) 379 if err != nil { 380 if errors.Is(err, errConfigNotFound) || isErrObjectNotFound(err) { 381 return nil 382 } 383 return err 384 } 385 if len(data) == 0 { 386 // Seems to be empty create a new poolMeta object. 387 return nil 388 } 389 if len(data) <= 4 { 390 return fmt.Errorf("poolMeta: no data") 391 } 392 // Read header 393 switch binary.LittleEndian.Uint16(data[0:2]) { 394 case poolMetaFormat: 395 default: 396 return fmt.Errorf("poolMeta: unknown format: %d", binary.LittleEndian.Uint16(data[0:2])) 397 } 398 switch binary.LittleEndian.Uint16(data[2:4]) { 399 case poolMetaVersion: 400 default: 401 return fmt.Errorf("poolMeta: unknown version: %d", binary.LittleEndian.Uint16(data[2:4])) 402 } 403 404 // OK, parse data. 405 if _, err = p.UnmarshalMsg(data[4:]); err != nil { 406 return err 407 } 408 409 switch p.Version { 410 case poolMetaVersionV1: 411 default: 412 return fmt.Errorf("unexpected pool meta version: %d", p.Version) 413 } 414 415 return nil 416 } 417 418 func (p *poolMeta) CountItem(idx int, size int64, failed bool) { 419 pd := p.Pools[idx].Decommission 420 if pd == nil { 421 return 422 } 423 if failed { 424 pd.ItemsDecommissionFailed++ 425 pd.BytesFailed += size 426 } else { 427 pd.ItemsDecommissioned++ 428 pd.BytesDone += size 429 } 430 p.Pools[idx].Decommission = pd 431 } 432 433 func (p *poolMeta) updateAfter(ctx context.Context, idx int, pools []*erasureSets, duration time.Duration) (bool, error) { 434 if p.Pools[idx].Decommission == nil { 435 return false, errInvalidArgument 436 } 437 now := UTCNow() 438 if now.Sub(p.Pools[idx].LastUpdate) >= duration { 439 if serverDebugLog { 440 console.Debugf("decommission: persisting poolMeta on drive: threshold:%s, poolMeta:%#v\n", now.Sub(p.Pools[idx].LastUpdate), p.Pools[idx]) 441 } 442 p.Pools[idx].LastUpdate = now 443 if err := p.save(ctx, pools); err != nil { 444 return false, err 445 } 446 return true, nil 447 } 448 return false, nil 449 } 450 451 func (p poolMeta) save(ctx context.Context, pools []*erasureSets) error { 452 if p.dontSave { 453 return nil 454 } 455 data := make([]byte, 4, p.Msgsize()+4) 456 457 // Initialize the header. 458 binary.LittleEndian.PutUint16(data[0:2], poolMetaFormat) 459 binary.LittleEndian.PutUint16(data[2:4], poolMetaVersion) 460 461 buf, err := p.MarshalMsg(data) 462 if err != nil { 463 return err 464 } 465 466 // Saves on all pools to make sure decommissioning of first pool is allowed. 467 for i, eset := range pools { 468 if err = saveConfig(ctx, eset, poolMetaName, buf); err != nil { 469 if !errors.Is(err, context.Canceled) { 470 logger.LogIf(ctx, fmt.Errorf("saving pool.bin for pool index %d failed with: %v", i, err)) 471 } 472 return err 473 } 474 } 475 return nil 476 } 477 478 const ( 479 poolMetaName = "pool.bin" 480 poolMetaFormat = 1 481 poolMetaVersionV1 = 1 482 poolMetaVersion = poolMetaVersionV1 483 ) 484 485 // Init() initializes pools and saves additional information about them 486 // in 'pool.bin', this is eventually used for decommissioning the pool. 487 func (z *erasureServerPools) Init(ctx context.Context) error { 488 // Load rebalance metadata if present 489 err := z.loadRebalanceMeta(ctx) 490 if err != nil { 491 return fmt.Errorf("failed to load rebalance data: %w", err) 492 } 493 494 // Start rebalance routine 495 z.StartRebalance() 496 497 meta := poolMeta{} 498 if err := meta.load(ctx, z.serverPools[0], z.serverPools); err != nil { 499 return err 500 } 501 502 update, err := meta.validate(z.serverPools) 503 if err != nil { 504 return err 505 } 506 507 // if no update is needed return right away. 508 if !update { 509 z.poolMetaMutex.Lock() 510 z.poolMeta = meta 511 z.poolMetaMutex.Unlock() 512 } else { 513 newMeta := newPoolMeta(z, meta) 514 if err = newMeta.save(ctx, z.serverPools); err != nil { 515 return err 516 } 517 z.poolMetaMutex.Lock() 518 z.poolMeta = newMeta 519 z.poolMetaMutex.Unlock() 520 } 521 522 pools := meta.returnResumablePools() 523 poolIndices := make([]int, 0, len(pools)) 524 for _, pool := range pools { 525 idx := globalEndpoints.GetPoolIdx(pool.CmdLine) 526 if idx == -1 { 527 return fmt.Errorf("unexpected state present for decommission status pool(%s) not found", pool.CmdLine) 528 } 529 poolIndices = append(poolIndices, idx) 530 } 531 532 if len(poolIndices) > 0 && globalEndpoints[poolIndices[0]].Endpoints[0].IsLocal { 533 go func() { 534 r := rand.New(rand.NewSource(time.Now().UnixNano())) 535 for { 536 if err := z.Decommission(ctx, poolIndices...); err != nil { 537 if errors.Is(err, errDecommissionAlreadyRunning) { 538 // A previous decommission running found restart it. 539 for _, idx := range poolIndices { 540 z.doDecommissionInRoutine(ctx, idx) 541 } 542 return 543 } 544 if configRetriableErrors(err) { 545 logger.LogIf(ctx, fmt.Errorf("Unable to resume decommission of pools %v: %w: retrying..", pools, err)) 546 time.Sleep(time.Second + time.Duration(r.Float64()*float64(5*time.Second))) 547 continue 548 } 549 logger.LogIf(ctx, fmt.Errorf("Unable to resume decommission of pool %v: %w", pools, err)) 550 return 551 } 552 } 553 }() 554 } 555 556 return nil 557 } 558 559 func newPoolMeta(z *erasureServerPools, prevMeta poolMeta) poolMeta { 560 newMeta := poolMeta{} // to update write poolMeta fresh. 561 // looks like new pool was added we need to update, 562 // or this is a fresh installation (or an existing 563 // installation with pool removed) 564 newMeta.Version = poolMetaVersion 565 for idx, pool := range z.serverPools { 566 var skip bool 567 for _, currentPool := range prevMeta.Pools { 568 // Preserve any current pool status. 569 if currentPool.CmdLine == pool.endpoints.CmdLine { 570 newMeta.Pools = append(newMeta.Pools, currentPool) 571 skip = true 572 break 573 } 574 } 575 if skip { 576 continue 577 } 578 newMeta.Pools = append(newMeta.Pools, PoolStatus{ 579 CmdLine: pool.endpoints.CmdLine, 580 ID: idx, 581 LastUpdate: UTCNow(), 582 }) 583 } 584 return newMeta 585 } 586 587 func (z *erasureServerPools) IsDecommissionRunning() bool { 588 z.poolMetaMutex.RLock() 589 defer z.poolMetaMutex.RUnlock() 590 meta := z.poolMeta 591 for _, pool := range meta.Pools { 592 if pool.Decommission != nil && 593 !pool.Decommission.Complete && 594 !pool.Decommission.Failed && 595 !pool.Decommission.Canceled { 596 return true 597 } 598 } 599 600 return false 601 } 602 603 func (z *erasureServerPools) decommissionObject(ctx context.Context, bucket string, gr *GetObjectReader) (err error) { 604 objInfo := gr.ObjInfo 605 606 defer func() { 607 gr.Close() 608 auditLogDecom(ctx, "DecomCopyData", objInfo.Bucket, objInfo.Name, objInfo.VersionID, err) 609 }() 610 611 actualSize, err := objInfo.GetActualSize() 612 if err != nil { 613 return err 614 } 615 616 if objInfo.isMultipart() { 617 res, err := z.NewMultipartUpload(ctx, bucket, objInfo.Name, ObjectOptions{ 618 VersionID: objInfo.VersionID, 619 UserDefined: objInfo.UserDefined, 620 NoAuditLog: true, 621 }) 622 if err != nil { 623 return fmt.Errorf("decommissionObject: NewMultipartUpload() %w", err) 624 } 625 defer z.AbortMultipartUpload(ctx, bucket, objInfo.Name, res.UploadID, ObjectOptions{NoAuditLog: true}) 626 parts := make([]CompletePart, len(objInfo.Parts)) 627 for i, part := range objInfo.Parts { 628 hr, err := hash.NewReader(ctx, io.LimitReader(gr, part.Size), part.Size, "", "", part.ActualSize) 629 if err != nil { 630 return fmt.Errorf("decommissionObject: hash.NewReader() %w", err) 631 } 632 pi, err := z.PutObjectPart(ctx, bucket, objInfo.Name, res.UploadID, 633 part.Number, 634 NewPutObjReader(hr), 635 ObjectOptions{ 636 PreserveETag: part.ETag, // Preserve original ETag to ensure same metadata. 637 IndexCB: func() []byte { 638 return part.Index // Preserve part Index to ensure decompression works. 639 }, 640 NoAuditLog: true, 641 }) 642 if err != nil { 643 return fmt.Errorf("decommissionObject: PutObjectPart() %w", err) 644 } 645 parts[i] = CompletePart{ 646 ETag: pi.ETag, 647 PartNumber: pi.PartNumber, 648 ChecksumCRC32: pi.ChecksumCRC32, 649 ChecksumCRC32C: pi.ChecksumCRC32C, 650 ChecksumSHA256: pi.ChecksumSHA256, 651 ChecksumSHA1: pi.ChecksumSHA1, 652 } 653 } 654 _, err = z.CompleteMultipartUpload(ctx, bucket, objInfo.Name, res.UploadID, parts, ObjectOptions{ 655 DataMovement: true, 656 MTime: objInfo.ModTime, 657 NoAuditLog: true, 658 }) 659 if err != nil { 660 err = fmt.Errorf("decommissionObject: CompleteMultipartUpload() %w", err) 661 } 662 return err 663 } 664 665 hr, err := hash.NewReader(ctx, io.LimitReader(gr, objInfo.Size), objInfo.Size, "", "", actualSize) 666 if err != nil { 667 return fmt.Errorf("decommissionObject: hash.NewReader() %w", err) 668 } 669 670 _, err = z.PutObject(ctx, 671 bucket, 672 objInfo.Name, 673 NewPutObjReader(hr), 674 ObjectOptions{ 675 DataMovement: true, 676 VersionID: objInfo.VersionID, 677 MTime: objInfo.ModTime, 678 UserDefined: objInfo.UserDefined, 679 PreserveETag: objInfo.ETag, // Preserve original ETag to ensure same metadata. 680 IndexCB: func() []byte { 681 return objInfo.Parts[0].Index // Preserve part Index to ensure decompression works. 682 }, 683 NoAuditLog: true, 684 }) 685 if err != nil { 686 err = fmt.Errorf("decommissionObject: PutObject() %w", err) 687 } 688 return err 689 } 690 691 // versionsSorter sorts FileInfo slices by version. 692 // 693 //msgp:ignore versionsSorter 694 type versionsSorter []FileInfo 695 696 func (v versionsSorter) reverse() { 697 sort.Slice(v, func(i, j int) bool { 698 return v[i].ModTime.Before(v[j].ModTime) 699 }) 700 } 701 702 func (set *erasureObjects) listObjectsToDecommission(ctx context.Context, bi decomBucketInfo, fn func(entry metaCacheEntry)) error { 703 disks, _ := set.getOnlineDisksWithHealing(false) 704 if len(disks) == 0 { 705 return fmt.Errorf("no online drives found for set with endpoints %s", set.getEndpoints()) 706 } 707 708 // However many we ask, versions must exist on ~50% 709 listingQuorum := (set.setDriveCount + 1) / 2 710 711 // How to resolve partial results. 712 resolver := metadataResolutionParams{ 713 dirQuorum: listingQuorum, // make sure to capture all quorum ratios 714 objQuorum: listingQuorum, // make sure to capture all quorum ratios 715 bucket: bi.Name, 716 } 717 718 err := listPathRaw(ctx, listPathRawOptions{ 719 disks: disks, 720 bucket: bi.Name, 721 path: bi.Prefix, 722 recursive: true, 723 forwardTo: "", 724 minDisks: listingQuorum, 725 reportNotFound: false, 726 agreed: fn, 727 partial: func(entries metaCacheEntries, _ []error) { 728 entry, ok := entries.resolve(&resolver) 729 if ok { 730 fn(*entry) 731 } 732 }, 733 finished: nil, 734 }) 735 return err 736 } 737 738 func (z *erasureServerPools) decommissionPool(ctx context.Context, idx int, pool *erasureSets, bi decomBucketInfo) error { 739 ctx = logger.SetReqInfo(ctx, &logger.ReqInfo{}) 740 741 const envDecomWorkers = "_MINIO_DECOMMISSION_WORKERS" 742 workerSize, err := env.GetInt(envDecomWorkers, len(pool.sets)) 743 if err != nil { 744 logger.LogIf(ctx, fmt.Errorf("invalid workers value err: %v, defaulting to %d", err, len(pool.sets))) 745 workerSize = len(pool.sets) 746 } 747 748 // Each decom worker needs one List() goroutine/worker 749 // add that many extra workers. 750 workerSize += len(pool.sets) 751 752 wk, err := workers.New(workerSize) 753 if err != nil { 754 return err 755 } 756 757 vc, _ := globalBucketVersioningSys.Get(bi.Name) 758 759 // Check if the current bucket has a configured lifecycle policy 760 lc, _ := globalLifecycleSys.Get(bi.Name) 761 762 // Check if bucket is object locked. 763 lr, _ := globalBucketObjectLockSys.Get(bi.Name) 764 rcfg, _ := getReplicationConfig(ctx, bi.Name) 765 766 for setIdx, set := range pool.sets { 767 set := set 768 769 filterLifecycle := func(bucket, object string, fi FileInfo) bool { 770 if lc == nil { 771 return false 772 } 773 versioned := vc != nil && vc.Versioned(object) 774 objInfo := fi.ToObjectInfo(bucket, object, versioned) 775 776 evt := evalActionFromLifecycle(ctx, *lc, lr, rcfg, objInfo) 777 switch { 778 case evt.Action.DeleteRestored(): // if restored copy has expired, delete it synchronously 779 applyExpiryOnTransitionedObject(ctx, z, objInfo, evt, lcEventSrc_Decom) 780 return false 781 case evt.Action.Delete(): 782 globalExpiryState.enqueueByDays(objInfo, evt, lcEventSrc_Decom) 783 return true 784 default: 785 return false 786 } 787 } 788 789 decommissionEntry := func(entry metaCacheEntry) { 790 defer wk.Give() 791 792 if entry.isDir() { 793 return 794 } 795 796 fivs, err := entry.fileInfoVersions(bi.Name) 797 if err != nil { 798 return 799 } 800 801 // We need a reversed order for decommissioning, 802 // to create the appropriate stack. 803 versionsSorter(fivs.Versions).reverse() 804 805 var decommissioned, expired int 806 for _, version := range fivs.Versions { 807 stopFn := globalDecommissionMetrics.log(decomMetricDecommissionObject, idx, bi.Name, version.Name, version.VersionID) 808 // Apply lifecycle rules on the objects that are expired. 809 if filterLifecycle(bi.Name, version.Name, version) { 810 expired++ 811 decommissioned++ 812 stopFn(errors.New("ILM expired object/version will be skipped")) 813 continue 814 } 815 816 // any object with only single DEL marker we don't need 817 // to decommission, just skip it, this also includes 818 // any other versions that have already expired. 819 remainingVersions := len(fivs.Versions) - expired 820 if version.Deleted && remainingVersions == 1 { 821 decommissioned++ 822 stopFn(errors.New("DELETE marked object with no other non-current versions will be skipped")) 823 continue 824 } 825 826 versionID := version.VersionID 827 if versionID == "" { 828 versionID = nullVersionID 829 } 830 831 if version.Deleted { 832 _, err := z.DeleteObject(ctx, 833 bi.Name, 834 version.Name, 835 ObjectOptions{ 836 // Since we are preserving a delete marker, we have to make sure this is always true. 837 // regardless of the current configuration of the bucket we must preserve all versions 838 // on the pool being decommissioned. 839 Versioned: true, 840 VersionID: versionID, 841 MTime: version.ModTime, 842 DeleteReplication: version.ReplicationState, 843 DeleteMarker: true, // make sure we create a delete marker 844 SkipDecommissioned: true, // make sure we skip the decommissioned pool 845 NoAuditLog: true, 846 }) 847 var failure bool 848 if err != nil { 849 if isErrObjectNotFound(err) || isErrVersionNotFound(err) { 850 err = nil 851 } 852 } 853 stopFn(err) 854 if err != nil { 855 logger.LogIf(ctx, err) 856 failure = true 857 } 858 z.poolMetaMutex.Lock() 859 z.poolMeta.CountItem(idx, 0, failure) 860 z.poolMetaMutex.Unlock() 861 if !failure { 862 // Success keep a count. 863 decommissioned++ 864 } 865 auditLogDecom(ctx, "DecomCopyDeleteMarker", bi.Name, version.Name, versionID, err) 866 continue 867 } 868 869 var failure, ignore bool 870 // gr.Close() is ensured by decommissionObject(). 871 for try := 0; try < 3; try++ { 872 if version.IsRemote() { 873 if err := z.DecomTieredObject(ctx, bi.Name, version.Name, version, ObjectOptions{ 874 VersionID: versionID, 875 MTime: version.ModTime, 876 UserDefined: version.Metadata, 877 }); err != nil { 878 stopFn(err) 879 failure = true 880 logger.LogIf(ctx, err) 881 continue 882 } 883 stopFn(nil) 884 failure = false 885 break 886 } 887 gr, err := set.GetObjectNInfo(ctx, 888 bi.Name, 889 encodeDirObject(version.Name), 890 nil, 891 http.Header{}, 892 ObjectOptions{ 893 VersionID: versionID, 894 NoDecryption: true, 895 NoLock: true, 896 NoAuditLog: true, 897 }) 898 if isErrObjectNotFound(err) || isErrVersionNotFound(err) { 899 // object deleted by the application, nothing to do here we move on. 900 ignore = true 901 stopFn(nil) 902 break 903 } 904 if err != nil && !ignore { 905 // if usage-cache.bin is not readable log and ignore it. 906 if bi.Name == minioMetaBucket && strings.Contains(version.Name, dataUsageCacheName) { 907 ignore = true 908 stopFn(err) 909 logger.LogIf(ctx, err) 910 break 911 } 912 } 913 if err != nil { 914 failure = true 915 logger.LogIf(ctx, err) 916 stopFn(err) 917 continue 918 } 919 if err = z.decommissionObject(ctx, bi.Name, gr); err != nil { 920 stopFn(err) 921 failure = true 922 logger.LogIf(ctx, err) 923 continue 924 } 925 stopFn(nil) 926 failure = false 927 break 928 } 929 if ignore { 930 continue 931 } 932 z.poolMetaMutex.Lock() 933 z.poolMeta.CountItem(idx, version.Size, failure) 934 z.poolMetaMutex.Unlock() 935 if failure { 936 break // break out on first error 937 } 938 decommissioned++ 939 } 940 941 // if all versions were decommissioned, then we can delete the object versions. 942 if decommissioned == len(fivs.Versions) { 943 stopFn := globalDecommissionMetrics.log(decomMetricDecommissionRemoveObject, idx, bi.Name, entry.name) 944 _, err := set.DeleteObject(ctx, 945 bi.Name, 946 encodeDirObject(entry.name), 947 ObjectOptions{ 948 DeletePrefix: true, // use prefix delete to delete all versions at once. 949 DeletePrefixObject: true, // use prefix delete on exact object (this is an optimization to avoid fan-out calls) 950 NoAuditLog: true, 951 }, 952 ) 953 stopFn(err) 954 auditLogDecom(ctx, "DecomDeleteObject", bi.Name, entry.name, "", err) 955 if err != nil { 956 logger.LogIf(ctx, err) 957 } 958 } 959 z.poolMetaMutex.Lock() 960 z.poolMeta.TrackCurrentBucketObject(idx, bi.Name, entry.name) 961 ok, err := z.poolMeta.updateAfter(ctx, idx, z.serverPools, 30*time.Second) 962 logger.LogIf(ctx, err) 963 if ok { 964 globalNotificationSys.ReloadPoolMeta(ctx) 965 } 966 z.poolMetaMutex.Unlock() 967 } 968 969 wk.Take() 970 go func(setIdx int) { 971 defer wk.Give() 972 // We will perpetually retry listing if it fails, since we cannot 973 // possibly give up in this matter 974 for { 975 if contextCanceled(ctx) { 976 break 977 } 978 979 err := set.listObjectsToDecommission(ctx, bi, 980 func(entry metaCacheEntry) { 981 wk.Take() 982 go decommissionEntry(entry) 983 }, 984 ) 985 if err == nil || errors.Is(err, context.Canceled) { 986 break 987 } 988 setN := humanize.Ordinal(setIdx + 1) 989 retryDur := time.Duration(rand.Float64() * float64(5*time.Second)) 990 logger.LogOnceIf(ctx, fmt.Errorf("listing objects from %s set failed with %v, retrying in %v", setN, err, retryDur), "decom-listing-failed"+setN) 991 time.Sleep(retryDur) 992 } 993 }(setIdx) 994 } 995 wk.Wait() 996 return nil 997 } 998 999 //msgp:ignore decomMetrics 1000 type decomMetrics struct{} 1001 1002 var globalDecommissionMetrics decomMetrics 1003 1004 //msgp:ignore decomMetric 1005 //go:generate stringer -type=decomMetric -trimprefix=decomMetric $GOFILE 1006 type decomMetric uint8 1007 1008 const ( 1009 decomMetricDecommissionBucket decomMetric = iota 1010 decomMetricDecommissionObject 1011 decomMetricDecommissionRemoveObject 1012 ) 1013 1014 func decomTrace(d decomMetric, poolIdx int, startTime time.Time, duration time.Duration, path string, err error) madmin.TraceInfo { 1015 var errStr string 1016 if err != nil { 1017 errStr = err.Error() 1018 } 1019 return madmin.TraceInfo{ 1020 TraceType: madmin.TraceDecommission, 1021 Time: startTime, 1022 NodeName: globalLocalNodeName, 1023 FuncName: fmt.Sprintf("decommission.%s (pool-id=%d)", d.String(), poolIdx), 1024 Duration: duration, 1025 Path: path, 1026 Error: errStr, 1027 } 1028 } 1029 1030 func (m *decomMetrics) log(d decomMetric, poolIdx int, paths ...string) func(err error) { 1031 startTime := time.Now() 1032 return func(err error) { 1033 duration := time.Since(startTime) 1034 if globalTrace.NumSubscribers(madmin.TraceDecommission) > 0 { 1035 globalTrace.Publish(decomTrace(d, poolIdx, startTime, duration, strings.Join(paths, " "), err)) 1036 } 1037 } 1038 } 1039 1040 func (z *erasureServerPools) decommissionInBackground(ctx context.Context, idx int) error { 1041 pool := z.serverPools[idx] 1042 z.poolMetaMutex.RLock() 1043 pending := z.poolMeta.PendingBuckets(idx) 1044 z.poolMetaMutex.RUnlock() 1045 1046 for _, bucket := range pending { 1047 z.poolMetaMutex.RLock() 1048 isDecommissioned := z.poolMeta.isBucketDecommissioned(idx, bucket.String()) 1049 z.poolMetaMutex.RUnlock() 1050 if isDecommissioned { 1051 if serverDebugLog { 1052 console.Debugln("decommission: already done, moving on", bucket) 1053 } 1054 1055 z.poolMetaMutex.Lock() 1056 if z.poolMeta.BucketDone(idx, bucket) { 1057 // remove from pendingBuckets and persist. 1058 logger.LogIf(ctx, z.poolMeta.save(ctx, z.serverPools)) 1059 } 1060 z.poolMetaMutex.Unlock() 1061 continue 1062 } 1063 if serverDebugLog { 1064 console.Debugln("decommission: currently on bucket", bucket.Name) 1065 } 1066 stopFn := globalDecommissionMetrics.log(decomMetricDecommissionBucket, idx, bucket.Name) 1067 if err := z.decommissionPool(ctx, idx, pool, bucket); err != nil { 1068 stopFn(err) 1069 return err 1070 } 1071 stopFn(nil) 1072 1073 z.poolMetaMutex.Lock() 1074 if z.poolMeta.BucketDone(idx, bucket) { 1075 logger.LogIf(ctx, z.poolMeta.save(ctx, z.serverPools)) 1076 } 1077 z.poolMetaMutex.Unlock() 1078 } 1079 return nil 1080 } 1081 1082 func (z *erasureServerPools) checkAfterDecom(ctx context.Context, idx int) error { 1083 buckets, err := z.getBucketsToDecommission(ctx) 1084 if err != nil { 1085 return err 1086 } 1087 1088 pool := z.serverPools[idx] 1089 for _, set := range pool.sets { 1090 for _, bi := range buckets { 1091 vc, _ := globalBucketVersioningSys.Get(bi.Name) 1092 1093 // Check if the current bucket has a configured lifecycle policy 1094 lc, _ := globalLifecycleSys.Get(bi.Name) 1095 1096 // Check if bucket is object locked. 1097 lr, _ := globalBucketObjectLockSys.Get(bi.Name) 1098 rcfg, _ := getReplicationConfig(ctx, bi.Name) 1099 1100 filterLifecycle := func(bucket, object string, fi FileInfo) bool { 1101 if lc == nil { 1102 return false 1103 } 1104 versioned := vc != nil && vc.Versioned(object) 1105 objInfo := fi.ToObjectInfo(bucket, object, versioned) 1106 1107 evt := evalActionFromLifecycle(ctx, *lc, lr, rcfg, objInfo) 1108 switch { 1109 case evt.Action.DeleteRestored(): // if restored copy has expired,delete it synchronously 1110 applyExpiryOnTransitionedObject(ctx, z, objInfo, evt, lcEventSrc_Decom) 1111 return false 1112 case evt.Action.Delete(): 1113 globalExpiryState.enqueueByDays(objInfo, evt, lcEventSrc_Decom) 1114 return true 1115 default: 1116 return false 1117 } 1118 } 1119 1120 var versionsFound int 1121 err := set.listObjectsToDecommission(ctx, bi, func(entry metaCacheEntry) { 1122 if !entry.isObject() { 1123 return 1124 } 1125 1126 fivs, err := entry.fileInfoVersions(bi.Name) 1127 if err != nil { 1128 return 1129 } 1130 1131 // We need a reversed order for decommissioning, 1132 // to create the appropriate stack. 1133 versionsSorter(fivs.Versions).reverse() 1134 1135 for _, version := range fivs.Versions { 1136 // Apply lifecycle rules on the objects that are expired. 1137 if filterLifecycle(bi.Name, version.Name, version) { 1138 continue 1139 } 1140 1141 // `.usage-cache.bin` still exists, must be not readable ignore it. 1142 if bi.Name == minioMetaBucket && strings.Contains(version.Name, dataUsageCacheName) { 1143 // skipping bucket usage cache name, as its autogenerated. 1144 continue 1145 } 1146 1147 versionsFound++ 1148 } 1149 }) 1150 if err != nil { 1151 return err 1152 } 1153 1154 if versionsFound > 0 { 1155 return fmt.Errorf("at least %d object(s)/version(s) were found in bucket `%s` after decommissioning", versionsFound, bi.Name) 1156 } 1157 } 1158 } 1159 1160 return nil 1161 } 1162 1163 func (z *erasureServerPools) doDecommissionInRoutine(ctx context.Context, idx int) { 1164 z.poolMetaMutex.Lock() 1165 var dctx context.Context 1166 dctx, z.decommissionCancelers[idx] = context.WithCancel(GlobalContext) 1167 z.poolMetaMutex.Unlock() 1168 1169 // Generate an empty request info so it can be directly modified later by audit 1170 dctx = logger.SetReqInfo(dctx, &logger.ReqInfo{}) 1171 1172 if err := z.decommissionInBackground(dctx, idx); err != nil { 1173 logger.LogIf(GlobalContext, err) 1174 logger.LogIf(GlobalContext, z.DecommissionFailed(dctx, idx)) 1175 return 1176 } 1177 1178 z.poolMetaMutex.Lock() 1179 failed := z.poolMeta.Pools[idx].Decommission.ItemsDecommissionFailed > 0 || contextCanceled(dctx) 1180 poolCmdLine := z.poolMeta.Pools[idx].CmdLine 1181 z.poolMetaMutex.Unlock() 1182 1183 if !failed { 1184 logger.Event(dctx, "Decommissioning complete for pool '%s', verifying for any pending objects", poolCmdLine) 1185 err := z.checkAfterDecom(dctx, idx) 1186 if err != nil { 1187 logger.LogIf(ctx, err) 1188 failed = true 1189 } 1190 } 1191 1192 if failed { 1193 // Decommission failed indicate as such. 1194 logger.LogIf(GlobalContext, z.DecommissionFailed(dctx, idx)) 1195 } else { 1196 // Complete the decommission.. 1197 logger.LogIf(GlobalContext, z.CompleteDecommission(dctx, idx)) 1198 } 1199 } 1200 1201 func (z *erasureServerPools) IsSuspended(idx int) bool { 1202 z.poolMetaMutex.RLock() 1203 defer z.poolMetaMutex.RUnlock() 1204 return z.poolMeta.IsSuspended(idx) 1205 } 1206 1207 // Decommission - start decommission session. 1208 func (z *erasureServerPools) Decommission(ctx context.Context, indices ...int) error { 1209 if len(indices) == 0 { 1210 return errInvalidArgument 1211 } 1212 1213 if z.SinglePool() { 1214 return errInvalidArgument 1215 } 1216 1217 // Make pool unwritable before decommissioning. 1218 if err := z.StartDecommission(ctx, indices...); err != nil { 1219 return err 1220 } 1221 1222 go func() { 1223 for _, idx := range indices { 1224 // decommission all pools serially one after 1225 // the other. 1226 z.doDecommissionInRoutine(ctx, idx) 1227 } 1228 }() 1229 1230 // Successfully started decommissioning. 1231 return nil 1232 } 1233 1234 type decomError struct { 1235 Err string 1236 } 1237 1238 func (d decomError) Error() string { 1239 return d.Err 1240 } 1241 1242 type poolSpaceInfo struct { 1243 Free int64 1244 Total int64 1245 Used int64 1246 } 1247 1248 func (z *erasureServerPools) getDecommissionPoolSpaceInfo(idx int) (pi poolSpaceInfo, err error) { 1249 if idx < 0 { 1250 return pi, errInvalidArgument 1251 } 1252 if idx+1 > len(z.serverPools) { 1253 return pi, errInvalidArgument 1254 } 1255 1256 info := z.serverPools[idx].StorageInfo(context.Background()) 1257 info.Backend = z.BackendInfo() 1258 1259 usableTotal := int64(GetTotalUsableCapacity(info.Disks, info)) 1260 usableFree := int64(GetTotalUsableCapacityFree(info.Disks, info)) 1261 return poolSpaceInfo{ 1262 Total: usableTotal, 1263 Free: usableFree, 1264 Used: usableTotal - usableFree, 1265 }, nil 1266 } 1267 1268 func (z *erasureServerPools) Status(ctx context.Context, idx int) (PoolStatus, error) { 1269 if idx < 0 { 1270 return PoolStatus{}, errInvalidArgument 1271 } 1272 1273 pi, err := z.getDecommissionPoolSpaceInfo(idx) 1274 if err != nil { 1275 return PoolStatus{}, err 1276 } 1277 1278 z.poolMetaMutex.RLock() 1279 defer z.poolMetaMutex.RUnlock() 1280 1281 poolInfo := z.poolMeta.Pools[idx].Clone() 1282 if poolInfo.Decommission != nil { 1283 poolInfo.Decommission.TotalSize = pi.Total 1284 if poolInfo.Decommission.Failed || poolInfo.Decommission.Canceled { 1285 poolInfo.Decommission.CurrentSize = pi.Free 1286 } else { 1287 poolInfo.Decommission.CurrentSize = poolInfo.Decommission.StartSize + poolInfo.Decommission.BytesDone 1288 } 1289 } else { 1290 poolInfo.Decommission = &PoolDecommissionInfo{ 1291 TotalSize: pi.Total, 1292 CurrentSize: pi.Free, 1293 } 1294 } 1295 return poolInfo, nil 1296 } 1297 1298 func (z *erasureServerPools) ReloadPoolMeta(ctx context.Context) (err error) { 1299 meta := poolMeta{} 1300 1301 if err = meta.load(ctx, z.serverPools[0], z.serverPools); err != nil { 1302 return err 1303 } 1304 1305 z.poolMetaMutex.Lock() 1306 defer z.poolMetaMutex.Unlock() 1307 1308 z.poolMeta = meta 1309 return nil 1310 } 1311 1312 func (z *erasureServerPools) DecommissionCancel(ctx context.Context, idx int) (err error) { 1313 if idx < 0 { 1314 return errInvalidArgument 1315 } 1316 1317 if z.SinglePool() { 1318 return errInvalidArgument 1319 } 1320 1321 z.poolMetaMutex.Lock() 1322 defer z.poolMetaMutex.Unlock() 1323 1324 fn := z.decommissionCancelers[idx] 1325 if fn == nil { 1326 // canceling a decommission before it started return an error. 1327 return errDecommissionNotStarted 1328 } 1329 1330 defer fn() // cancel any active thread. 1331 1332 if z.poolMeta.DecommissionCancel(idx) { 1333 if err = z.poolMeta.save(ctx, z.serverPools); err != nil { 1334 return err 1335 } 1336 globalNotificationSys.ReloadPoolMeta(ctx) 1337 } 1338 1339 return nil 1340 } 1341 1342 func (z *erasureServerPools) DecommissionFailed(ctx context.Context, idx int) (err error) { 1343 if idx < 0 { 1344 return errInvalidArgument 1345 } 1346 1347 if z.SinglePool() { 1348 return errInvalidArgument 1349 } 1350 1351 z.poolMetaMutex.Lock() 1352 defer z.poolMetaMutex.Unlock() 1353 1354 if z.poolMeta.DecommissionFailed(idx) { 1355 if fn := z.decommissionCancelers[idx]; fn != nil { 1356 defer fn() 1357 } // cancel any active thread. 1358 1359 if err = z.poolMeta.save(ctx, z.serverPools); err != nil { 1360 return err 1361 } 1362 globalNotificationSys.ReloadPoolMeta(ctx) 1363 } 1364 return nil 1365 } 1366 1367 func (z *erasureServerPools) CompleteDecommission(ctx context.Context, idx int) (err error) { 1368 if idx < 0 { 1369 return errInvalidArgument 1370 } 1371 1372 if z.SinglePool() { 1373 return errInvalidArgument 1374 } 1375 1376 z.poolMetaMutex.Lock() 1377 defer z.poolMetaMutex.Unlock() 1378 1379 if z.poolMeta.DecommissionComplete(idx) { 1380 if fn := z.decommissionCancelers[idx]; fn != nil { 1381 defer fn() 1382 } // cancel any active thread. 1383 1384 if err = z.poolMeta.save(ctx, z.serverPools); err != nil { 1385 return err 1386 } 1387 globalNotificationSys.ReloadPoolMeta(ctx) 1388 } 1389 return nil 1390 } 1391 1392 func (z *erasureServerPools) getBucketsToDecommission(ctx context.Context) ([]decomBucketInfo, error) { 1393 buckets, err := z.ListBuckets(ctx, BucketOptions{}) 1394 if err != nil { 1395 return nil, err 1396 } 1397 1398 decomBuckets := make([]decomBucketInfo, len(buckets)) 1399 for i := range buckets { 1400 decomBuckets[i] = decomBucketInfo{ 1401 Name: buckets[i].Name, 1402 } 1403 } 1404 1405 // Buckets data are dispersed in multiple zones/sets, make 1406 // sure to decommission the necessary metadata. 1407 decomBuckets = append(decomBuckets, decomBucketInfo{ 1408 Name: minioMetaBucket, 1409 Prefix: minioConfigPrefix, 1410 }) 1411 decomBuckets = append(decomBuckets, decomBucketInfo{ 1412 Name: minioMetaBucket, 1413 Prefix: bucketMetaPrefix, 1414 }) 1415 1416 return decomBuckets, nil 1417 } 1418 1419 func (z *erasureServerPools) StartDecommission(ctx context.Context, indices ...int) (err error) { 1420 if len(indices) == 0 { 1421 return errInvalidArgument 1422 } 1423 1424 if z.SinglePool() { 1425 return errInvalidArgument 1426 } 1427 1428 decomBuckets, err := z.getBucketsToDecommission(ctx) 1429 if err != nil { 1430 return err 1431 } 1432 1433 for _, bucket := range decomBuckets { 1434 z.HealBucket(ctx, bucket.Name, madmin.HealOpts{}) 1435 } 1436 1437 // Create .minio.sys/config, .minio.sys/buckets paths if missing, 1438 // this code is present to avoid any missing meta buckets on other 1439 // pools. 1440 for _, metaBucket := range []string{ 1441 pathJoin(minioMetaBucket, minioConfigPrefix), 1442 pathJoin(minioMetaBucket, bucketMetaPrefix), 1443 } { 1444 var bucketExists BucketExists 1445 if err = z.MakeBucket(ctx, metaBucket, MakeBucketOptions{}); err != nil { 1446 if !errors.As(err, &bucketExists) { 1447 return err 1448 } 1449 } 1450 } 1451 1452 z.poolMetaMutex.Lock() 1453 defer z.poolMetaMutex.Unlock() 1454 1455 for _, idx := range indices { 1456 pi, err := z.getDecommissionPoolSpaceInfo(idx) 1457 if err != nil { 1458 return err 1459 } 1460 1461 if err = z.poolMeta.Decommission(idx, pi); err != nil { 1462 return err 1463 } 1464 1465 z.poolMeta.QueueBuckets(idx, decomBuckets) 1466 } 1467 1468 if err = z.poolMeta.save(ctx, z.serverPools); err != nil { 1469 return err 1470 } 1471 1472 globalNotificationSys.ReloadPoolMeta(ctx) 1473 1474 return nil 1475 } 1476 1477 func auditLogDecom(ctx context.Context, apiName, bucket, object, versionID string, err error) { 1478 errStr := "" 1479 if err != nil { 1480 errStr = err.Error() 1481 } 1482 auditLogInternal(ctx, AuditLogOptions{ 1483 Event: "decommission", 1484 APIName: apiName, 1485 Bucket: bucket, 1486 Object: object, 1487 VersionID: versionID, 1488 Error: errStr, 1489 }) 1490 }