github.com/minio/minio@v0.0.0-20240328213742-3f72439b8a27/cmd/format-erasure.go (about) 1 // Copyright (c) 2015-2024 MinIO, Inc. 2 // 3 // This file is part of MinIO Object Storage stack 4 // 5 // This program is free software: you can redistribute it and/or modify 6 // it under the terms of the GNU Affero General Public License as published by 7 // the Free Software Foundation, either version 3 of the License, or 8 // (at your option) any later version. 9 // 10 // This program is distributed in the hope that it will be useful 11 // but WITHOUT ANY WARRANTY; without even the implied warranty of 12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 // GNU Affero General Public License for more details. 14 // 15 // You should have received a copy of the GNU Affero General Public License 16 // along with this program. If not, see <http://www.gnu.org/licenses/>. 17 18 package cmd 19 20 import ( 21 "context" 22 "encoding/json" 23 "errors" 24 "fmt" 25 "io/fs" 26 "os" 27 "reflect" 28 "sync" 29 30 "github.com/dustin/go-humanize" 31 jsoniter "github.com/json-iterator/go" 32 "github.com/minio/minio/internal/color" 33 "github.com/minio/minio/internal/config" 34 "github.com/minio/minio/internal/config/storageclass" 35 xioutil "github.com/minio/minio/internal/ioutil" 36 "github.com/minio/minio/internal/logger" 37 "github.com/minio/pkg/v2/sync/errgroup" 38 ) 39 40 const ( 41 // Represents Erasure backend. 42 formatBackendErasure = "xl" 43 44 // Represents Erasure backend - single drive 45 formatBackendErasureSingle = "xl-single" 46 47 // formatErasureV1.Erasure.Version - version '1'. 48 formatErasureVersionV1 = "1" 49 50 // formatErasureV2.Erasure.Version - version '2'. 51 formatErasureVersionV2 = "2" 52 53 // formatErasureV3.Erasure.Version - version '3'. 54 formatErasureVersionV3 = "3" 55 56 // Distribution algorithm used, legacy 57 formatErasureVersionV2DistributionAlgoV1 = "CRCMOD" 58 59 // Distributed algorithm used, with N/2 default parity 60 formatErasureVersionV3DistributionAlgoV2 = "SIPMOD" 61 62 // Distributed algorithm used, with EC:4 default parity 63 formatErasureVersionV3DistributionAlgoV3 = "SIPMOD+PARITY" 64 ) 65 66 // Offline disk UUID represents an offline disk. 67 const offlineDiskUUID = "ffffffff-ffff-ffff-ffff-ffffffffffff" 68 69 // Used to detect the version of "xl" format. 70 type formatErasureVersionDetect struct { 71 Erasure struct { 72 Version string `json:"version"` 73 } `json:"xl"` 74 } 75 76 // Represents the V1 backend disk structure version 77 // under `.minio.sys` and actual data namespace. 78 // formatErasureV1 - structure holds format config version '1'. 79 type formatErasureV1 struct { 80 formatMetaV1 81 Erasure struct { 82 Version string `json:"version"` // Version of 'xl' format. 83 Disk string `json:"drive"` // Disk field carries assigned disk uuid. 84 // JBOD field carries the input disk order generated the first 85 // time when fresh disks were supplied. 86 JBOD []string `json:"jbod"` 87 } `json:"xl"` // Erasure field holds xl format. 88 } 89 90 // Represents the V2 backend disk structure version 91 // under `.minio.sys` and actual data namespace. 92 // formatErasureV2 - structure holds format config version '2'. 93 // The V2 format to support "large bucket" support where a bucket 94 // can span multiple erasure sets. 95 type formatErasureV2 struct { 96 formatMetaV1 97 Erasure struct { 98 Version string `json:"version"` // Version of 'xl' format. 99 This string `json:"this"` // This field carries assigned disk uuid. 100 // Sets field carries the input disk order generated the first 101 // time when fresh disks were supplied, it is a two dimensional 102 // array second dimension represents list of disks used per set. 103 Sets [][]string `json:"sets"` 104 // Distribution algorithm represents the hashing algorithm 105 // to pick the right set index for an object. 106 DistributionAlgo string `json:"distributionAlgo"` 107 } `json:"xl"` 108 } 109 110 // formatErasureV3 struct is same as formatErasureV2 struct except that formatErasureV3.Erasure.Version is "3" indicating 111 // the simplified multipart backend which is a flat hierarchy now. 112 // In .minio.sys/multipart we have: 113 // sha256(bucket/object)/uploadID/[xl.meta, part.1, part.2 ....] 114 type formatErasureV3 struct { 115 formatMetaV1 116 Erasure struct { 117 Version string `json:"version"` // Version of 'xl' format. 118 This string `json:"this"` // This field carries assigned disk uuid. 119 // Sets field carries the input disk order generated the first 120 // time when fresh disks were supplied, it is a two dimensional 121 // array second dimension represents list of disks used per set. 122 Sets [][]string `json:"sets"` 123 // Distribution algorithm represents the hashing algorithm 124 // to pick the right set index for an object. 125 DistributionAlgo string `json:"distributionAlgo"` 126 } `json:"xl"` 127 Info DiskInfo `json:"-"` 128 } 129 130 func (f *formatErasureV3) Drives() (drives int) { 131 for _, set := range f.Erasure.Sets { 132 drives += len(set) 133 } 134 return drives 135 } 136 137 func (f *formatErasureV3) Clone() *formatErasureV3 { 138 b, err := json.Marshal(f) 139 if err != nil { 140 panic(err) 141 } 142 var dst formatErasureV3 143 if err = json.Unmarshal(b, &dst); err != nil { 144 panic(err) 145 } 146 return &dst 147 } 148 149 // Returns formatErasure.Erasure.Version 150 func newFormatErasureV3(numSets int, setLen int) *formatErasureV3 { 151 format := &formatErasureV3{} 152 format.Version = formatMetaVersionV1 153 format.Format = formatBackendErasure 154 if setLen == 1 { 155 format.Format = formatBackendErasureSingle 156 } 157 format.ID = mustGetUUID() 158 format.Erasure.Version = formatErasureVersionV3 159 format.Erasure.DistributionAlgo = formatErasureVersionV3DistributionAlgoV3 160 format.Erasure.Sets = make([][]string, numSets) 161 162 for i := 0; i < numSets; i++ { 163 format.Erasure.Sets[i] = make([]string, setLen) 164 for j := 0; j < setLen; j++ { 165 format.Erasure.Sets[i][j] = mustGetUUID() 166 } 167 } 168 return format 169 } 170 171 // Returns format Erasure version after reading `format.json`, returns 172 // successfully the version only if the backend is Erasure. 173 func formatGetBackendErasureVersion(b []byte) (string, error) { 174 meta := &formatMetaV1{} 175 if err := json.Unmarshal(b, meta); err != nil { 176 return "", err 177 } 178 if meta.Version != formatMetaVersionV1 { 179 return "", fmt.Errorf(`format.Version expected: %s, got: %s`, formatMetaVersionV1, meta.Version) 180 } 181 if meta.Format != formatBackendErasure && meta.Format != formatBackendErasureSingle { 182 return "", fmt.Errorf(`found backend type %s, expected %s or %s - to migrate to a supported backend visit https://min.io/docs/minio/linux/operations/install-deploy-manage/migrate-fs-gateway.html`, meta.Format, formatBackendErasure, formatBackendErasureSingle) 183 } 184 // Erasure backend found, proceed to detect version. 185 format := &formatErasureVersionDetect{} 186 if err := json.Unmarshal(b, format); err != nil { 187 return "", err 188 } 189 return format.Erasure.Version, nil 190 } 191 192 // Migrates all previous versions to latest version of `format.json`, 193 // this code calls migration in sequence, such as V1 is migrated to V2 194 // first before it V2 migrates to V3.n 195 func formatErasureMigrate(export string) ([]byte, fs.FileInfo, error) { 196 formatPath := pathJoin(export, minioMetaBucket, formatConfigFile) 197 formatData, formatFi, err := xioutil.ReadFileWithFileInfo(formatPath) 198 if err != nil { 199 return nil, nil, err 200 } 201 202 version, err := formatGetBackendErasureVersion(formatData) 203 if err != nil { 204 return nil, nil, fmt.Errorf("Drive %s: %w", export, err) 205 } 206 207 migrate := func(formatPath string, formatData []byte) ([]byte, fs.FileInfo, error) { 208 if err = os.WriteFile(formatPath, formatData, 0o666); err != nil { 209 return nil, nil, err 210 } 211 formatFi, err := Lstat(formatPath) 212 if err != nil { 213 return nil, nil, err 214 } 215 return formatData, formatFi, nil 216 } 217 218 switch version { 219 case formatErasureVersionV1: 220 formatData, err = formatErasureMigrateV1ToV2(formatData, version) 221 if err != nil { 222 return nil, nil, fmt.Errorf("Drive %s: %w", export, err) 223 } 224 // Migrate successful v1 => v2, proceed to v2 => v3 225 version = formatErasureVersionV2 226 fallthrough 227 case formatErasureVersionV2: 228 formatData, err = formatErasureMigrateV2ToV3(formatData, export, version) 229 if err != nil { 230 return nil, nil, fmt.Errorf("Drive %s: %w", export, err) 231 } 232 // Migrate successful v2 => v3, v3 is latest 233 // version = formatXLVersionV3 234 return migrate(formatPath, formatData) 235 case formatErasureVersionV3: 236 // v3 is the latest version, return. 237 return formatData, formatFi, nil 238 } 239 return nil, nil, fmt.Errorf(`Disk %s: unknown format version %s`, export, version) 240 } 241 242 // Migrates version V1 of format.json to version V2 of format.json, 243 // migration fails upon any error. 244 func formatErasureMigrateV1ToV2(data []byte, version string) ([]byte, error) { 245 if version != formatErasureVersionV1 { 246 return nil, fmt.Errorf(`format version expected %s, found %s`, formatErasureVersionV1, version) 247 } 248 249 formatV1 := &formatErasureV1{} 250 if err := json.Unmarshal(data, formatV1); err != nil { 251 return nil, err 252 } 253 254 formatV2 := &formatErasureV2{} 255 formatV2.Version = formatMetaVersionV1 256 formatV2.Format = formatBackendErasure 257 formatV2.Erasure.Version = formatErasureVersionV2 258 formatV2.Erasure.DistributionAlgo = formatErasureVersionV2DistributionAlgoV1 259 formatV2.Erasure.This = formatV1.Erasure.Disk 260 formatV2.Erasure.Sets = make([][]string, 1) 261 formatV2.Erasure.Sets[0] = make([]string, len(formatV1.Erasure.JBOD)) 262 copy(formatV2.Erasure.Sets[0], formatV1.Erasure.JBOD) 263 264 return json.Marshal(formatV2) 265 } 266 267 // Migrates V2 for format.json to V3 (Flat hierarchy for multipart) 268 func formatErasureMigrateV2ToV3(data []byte, export, version string) ([]byte, error) { 269 if version != formatErasureVersionV2 { 270 return nil, fmt.Errorf(`format version expected %s, found %s`, formatErasureVersionV2, version) 271 } 272 273 formatV2 := &formatErasureV2{} 274 if err := json.Unmarshal(data, formatV2); err != nil { 275 return nil, err 276 } 277 278 tmpOld := pathJoin(export, minioMetaTmpDeletedBucket, mustGetUUID()) 279 if err := renameAll(pathJoin(export, minioMetaMultipartBucket), 280 tmpOld, export); err != nil && err != errFileNotFound { 281 logger.LogIf(GlobalContext, fmt.Errorf("unable to rename (%s -> %s) %w, drive may be faulty please investigate", 282 pathJoin(export, minioMetaMultipartBucket), 283 tmpOld, 284 osErrToFileErr(err))) 285 } 286 287 // format-V2 struct is exactly same as format-V1 except that version is "3" 288 // which indicates the simplified multipart backend. 289 formatV3 := formatErasureV3{} 290 formatV3.Version = formatV2.Version 291 formatV3.Format = formatV2.Format 292 formatV3.Erasure = formatV2.Erasure 293 formatV3.Erasure.Version = formatErasureVersionV3 294 295 return json.Marshal(formatV3) 296 } 297 298 // countErrs - count a specific error. 299 func countErrs(errs []error, err error) int { 300 i := 0 301 for _, err1 := range errs { 302 if err1 == err || errors.Is(err1, err) { 303 i++ 304 } 305 } 306 return i 307 } 308 309 // Does all errors indicate we need to initialize all disks?. 310 func shouldInitErasureDisks(errs []error) bool { 311 return countErrs(errs, errUnformattedDisk) == len(errs) 312 } 313 314 // Check if unformatted disks are equal to 50%+1 of all the drives. 315 func quorumUnformattedDisks(errs []error) bool { 316 return countErrs(errs, errUnformattedDisk) >= (len(errs)/2)+1 317 } 318 319 // loadFormatErasureAll - load all format config from all input disks in parallel. 320 func loadFormatErasureAll(storageDisks []StorageAPI, heal bool) ([]*formatErasureV3, []error) { 321 // Initialize list of errors. 322 g := errgroup.WithNErrs(len(storageDisks)) 323 324 // Initialize format configs. 325 formats := make([]*formatErasureV3, len(storageDisks)) 326 327 // Load format from each disk in parallel 328 for index := range storageDisks { 329 index := index 330 g.Go(func() error { 331 if storageDisks[index] == nil { 332 return errDiskNotFound 333 } 334 format, formatData, err := loadFormatErasureWithData(storageDisks[index], heal) 335 if err != nil { 336 return err 337 } 338 formats[index] = format 339 if !heal { 340 // If no healing required, make the disks valid and 341 // online. 342 storageDisks[index].SetDiskID(format.Erasure.This) 343 storageDisks[index].SetFormatData(formatData) 344 } 345 return nil 346 }, index) 347 } 348 349 // Return all formats and errors if any. 350 return formats, g.Wait() 351 } 352 353 func saveFormatErasure(disk StorageAPI, format *formatErasureV3, healID string) error { 354 if disk == nil || format == nil { 355 return errDiskNotFound 356 } 357 358 // Marshal and write to disk. 359 formatData, err := json.Marshal(format) 360 if err != nil { 361 return err 362 } 363 364 tmpFormat := mustGetUUID() 365 366 // Purge any existing temporary file, okay to ignore errors here. 367 defer disk.Delete(context.TODO(), minioMetaBucket, tmpFormat, DeleteOptions{ 368 Recursive: false, 369 Immediate: false, 370 }) 371 372 // write to unique file. 373 if err = disk.WriteAll(context.TODO(), minioMetaBucket, tmpFormat, formatData); err != nil { 374 return err 375 } 376 377 // Rename file `uuid.json` --> `format.json`. 378 if err = disk.RenameFile(context.TODO(), minioMetaBucket, tmpFormat, minioMetaBucket, formatConfigFile); err != nil { 379 return err 380 } 381 382 disk.SetDiskID(format.Erasure.This) 383 disk.SetFormatData(formatData) 384 if healID != "" { 385 ctx := context.Background() 386 ht := initHealingTracker(disk, healID) 387 return ht.save(ctx) 388 } 389 return nil 390 } 391 392 // loadFormatErasureWithData - loads format.json from disk. 393 func loadFormatErasureWithData(disk StorageAPI, heal bool) (format *formatErasureV3, data []byte, err error) { 394 data, err = disk.ReadAll(context.TODO(), minioMetaBucket, formatConfigFile) 395 if err != nil { 396 // 'file not found' and 'volume not found' as 397 // same. 'volume not found' usually means its a fresh disk. 398 if errors.Is(err, errFileNotFound) || errors.Is(err, errVolumeNotFound) { 399 return nil, nil, errUnformattedDisk 400 } 401 return nil, nil, err 402 } 403 404 // Try to decode format json into formatConfigV1 struct. 405 format = &formatErasureV3{} 406 if err = json.Unmarshal(data, format); err != nil { 407 return nil, nil, err 408 } 409 410 if heal { 411 info, err := disk.DiskInfo(context.Background(), DiskInfoOptions{NoOp: heal}) 412 if err != nil { 413 return nil, nil, err 414 } 415 format.Info = info 416 } 417 418 // Success. 419 return format, data, nil 420 } 421 422 // loadFormatErasure - loads format.json from disk. 423 func loadFormatErasure(disk StorageAPI) (format *formatErasureV3, err error) { 424 buf, err := disk.ReadAll(context.TODO(), minioMetaBucket, formatConfigFile) 425 if err != nil { 426 // 'file not found' and 'volume not found' as 427 // same. 'volume not found' usually means its a fresh disk. 428 if err == errFileNotFound || err == errVolumeNotFound { 429 return nil, errUnformattedDisk 430 } 431 return nil, err 432 } 433 434 json := jsoniter.ConfigCompatibleWithStandardLibrary 435 436 // Try to decode format json into formatConfigV1 struct. 437 format = &formatErasureV3{} 438 if err = json.Unmarshal(buf, format); err != nil { 439 return nil, err 440 } 441 442 // Success. 443 return format, nil 444 } 445 446 // Valid formatErasure basic versions. 447 func checkFormatErasureValue(formatErasure *formatErasureV3, disk StorageAPI) error { 448 // Validate format version and format type. 449 if formatErasure.Version != formatMetaVersionV1 { 450 return fmt.Errorf("Unsupported version of backend format [%s] found on %s", formatErasure.Version, disk) 451 } 452 if formatErasure.Format != formatBackendErasure && formatErasure.Format != formatBackendErasureSingle { 453 return fmt.Errorf("Unsupported backend format [%s] found on %s", formatErasure.Format, disk) 454 } 455 if formatErasure.Erasure.Version != formatErasureVersionV3 { 456 return fmt.Errorf("Unsupported Erasure backend format found [%s] on %s", formatErasure.Erasure.Version, disk) 457 } 458 return nil 459 } 460 461 // Check all format values. 462 func checkFormatErasureValues(formats []*formatErasureV3, disks []StorageAPI, setDriveCount int) error { 463 for i, formatErasure := range formats { 464 if formatErasure == nil { 465 continue 466 } 467 if err := checkFormatErasureValue(formatErasure, disks[i]); err != nil { 468 return err 469 } 470 if len(formats) != len(formatErasure.Erasure.Sets)*len(formatErasure.Erasure.Sets[0]) { 471 return fmt.Errorf("%s drive is already being used in another erasure deployment. (Number of drives specified: %d but the number of drives found in the %s drive's format.json: %d)", 472 disks[i], len(formats), humanize.Ordinal(i+1), len(formatErasure.Erasure.Sets)*len(formatErasure.Erasure.Sets[0])) 473 } 474 // Only if custom erasure drive count is set, verify if the 475 // set_drive_count was manually set - we need to honor what is 476 // present on the drives. 477 if globalCustomErasureDriveCount && len(formatErasure.Erasure.Sets[0]) != setDriveCount { 478 return fmt.Errorf("%s drive is already formatted with %d drives per erasure set. This cannot be changed to %d, please revert your MINIO_ERASURE_SET_DRIVE_COUNT setting", disks[i], len(formatErasure.Erasure.Sets[0]), setDriveCount) 479 } 480 } 481 return nil 482 } 483 484 // Get Deployment ID for the Erasure sets from format.json. 485 // This need not be in quorum. Even if one of the format.json 486 // file has this value, we assume it is valid. 487 // If more than one format.json's have different id, it is considered a corrupt 488 // backend format. 489 func formatErasureGetDeploymentID(refFormat *formatErasureV3, formats []*formatErasureV3) (string, error) { 490 var deploymentID string 491 for _, format := range formats { 492 if format == nil || format.ID == "" { 493 continue 494 } 495 if reflect.DeepEqual(format.Erasure.Sets, refFormat.Erasure.Sets) { 496 // Found an ID in one of the format.json file 497 // Set deploymentID for the first time. 498 if deploymentID == "" { 499 deploymentID = format.ID 500 } else if deploymentID != format.ID { 501 // DeploymentID found earlier doesn't match with the 502 // current format.json's ID. 503 return "", fmt.Errorf("Deployment IDs do not match expected %s, got %s: %w", 504 deploymentID, format.ID, errCorruptedFormat) 505 } 506 } 507 } 508 return deploymentID, nil 509 } 510 511 // formatErasureFixDeploymentID - Add deployment id if it is not present. 512 func formatErasureFixDeploymentID(endpoints Endpoints, storageDisks []StorageAPI, refFormat *formatErasureV3, formats []*formatErasureV3) (err error) { 513 for index := range formats { 514 // If the Erasure sets do not match, set those formats to nil, 515 // We do not have to update the ID on those format.json file. 516 if formats[index] != nil && !reflect.DeepEqual(formats[index].Erasure.Sets, refFormat.Erasure.Sets) { 517 formats[index] = nil 518 } 519 } 520 521 refFormat.ID, err = formatErasureGetDeploymentID(refFormat, formats) 522 if err != nil { 523 return err 524 } 525 526 // If ID is set, then some other node got the lock 527 // before this node could and generated an ID 528 // for the deployment. No need to generate one. 529 if refFormat.ID != "" { 530 return nil 531 } 532 533 // ID is generated for the first time, 534 // We set the ID in all the formats and update. 535 refFormat.ID = mustGetUUID() 536 for _, format := range formats { 537 if format != nil { 538 format.ID = refFormat.ID 539 } 540 } 541 // Deployment ID needs to be set on all the disks. 542 // Save `format.json` across all disks. 543 return saveFormatErasureAll(GlobalContext, storageDisks, formats) 544 } 545 546 // Update only the valid local disks which have not been updated before. 547 func formatErasureFixLocalDeploymentID(endpoints Endpoints, storageDisks []StorageAPI, refFormat *formatErasureV3) error { 548 // If this server was down when the deploymentID was updated 549 // then we make sure that we update the local disks with the deploymentID. 550 551 // Initialize errs to collect errors inside go-routine. 552 g := errgroup.WithNErrs(len(storageDisks)) 553 554 for index := range storageDisks { 555 index := index 556 g.Go(func() error { 557 if endpoints[index].IsLocal && storageDisks[index] != nil && storageDisks[index].IsOnline() { 558 format, err := loadFormatErasure(storageDisks[index]) 559 if err != nil { 560 // Disk can be offline etc. 561 // ignore the errors seen here. 562 return nil 563 } 564 if format.ID != "" { 565 return nil 566 } 567 if !reflect.DeepEqual(format.Erasure.Sets, refFormat.Erasure.Sets) { 568 return nil 569 } 570 format.ID = refFormat.ID 571 // Heal the drive if we fixed its deployment ID. 572 if err := saveFormatErasure(storageDisks[index], format, mustGetUUID()); err != nil { 573 logger.LogIf(GlobalContext, err) 574 return fmt.Errorf("Unable to save format.json, %w", err) 575 } 576 } 577 return nil 578 }, index) 579 } 580 for _, err := range g.Wait() { 581 if err != nil { 582 return err 583 } 584 } 585 return nil 586 } 587 588 // Get backend Erasure format in quorum `format.json`. 589 func getFormatErasureInQuorum(formats []*formatErasureV3) (*formatErasureV3, error) { 590 formatCountMap := make(map[int]int, len(formats)) 591 for _, format := range formats { 592 if format == nil { 593 continue 594 } 595 formatCountMap[format.Drives()]++ 596 } 597 598 maxDrives := 0 599 maxCount := 0 600 for drives, count := range formatCountMap { 601 if count > maxCount { 602 maxCount = count 603 maxDrives = drives 604 } 605 } 606 607 if maxDrives == 0 { 608 return nil, errErasureReadQuorum 609 } 610 611 if maxCount < len(formats)/2 { 612 return nil, errErasureReadQuorum 613 } 614 615 for i, format := range formats { 616 if format == nil { 617 continue 618 } 619 if format.Drives() == maxDrives { 620 format := formats[i].Clone() 621 format.Erasure.This = "" 622 return format, nil 623 } 624 } 625 626 return nil, errErasureReadQuorum 627 } 628 629 func formatErasureV3Check(reference *formatErasureV3, format *formatErasureV3) error { 630 tmpFormat := format.Clone() 631 this := tmpFormat.Erasure.This 632 tmpFormat.Erasure.This = "" 633 if len(reference.Erasure.Sets) != len(format.Erasure.Sets) { 634 return fmt.Errorf("Expected number of sets %d, got %d", len(reference.Erasure.Sets), len(format.Erasure.Sets)) 635 } 636 637 // Make sure that the sets match. 638 for i := range reference.Erasure.Sets { 639 if len(reference.Erasure.Sets[i]) != len(format.Erasure.Sets[i]) { 640 return fmt.Errorf("Each set should be of same size, expected %d got %d", 641 len(reference.Erasure.Sets[i]), len(format.Erasure.Sets[i])) 642 } 643 for j := range reference.Erasure.Sets[i] { 644 if reference.Erasure.Sets[i][j] != format.Erasure.Sets[i][j] { 645 return fmt.Errorf("UUID on positions %d:%d do not match with, expected %s got %s: (%w)", 646 i, j, reference.Erasure.Sets[i][j], format.Erasure.Sets[i][j], errInconsistentDisk) 647 } 648 } 649 } 650 651 // Make sure that the diskID is found in the set. 652 for i := 0; i < len(tmpFormat.Erasure.Sets); i++ { 653 for j := 0; j < len(tmpFormat.Erasure.Sets[i]); j++ { 654 if this == tmpFormat.Erasure.Sets[i][j] { 655 return nil 656 } 657 } 658 } 659 return fmt.Errorf("DriveID %s not found in any drive sets %s", this, format.Erasure.Sets) 660 } 661 662 // saveFormatErasureAll - populates `format.json` on disks in its order. 663 func saveFormatErasureAll(ctx context.Context, storageDisks []StorageAPI, formats []*formatErasureV3) error { 664 g := errgroup.WithNErrs(len(storageDisks)) 665 666 // Write `format.json` to all disks. 667 for index := range storageDisks { 668 index := index 669 g.Go(func() error { 670 if formats[index] == nil { 671 return errDiskNotFound 672 } 673 return saveFormatErasure(storageDisks[index], formats[index], "") 674 }, index) 675 } 676 677 // Wait for the routines to finish. 678 return reduceWriteQuorumErrs(ctx, g.Wait(), nil, len(storageDisks)) 679 } 680 681 // relinquishes the underlying connection for all storage disks. 682 func closeStorageDisks(storageDisks ...StorageAPI) { 683 var wg sync.WaitGroup 684 for _, disk := range storageDisks { 685 if disk == nil { 686 continue 687 } 688 wg.Add(1) 689 go func(disk StorageAPI) { 690 defer wg.Done() 691 disk.Close() 692 }(disk) 693 } 694 wg.Wait() 695 } 696 697 // Initialize storage disks for each endpoint. 698 // Errors are returned for each endpoint with matching index. 699 func initStorageDisksWithErrors(endpoints Endpoints, opts storageOpts) ([]StorageAPI, []error) { 700 // Bootstrap disks. 701 storageDisks := make([]StorageAPI, len(endpoints)) 702 g := errgroup.WithNErrs(len(endpoints)) 703 for index := range endpoints { 704 index := index 705 g.Go(func() (err error) { 706 storageDisks[index], err = newStorageAPI(endpoints[index], opts) 707 return err 708 }, index) 709 } 710 return storageDisks, g.Wait() 711 } 712 713 // formatErasureV3ThisEmpty - find out if '.This' field is empty 714 // in any of the input `formats`, if yes return true. 715 func formatErasureV3ThisEmpty(formats []*formatErasureV3) bool { 716 for _, format := range formats { 717 if format == nil { 718 continue 719 } 720 // NOTE: This code is specifically needed when migrating version 721 // V1 to V2 to V3, in a scenario such as this we only need to handle 722 // single sets since we never used to support multiple sets in releases 723 // with V1 format version. 724 if len(format.Erasure.Sets) > 1 { 725 continue 726 } 727 if format.Erasure.This == "" { 728 return true 729 } 730 } 731 return false 732 } 733 734 // fixFormatErasureV3 - fix format Erasure configuration on all disks. 735 func fixFormatErasureV3(storageDisks []StorageAPI, endpoints Endpoints, formats []*formatErasureV3) error { 736 g := errgroup.WithNErrs(len(formats)) 737 for i := range formats { 738 i := i 739 g.Go(func() error { 740 if formats[i] == nil || !endpoints[i].IsLocal { 741 return nil 742 } 743 // NOTE: This code is specifically needed when migrating version 744 // V1 to V2 to V3, in a scenario such as this we only need to handle 745 // single sets since we never used to support multiple sets in releases 746 // with V1 format version. 747 if len(formats[i].Erasure.Sets) > 1 { 748 return nil 749 } 750 if formats[i].Erasure.This == "" { 751 formats[i].Erasure.This = formats[i].Erasure.Sets[0][i] 752 // Heal the drive if drive has .This empty. 753 if err := saveFormatErasure(storageDisks[i], formats[i], mustGetUUID()); err != nil { 754 return err 755 } 756 } 757 return nil 758 }, i) 759 } 760 for _, err := range g.Wait() { 761 if err != nil { 762 return err 763 } 764 } 765 return nil 766 } 767 768 // initFormatErasure - save Erasure format configuration on all disks. 769 func initFormatErasure(ctx context.Context, storageDisks []StorageAPI, setCount, setDriveCount int, deploymentID string, sErrs []error) (*formatErasureV3, error) { 770 format := newFormatErasureV3(setCount, setDriveCount) 771 formats := make([]*formatErasureV3, len(storageDisks)) 772 wantAtMost, err := ecDrivesNoConfig(setDriveCount) 773 if err != nil { 774 return nil, err 775 } 776 777 for i := 0; i < setCount; i++ { 778 hostCount := make(map[string]int, setDriveCount) 779 for j := 0; j < setDriveCount; j++ { 780 disk := storageDisks[i*setDriveCount+j] 781 newFormat := format.Clone() 782 newFormat.Erasure.This = format.Erasure.Sets[i][j] 783 if deploymentID != "" { 784 newFormat.ID = deploymentID 785 } 786 hostCount[disk.Hostname()]++ 787 formats[i*setDriveCount+j] = newFormat 788 } 789 var once sync.Once 790 for host, count := range hostCount { 791 if count > wantAtMost { 792 if host == "" { 793 host = "local" 794 } 795 once.Do(func() { 796 if len(hostCount) == 1 { 797 return 798 } 799 logger.Info(" * Set %v:", i+1) 800 for j := 0; j < setDriveCount; j++ { 801 disk := storageDisks[i*setDriveCount+j] 802 logger.Info(" - Drive: %s", disk.String()) 803 } 804 }) 805 logger.Info(color.Yellow("WARNING:")+" Host %v has more than %v drives of set. "+ 806 "A host failure will result in data becoming unavailable.", host, wantAtMost) 807 } 808 } 809 } 810 811 // Save formats `format.json` across all disks. 812 if err := saveFormatErasureAll(ctx, storageDisks, formats); err != nil { 813 return nil, err 814 } 815 816 return getFormatErasureInQuorum(formats) 817 } 818 819 // ecDrivesNoConfig returns the erasure coded drives in a set if no config has been set. 820 // It will attempt to read it from env variable and fall back to drives/2. 821 func ecDrivesNoConfig(setDriveCount int) (int, error) { 822 sc, err := storageclass.LookupConfig(config.KVS{}, setDriveCount) 823 if err != nil { 824 return 0, err 825 } 826 return sc.GetParityForSC(storageclass.STANDARD), nil 827 } 828 829 // Initialize a new set of set formats which will be written to all disks. 830 func newHealFormatSets(refFormat *formatErasureV3, setCount, setDriveCount int, formats []*formatErasureV3, errs []error) ([][]*formatErasureV3, [][]DiskInfo) { 831 newFormats := make([][]*formatErasureV3, setCount) 832 for i := range refFormat.Erasure.Sets { 833 newFormats[i] = make([]*formatErasureV3, setDriveCount) 834 } 835 currentDisksInfo := make([][]DiskInfo, setCount) 836 for i := range refFormat.Erasure.Sets { 837 currentDisksInfo[i] = make([]DiskInfo, setDriveCount) 838 } 839 for i := range refFormat.Erasure.Sets { 840 for j := range refFormat.Erasure.Sets[i] { 841 if errors.Is(errs[i*setDriveCount+j], errUnformattedDisk) { 842 newFormats[i][j] = &formatErasureV3{} 843 newFormats[i][j].ID = refFormat.ID 844 newFormats[i][j].Format = refFormat.Format 845 newFormats[i][j].Version = refFormat.Version 846 newFormats[i][j].Erasure.This = refFormat.Erasure.Sets[i][j] 847 newFormats[i][j].Erasure.Sets = refFormat.Erasure.Sets 848 newFormats[i][j].Erasure.Version = refFormat.Erasure.Version 849 newFormats[i][j].Erasure.DistributionAlgo = refFormat.Erasure.DistributionAlgo 850 } 851 if format := formats[i*setDriveCount+j]; format != nil && (errs[i*setDriveCount+j] == nil) { 852 if format.Info.Endpoint != "" { 853 currentDisksInfo[i][j] = format.Info 854 } 855 } 856 } 857 } 858 return newFormats, currentDisksInfo 859 }