storj.io/minio@v0.0.0-20230509071714-0cbc90f649b1/cmd/format-erasure.go (about) 1 /* 2 * MinIO Cloud Storage, (C) 2016, 2017, 2018 MinIO, Inc. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package cmd 18 19 import ( 20 "context" 21 "crypto/sha256" 22 "encoding/hex" 23 "encoding/json" 24 "errors" 25 "fmt" 26 "io/ioutil" 27 "reflect" 28 "sync" 29 30 humanize "github.com/dustin/go-humanize" 31 32 "storj.io/minio/cmd/config" 33 "storj.io/minio/cmd/config/storageclass" 34 "storj.io/minio/cmd/logger" 35 "storj.io/minio/pkg/color" 36 xioutil "storj.io/minio/pkg/ioutil" 37 "storj.io/minio/pkg/sync/errgroup" 38 ) 39 40 const ( 41 // Represents Erasure backend. 42 formatBackendErasure = "xl" 43 44 // formatErasureV1.Erasure.Version - version '1'. 45 formatErasureVersionV1 = "1" 46 47 // formatErasureV2.Erasure.Version - version '2'. 48 formatErasureVersionV2 = "2" 49 50 // formatErasureV3.Erasure.Version - version '3'. 51 formatErasureVersionV3 = "3" 52 53 // Distribution algorithm used, legacy 54 formatErasureVersionV2DistributionAlgoV1 = "CRCMOD" 55 56 // Distributed algorithm used, with N/2 default parity 57 formatErasureVersionV3DistributionAlgoV2 = "SIPMOD" 58 59 // Distributed algorithm used, with EC:4 default parity 60 formatErasureVersionV3DistributionAlgoV3 = "SIPMOD+PARITY" 61 ) 62 63 // Offline disk UUID represents an offline disk. 64 const offlineDiskUUID = "ffffffff-ffff-ffff-ffff-ffffffffffff" 65 66 // Used to detect the version of "xl" format. 67 type formatErasureVersionDetect struct { 68 Erasure struct { 69 Version string `json:"version"` 70 } `json:"xl"` 71 } 72 73 // Represents the V1 backend disk structure version 74 // under `.minio.sys` and actual data namespace. 75 // formatErasureV1 - structure holds format config version '1'. 76 type formatErasureV1 struct { 77 formatMetaV1 78 Erasure struct { 79 Version string `json:"version"` // Version of 'xl' format. 80 Disk string `json:"disk"` // Disk field carries assigned disk uuid. 81 // JBOD field carries the input disk order generated the first 82 // time when fresh disks were supplied. 83 JBOD []string `json:"jbod"` 84 } `json:"xl"` // Erasure field holds xl format. 85 } 86 87 // Represents the V2 backend disk structure version 88 // under `.minio.sys` and actual data namespace. 89 // formatErasureV2 - structure holds format config version '2'. 90 // The V2 format to support "large bucket" support where a bucket 91 // can span multiple erasure sets. 92 type formatErasureV2 struct { 93 formatMetaV1 94 Erasure struct { 95 Version string `json:"version"` // Version of 'xl' format. 96 This string `json:"this"` // This field carries assigned disk uuid. 97 // Sets field carries the input disk order generated the first 98 // time when fresh disks were supplied, it is a two dimensional 99 // array second dimension represents list of disks used per set. 100 Sets [][]string `json:"sets"` 101 // Distribution algorithm represents the hashing algorithm 102 // to pick the right set index for an object. 103 DistributionAlgo string `json:"distributionAlgo"` 104 } `json:"xl"` 105 } 106 107 // formatErasureV3 struct is same as formatErasureV2 struct except that formatErasureV3.Erasure.Version is "3" indicating 108 // the simplified multipart backend which is a flat hierarchy now. 109 // In .minio.sys/multipart we have: 110 // sha256(bucket/object)/uploadID/[xl.meta, part.1, part.2 ....] 111 type formatErasureV3 struct { 112 formatMetaV1 113 Erasure struct { 114 Version string `json:"version"` // Version of 'xl' format. 115 This string `json:"this"` // This field carries assigned disk uuid. 116 // Sets field carries the input disk order generated the first 117 // time when fresh disks were supplied, it is a two dimensional 118 // array second dimension represents list of disks used per set. 119 Sets [][]string `json:"sets"` 120 // Distribution algorithm represents the hashing algorithm 121 // to pick the right set index for an object. 122 DistributionAlgo string `json:"distributionAlgo"` 123 } `json:"xl"` 124 } 125 126 func (f *formatErasureV3) Clone() *formatErasureV3 { 127 b, err := json.Marshal(f) 128 if err != nil { 129 panic(err) 130 } 131 var dst formatErasureV3 132 if err = json.Unmarshal(b, &dst); err != nil { 133 panic(err) 134 } 135 return &dst 136 } 137 138 // Returns formatErasure.Erasure.Version 139 func newFormatErasureV3(numSets int, setLen int) *formatErasureV3 { 140 format := &formatErasureV3{} 141 format.Version = formatMetaVersionV1 142 format.Format = formatBackendErasure 143 format.ID = mustGetUUID() 144 format.Erasure.Version = formatErasureVersionV3 145 format.Erasure.DistributionAlgo = formatErasureVersionV3DistributionAlgoV3 146 format.Erasure.Sets = make([][]string, numSets) 147 148 for i := 0; i < numSets; i++ { 149 format.Erasure.Sets[i] = make([]string, setLen) 150 for j := 0; j < setLen; j++ { 151 format.Erasure.Sets[i][j] = mustGetUUID() 152 } 153 } 154 return format 155 } 156 157 // Returns format Erasure version after reading `format.json`, returns 158 // successfully the version only if the backend is Erasure. 159 func formatGetBackendErasureVersion(formatPath string) (string, error) { 160 meta := &formatMetaV1{} 161 b, err := xioutil.ReadFile(formatPath) 162 if err != nil { 163 return "", err 164 } 165 if err = json.Unmarshal(b, meta); err != nil { 166 return "", err 167 } 168 if meta.Version != formatMetaVersionV1 { 169 return "", fmt.Errorf(`format.Version expected: %s, got: %s`, formatMetaVersionV1, meta.Version) 170 } 171 if meta.Format != formatBackendErasure { 172 return "", fmt.Errorf(`found backend type %s, expected %s`, meta.Format, formatBackendErasure) 173 } 174 // Erasure backend found, proceed to detect version. 175 format := &formatErasureVersionDetect{} 176 if err = json.Unmarshal(b, format); err != nil { 177 return "", err 178 } 179 return format.Erasure.Version, nil 180 } 181 182 // Migrates all previous versions to latest version of `format.json`, 183 // this code calls migration in sequence, such as V1 is migrated to V2 184 // first before it V2 migrates to V3.n 185 func formatErasureMigrate(export string) error { 186 formatPath := pathJoin(export, minioMetaBucket, formatConfigFile) 187 version, err := formatGetBackendErasureVersion(formatPath) 188 if err != nil { 189 return fmt.Errorf("Disk %s: %w", export, err) 190 } 191 switch version { 192 case formatErasureVersionV1: 193 if err = formatErasureMigrateV1ToV2(export, version); err != nil { 194 return fmt.Errorf("Disk %s: %w", export, err) 195 } 196 // Migrate successful v1 => v2, proceed to v2 => v3 197 version = formatErasureVersionV2 198 fallthrough 199 case formatErasureVersionV2: 200 if err = formatErasureMigrateV2ToV3(export, version); err != nil { 201 return fmt.Errorf("Disk %s: %w", export, err) 202 } 203 // Migrate successful v2 => v3, v3 is latest 204 // version = formatXLVersionV3 205 fallthrough 206 case formatErasureVersionV3: 207 // v3 is the latest version, return. 208 return nil 209 } 210 return fmt.Errorf(`Disk %s: unknown format version %s`, export, version) 211 } 212 213 // Migrates version V1 of format.json to version V2 of format.json, 214 // migration fails upon any error. 215 func formatErasureMigrateV1ToV2(export, version string) error { 216 if version != formatErasureVersionV1 { 217 return fmt.Errorf(`format version expected %s, found %s`, formatErasureVersionV1, version) 218 } 219 220 formatPath := pathJoin(export, minioMetaBucket, formatConfigFile) 221 222 formatV1 := &formatErasureV1{} 223 b, err := xioutil.ReadFile(formatPath) 224 if err != nil { 225 return err 226 } 227 if err = json.Unmarshal(b, formatV1); err != nil { 228 return err 229 } 230 231 formatV2 := &formatErasureV2{} 232 formatV2.Version = formatMetaVersionV1 233 formatV2.Format = formatBackendErasure 234 formatV2.Erasure.Version = formatErasureVersionV2 235 formatV2.Erasure.DistributionAlgo = formatErasureVersionV2DistributionAlgoV1 236 formatV2.Erasure.This = formatV1.Erasure.Disk 237 formatV2.Erasure.Sets = make([][]string, 1) 238 formatV2.Erasure.Sets[0] = make([]string, len(formatV1.Erasure.JBOD)) 239 copy(formatV2.Erasure.Sets[0], formatV1.Erasure.JBOD) 240 241 b, err = json.Marshal(formatV2) 242 if err != nil { 243 return err 244 } 245 return ioutil.WriteFile(formatPath, b, 0644) 246 } 247 248 // Migrates V2 for format.json to V3 (Flat hierarchy for multipart) 249 func formatErasureMigrateV2ToV3(export, version string) error { 250 if version != formatErasureVersionV2 { 251 return fmt.Errorf(`format version expected %s, found %s`, formatErasureVersionV2, version) 252 } 253 254 formatPath := pathJoin(export, minioMetaBucket, formatConfigFile) 255 formatV2 := &formatErasureV2{} 256 b, err := xioutil.ReadFile(formatPath) 257 if err != nil { 258 return err 259 } 260 err = json.Unmarshal(b, formatV2) 261 if err != nil { 262 return err 263 } 264 265 if err = removeAll(pathJoin(export, minioMetaMultipartBucket)); err != nil { 266 return err 267 } 268 269 if err = mkdirAll(pathJoin(export, minioMetaMultipartBucket), 0755); err != nil { 270 return err 271 } 272 273 // format-V2 struct is exactly same as format-V1 except that version is "3" 274 // which indicates the simplified multipart backend. 275 formatV3 := formatErasureV3{} 276 277 formatV3.Version = formatV2.Version 278 formatV3.Format = formatV2.Format 279 formatV3.Erasure = formatV2.Erasure 280 281 formatV3.Erasure.Version = formatErasureVersionV3 282 283 b, err = json.Marshal(formatV3) 284 if err != nil { 285 return err 286 } 287 return ioutil.WriteFile(formatPath, b, 0644) 288 } 289 290 // countErrs - count a specific error. 291 func countErrs(errs []error, err error) int { 292 var i = 0 293 for _, err1 := range errs { 294 if err1 == err { 295 i++ 296 } 297 } 298 return i 299 } 300 301 // Does all errors indicate we need to initialize all disks?. 302 func shouldInitErasureDisks(errs []error) bool { 303 return countErrs(errs, errUnformattedDisk) == len(errs) 304 } 305 306 // Check if unformatted disks are equal to write quorum. 307 func quorumUnformattedDisks(errs []error) bool { 308 return countErrs(errs, errUnformattedDisk) >= (len(errs)/2)+1 309 } 310 311 // loadFormatErasureAll - load all format config from all input disks in parallel. 312 func loadFormatErasureAll(storageDisks []StorageAPI, heal bool) ([]*formatErasureV3, []error) { 313 // Initialize list of errors. 314 g := errgroup.WithNErrs(len(storageDisks)) 315 316 // Initialize format configs. 317 var formats = make([]*formatErasureV3, len(storageDisks)) 318 319 // Load format from each disk in parallel 320 for index := range storageDisks { 321 index := index 322 g.Go(func() error { 323 if storageDisks[index] == nil { 324 return errDiskNotFound 325 } 326 format, err := loadFormatErasure(storageDisks[index]) 327 if err != nil { 328 return err 329 } 330 formats[index] = format 331 if !heal { 332 // If no healing required, make the disks valid and 333 // online. 334 storageDisks[index].SetDiskID(format.Erasure.This) 335 } 336 return nil 337 }, index) 338 } 339 340 // Return all formats and errors if any. 341 return formats, g.Wait() 342 } 343 344 func saveFormatErasure(disk StorageAPI, format *formatErasureV3, heal bool) error { 345 if disk == nil || format == nil { 346 return errDiskNotFound 347 } 348 349 diskID := format.Erasure.This 350 351 if err := makeFormatErasureMetaVolumes(disk); err != nil { 352 return err 353 } 354 355 // Marshal and write to disk. 356 formatBytes, err := json.Marshal(format) 357 if err != nil { 358 return err 359 } 360 361 tmpFormat := mustGetUUID() 362 363 // Purge any existing temporary file, okay to ignore errors here. 364 defer disk.Delete(context.TODO(), minioMetaBucket, tmpFormat, false) 365 366 // write to unique file. 367 if err = disk.WriteAll(context.TODO(), minioMetaBucket, tmpFormat, formatBytes); err != nil { 368 return err 369 } 370 371 // Rename file `uuid.json` --> `format.json`. 372 if err = disk.RenameFile(context.TODO(), minioMetaBucket, tmpFormat, minioMetaBucket, formatConfigFile); err != nil { 373 return err 374 } 375 376 disk.SetDiskID(diskID) 377 if heal { 378 ctx := context.Background() 379 ht := newHealingTracker(disk) 380 return ht.save(ctx) 381 } 382 return nil 383 } 384 385 var ignoredHiddenDirectories = map[string]struct{}{ 386 minioMetaBucket: {}, // metabucket '.minio.sys' 387 ".minio": {}, // users may choose to double down the backend as the config folder for certs 388 ".snapshot": {}, // .snapshot for ignoring NetApp based persistent volumes WAFL snapshot 389 "lost+found": {}, // 'lost+found' directory default on ext4 filesystems 390 "$RECYCLE.BIN": {}, // windows specific directory for each drive (hidden) 391 "System Volume Information": {}, // windows specific directory for each drive (hidden) 392 } 393 394 func isHiddenDirectories(vols ...VolInfo) bool { 395 for _, vol := range vols { 396 if _, ok := ignoredHiddenDirectories[vol.Name]; ok { 397 continue 398 } 399 return false 400 } 401 return true 402 } 403 404 // loadFormatErasure - loads format.json from disk. 405 func loadFormatErasure(disk StorageAPI) (format *formatErasureV3, err error) { 406 buf, err := disk.ReadAll(context.TODO(), minioMetaBucket, formatConfigFile) 407 if err != nil { 408 // 'file not found' and 'volume not found' as 409 // same. 'volume not found' usually means its a fresh disk. 410 if err == errFileNotFound || err == errVolumeNotFound { 411 var vols []VolInfo 412 vols, err = disk.ListVols(context.TODO()) 413 if err != nil { 414 return nil, err 415 } 416 if !isHiddenDirectories(vols...) { 417 // 'format.json' not found, but we found user data, reject such disks. 418 return nil, fmt.Errorf("some unexpected files '%v' found on %s: %w", 419 vols, disk, errCorruptedFormat) 420 } 421 // No other data found, its a fresh disk. 422 return nil, errUnformattedDisk 423 } 424 return nil, err 425 } 426 427 // Try to decode format json into formatConfigV1 struct. 428 format = &formatErasureV3{} 429 if err = json.Unmarshal(buf, format); err != nil { 430 return nil, err 431 } 432 433 // Success. 434 return format, nil 435 } 436 437 // Valid formatErasure basic versions. 438 func checkFormatErasureValue(formatErasure *formatErasureV3, disk StorageAPI) error { 439 // Validate format version and format type. 440 if formatErasure.Version != formatMetaVersionV1 { 441 return fmt.Errorf("Unsupported version of backend format [%s] found on %s", formatErasure.Version, disk) 442 } 443 if formatErasure.Format != formatBackendErasure { 444 return fmt.Errorf("Unsupported backend format [%s] found on %s", formatErasure.Format, disk) 445 } 446 if formatErasure.Erasure.Version != formatErasureVersionV3 { 447 return fmt.Errorf("Unsupported Erasure backend format found [%s] on %s", formatErasure.Erasure.Version, disk) 448 } 449 return nil 450 } 451 452 // Check all format values. 453 func checkFormatErasureValues(formats []*formatErasureV3, disks []StorageAPI, setDriveCount int) error { 454 for i, formatErasure := range formats { 455 if formatErasure == nil { 456 continue 457 } 458 if err := checkFormatErasureValue(formatErasure, disks[i]); err != nil { 459 return err 460 } 461 if len(formats) != len(formatErasure.Erasure.Sets)*len(formatErasure.Erasure.Sets[0]) { 462 return fmt.Errorf("%s disk is already being used in another erasure deployment. (Number of disks specified: %d but the number of disks found in the %s disk's format.json: %d)", 463 disks[i], len(formats), humanize.Ordinal(i+1), len(formatErasure.Erasure.Sets)*len(formatErasure.Erasure.Sets[0])) 464 } 465 // Only if custom erasure drive count is set, verify if the 466 // set_drive_count was manually set - we need to honor what is 467 // present on the drives. 468 if globalCustomErasureDriveCount && len(formatErasure.Erasure.Sets[0]) != setDriveCount { 469 return fmt.Errorf("%s disk is already formatted with %d drives per erasure set. This cannot be changed to %d, please revert your MINIO_ERASURE_SET_DRIVE_COUNT setting", disks[i], len(formatErasure.Erasure.Sets[0]), setDriveCount) 470 } 471 } 472 return nil 473 } 474 475 // Get Deployment ID for the Erasure sets from format.json. 476 // This need not be in quorum. Even if one of the format.json 477 // file has this value, we assume it is valid. 478 // If more than one format.json's have different id, it is considered a corrupt 479 // backend format. 480 func formatErasureGetDeploymentID(refFormat *formatErasureV3, formats []*formatErasureV3) (string, error) { 481 var deploymentID string 482 for _, format := range formats { 483 if format == nil || format.ID == "" { 484 continue 485 } 486 if reflect.DeepEqual(format.Erasure.Sets, refFormat.Erasure.Sets) { 487 // Found an ID in one of the format.json file 488 // Set deploymentID for the first time. 489 if deploymentID == "" { 490 deploymentID = format.ID 491 } else if deploymentID != format.ID { 492 // DeploymentID found earlier doesn't match with the 493 // current format.json's ID. 494 return "", fmt.Errorf("Deployment IDs do not match expected %s, got %s: %w", 495 deploymentID, format.ID, errCorruptedFormat) 496 } 497 } 498 } 499 return deploymentID, nil 500 } 501 502 // formatErasureFixDeploymentID - Add deployment id if it is not present. 503 func formatErasureFixDeploymentID(endpoints Endpoints, storageDisks []StorageAPI, refFormat *formatErasureV3) (err error) { 504 // Attempt to load all `format.json` from all disks. 505 formats, _ := loadFormatErasureAll(storageDisks, false) 506 for index := range formats { 507 // If the Erasure sets do not match, set those formats to nil, 508 // We do not have to update the ID on those format.json file. 509 if formats[index] != nil && !reflect.DeepEqual(formats[index].Erasure.Sets, refFormat.Erasure.Sets) { 510 formats[index] = nil 511 } 512 } 513 514 refFormat.ID, err = formatErasureGetDeploymentID(refFormat, formats) 515 if err != nil { 516 return err 517 } 518 519 // If ID is set, then some other node got the lock 520 // before this node could and generated an ID 521 // for the deployment. No need to generate one. 522 if refFormat.ID != "" { 523 return nil 524 } 525 526 // ID is generated for the first time, 527 // We set the ID in all the formats and update. 528 refFormat.ID = mustGetUUID() 529 for _, format := range formats { 530 if format != nil { 531 format.ID = refFormat.ID 532 } 533 } 534 // Deployment ID needs to be set on all the disks. 535 // Save `format.json` across all disks. 536 return saveFormatErasureAll(GlobalContext, storageDisks, formats) 537 538 } 539 540 // Update only the valid local disks which have not been updated before. 541 func formatErasureFixLocalDeploymentID(endpoints Endpoints, storageDisks []StorageAPI, refFormat *formatErasureV3) error { 542 // If this server was down when the deploymentID was updated 543 // then we make sure that we update the local disks with the deploymentID. 544 545 // Initialize errs to collect errors inside go-routine. 546 g := errgroup.WithNErrs(len(storageDisks)) 547 548 for index := range storageDisks { 549 index := index 550 g.Go(func() error { 551 if endpoints[index].IsLocal && storageDisks[index] != nil && storageDisks[index].IsOnline() { 552 format, err := loadFormatErasure(storageDisks[index]) 553 if err != nil { 554 // Disk can be offline etc. 555 // ignore the errors seen here. 556 return nil 557 } 558 if format.ID != "" { 559 return nil 560 } 561 if !reflect.DeepEqual(format.Erasure.Sets, refFormat.Erasure.Sets) { 562 return nil 563 } 564 format.ID = refFormat.ID 565 // Heal the drive if we fixed its deployment ID. 566 if err := saveFormatErasure(storageDisks[index], format, true); err != nil { 567 logger.LogIf(GlobalContext, err) 568 return fmt.Errorf("Unable to save format.json, %w", err) 569 } 570 } 571 return nil 572 }, index) 573 } 574 for _, err := range g.Wait() { 575 if err != nil { 576 return err 577 } 578 } 579 return nil 580 } 581 582 // Get backend Erasure format in quorum `format.json`. 583 func getFormatErasureInQuorum(formats []*formatErasureV3) (*formatErasureV3, error) { 584 formatHashes := make([]string, len(formats)) 585 for i, format := range formats { 586 if format == nil { 587 continue 588 } 589 h := sha256.New() 590 for _, set := range format.Erasure.Sets { 591 for _, diskID := range set { 592 h.Write([]byte(diskID)) 593 } 594 } 595 formatHashes[i] = hex.EncodeToString(h.Sum(nil)) 596 } 597 598 formatCountMap := make(map[string]int) 599 for _, hash := range formatHashes { 600 if hash == "" { 601 continue 602 } 603 formatCountMap[hash]++ 604 } 605 606 maxHash := "" 607 maxCount := 0 608 for hash, count := range formatCountMap { 609 if count > maxCount { 610 maxCount = count 611 maxHash = hash 612 } 613 } 614 615 if maxCount < len(formats)/2 { 616 return nil, errErasureReadQuorum 617 } 618 619 for i, hash := range formatHashes { 620 if hash == maxHash { 621 format := formats[i].Clone() 622 format.Erasure.This = "" 623 return format, nil 624 } 625 } 626 627 return nil, errErasureReadQuorum 628 } 629 630 func formatErasureV3Check(reference *formatErasureV3, format *formatErasureV3) error { 631 tmpFormat := format.Clone() 632 this := tmpFormat.Erasure.This 633 tmpFormat.Erasure.This = "" 634 if len(reference.Erasure.Sets) != len(format.Erasure.Sets) { 635 return fmt.Errorf("Expected number of sets %d, got %d", len(reference.Erasure.Sets), len(format.Erasure.Sets)) 636 } 637 638 // Make sure that the sets match. 639 for i := range reference.Erasure.Sets { 640 if len(reference.Erasure.Sets[i]) != len(format.Erasure.Sets[i]) { 641 return fmt.Errorf("Each set should be of same size, expected %d got %d", 642 len(reference.Erasure.Sets[i]), len(format.Erasure.Sets[i])) 643 } 644 for j := range reference.Erasure.Sets[i] { 645 if reference.Erasure.Sets[i][j] != format.Erasure.Sets[i][j] { 646 return fmt.Errorf("UUID on positions %d:%d do not match with, expected %s got %s: (%w)", 647 i, j, reference.Erasure.Sets[i][j], format.Erasure.Sets[i][j], errInconsistentDisk) 648 } 649 } 650 } 651 652 // Make sure that the diskID is found in the set. 653 for i := 0; i < len(tmpFormat.Erasure.Sets); i++ { 654 for j := 0; j < len(tmpFormat.Erasure.Sets[i]); j++ { 655 if this == tmpFormat.Erasure.Sets[i][j] { 656 return nil 657 } 658 } 659 } 660 return fmt.Errorf("Disk ID %s not found in any disk sets %s", this, format.Erasure.Sets) 661 } 662 663 // Initializes meta volume only on local storage disks. 664 func initErasureMetaVolumesInLocalDisks(storageDisks []StorageAPI, formats []*formatErasureV3) error { 665 666 // Compute the local disks eligible for meta volumes (re)initialization 667 disksToInit := make([]StorageAPI, 0, len(storageDisks)) 668 for index := range storageDisks { 669 if formats[index] == nil || storageDisks[index] == nil || !storageDisks[index].IsLocal() { 670 // Ignore create meta volume on disks which are not found or not local. 671 continue 672 } 673 disksToInit = append(disksToInit, storageDisks[index]) 674 } 675 676 // Initialize errs to collect errors inside go-routine. 677 g := errgroup.WithNErrs(len(disksToInit)) 678 679 // Initialize all disks in parallel. 680 for index := range disksToInit { 681 // Initialize a new index variable in each loop so each 682 // goroutine will return its own instance of index variable. 683 index := index 684 g.Go(func() error { 685 return makeFormatErasureMetaVolumes(disksToInit[index]) 686 }, index) 687 } 688 689 // Return upon first error. 690 for _, err := range g.Wait() { 691 if err == nil { 692 continue 693 } 694 return toObjectErr(err, minioMetaBucket) 695 } 696 697 // Return success here. 698 return nil 699 } 700 701 // saveUnformattedFormat - populates `format.json` on unformatted disks. 702 // also adds `.healing.bin` on the disks which are being actively healed. 703 func saveUnformattedFormat(ctx context.Context, storageDisks []StorageAPI, formats []*formatErasureV3) error { 704 for index, format := range formats { 705 if format == nil { 706 continue 707 } 708 if err := saveFormatErasure(storageDisks[index], format, true); err != nil { 709 return err 710 } 711 } 712 return nil 713 } 714 715 // saveFormatErasureAll - populates `format.json` on disks in its order. 716 func saveFormatErasureAll(ctx context.Context, storageDisks []StorageAPI, formats []*formatErasureV3) error { 717 g := errgroup.WithNErrs(len(storageDisks)) 718 719 // Write `format.json` to all disks. 720 for index := range storageDisks { 721 index := index 722 g.Go(func() error { 723 if formats[index] == nil { 724 return errDiskNotFound 725 } 726 return saveFormatErasure(storageDisks[index], formats[index], false) 727 }, index) 728 } 729 730 writeQuorum := getWriteQuorum(len(storageDisks)) 731 // Wait for the routines to finish. 732 return reduceWriteQuorumErrs(ctx, g.Wait(), nil, writeQuorum) 733 } 734 735 // relinquishes the underlying connection for all storage disks. 736 func closeStorageDisks(storageDisks []StorageAPI) { 737 for _, disk := range storageDisks { 738 if disk == nil { 739 continue 740 } 741 disk.Close() 742 } 743 } 744 745 func initStorageDisksWithErrorsWithoutHealthCheck(endpoints Endpoints) ([]StorageAPI, []error) { 746 // Bootstrap disks. 747 storageDisks := make([]StorageAPI, len(endpoints)) 748 g := errgroup.WithNErrs(len(endpoints)) 749 for index := range endpoints { 750 index := index 751 g.Go(func() (err error) { 752 storageDisks[index], err = newStorageAPIWithoutHealthCheck(endpoints[index]) 753 return err 754 }, index) 755 } 756 return storageDisks, g.Wait() 757 } 758 759 // Initialize storage disks for each endpoint. 760 // Errors are returned for each endpoint with matching index. 761 func initStorageDisksWithErrors(endpoints Endpoints) ([]StorageAPI, []error) { 762 // Bootstrap disks. 763 storageDisks := make([]StorageAPI, len(endpoints)) 764 g := errgroup.WithNErrs(len(endpoints)) 765 for index := range endpoints { 766 index := index 767 g.Go(func() (err error) { 768 storageDisks[index], err = newStorageAPI(endpoints[index]) 769 return err 770 }, index) 771 } 772 return storageDisks, g.Wait() 773 } 774 775 // formatErasureV3ThisEmpty - find out if '.This' field is empty 776 // in any of the input `formats`, if yes return true. 777 func formatErasureV3ThisEmpty(formats []*formatErasureV3) bool { 778 for _, format := range formats { 779 if format == nil { 780 continue 781 } 782 // NOTE: This code is specifically needed when migrating version 783 // V1 to V2 to V3, in a scenario such as this we only need to handle 784 // single sets since we never used to support multiple sets in releases 785 // with V1 format version. 786 if len(format.Erasure.Sets) > 1 { 787 continue 788 } 789 if format.Erasure.This == "" { 790 return true 791 } 792 } 793 return false 794 } 795 796 // fixFormatErasureV3 - fix format Erasure configuration on all disks. 797 func fixFormatErasureV3(storageDisks []StorageAPI, endpoints Endpoints, formats []*formatErasureV3) error { 798 g := errgroup.WithNErrs(len(formats)) 799 for i := range formats { 800 i := i 801 g.Go(func() error { 802 if formats[i] == nil || !endpoints[i].IsLocal { 803 return nil 804 } 805 // NOTE: This code is specifically needed when migrating version 806 // V1 to V2 to V3, in a scenario such as this we only need to handle 807 // single sets since we never used to support multiple sets in releases 808 // with V1 format version. 809 if len(formats[i].Erasure.Sets) > 1 { 810 return nil 811 } 812 if formats[i].Erasure.This == "" { 813 formats[i].Erasure.This = formats[i].Erasure.Sets[0][i] 814 // Heal the drive if drive has .This empty. 815 if err := saveFormatErasure(storageDisks[i], formats[i], true); err != nil { 816 return err 817 } 818 } 819 return nil 820 }, i) 821 } 822 for _, err := range g.Wait() { 823 if err != nil { 824 return err 825 } 826 } 827 return nil 828 829 } 830 831 // initFormatErasure - save Erasure format configuration on all disks. 832 func initFormatErasure(ctx context.Context, storageDisks []StorageAPI, setCount, setDriveCount int, deploymentID, distributionAlgo string, sErrs []error) (*formatErasureV3, error) { 833 format := newFormatErasureV3(setCount, setDriveCount) 834 formats := make([]*formatErasureV3, len(storageDisks)) 835 wantAtMost := ecDrivesNoConfig(setDriveCount) 836 837 for i := 0; i < setCount; i++ { 838 hostCount := make(map[string]int, setDriveCount) 839 for j := 0; j < setDriveCount; j++ { 840 disk := storageDisks[i*setDriveCount+j] 841 newFormat := format.Clone() 842 newFormat.Erasure.This = format.Erasure.Sets[i][j] 843 if distributionAlgo != "" { 844 newFormat.Erasure.DistributionAlgo = distributionAlgo 845 } 846 if deploymentID != "" { 847 newFormat.ID = deploymentID 848 } 849 hostCount[disk.Hostname()]++ 850 formats[i*setDriveCount+j] = newFormat 851 } 852 if len(hostCount) > 0 { 853 var once sync.Once 854 for host, count := range hostCount { 855 if count > wantAtMost { 856 if host == "" { 857 host = "local" 858 } 859 once.Do(func() { 860 if len(hostCount) == 1 { 861 return 862 } 863 logger.Info(" * Set %v:", i+1) 864 for j := 0; j < setDriveCount; j++ { 865 disk := storageDisks[i*setDriveCount+j] 866 logger.Info(" - Drive: %s", disk.String()) 867 } 868 }) 869 logger.Info(color.Yellow("WARNING:")+" Host %v has more than %v drives of set. "+ 870 "A host failure will result in data becoming unavailable.", host, wantAtMost) 871 } 872 } 873 } 874 } 875 876 // Mark all root disks down 877 markRootDisksAsDown(storageDisks, sErrs) 878 879 // Save formats `format.json` across all disks. 880 if err := saveFormatErasureAll(ctx, storageDisks, formats); err != nil { 881 return nil, err 882 } 883 884 return getFormatErasureInQuorum(formats) 885 } 886 887 func getDefaultParityBlocks(drive int) int { 888 switch drive { 889 case 3, 2: 890 return 1 891 case 4, 5: 892 return 2 893 case 6, 7: 894 return 3 895 default: 896 return 4 897 } 898 } 899 900 // ecDrivesNoConfig returns the erasure coded drives in a set if no config has been set. 901 // It will attempt to read it from env variable and fall back to drives/2. 902 func ecDrivesNoConfig(setDriveCount int) int { 903 sc, _ := storageclass.LookupConfig(config.KVS{}, setDriveCount) 904 ecDrives := sc.GetParityForSC(storageclass.STANDARD) 905 if ecDrives <= 0 { 906 ecDrives = getDefaultParityBlocks(setDriveCount) 907 } 908 return ecDrives 909 } 910 911 // Make Erasure backend meta volumes. 912 func makeFormatErasureMetaVolumes(disk StorageAPI) error { 913 if disk == nil { 914 return errDiskNotFound 915 } 916 // Attempt to create MinIO internal buckets. 917 return disk.MakeVolBulk(context.TODO(), minioMetaBucket, minioMetaTmpBucket, minioMetaMultipartBucket, minioMetaTmpDeletedBucket, dataUsageBucket, minioMetaTmpBucket+"-old") 918 } 919 920 // Initialize a new set of set formats which will be written to all disks. 921 func newHealFormatSets(refFormat *formatErasureV3, setCount, setDriveCount int, formats []*formatErasureV3, errs []error) [][]*formatErasureV3 { 922 newFormats := make([][]*formatErasureV3, setCount) 923 for i := range refFormat.Erasure.Sets { 924 newFormats[i] = make([]*formatErasureV3, setDriveCount) 925 } 926 for i := range refFormat.Erasure.Sets { 927 for j := range refFormat.Erasure.Sets[i] { 928 if errors.Is(errs[i*setDriveCount+j], errUnformattedDisk) { 929 newFormats[i][j] = &formatErasureV3{} 930 newFormats[i][j].ID = refFormat.ID 931 newFormats[i][j].Format = refFormat.Format 932 newFormats[i][j].Version = refFormat.Version 933 newFormats[i][j].Erasure.This = refFormat.Erasure.Sets[i][j] 934 newFormats[i][j].Erasure.Sets = refFormat.Erasure.Sets 935 newFormats[i][j].Erasure.Version = refFormat.Erasure.Version 936 newFormats[i][j].Erasure.DistributionAlgo = refFormat.Erasure.DistributionAlgo 937 } 938 } 939 } 940 return newFormats 941 }