github.com/minio/minio@v0.0.0-20240328213742-3f72439b8a27/cmd/metacache-set.go (about) 1 // Copyright (c) 2015-2021 MinIO, Inc. 2 // 3 // This file is part of MinIO Object Storage stack 4 // 5 // This program is free software: you can redistribute it and/or modify 6 // it under the terms of the GNU Affero General Public License as published by 7 // the Free Software Foundation, either version 3 of the License, or 8 // (at your option) any later version. 9 // 10 // This program is distributed in the hope that it will be useful 11 // but WITHOUT ANY WARRANTY; without even the implied warranty of 12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 // GNU Affero General Public License for more details. 14 // 15 // You should have received a copy of the GNU Affero General Public License 16 // along with this program. If not, see <http://www.gnu.org/licenses/>. 17 18 package cmd 19 20 import ( 21 "bytes" 22 "context" 23 "encoding/gob" 24 "encoding/json" 25 "errors" 26 "fmt" 27 "io" 28 "math/rand" 29 "strconv" 30 "strings" 31 "sync" 32 "time" 33 34 jsoniter "github.com/json-iterator/go" 35 "github.com/minio/minio/internal/bucket/lifecycle" 36 "github.com/minio/minio/internal/bucket/object/lock" 37 "github.com/minio/minio/internal/bucket/versioning" 38 "github.com/minio/minio/internal/color" 39 "github.com/minio/minio/internal/hash" 40 xioutil "github.com/minio/minio/internal/ioutil" 41 "github.com/minio/minio/internal/logger" 42 "github.com/minio/pkg/v2/console" 43 ) 44 45 //go:generate msgp -file $GOFILE -unexported 46 47 type listPathOptions struct { 48 // ID of the listing. 49 // This will be used to persist the list. 50 ID string 51 52 // Bucket of the listing. 53 Bucket string 54 55 // Directory inside the bucket. 56 // When unset listPath will set this based on Prefix 57 BaseDir string 58 59 // Scan/return only content with prefix. 60 Prefix string 61 62 // FilterPrefix will return only results with this prefix when scanning. 63 // Should never contain a slash. 64 // Prefix should still be set. 65 FilterPrefix string 66 67 // Marker to resume listing. 68 // The response will be the first entry >= this object name. 69 Marker string 70 71 // Limit the number of results. 72 Limit int 73 74 // The number of disks to ask. 75 AskDisks string 76 77 // InclDeleted will keep all entries where latest version is a delete marker. 78 InclDeleted bool 79 80 // Scan recursively. 81 // If false only main directory will be scanned. 82 // Should always be true if Separator is n SlashSeparator. 83 Recursive bool 84 85 // Separator to use. 86 Separator string 87 88 // Create indicates that the lister should not attempt to load an existing cache. 89 Create bool 90 91 // Include pure directories. 92 IncludeDirectories bool 93 94 // Transient is set if the cache is transient due to an error or being a reserved bucket. 95 // This means the cache metadata will not be persisted on disk. 96 // A transient result will never be returned from the cache so knowing the list id is required. 97 Transient bool 98 99 // Versioned is this a ListObjectVersions call. 100 Versioned bool 101 // V1 listing type 102 V1 bool 103 104 // Versioning config is used for if the path 105 // has versioning enabled. 106 Versioning *versioning.Versioning `msg:"-"` 107 108 // Lifecycle performs filtering based on lifecycle. 109 // This will filter out objects if the most recent version should be deleted by lifecycle. 110 // Is not transferred across request calls. 111 Lifecycle *lifecycle.Lifecycle `msg:"-"` 112 113 // Retention configuration, needed to be passed along with lifecycle if set. 114 Retention lock.Retention `msg:"-"` 115 116 // Replication configuration 117 Replication replicationConfig `msg:"-"` 118 119 // StopDiskAtLimit will stop listing on each disk when limit number off objects has been returned. 120 StopDiskAtLimit bool 121 122 // pool and set of where the cache is located. 123 pool, set int 124 } 125 126 func init() { 127 gob.Register(listPathOptions{}) 128 } 129 130 func (o *listPathOptions) setBucketMeta(ctx context.Context) { 131 lc, _ := globalLifecycleSys.Get(o.Bucket) 132 vc, _ := globalBucketVersioningSys.Get(o.Bucket) 133 134 // Check if bucket is object locked. 135 rcfg, _ := globalBucketObjectLockSys.Get(o.Bucket) 136 replCfg, _, _ := globalBucketMetadataSys.GetReplicationConfig(ctx, o.Bucket) 137 tgts, _ := globalBucketTargetSys.ListBucketTargets(ctx, o.Bucket) 138 o.Lifecycle = lc 139 o.Versioning = vc 140 o.Replication = replicationConfig{ 141 Config: replCfg, 142 remotes: tgts, 143 } 144 o.Retention = rcfg 145 } 146 147 // newMetacache constructs a new metacache from the options. 148 func (o listPathOptions) newMetacache() metacache { 149 return metacache{ 150 id: o.ID, 151 bucket: o.Bucket, 152 root: o.BaseDir, 153 recursive: o.Recursive, 154 status: scanStateStarted, 155 error: "", 156 started: UTCNow(), 157 lastHandout: UTCNow(), 158 lastUpdate: UTCNow(), 159 ended: time.Time{}, 160 dataVersion: metacacheStreamVersion, 161 filter: o.FilterPrefix, 162 } 163 } 164 165 func (o *listPathOptions) debugf(format string, data ...interface{}) { 166 if serverDebugLog { 167 console.Debugf(format+"\n", data...) 168 } 169 } 170 171 func (o *listPathOptions) debugln(data ...interface{}) { 172 if serverDebugLog { 173 console.Debugln(data...) 174 } 175 } 176 177 // gatherResults will collect all results on the input channel and filter results according 178 // to the options or to the current bucket ILM expiry rules. 179 // Caller should close the channel when done. 180 // The returned function will return the results once there is enough or input is closed, 181 // or the context is canceled. 182 func (o *listPathOptions) gatherResults(ctx context.Context, in <-chan metaCacheEntry) func() (metaCacheEntriesSorted, error) { 183 resultsDone := make(chan metaCacheEntriesSorted) 184 // Copy so we can mutate 185 resCh := resultsDone 186 var done bool 187 var mu sync.Mutex 188 resErr := io.EOF 189 190 go func() { 191 var results metaCacheEntriesSorted 192 var returned bool 193 for entry := range in { 194 if returned { 195 // past limit 196 continue 197 } 198 mu.Lock() 199 returned = done 200 mu.Unlock() 201 if returned { 202 resCh = nil 203 continue 204 } 205 if !o.IncludeDirectories && (entry.isDir() || (!o.Versioned && entry.isObjectDir() && entry.isLatestDeletemarker())) { 206 continue 207 } 208 if o.Marker != "" && entry.name < o.Marker { 209 continue 210 } 211 if !strings.HasPrefix(entry.name, o.Prefix) { 212 continue 213 } 214 if !o.Recursive && !entry.isInDir(o.Prefix, o.Separator) { 215 continue 216 } 217 if !o.InclDeleted && entry.isObject() && entry.isLatestDeletemarker() && !entry.isObjectDir() { 218 continue 219 } 220 if o.Lifecycle != nil || o.Replication.Config != nil { 221 if skipped := triggerExpiryAndRepl(ctx, *o, entry); skipped == true { 222 results.lastSkippedEntry = entry.name 223 continue 224 } 225 } 226 if o.Limit > 0 && results.len() >= o.Limit { 227 // We have enough and we have more. 228 // Do not return io.EOF 229 if resCh != nil { 230 resErr = nil 231 select { 232 case resCh <- results: 233 case <-ctx.Done(): 234 } 235 resCh = nil 236 returned = true 237 } 238 continue 239 } 240 results.o = append(results.o, entry) 241 } 242 if resCh != nil { 243 resErr = io.EOF 244 select { 245 case <-ctx.Done(): 246 // Nobody wants it. 247 case resCh <- results: 248 } 249 } 250 }() 251 return func() (metaCacheEntriesSorted, error) { 252 select { 253 case <-ctx.Done(): 254 mu.Lock() 255 done = true 256 mu.Unlock() 257 return metaCacheEntriesSorted{}, ctx.Err() 258 case r := <-resultsDone: 259 return r, resErr 260 } 261 } 262 } 263 264 // findFirstPart will find the part with 0 being the first that corresponds to the marker in the options. 265 // io.ErrUnexpectedEOF is returned if the place containing the marker hasn't been scanned yet. 266 // io.EOF indicates the marker is beyond the end of the stream and does not exist. 267 func (o *listPathOptions) findFirstPart(fi FileInfo) (int, error) { 268 search := o.Marker 269 if search == "" { 270 search = o.Prefix 271 } 272 if search == "" { 273 return 0, nil 274 } 275 o.debugln("searching for ", search) 276 var tmp metacacheBlock 277 json := jsoniter.ConfigCompatibleWithStandardLibrary 278 i := 0 279 for { 280 partKey := fmt.Sprintf("%s-metacache-part-%d", ReservedMetadataPrefixLower, i) 281 v, ok := fi.Metadata[partKey] 282 if !ok { 283 o.debugln("no match in metadata, waiting") 284 return -1, io.ErrUnexpectedEOF 285 } 286 err := json.Unmarshal([]byte(v), &tmp) 287 if !ok { 288 logger.LogIf(context.Background(), err) 289 return -1, err 290 } 291 if tmp.First == "" && tmp.Last == "" && tmp.EOS { 292 return 0, errFileNotFound 293 } 294 if tmp.First >= search { 295 o.debugln("First >= search", v) 296 return i, nil 297 } 298 if tmp.Last >= search { 299 o.debugln("Last >= search", v) 300 return i, nil 301 } 302 if tmp.EOS { 303 o.debugln("no match, at EOS", v) 304 return -3, io.EOF 305 } 306 o.debugln("First ", tmp.First, "<", search, " search", i) 307 i++ 308 } 309 } 310 311 // updateMetacacheListing will update the metacache listing. 312 func (o *listPathOptions) updateMetacacheListing(m metacache, rpc *peerRESTClient) (metacache, error) { 313 if rpc == nil { 314 return localMetacacheMgr.updateCacheEntry(m) 315 } 316 return rpc.UpdateMetacacheListing(context.Background(), m) 317 } 318 319 func getMetacacheBlockInfo(fi FileInfo, block int) (*metacacheBlock, error) { 320 var tmp metacacheBlock 321 partKey := fmt.Sprintf("%s-metacache-part-%d", ReservedMetadataPrefixLower, block) 322 v, ok := fi.Metadata[partKey] 323 if !ok { 324 return nil, io.ErrUnexpectedEOF 325 } 326 return &tmp, json.Unmarshal([]byte(v), &tmp) 327 } 328 329 const metacachePrefix = ".metacache" 330 331 func metacachePrefixForID(bucket, id string) string { 332 return pathJoin(bucketMetaPrefix, bucket, metacachePrefix, id) 333 } 334 335 // objectPath returns the object path of the cache. 336 func (o *listPathOptions) objectPath(block int) string { 337 return pathJoin(metacachePrefixForID(o.Bucket, o.ID), "block-"+strconv.Itoa(block)+".s2") 338 } 339 340 func (o *listPathOptions) SetFilter() { 341 switch { 342 case metacacheSharePrefix: 343 return 344 case o.Prefix == o.BaseDir: 345 // No additional prefix 346 return 347 } 348 // Remove basedir. 349 o.FilterPrefix = strings.TrimPrefix(o.Prefix, o.BaseDir) 350 // Remove leading and trailing slashes. 351 o.FilterPrefix = strings.Trim(o.FilterPrefix, slashSeparator) 352 353 if strings.Contains(o.FilterPrefix, slashSeparator) { 354 // Sanity check, should not happen. 355 o.FilterPrefix = "" 356 } 357 } 358 359 // filter will apply the options and return the number of objects requested by the limit. 360 // Will return io.EOF if there are no more entries with the same filter. 361 // The last entry can be used as a marker to resume the listing. 362 func (r *metacacheReader) filter(o listPathOptions) (entries metaCacheEntriesSorted, err error) { 363 // Forward to prefix, if any 364 err = r.forwardTo(o.Prefix) 365 if err != nil { 366 return entries, err 367 } 368 if o.Marker != "" { 369 err = r.forwardTo(o.Marker) 370 if err != nil { 371 return entries, err 372 } 373 } 374 o.debugln("forwarded to ", o.Prefix, "marker:", o.Marker, "sep:", o.Separator) 375 376 // Filter 377 if !o.Recursive { 378 entries.o = make(metaCacheEntries, 0, o.Limit) 379 pastPrefix := false 380 err := r.readFn(func(entry metaCacheEntry) bool { 381 if o.Prefix != "" && !strings.HasPrefix(entry.name, o.Prefix) { 382 // We are past the prefix, don't continue. 383 pastPrefix = true 384 return false 385 } 386 if !o.IncludeDirectories && (entry.isDir() || (!o.Versioned && entry.isObjectDir() && entry.isLatestDeletemarker())) { 387 return true 388 } 389 if !entry.isInDir(o.Prefix, o.Separator) { 390 return true 391 } 392 if !o.InclDeleted && entry.isObject() && entry.isLatestDeletemarker() && !entry.isObjectDir() { 393 return true 394 } 395 if entry.isAllFreeVersions() { 396 return true 397 } 398 entries.o = append(entries.o, entry) 399 return entries.len() < o.Limit 400 }) 401 if (err != nil && errors.Is(err, io.EOF)) || pastPrefix || r.nextEOF() { 402 return entries, io.EOF 403 } 404 return entries, err 405 } 406 407 // We should not need to filter more. 408 return r.readN(o.Limit, o.InclDeleted, o.IncludeDirectories, o.Versioned, o.Prefix) 409 } 410 411 func (er *erasureObjects) streamMetadataParts(ctx context.Context, o listPathOptions) (entries metaCacheEntriesSorted, err error) { 412 retries := 0 413 rpc := globalNotificationSys.restClientFromHash(pathJoin(o.Bucket, o.Prefix)) 414 415 const ( 416 retryDelay = 50 * time.Millisecond 417 retryDelay250 = 250 * time.Millisecond 418 ) 419 420 for { 421 if contextCanceled(ctx) { 422 return entries, ctx.Err() 423 } 424 425 // If many failures, check the cache state. 426 if retries > 10 { 427 err := o.checkMetacacheState(ctx, rpc) 428 if err != nil { 429 return entries, fmt.Errorf("remote listing canceled: %w", err) 430 } 431 retries = 1 432 } 433 434 // All operations are performed without locks, so we must be careful and allow for failures. 435 // Read metadata associated with the object from a disk. 436 if retries > 0 { 437 for _, disk := range er.getDisks() { 438 if disk == nil { 439 continue 440 } 441 if !disk.IsOnline() { 442 continue 443 } 444 _, err := disk.ReadVersion(ctx, "", minioMetaBucket, 445 o.objectPath(0), "", ReadOptions{}) 446 if err != nil { 447 time.Sleep(retryDelay250) 448 retries++ 449 continue 450 } 451 break 452 } 453 } 454 retryWait := func() { 455 retries++ 456 if retries == 1 { 457 time.Sleep(retryDelay) 458 } else { 459 time.Sleep(retryDelay250) 460 } 461 } 462 // Load first part metadata... 463 // Read metadata associated with the object from all disks. 464 fi, metaArr, onlineDisks, err := er.getObjectFileInfo(ctx, minioMetaBucket, o.objectPath(0), ObjectOptions{}, true) 465 if err != nil { 466 switch toObjectErr(err, minioMetaBucket, o.objectPath(0)).(type) { 467 case ObjectNotFound, InsufficientReadQuorum: 468 retryWait() 469 continue 470 } 471 // Allow one fast retry for other errors. 472 if retries > 0 { 473 return entries, fmt.Errorf("reading first part metadata: %v", err) 474 } 475 retryWait() 476 continue 477 } 478 479 partN, err := o.findFirstPart(fi) 480 switch { 481 case err == nil: 482 case errors.Is(err, io.ErrUnexpectedEOF): 483 if retries == 10 { 484 err := o.checkMetacacheState(ctx, rpc) 485 if err != nil { 486 return entries, fmt.Errorf("remote listing canceled: %w", err) 487 } 488 retries = -1 489 } 490 retryWait() 491 continue 492 case errors.Is(err, io.EOF): 493 return entries, io.EOF 494 } 495 496 // We got a stream to start at. 497 loadedPart := 0 498 for { 499 if contextCanceled(ctx) { 500 return entries, ctx.Err() 501 } 502 503 if partN != loadedPart { 504 if retries > 10 { 505 err := o.checkMetacacheState(ctx, rpc) 506 if err != nil { 507 return entries, fmt.Errorf("waiting for next part %d: %w", partN, err) 508 } 509 retries = 1 510 } 511 512 if retries > 0 { 513 // Load from one disk only 514 for _, disk := range er.getDisks() { 515 if disk == nil { 516 continue 517 } 518 if !disk.IsOnline() { 519 continue 520 } 521 _, err := disk.ReadVersion(ctx, "", minioMetaBucket, 522 o.objectPath(partN), "", ReadOptions{}) 523 if err != nil { 524 time.Sleep(retryDelay250) 525 retries++ 526 continue 527 } 528 break 529 } 530 } 531 532 // Load partN metadata... 533 fi, metaArr, onlineDisks, err = er.getObjectFileInfo(ctx, minioMetaBucket, o.objectPath(partN), ObjectOptions{}, true) 534 if err != nil { 535 time.Sleep(retryDelay250) 536 retries++ 537 continue 538 } 539 loadedPart = partN 540 bi, err := getMetacacheBlockInfo(fi, partN) 541 logger.LogIf(ctx, err) 542 if err == nil { 543 if bi.pastPrefix(o.Prefix) { 544 return entries, io.EOF 545 } 546 } 547 } 548 549 pr, pw := io.Pipe() 550 go func() { 551 werr := er.getObjectWithFileInfo(ctx, minioMetaBucket, o.objectPath(partN), 0, 552 fi.Size, pw, fi, metaArr, onlineDisks) 553 pw.CloseWithError(werr) 554 }() 555 556 tmp := newMetacacheReader(pr) 557 e, err := tmp.filter(o) 558 pr.CloseWithError(err) 559 tmp.Close() 560 entries.o = append(entries.o, e.o...) 561 if o.Limit > 0 && entries.len() > o.Limit { 562 entries.truncate(o.Limit) 563 return entries, nil 564 } 565 if err == nil { 566 // We stopped within the listing, we are done for now... 567 return entries, nil 568 } 569 if err != nil && !errors.Is(err, io.EOF) { 570 switch toObjectErr(err, minioMetaBucket, o.objectPath(partN)).(type) { 571 case ObjectNotFound: 572 retries++ 573 time.Sleep(retryDelay250) 574 continue 575 case InsufficientReadQuorum: 576 retries++ 577 time.Sleep(retryDelay250) 578 continue 579 default: 580 logger.LogIf(ctx, err) 581 return entries, err 582 } 583 } 584 585 // We finished at the end of the block. 586 // And should not expect any more results. 587 bi, err := getMetacacheBlockInfo(fi, partN) 588 logger.LogIf(ctx, err) 589 if err != nil || bi.EOS { 590 // We are done and there are no more parts. 591 return entries, io.EOF 592 } 593 if bi.endedPrefix(o.Prefix) { 594 // Nothing more for prefix. 595 return entries, io.EOF 596 } 597 partN++ 598 retries = 0 599 } 600 } 601 } 602 603 // getListQuorum interprets list quorum values and returns appropriate 604 // acceptable quorum expected for list operations 605 func getListQuorum(quorum string, driveCount int) int { 606 switch quorum { 607 case "disk": 608 return 1 609 case "reduced": 610 return 2 611 case "optimal": 612 return (driveCount + 1) / 2 613 case "auto": 614 return -1 615 } 616 // defaults to 'strict' 617 return driveCount 618 } 619 620 func calcCommonWritesDeletes(infos []DiskInfo, readQuorum int) (commonWrite, commonDelete uint64) { 621 deletes := make([]uint64, len(infos)) 622 writes := make([]uint64, len(infos)) 623 for index, di := range infos { 624 deletes[index] = di.Metrics.TotalDeletes 625 writes[index] = di.Metrics.TotalWrites 626 } 627 628 filter := func(list []uint64) (commonCount uint64) { 629 max := 0 630 signatureMap := map[uint64]int{} 631 for _, v := range list { 632 signatureMap[v]++ 633 } 634 for ops, count := range signatureMap { 635 if max < count && commonCount < ops { 636 max = count 637 commonCount = ops 638 } 639 } 640 if max < readQuorum { 641 return 0 642 } 643 return commonCount 644 } 645 646 commonWrite = filter(writes) 647 commonDelete = filter(deletes) 648 return 649 } 650 651 func calcCommonCounter(infos []DiskInfo, readQuorum int) (commonCount uint64) { 652 filter := func() (commonCount uint64) { 653 max := 0 654 signatureMap := map[uint64]int{} 655 for _, info := range infos { 656 if info.Error != "" { 657 continue 658 } 659 mutations := info.Metrics.TotalDeletes + info.Metrics.TotalWrites 660 signatureMap[mutations]++ 661 } 662 for ops, count := range signatureMap { 663 if max < count && commonCount < ops { 664 max = count 665 commonCount = ops 666 } 667 } 668 if max < readQuorum { 669 return 0 670 } 671 return commonCount 672 } 673 674 return filter() 675 } 676 677 func getQuorumDiskInfos(disks []StorageAPI, infos []DiskInfo, readQuorum int) (newDisks []StorageAPI, newInfos []DiskInfo) { 678 commonMutations := calcCommonCounter(infos, readQuorum) 679 for i, info := range infos { 680 mutations := info.Metrics.TotalDeletes + info.Metrics.TotalWrites 681 if mutations >= commonMutations { 682 newDisks = append(newDisks, disks[i]) 683 newInfos = append(newInfos, infos[i]) 684 } 685 } 686 687 return newDisks, newInfos 688 } 689 690 func getQuorumDisks(disks []StorageAPI, infos []DiskInfo, readQuorum int) (newDisks []StorageAPI) { 691 newDisks, _ = getQuorumDiskInfos(disks, infos, readQuorum) 692 return newDisks 693 } 694 695 // Will return io.EOF if continuing would not yield more results. 696 func (er *erasureObjects) listPath(ctx context.Context, o listPathOptions, results chan<- metaCacheEntry) (err error) { 697 defer xioutil.SafeClose(results) 698 o.debugf(color.Green("listPath:")+" with options: %#v", o) 699 700 // get prioritized non-healing disks for listing 701 disks, infos, _ := er.getOnlineDisksWithHealingAndInfo(true) 702 askDisks := getListQuorum(o.AskDisks, er.setDriveCount) 703 if askDisks == -1 { 704 newDisks := getQuorumDisks(disks, infos, (len(disks)+1)/2) 705 if newDisks != nil { 706 // If we found disks signature in quorum, we proceed to list 707 // from a single drive, shuffling of the drives is subsequently. 708 disks = newDisks 709 askDisks = 1 710 } else { 711 // If we did not find suitable disks, perform strict quorum listing 712 // as no disk agrees on quorum anymore. 713 askDisks = getListQuorum("strict", er.setDriveCount) 714 } 715 } 716 717 var fallbackDisks []StorageAPI 718 719 // Special case: ask all disks if the drive count is 4 720 if er.setDriveCount == 4 || askDisks > len(disks) { 721 askDisks = len(disks) // use all available drives 722 } 723 724 // However many we ask, versions must exist on ~50% 725 listingQuorum := (askDisks + 1) / 2 726 727 if askDisks > 0 && len(disks) > askDisks { 728 rand.Shuffle(len(disks), func(i, j int) { 729 disks[i], disks[j] = disks[j], disks[i] 730 }) 731 fallbackDisks = disks[askDisks:] 732 disks = disks[:askDisks] 733 } 734 735 // How to resolve results. 736 resolver := metadataResolutionParams{ 737 dirQuorum: listingQuorum, 738 objQuorum: listingQuorum, 739 bucket: o.Bucket, 740 } 741 742 // Maximum versions requested for "latest" object 743 // resolution on versioned buckets, this is to be only 744 // used when o.Versioned is false 745 if !o.Versioned { 746 resolver.requestedVersions = 1 747 } 748 var limit int 749 if o.Limit > 0 && o.StopDiskAtLimit { 750 // Over-read by 4 + 1 for every 16 in limit to give some space for resolver, 751 // allow for truncating the list and know if we have more results. 752 limit = o.Limit + 4 + (o.Limit / 16) 753 } 754 ctxDone := ctx.Done() 755 return listPathRaw(ctx, listPathRawOptions{ 756 disks: disks, 757 fallbackDisks: fallbackDisks, 758 bucket: o.Bucket, 759 path: o.BaseDir, 760 recursive: o.Recursive, 761 filterPrefix: o.FilterPrefix, 762 minDisks: listingQuorum, 763 forwardTo: o.Marker, 764 perDiskLimit: limit, 765 agreed: func(entry metaCacheEntry) { 766 select { 767 case <-ctxDone: 768 case results <- entry: 769 } 770 }, 771 partial: func(entries metaCacheEntries, errs []error) { 772 // Results Disagree :-( 773 entry, ok := entries.resolve(&resolver) 774 if ok { 775 select { 776 case <-ctxDone: 777 case results <- *entry: 778 } 779 } 780 }, 781 }) 782 } 783 784 //msgp:ignore metaCacheRPC 785 type metaCacheRPC struct { 786 o listPathOptions 787 mu sync.Mutex 788 meta *metacache 789 rpc *peerRESTClient 790 cancel context.CancelFunc 791 } 792 793 func (m *metaCacheRPC) setErr(err string) { 794 m.mu.Lock() 795 defer m.mu.Unlock() 796 meta := *m.meta 797 if meta.status != scanStateError { 798 meta.error = err 799 meta.status = scanStateError 800 } else { 801 // An error is already set. 802 return 803 } 804 meta, _ = m.o.updateMetacacheListing(meta, m.rpc) 805 *m.meta = meta 806 } 807 808 func (er *erasureObjects) saveMetaCacheStream(ctx context.Context, mc *metaCacheRPC, entries <-chan metaCacheEntry) (err error) { 809 o := mc.o 810 o.debugf(color.Green("saveMetaCacheStream:")+" with options: %#v", o) 811 812 metaMu := &mc.mu 813 rpc := mc.rpc 814 cancel := mc.cancel 815 defer func() { 816 o.debugln(color.Green("saveMetaCacheStream:")+"err:", err) 817 if err != nil && !errors.Is(err, io.EOF) { 818 go mc.setErr(err.Error()) 819 cancel() 820 } 821 }() 822 823 defer cancel() 824 // Save continuous updates 825 go func() { 826 var err error 827 ticker := time.NewTicker(10 * time.Second) 828 defer ticker.Stop() 829 var exit bool 830 for !exit { 831 select { 832 case <-ticker.C: 833 case <-ctx.Done(): 834 exit = true 835 } 836 metaMu.Lock() 837 meta := *mc.meta 838 meta, err = o.updateMetacacheListing(meta, rpc) 839 if err == nil && time.Since(meta.lastHandout) > metacacheMaxClientWait { 840 cancel() 841 exit = true 842 meta.status = scanStateError 843 meta.error = fmt.Sprintf("listing canceled since time since last handout was %v ago", time.Since(meta.lastHandout).Round(time.Second)) 844 o.debugln(color.Green("saveMetaCacheStream: ") + meta.error) 845 meta, err = o.updateMetacacheListing(meta, rpc) 846 } 847 if err == nil { 848 *mc.meta = meta 849 if meta.status == scanStateError { 850 cancel() 851 exit = true 852 } 853 } 854 metaMu.Unlock() 855 } 856 }() 857 858 const retryDelay = 200 * time.Millisecond 859 const maxTries = 5 860 861 // Keep destination... 862 // Write results to disk. 863 bw := newMetacacheBlockWriter(entries, func(b *metacacheBlock) error { 864 // if the block is 0 bytes and its a first block skip it. 865 // skip only this for Transient caches. 866 if len(b.data) == 0 && b.n == 0 && o.Transient { 867 return nil 868 } 869 o.debugln(color.Green("saveMetaCacheStream:")+" saving block", b.n, "to", o.objectPath(b.n)) 870 r, err := hash.NewReader(ctx, bytes.NewReader(b.data), int64(len(b.data)), "", "", int64(len(b.data))) 871 logger.LogIf(ctx, err) 872 custom := b.headerKV() 873 _, err = er.putMetacacheObject(ctx, o.objectPath(b.n), NewPutObjReader(r), ObjectOptions{ 874 UserDefined: custom, 875 }) 876 if err != nil { 877 mc.setErr(err.Error()) 878 cancel() 879 return err 880 } 881 if b.n == 0 { 882 return nil 883 } 884 // Update block 0 metadata. 885 var retries int 886 for { 887 meta := b.headerKV() 888 fi := FileInfo{ 889 Metadata: make(map[string]string, len(meta)), 890 } 891 for k, v := range meta { 892 fi.Metadata[k] = v 893 } 894 err := er.updateObjectMetaWithOpts(ctx, minioMetaBucket, o.objectPath(0), fi, er.getDisks(), UpdateMetadataOpts{NoPersistence: true}) 895 if err == nil { 896 break 897 } 898 switch err.(type) { 899 case ObjectNotFound: 900 return err 901 case StorageErr: 902 return err 903 case InsufficientReadQuorum: 904 default: 905 logger.LogIf(ctx, err) 906 } 907 if retries >= maxTries { 908 return err 909 } 910 retries++ 911 time.Sleep(retryDelay) 912 } 913 return nil 914 }) 915 916 // Blocks while consuming entries or an error occurs. 917 err = bw.Close() 918 if err != nil { 919 mc.setErr(err.Error()) 920 } 921 metaMu.Lock() 922 defer metaMu.Unlock() 923 if mc.meta.error != "" { 924 return err 925 } 926 // Save success 927 mc.meta.status = scanStateSuccess 928 meta, err := o.updateMetacacheListing(*mc.meta, rpc) 929 if err == nil { 930 *mc.meta = meta 931 } 932 return nil 933 } 934 935 //msgp:ignore listPathRawOptions 936 type listPathRawOptions struct { 937 disks []StorageAPI 938 fallbackDisks []StorageAPI 939 bucket, path string 940 recursive bool 941 942 // Only return results with this prefix. 943 filterPrefix string 944 945 // Forward to this prefix before returning results. 946 forwardTo string 947 948 // Minimum number of good disks to continue. 949 // An error will be returned if this many disks returned an error. 950 minDisks int 951 reportNotFound bool 952 953 // perDiskLimit will limit each disk to return n objects. 954 // If <= 0 all results will be returned until canceled. 955 perDiskLimit int 956 957 // Callbacks with results: 958 // If set to nil, it will not be called. 959 960 // agreed is called if all disks agreed. 961 agreed func(entry metaCacheEntry) 962 963 // partial will be called when there is disagreement between disks. 964 // if disk did not return any result, but also haven't errored 965 // the entry will be empty and errs will 966 partial func(entries metaCacheEntries, errs []error) 967 968 // finished will be called when all streams have finished and 969 // more than one disk returned an error. 970 // Will not be called if everything operates as expected. 971 finished func(errs []error) 972 } 973 974 // listPathRaw will list a path on the provided drives. 975 // See listPathRawOptions on how results are delivered. 976 // Directories are always returned. 977 // Cache will be bypassed. 978 // Context cancellation will be respected but may take a while to effectuate. 979 func listPathRaw(ctx context.Context, opts listPathRawOptions) (err error) { 980 disks := opts.disks 981 if len(disks) == 0 { 982 return fmt.Errorf("listPathRaw: 0 drives provided") 983 } 984 985 // Cancel upstream if we finish before we expect. 986 ctx, cancel := context.WithCancel(ctx) 987 defer cancel() 988 989 // Keep track of fallback disks 990 var fdMu sync.Mutex 991 fds := opts.fallbackDisks 992 fallback := func(err error) StorageAPI { 993 if _, ok := err.(StorageErr); ok { 994 // Attempt to grab a fallback disk 995 fdMu.Lock() 996 defer fdMu.Unlock() 997 if len(fds) == 0 { 998 return nil 999 } 1000 fdsCopy := fds 1001 for _, fd := range fdsCopy { 1002 // Grab a fallback disk 1003 fds = fds[1:] 1004 if fd != nil && fd.IsOnline() { 1005 return fd 1006 } 1007 } 1008 } 1009 // Either no more disks for fallback or 1010 // not a storage error. 1011 return nil 1012 } 1013 askDisks := len(disks) 1014 readers := make([]*metacacheReader, askDisks) 1015 defer func() { 1016 for _, r := range readers { 1017 r.Close() 1018 } 1019 }() 1020 for i := range disks { 1021 r, w := io.Pipe() 1022 // Make sure we close the pipe so blocked writes doesn't stay around. 1023 defer r.CloseWithError(context.Canceled) 1024 1025 readers[i] = newMetacacheReader(r) 1026 d := disks[i] 1027 1028 // Send request to each disk. 1029 go func() { 1030 var werr error 1031 if d == nil { 1032 werr = errDiskNotFound 1033 } else { 1034 werr = d.WalkDir(ctx, WalkDirOptions{ 1035 Limit: opts.perDiskLimit, 1036 Bucket: opts.bucket, 1037 BaseDir: opts.path, 1038 Recursive: opts.recursive, 1039 ReportNotFound: opts.reportNotFound, 1040 FilterPrefix: opts.filterPrefix, 1041 ForwardTo: opts.forwardTo, 1042 }, w) 1043 } 1044 1045 // fallback only when set. 1046 for { 1047 fd := fallback(werr) 1048 if fd == nil { 1049 break 1050 } 1051 // This fallback is only set when 1052 // askDisks is less than total 1053 // number of disks per set. 1054 werr = fd.WalkDir(ctx, WalkDirOptions{ 1055 Limit: opts.perDiskLimit, 1056 Bucket: opts.bucket, 1057 BaseDir: opts.path, 1058 Recursive: opts.recursive, 1059 ReportNotFound: opts.reportNotFound, 1060 FilterPrefix: opts.filterPrefix, 1061 ForwardTo: opts.forwardTo, 1062 }, w) 1063 if werr == nil { 1064 break 1065 } 1066 } 1067 w.CloseWithError(werr) 1068 }() 1069 } 1070 1071 topEntries := make(metaCacheEntries, len(readers)) 1072 errs := make([]error, len(readers)) 1073 for { 1074 // Get the top entry from each 1075 var current metaCacheEntry 1076 var atEOF, fnf, vnf, hasErr, agree int 1077 for i := range topEntries { 1078 topEntries[i] = metaCacheEntry{} 1079 } 1080 if contextCanceled(ctx) { 1081 return ctx.Err() 1082 } 1083 for i, r := range readers { 1084 if errs[i] != nil { 1085 hasErr++ 1086 continue 1087 } 1088 entry, err := r.peek() 1089 switch err { 1090 case io.EOF: 1091 atEOF++ 1092 continue 1093 case nil: 1094 default: 1095 switch err.Error() { 1096 case errFileNotFound.Error(), 1097 errVolumeNotFound.Error(), 1098 errUnformattedDisk.Error(), 1099 errDiskNotFound.Error(): 1100 atEOF++ 1101 fnf++ 1102 // This is a special case, to handle bucket does 1103 // not exist situations. 1104 if errors.Is(err, errVolumeNotFound) { 1105 vnf++ 1106 } 1107 continue 1108 } 1109 hasErr++ 1110 errs[i] = err 1111 continue 1112 } 1113 // If no current, add it. 1114 if current.name == "" { 1115 topEntries[i] = entry 1116 current = entry 1117 agree++ 1118 continue 1119 } 1120 // If exact match, we agree. 1121 if _, ok := current.matches(&entry, true); ok { 1122 topEntries[i] = entry 1123 agree++ 1124 continue 1125 } 1126 // If only the name matches we didn't agree, but add it for resolution. 1127 if entry.name == current.name { 1128 topEntries[i] = entry 1129 continue 1130 } 1131 // We got different entries 1132 if entry.name > current.name { 1133 continue 1134 } 1135 // We got a new, better current. 1136 // Clear existing entries. 1137 for i := range topEntries[:i] { 1138 topEntries[i] = metaCacheEntry{} 1139 } 1140 agree = 1 1141 current = entry 1142 topEntries[i] = entry 1143 } 1144 1145 // Stop if we exceed number of bad disks 1146 if hasErr > len(disks)-opts.minDisks && hasErr > 0 { 1147 if opts.finished != nil { 1148 opts.finished(errs) 1149 } 1150 var combinedErr []string 1151 for i, err := range errs { 1152 if err != nil { 1153 if disks[i] != nil { 1154 combinedErr = append(combinedErr, 1155 fmt.Sprintf("drive %s returned: %s", disks[i], err)) 1156 } else { 1157 combinedErr = append(combinedErr, err.Error()) 1158 } 1159 } 1160 } 1161 return errors.New(strings.Join(combinedErr, ", ")) 1162 } 1163 1164 if vnf == len(readers) { 1165 return errVolumeNotFound 1166 } 1167 1168 // Break if all at EOF or error. 1169 if atEOF+hasErr == len(readers) { 1170 if hasErr > 0 && opts.finished != nil { 1171 opts.finished(errs) 1172 } 1173 break 1174 } 1175 1176 if fnf == len(readers) { 1177 return errFileNotFound 1178 } 1179 1180 if agree == len(readers) { 1181 // Everybody agreed 1182 for _, r := range readers { 1183 r.skip(1) 1184 } 1185 if opts.agreed != nil { 1186 opts.agreed(current) 1187 } 1188 continue 1189 } 1190 if opts.partial != nil { 1191 opts.partial(topEntries, errs) 1192 } 1193 // Skip the inputs we used. 1194 for i, r := range readers { 1195 if topEntries[i].name != "" { 1196 r.skip(1) 1197 } 1198 } 1199 } 1200 return nil 1201 }