storj.io/minio@v0.0.0-20230509071714-0cbc90f649b1/cmd/metacache-set.go (about) 1 /* 2 * MinIO Cloud Storage, (C) 2020 MinIO, Inc. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package cmd 18 19 import ( 20 "bytes" 21 "context" 22 "encoding/gob" 23 "encoding/json" 24 "errors" 25 "fmt" 26 "io" 27 "strconv" 28 "strings" 29 "sync" 30 "time" 31 32 jsoniter "github.com/json-iterator/go" 33 34 "storj.io/minio/cmd/logger" 35 "storj.io/minio/pkg/color" 36 "storj.io/minio/pkg/console" 37 "storj.io/minio/pkg/hash" 38 ) 39 40 type listPathOptions struct { 41 // ID of the listing. 42 // This will be used to persist the list. 43 ID string 44 45 // Bucket of the listing. 46 Bucket string 47 48 // Directory inside the bucket. 49 BaseDir string 50 51 // Scan/return only content with prefix. 52 Prefix string 53 54 // FilterPrefix will return only results with this prefix when scanning. 55 // Should never contain a slash. 56 // Prefix should still be set. 57 FilterPrefix string 58 59 // Marker to resume listing. 60 // The response will be the first entry >= this object name. 61 Marker string 62 63 // Limit the number of results. 64 Limit int 65 66 // The number of disks to ask. Special values: 67 // 0 uses default number of disks. 68 // -1 use at least 50% of disks or at least the default number. 69 AskDisks int 70 71 // InclDeleted will keep all entries where latest version is a delete marker. 72 InclDeleted bool 73 74 // Scan recursively. 75 // If false only main directory will be scanned. 76 // Should always be true if Separator is n SlashSeparator. 77 Recursive bool 78 79 // Separator to use. 80 Separator string 81 82 // Create indicates that the lister should not attempt to load an existing cache. 83 Create bool 84 85 // CurrentCycle indicates the current bloom cycle. 86 // Will be used if a new scan is started. 87 CurrentCycle uint64 88 89 // OldestCycle indicates the oldest cycle acceptable. 90 OldestCycle uint64 91 92 // Include pure directories. 93 IncludeDirectories bool 94 95 // Transient is set if the cache is transient due to an error or being a reserved bucket. 96 // This means the cache metadata will not be persisted on disk. 97 // A transient result will never be returned from the cache so knowing the list id is required. 98 Transient bool 99 100 // discardResult will not persist the cache to storage. 101 // When the initial results are returned listing will be canceled. 102 discardResult bool 103 } 104 105 func init() { 106 gob.Register(listPathOptions{}) 107 } 108 109 // newMetacache constructs a new metacache from the options. 110 func (o listPathOptions) newMetacache() metacache { 111 return metacache{ 112 id: o.ID, 113 bucket: o.Bucket, 114 root: o.BaseDir, 115 recursive: o.Recursive, 116 status: scanStateStarted, 117 error: "", 118 started: UTCNow(), 119 lastHandout: UTCNow(), 120 lastUpdate: UTCNow(), 121 ended: time.Time{}, 122 startedCycle: o.CurrentCycle, 123 endedCycle: 0, 124 dataVersion: metacacheStreamVersion, 125 filter: o.FilterPrefix, 126 } 127 } 128 129 func (o *listPathOptions) debugf(format string, data ...interface{}) { 130 if serverDebugLog { 131 console.Debugf(format+"\n", data...) 132 } 133 } 134 135 func (o *listPathOptions) debugln(data ...interface{}) { 136 if serverDebugLog { 137 console.Debugln(data...) 138 } 139 } 140 141 // gatherResults will collect all results on the input channel and filter results according to the options. 142 // Caller should close the channel when done. 143 // The returned function will return the results once there is enough or input is closed. 144 func (o *listPathOptions) gatherResults(in <-chan metaCacheEntry) func() (metaCacheEntriesSorted, error) { 145 var resultsDone = make(chan metaCacheEntriesSorted) 146 // Copy so we can mutate 147 resCh := resultsDone 148 resErr := io.EOF 149 150 go func() { 151 var results metaCacheEntriesSorted 152 for entry := range in { 153 if resCh == nil { 154 // past limit 155 continue 156 } 157 if !o.IncludeDirectories && entry.isDir() { 158 continue 159 } 160 o.debugln("gather got:", entry.name) 161 if o.Marker != "" && entry.name < o.Marker { 162 o.debugln("pre marker") 163 continue 164 } 165 if !strings.HasPrefix(entry.name, o.Prefix) { 166 o.debugln("not in prefix") 167 continue 168 } 169 if !o.Recursive && !entry.isInDir(o.Prefix, o.Separator) { 170 o.debugln("not in dir", o.Prefix, o.Separator) 171 continue 172 } 173 if !o.InclDeleted && entry.isObject() && entry.isLatestDeletemarker() { 174 o.debugln("latest is delete marker") 175 continue 176 } 177 if o.Limit > 0 && results.len() >= o.Limit { 178 // We have enough and we have more. 179 // Do not return io.EOF 180 if resCh != nil { 181 resErr = nil 182 resCh <- results 183 resCh = nil 184 } 185 continue 186 } 187 o.debugln("adding...") 188 results.o = append(results.o, entry) 189 } 190 if resCh != nil { 191 resErr = io.EOF 192 resCh <- results 193 } 194 }() 195 return func() (metaCacheEntriesSorted, error) { 196 return <-resultsDone, resErr 197 } 198 } 199 200 // findFirstPart will find the part with 0 being the first that corresponds to the marker in the options. 201 // io.ErrUnexpectedEOF is returned if the place containing the marker hasn't been scanned yet. 202 // io.EOF indicates the marker is beyond the end of the stream and does not exist. 203 func (o *listPathOptions) findFirstPart(fi FileInfo) (int, error) { 204 search := o.Marker 205 if search == "" { 206 search = o.Prefix 207 } 208 if search == "" { 209 return 0, nil 210 } 211 o.debugln("searching for ", search) 212 var tmp metacacheBlock 213 var json = jsoniter.ConfigCompatibleWithStandardLibrary 214 i := 0 215 for { 216 partKey := fmt.Sprintf("%s-metacache-part-%d", ReservedMetadataPrefixLower, i) 217 v, ok := fi.Metadata[partKey] 218 if !ok { 219 o.debugln("no match in metadata, waiting") 220 return -1, io.ErrUnexpectedEOF 221 } 222 err := json.Unmarshal([]byte(v), &tmp) 223 if !ok { 224 logger.LogIf(context.Background(), err) 225 return -1, err 226 } 227 if tmp.First == "" && tmp.Last == "" && tmp.EOS { 228 return 0, errFileNotFound 229 } 230 if tmp.First >= search { 231 o.debugln("First >= search", v) 232 return i, nil 233 } 234 if tmp.Last >= search { 235 o.debugln("Last >= search", v) 236 return i, nil 237 } 238 if tmp.EOS { 239 o.debugln("no match, at EOS", v) 240 return -3, io.EOF 241 } 242 o.debugln("First ", tmp.First, "<", search, " search", i) 243 i++ 244 } 245 } 246 247 // updateMetacacheListing will update the metacache listing. 248 func (o *listPathOptions) updateMetacacheListing(m metacache, rpc *peerRESTClient) (metacache, error) { 249 if o.Transient { 250 return localMetacacheMgr.getTransient().updateCacheEntry(m) 251 } 252 if rpc == nil { 253 return localMetacacheMgr.updateCacheEntry(m) 254 } 255 return rpc.UpdateMetacacheListing(context.Background(), m) 256 } 257 258 func getMetacacheBlockInfo(fi FileInfo, block int) (*metacacheBlock, error) { 259 var tmp metacacheBlock 260 partKey := fmt.Sprintf("%s-metacache-part-%d", ReservedMetadataPrefixLower, block) 261 v, ok := fi.Metadata[partKey] 262 if !ok { 263 return nil, io.ErrUnexpectedEOF 264 } 265 return &tmp, json.Unmarshal([]byte(v), &tmp) 266 } 267 268 const metacachePrefix = ".metacache" 269 270 func metacachePrefixForID(bucket, id string) string { 271 return pathJoin(bucketMetaPrefix, bucket, metacachePrefix, id) 272 } 273 274 // objectPath returns the object path of the cache. 275 func (o *listPathOptions) objectPath(block int) string { 276 return pathJoin(metacachePrefixForID(o.Bucket, o.ID), "block-"+strconv.Itoa(block)+".s2") 277 } 278 279 func (o *listPathOptions) SetFilter() { 280 switch { 281 case metacacheSharePrefix: 282 return 283 case o.CurrentCycle != o.OldestCycle: 284 // We have a clean bloom filter 285 return 286 case o.Prefix == o.BaseDir: 287 // No additional prefix 288 return 289 } 290 // Remove basedir. 291 o.FilterPrefix = strings.TrimPrefix(o.Prefix, o.BaseDir) 292 // Remove leading and trailing slashes. 293 o.FilterPrefix = strings.Trim(o.FilterPrefix, slashSeparator) 294 295 if strings.Contains(o.FilterPrefix, slashSeparator) { 296 // Sanity check, should not happen. 297 o.FilterPrefix = "" 298 } 299 } 300 301 // filter will apply the options and return the number of objects requested by the limit. 302 // Will return io.EOF if there are no more entries with the same filter. 303 // The last entry can be used as a marker to resume the listing. 304 func (r *metacacheReader) filter(o listPathOptions) (entries metaCacheEntriesSorted, err error) { 305 // Forward to prefix, if any 306 err = r.forwardTo(o.Prefix) 307 if err != nil { 308 return entries, err 309 } 310 if o.Marker != "" { 311 err = r.forwardTo(o.Marker) 312 if err != nil { 313 return entries, err 314 } 315 } 316 o.debugln("forwarded to ", o.Prefix, "marker:", o.Marker, "sep:", o.Separator) 317 318 // Filter 319 if !o.Recursive { 320 entries.o = make(metaCacheEntries, 0, o.Limit) 321 pastPrefix := false 322 err := r.readFn(func(entry metaCacheEntry) bool { 323 if o.Prefix != "" && !strings.HasPrefix(entry.name, o.Prefix) { 324 // We are past the prefix, don't continue. 325 pastPrefix = true 326 return false 327 } 328 if !o.IncludeDirectories && entry.isDir() { 329 return true 330 } 331 if !entry.isInDir(o.Prefix, o.Separator) { 332 return true 333 } 334 if !o.InclDeleted && entry.isObject() && entry.isLatestDeletemarker() { 335 return entries.len() < o.Limit 336 } 337 entries.o = append(entries.o, entry) 338 return entries.len() < o.Limit 339 }) 340 if (err != nil && err.Error() == io.EOF.Error()) || pastPrefix || r.nextEOF() { 341 return entries, io.EOF 342 } 343 return entries, err 344 } 345 346 // We should not need to filter more. 347 return r.readN(o.Limit, o.InclDeleted, o.IncludeDirectories, o.Prefix) 348 } 349 350 func (er *erasureObjects) streamMetadataParts(ctx context.Context, o listPathOptions) (entries metaCacheEntriesSorted, err error) { 351 retries := 0 352 rpc := GlobalNotificationSys.restClientFromHash(o.Bucket) 353 354 for { 355 select { 356 case <-ctx.Done(): 357 return entries, ctx.Err() 358 default: 359 } 360 361 // If many failures, check the cache state. 362 if retries > 10 { 363 err := o.checkMetacacheState(ctx, rpc) 364 if err != nil { 365 return entries, fmt.Errorf("remote listing canceled: %w", err) 366 } 367 retries = 1 368 } 369 370 const retryDelay = 500 * time.Millisecond 371 // Load first part metadata... 372 // All operations are performed without locks, so we must be careful and allow for failures. 373 // Read metadata associated with the object from a disk. 374 if retries > 0 { 375 disks := er.getOnlineDisks() 376 if len(disks) == 0 { 377 time.Sleep(retryDelay) 378 retries++ 379 continue 380 } 381 382 _, err := disks[0].ReadVersion(ctx, minioMetaBucket, o.objectPath(0), "", false) 383 if err != nil { 384 time.Sleep(retryDelay) 385 retries++ 386 continue 387 } 388 } 389 390 // Read metadata associated with the object from all disks. 391 fi, metaArr, onlineDisks, err := er.getObjectFileInfo(ctx, minioMetaBucket, o.objectPath(0), ObjectOptions{}, true) 392 if err != nil { 393 switch toObjectErr(err, minioMetaBucket, o.objectPath(0)).(type) { 394 case ObjectNotFound: 395 retries++ 396 time.Sleep(retryDelay) 397 continue 398 case InsufficientReadQuorum: 399 retries++ 400 time.Sleep(retryDelay) 401 continue 402 default: 403 return entries, fmt.Errorf("reading first part metadata: %w", err) 404 } 405 } 406 407 partN, err := o.findFirstPart(fi) 408 switch { 409 case err == nil: 410 case errors.Is(err, io.ErrUnexpectedEOF): 411 if retries == 10 { 412 err := o.checkMetacacheState(ctx, rpc) 413 if err != nil { 414 return entries, fmt.Errorf("remote listing canceled: %w", err) 415 } 416 retries = -1 417 } 418 retries++ 419 time.Sleep(retryDelay) 420 continue 421 case errors.Is(err, io.EOF): 422 return entries, io.EOF 423 } 424 425 // We got a stream to start at. 426 loadedPart := 0 427 buf := bufferPool.Get().(*bytes.Buffer) 428 defer func() { 429 buf.Reset() 430 bufferPool.Put(buf) 431 }() 432 for { 433 select { 434 case <-ctx.Done(): 435 return entries, ctx.Err() 436 default: 437 } 438 439 if partN != loadedPart { 440 if retries > 10 { 441 err := o.checkMetacacheState(ctx, rpc) 442 if err != nil { 443 return entries, fmt.Errorf("waiting for next part %d: %w", partN, err) 444 } 445 retries = 1 446 } 447 448 if retries > 0 { 449 // Load from one disk only 450 disks := er.getOnlineDisks() 451 if len(disks) == 0 { 452 time.Sleep(retryDelay) 453 retries++ 454 continue 455 } 456 457 _, err := disks[0].ReadVersion(ctx, minioMetaBucket, o.objectPath(partN), "", false) 458 if err != nil { 459 time.Sleep(retryDelay) 460 retries++ 461 continue 462 } 463 } 464 // Load first part metadata... 465 fi, metaArr, onlineDisks, err = er.getObjectFileInfo(ctx, minioMetaBucket, o.objectPath(partN), ObjectOptions{}, true) 466 if err != nil { 467 time.Sleep(retryDelay) 468 retries++ 469 continue 470 } 471 loadedPart = partN 472 bi, err := getMetacacheBlockInfo(fi, partN) 473 logger.LogIf(ctx, err) 474 if err == nil { 475 if bi.pastPrefix(o.Prefix) { 476 return entries, io.EOF 477 } 478 } 479 } 480 buf.Reset() 481 err := er.getObjectWithFileInfo(ctx, minioMetaBucket, o.objectPath(partN), 0, fi.Size, buf, fi, metaArr, onlineDisks) 482 if err != nil { 483 switch toObjectErr(err, minioMetaBucket, o.objectPath(partN)).(type) { 484 case ObjectNotFound: 485 retries++ 486 time.Sleep(retryDelay) 487 continue 488 case InsufficientReadQuorum: 489 retries++ 490 time.Sleep(retryDelay) 491 continue 492 default: 493 logger.LogIf(ctx, err) 494 return entries, err 495 } 496 } 497 tmp, err := newMetacacheReader(buf) 498 if err != nil { 499 return entries, err 500 } 501 e, err := tmp.filter(o) 502 entries.o = append(entries.o, e.o...) 503 if o.Limit > 0 && entries.len() > o.Limit { 504 entries.truncate(o.Limit) 505 return entries, nil 506 } 507 if err == nil { 508 // We stopped within the listing, we are done for now... 509 return entries, nil 510 } 511 if !errors.Is(err, io.EOF) { 512 logger.LogIf(ctx, err) 513 return entries, err 514 } 515 516 // We finished at the end of the block. 517 // And should not expect any more results. 518 bi, err := getMetacacheBlockInfo(fi, partN) 519 logger.LogIf(ctx, err) 520 if err != nil || bi.EOS { 521 // We are done and there are no more parts. 522 return entries, io.EOF 523 } 524 if bi.endedPrefix(o.Prefix) { 525 // Nothing more for prefix. 526 return entries, io.EOF 527 } 528 partN++ 529 retries = 0 530 } 531 } 532 } 533 534 // Will return io.EOF if continuing would not yield more results. 535 func (er *erasureObjects) listPath(ctx context.Context, o listPathOptions) (entries metaCacheEntriesSorted, err error) { 536 o.debugf(color.Green("listPath:")+" with options: %#v", o) 537 538 // See if we have the listing stored. 539 if !o.Create && !o.discardResult { 540 entries, err := er.streamMetadataParts(ctx, o) 541 if IsErr(err, []error{ 542 nil, 543 context.Canceled, 544 context.DeadlineExceeded, 545 }...) { 546 // Expected good errors we don't need to return error. 547 return entries, nil 548 } 549 550 if !errors.Is(err, io.EOF) { // io.EOF is expected and should be returned but no need to log it. 551 // Log an return errors on unexpected errors. 552 logger.LogIf(ctx, err) 553 } 554 555 return entries, err 556 } 557 558 meta := o.newMetacache() 559 rpc := GlobalNotificationSys.restClientFromHash(o.Bucket) 560 var metaMu sync.Mutex 561 562 o.debugln(color.Green("listPath:")+" scanning bucket:", o.Bucket, "basedir:", o.BaseDir, "prefix:", o.Prefix, "marker:", o.Marker) 563 564 // Disconnect from call above, but cancel on exit. 565 ctx, cancel := context.WithCancel(GlobalContext) 566 // We need to ask disks. 567 disks := er.getOnlineDisks() 568 569 defer func() { 570 o.debugln(color.Green("listPath:")+" returning:", entries.len(), "err:", err) 571 if err != nil && !errors.Is(err, io.EOF) { 572 go func(err string) { 573 metaMu.Lock() 574 if meta.status != scanStateError { 575 meta.error = err 576 meta.status = scanStateError 577 } 578 meta, _ = o.updateMetacacheListing(meta, rpc) 579 metaMu.Unlock() 580 }(err.Error()) 581 cancel() 582 } 583 }() 584 585 askDisks := o.AskDisks 586 listingQuorum := askDisks - 1 587 // Special case: ask all disks if the drive count is 4 588 if askDisks == -1 || er.setDriveCount == 4 { 589 askDisks = len(disks) // with 'strict' quorum list on all online disks. 590 listingQuorum = getReadQuorum(er.setDriveCount) 591 } 592 593 if len(disks) < askDisks { 594 err = InsufficientReadQuorum{} 595 logger.LogIf(ctx, fmt.Errorf("listPath: Insufficient disks, %d of %d needed are available", len(disks), askDisks)) 596 cancel() 597 return 598 } 599 600 // Select askDisks random disks. 601 if len(disks) > askDisks { 602 disks = disks[:askDisks] 603 } 604 605 // Create output for our results. 606 var cacheCh chan metaCacheEntry 607 if !o.discardResult { 608 cacheCh = make(chan metaCacheEntry, metacacheBlockSize) 609 } 610 611 // Create filter for results. 612 filterCh := make(chan metaCacheEntry, 100) 613 filteredResults := o.gatherResults(filterCh) 614 closeChannels := func() { 615 if !o.discardResult { 616 close(cacheCh) 617 } 618 close(filterCh) 619 } 620 621 // Cancel listing on return if non-saved list. 622 if o.discardResult { 623 defer cancel() 624 } 625 626 go func() { 627 defer cancel() 628 // Save continuous updates 629 go func() { 630 var err error 631 ticker := time.NewTicker(10 * time.Second) 632 defer ticker.Stop() 633 var exit bool 634 for !exit { 635 select { 636 case <-ticker.C: 637 case <-ctx.Done(): 638 exit = true 639 } 640 metaMu.Lock() 641 meta.endedCycle = intDataUpdateTracker.current() 642 meta, err = o.updateMetacacheListing(meta, rpc) 643 if meta.status == scanStateError { 644 logger.LogIf(ctx, err) 645 cancel() 646 exit = true 647 } 648 metaMu.Unlock() 649 } 650 }() 651 652 const retryDelay = 200 * time.Millisecond 653 const maxTries = 5 654 655 var bw *metacacheBlockWriter 656 // Don't save single object listings. 657 if !o.discardResult { 658 // Write results to disk. 659 bw = newMetacacheBlockWriter(cacheCh, func(b *metacacheBlock) error { 660 // if the block is 0 bytes and its a first block skip it. 661 // skip only this for Transient caches. 662 if len(b.data) == 0 && b.n == 0 && o.Transient { 663 return nil 664 } 665 o.debugln(color.Green("listPath:")+" saving block", b.n, "to", o.objectPath(b.n)) 666 r, err := hash.NewReader(bytes.NewReader(b.data), int64(len(b.data)), "", "", int64(len(b.data))) 667 logger.LogIf(ctx, err) 668 custom := b.headerKV() 669 _, err = er.putObject(ctx, minioMetaBucket, o.objectPath(b.n), NewPutObjReader(r), ObjectOptions{ 670 UserDefined: custom, 671 NoLock: true, // No need to hold namespace lock, each prefix caches uniquely. 672 ParentIsObject: nil, 673 }) 674 if err != nil { 675 metaMu.Lock() 676 if meta.error != "" { 677 meta.status = scanStateError 678 meta.error = err.Error() 679 } 680 metaMu.Unlock() 681 cancel() 682 return err 683 } 684 if b.n == 0 { 685 return nil 686 } 687 // Update block 0 metadata. 688 var retries int 689 for { 690 meta := b.headerKV() 691 fi := FileInfo{ 692 Metadata: make(map[string]string, len(meta)), 693 } 694 for k, v := range meta { 695 fi.Metadata[k] = v 696 } 697 err := er.updateObjectMeta(ctx, minioMetaBucket, o.objectPath(0), fi) 698 if err == nil { 699 break 700 } 701 switch err.(type) { 702 case ObjectNotFound: 703 return err 704 case InsufficientReadQuorum: 705 default: 706 logger.LogIf(ctx, err) 707 } 708 if retries >= maxTries { 709 return err 710 } 711 retries++ 712 time.Sleep(retryDelay) 713 } 714 return nil 715 }) 716 } 717 718 // How to resolve results. 719 resolver := metadataResolutionParams{ 720 dirQuorum: listingQuorum, 721 objQuorum: listingQuorum, 722 bucket: o.Bucket, 723 } 724 725 err := listPathRaw(ctx, listPathRawOptions{ 726 disks: disks, 727 bucket: o.Bucket, 728 path: o.BaseDir, 729 recursive: o.Recursive, 730 filterPrefix: o.FilterPrefix, 731 minDisks: listingQuorum, 732 agreed: func(entry metaCacheEntry) { 733 if !o.discardResult { 734 cacheCh <- entry 735 } 736 filterCh <- entry 737 }, 738 partial: func(entries metaCacheEntries, nAgreed int, errs []error) { 739 // Results Disagree :-( 740 entry, ok := entries.resolve(&resolver) 741 if ok { 742 if !o.discardResult { 743 cacheCh <- *entry 744 } 745 filterCh <- *entry 746 } 747 }, 748 }) 749 750 metaMu.Lock() 751 if err != nil { 752 meta.status = scanStateError 753 meta.error = err.Error() 754 } 755 // Save success 756 if meta.error == "" { 757 meta.status = scanStateSuccess 758 meta.endedCycle = intDataUpdateTracker.current() 759 } 760 761 meta, _ = o.updateMetacacheListing(meta, rpc) 762 metaMu.Unlock() 763 764 closeChannels() 765 if !o.discardResult { 766 if err := bw.Close(); err != nil { 767 metaMu.Lock() 768 meta.error = err.Error() 769 meta.status = scanStateError 770 meta, _ = o.updateMetacacheListing(meta, rpc) 771 metaMu.Unlock() 772 } 773 } 774 }() 775 776 return filteredResults() 777 } 778 779 type listPathRawOptions struct { 780 disks []StorageAPI 781 bucket, path string 782 recursive bool 783 784 // Only return results with this prefix. 785 filterPrefix string 786 787 // Forward to this prefix before returning results. 788 forwardTo string 789 790 // Minimum number of good disks to continue. 791 // An error will be returned if this many disks returned an error. 792 minDisks int 793 reportNotFound bool 794 795 // Callbacks with results: 796 // If set to nil, it will not be called. 797 798 // agreed is called if all disks agreed. 799 agreed func(entry metaCacheEntry) 800 801 // partial will be returned when there is disagreement between disks. 802 // if disk did not return any result, but also haven't errored 803 // the entry will be empty and errs will 804 partial func(entries metaCacheEntries, nAgreed int, errs []error) 805 806 // finished will be called when all streams have finished and 807 // more than one disk returned an error. 808 // Will not be called if everything operates as expected. 809 finished func(errs []error) 810 } 811 812 // listPathRaw will list a path on the provided drives. 813 // See listPathRawOptions on how results are delivered. 814 // Directories are always returned. 815 // Cache will be bypassed. 816 // Context cancellation will be respected but may take a while to effectuate. 817 func listPathRaw(ctx context.Context, opts listPathRawOptions) (err error) { 818 disks := opts.disks 819 if len(disks) == 0 { 820 return fmt.Errorf("listPathRaw: 0 drives provided") 821 } 822 // Cancel upstream if we finish before we expect. 823 ctx, cancel := context.WithCancel(ctx) 824 defer cancel() 825 826 askDisks := len(disks) 827 readers := make([]*metacacheReader, askDisks) 828 for i := range disks { 829 r, w := io.Pipe() 830 d := disks[i] 831 readers[i], err = newMetacacheReader(r) 832 if err != nil { 833 return err 834 } 835 // Make sure we close the pipe so blocked writes doesn't stay around. 836 defer r.CloseWithError(context.Canceled) 837 // Send request to each disk. 838 go func() { 839 werr := d.WalkDir(ctx, WalkDirOptions{ 840 Bucket: opts.bucket, 841 BaseDir: opts.path, 842 Recursive: opts.recursive, 843 ReportNotFound: opts.reportNotFound, 844 FilterPrefix: opts.filterPrefix, 845 ForwardTo: opts.forwardTo, 846 }, w) 847 w.CloseWithError(werr) 848 if werr != io.EOF && werr != nil && 849 werr.Error() != errFileNotFound.Error() && 850 werr.Error() != errVolumeNotFound.Error() && 851 !errors.Is(werr, context.Canceled) { 852 logger.LogIf(ctx, werr) 853 } 854 }() 855 } 856 857 topEntries := make(metaCacheEntries, len(readers)) 858 errs := make([]error, len(readers)) 859 for { 860 // Get the top entry from each 861 var current metaCacheEntry 862 var atEOF, fnf, hasErr, agree int 863 for i := range topEntries { 864 topEntries[i] = metaCacheEntry{} 865 } 866 select { 867 case <-ctx.Done(): 868 return ctx.Err() 869 default: 870 } 871 for i, r := range readers { 872 if errs[i] != nil { 873 hasErr++ 874 continue 875 } 876 entry, err := r.peek() 877 switch err { 878 case io.EOF: 879 atEOF++ 880 continue 881 case nil: 882 default: 883 if err.Error() == errFileNotFound.Error() { 884 atEOF++ 885 fnf++ 886 continue 887 } 888 if err.Error() == errVolumeNotFound.Error() { 889 atEOF++ 890 fnf++ 891 continue 892 } 893 hasErr++ 894 errs[i] = err 895 continue 896 } 897 // If no current, add it. 898 if current.name == "" { 899 topEntries[i] = entry 900 current = entry 901 agree++ 902 continue 903 } 904 // If exact match, we agree. 905 if current.matches(&entry, opts.bucket) { 906 topEntries[i] = entry 907 agree++ 908 continue 909 } 910 // If only the name matches we didn't agree, but add it for resolution. 911 if entry.name == current.name { 912 topEntries[i] = entry 913 continue 914 } 915 // We got different entries 916 if entry.name > current.name { 917 continue 918 } 919 // We got a new, better current. 920 // Clear existing entries. 921 for i := range topEntries[:i] { 922 topEntries[i] = metaCacheEntry{} 923 } 924 agree = 1 925 current = entry 926 topEntries[i] = entry 927 } 928 929 // Stop if we exceed number of bad disks 930 if hasErr > len(disks)-opts.minDisks && hasErr > 0 { 931 if opts.finished != nil { 932 opts.finished(errs) 933 } 934 var combinedErr []string 935 for i, err := range errs { 936 if err != nil { 937 combinedErr = append(combinedErr, fmt.Sprintf("disk %d returned: %s", i, err)) 938 } 939 } 940 return errors.New(strings.Join(combinedErr, ", ")) 941 } 942 943 // Break if all at EOF or error. 944 if atEOF+hasErr == len(readers) { 945 if hasErr > 0 && opts.finished != nil { 946 opts.finished(errs) 947 } 948 break 949 } 950 if fnf == len(readers) { 951 return errFileNotFound 952 } 953 if agree == len(readers) { 954 // Everybody agreed 955 for _, r := range readers { 956 r.skip(1) 957 } 958 if opts.agreed != nil { 959 opts.agreed(current) 960 } 961 continue 962 } 963 if opts.partial != nil { 964 opts.partial(topEntries, agree, errs) 965 } 966 // Skip the inputs we used. 967 for i, r := range readers { 968 if topEntries[i].name != "" { 969 r.skip(1) 970 } 971 } 972 } 973 return nil 974 }