github.com/NVIDIA/aistore@v1.3.23-0.20240517131212-7df6609be51d/fs/walk.go (about)

     1  // Package fs provides mountpath and FQN abstractions and methods to resolve/map stored content
     2  /*
     3   * Copyright (c) 2018-2024, NVIDIA CORPORATION. All rights reserved.
     4   */
     5  package fs
     6  
     7  import (
     8  	"context"
     9  	iofs "io/fs"
    10  	"os"
    11  	"path/filepath"
    12  	"sort"
    13  
    14  	"github.com/NVIDIA/aistore/cmn"
    15  	"github.com/NVIDIA/aistore/cmn/atomic"
    16  	"github.com/NVIDIA/aistore/cmn/cos"
    17  	"github.com/NVIDIA/aistore/cmn/debug"
    18  	"github.com/NVIDIA/aistore/cmn/feat"
    19  	"github.com/NVIDIA/aistore/cmn/nlog"
    20  	"github.com/NVIDIA/aistore/memsys"
    21  	"github.com/karrick/godirwalk"
    22  )
    23  
    24  const (
    25  	// Determines the threshold of error count which will result in halting
    26  	// the walking operation.
    27  	errThreshold = 1000
    28  
    29  	// Determines the buffer size of the mpath worker queue.
    30  	mpathQueueSize = 100
    31  )
    32  
    33  type (
    34  	DirEntry interface {
    35  		IsDir() bool
    36  	}
    37  
    38  	walkFunc func(fqn string, de DirEntry) error
    39  
    40  	WalkOpts struct {
    41  		Mi       *Mountpath
    42  		Callback walkFunc
    43  		Bck      cmn.Bck
    44  		Dir      string
    45  		Prefix   string
    46  		CTs      []string
    47  		Sorted   bool
    48  	}
    49  
    50  	errCallbackWrapper struct {
    51  		counter atomic.Int64
    52  	}
    53  
    54  	walkDirWrapper struct {
    55  		ucb func(string, DirEntry) error // user-provided callback
    56  		dir string                       // root pathname
    57  		errCallbackWrapper
    58  	}
    59  )
    60  
    61  // PathErrToAction is a default error callback for fast godirwalk.Walk.
    62  // The idea is that on any error that was produced during the walk we dispatch
    63  // this handler and act upon the error.
    64  //
    65  // By default it halts on bucket level errors because there is no option to
    66  // continue walking if there is a problem with a bucket. Also we count "soft"
    67  // errors and abort if we reach certain amount of them.
    68  func (ew *errCallbackWrapper) PathErrToAction(_ string, err error) godirwalk.ErrorAction {
    69  	if cmn.IsErrBucketLevel(err) {
    70  		return godirwalk.Halt
    71  	}
    72  	if ew.counter.Load() > errThreshold {
    73  		return godirwalk.Halt
    74  	}
    75  	if cmn.IsErrObjLevel(err) {
    76  		ew.counter.Inc()
    77  		return godirwalk.SkipNode
    78  	}
    79  	return godirwalk.Halt
    80  }
    81  
    82  // godirwalk is used by default. If you want to switch to standard filepath.Walk do:
    83  // 1. Rewrite `callback` to:
    84  //   func (opts *WalkOpts) callback(fqn string, de os.FileInfo, err error) error {
    85  //     if err != nil {
    86  //        if err := cmn.PathWalkErr(err); err != nil {
    87  //          return err
    88  //        }
    89  //        return nil
    90  //     }
    91  //     return opts.callback(fqn, de)
    92  //   }
    93  // 2. Replace `Walk` body with one-liner:
    94  //   return filepath.Walk(fqn, opts.callback)
    95  // No more changes required.
    96  // NOTE: for standard filepath.Walk option 'Sorted' is ignored
    97  
    98  // interface guard
    99  var _ DirEntry = (*godirwalk.Dirent)(nil)
   100  
   101  func (opts *WalkOpts) callback(fqn string, de *godirwalk.Dirent) error {
   102  	return opts.Callback(fqn, de)
   103  }
   104  
   105  func Walk(opts *WalkOpts) error {
   106  	var (
   107  		fqns []string
   108  		err  error
   109  		ew   = &errCallbackWrapper{}
   110  	)
   111  	if opts.Dir != "" {
   112  		debug.Assert(opts.Prefix == "")
   113  		fqns = append(fqns, opts.Dir)
   114  	} else if opts.Bck.Name != "" {
   115  		debug.Assert(len(opts.CTs) > 0)
   116  		// one bucket
   117  		for _, ct := range opts.CTs {
   118  			bdir := opts.Mi.MakePathCT(&opts.Bck, ct)
   119  			if opts.Prefix != "" {
   120  				fqns = append(fqns, _join(bdir, opts.Prefix))
   121  			} else {
   122  				fqns = append(fqns, bdir)
   123  			}
   124  		}
   125  	} else {
   126  		// all buckets
   127  		debug.Assert(len(opts.CTs) > 0)
   128  		fqns, err = allMpathCTpaths(opts)
   129  		if len(fqns) == 0 || err != nil {
   130  			return err
   131  		}
   132  	}
   133  	scratch, slab := memsys.PageMM().AllocSize(memsys.DefaultBufSize)
   134  	gOpts := &godirwalk.Options{
   135  		ErrorCallback: ew.PathErrToAction, // "halts the walk" or "skips the node" (detailed comment above)
   136  		Callback:      opts.callback,
   137  		Unsorted:      !opts.Sorted,
   138  		ScratchBuffer: scratch,
   139  	}
   140  	for _, fqn := range fqns {
   141  		err1 := godirwalk.Walk(fqn, gOpts)
   142  		if err1 == nil || os.IsNotExist(err1) {
   143  			continue
   144  		}
   145  		// NOTE: mountpath is getting detached or disabled
   146  		if cmn.IsErrMountpathNotFound(err1) {
   147  			nlog.Errorln(err1)
   148  			continue
   149  		}
   150  		if cmn.IsErrAborted(err1) {
   151  			// Errors different from cmn.ErrAborted should not be overwritten
   152  			// by cmn.ErrAborted. Assign err = err1 only when there wasn't any other error
   153  			if err == nil {
   154  				err = err1
   155  			}
   156  			continue
   157  		}
   158  		if err1 != context.Canceled && !cos.IsNotExist(err1, 0) {
   159  			nlog.Errorln(err1)
   160  		}
   161  		err = err1
   162  	}
   163  	slab.Free(scratch)
   164  	return err
   165  }
   166  
   167  func _join(bdir, prefix string) string {
   168  	sub := bdir + cos.PathSeparator + prefix
   169  	if cos.IsLastB(prefix, filepath.Separator) {
   170  		return sub
   171  	}
   172  	if !cmn.Rom.Features().IsSet(feat.DontOptimizeVirtualDir) {
   173  		if finfo, err := os.Stat(sub); err == nil && finfo.IsDir() {
   174  			return sub
   175  		}
   176  	}
   177  	return bdir
   178  }
   179  
   180  func allMpathCTpaths(opts *WalkOpts) (fqns []string, err error) {
   181  	children, erc := mpathChildren(opts)
   182  	if erc != nil {
   183  		return nil, erc
   184  	}
   185  	if len(opts.CTs) > 1 {
   186  		fqns = make([]string, 0, len(children)*len(opts.CTs))
   187  	} else {
   188  		fqns = children[:0] // optimization to reuse previously allocated slice
   189  	}
   190  	bck := opts.Bck
   191  	for _, child := range children {
   192  		bck.Name = child
   193  		if err := bck.ValidateName(); err != nil {
   194  			continue
   195  		}
   196  		for _, ct := range opts.CTs {
   197  			bdir := opts.Mi.MakePathCT(&bck, ct)
   198  			if opts.Prefix != "" {
   199  				fqns = append(fqns, _join(bdir, opts.Prefix))
   200  			} else {
   201  				fqns = append(fqns, bdir)
   202  			}
   203  		}
   204  	}
   205  	return
   206  }
   207  
   208  func AllMpathBcks(opts *WalkOpts) (bcks []cmn.Bck, err error) {
   209  	children, erc := mpathChildren(opts)
   210  	if erc != nil {
   211  		return nil, erc
   212  	}
   213  	bck := opts.Bck
   214  	for _, child := range children {
   215  		bck.Name = child
   216  		if err := bck.ValidateName(); err != nil {
   217  			continue
   218  		}
   219  		bcks = append(bcks, bck)
   220  	}
   221  	return
   222  }
   223  
   224  func mpathChildren(opts *WalkOpts) (children []string, err error) {
   225  	var (
   226  		fqn           = opts.Mi.MakePathBck(&opts.Bck)
   227  		scratch, slab = memsys.PageMM().AllocSize(memsys.DefaultBufSize)
   228  	)
   229  	children, err = godirwalk.ReadDirnames(fqn, scratch)
   230  	slab.Free(scratch)
   231  	if err != nil {
   232  		if os.IsNotExist(err) {
   233  			err = nil
   234  		}
   235  		return
   236  	}
   237  	if opts.Sorted {
   238  		sort.Strings(children)
   239  	}
   240  	return
   241  }
   242  
   243  ////////////////////
   244  // WalkDir & walkDirWrapper - non-recursive walk
   245  ////////////////////
   246  
   247  // NOTE: using Go filepath.WalkDir
   248  // pros: lexical deterministic order; cons: reads the entire directory
   249  func WalkDir(dir string, ucb func(string, DirEntry) error) error {
   250  	wd := &walkDirWrapper{dir: dir, ucb: ucb}
   251  	return filepath.WalkDir(dir, wd.wcb)
   252  }
   253  
   254  // wraps around user callback to implement default error handling and skipping
   255  func (wd *walkDirWrapper) wcb(path string, de iofs.DirEntry, err error) error {
   256  	if err != nil {
   257  		// Walk and WalkDir share the same error-processing logic (hence, godirwalk enum)
   258  		if path != wd.dir && wd.PathErrToAction(path, err) != godirwalk.Halt {
   259  			err = nil
   260  		}
   261  		return err
   262  	}
   263  	if de.IsDir() && path != wd.dir {
   264  		return filepath.SkipDir
   265  	}
   266  	if !de.Type().IsRegular() {
   267  		return nil
   268  	}
   269  	// user callback
   270  	return wd.ucb(path, de)
   271  }