github.com/NVIDIA/aistore@v1.3.23-0.20240517131212-7df6609be51d/fs/walk.go (about) 1 // Package fs provides mountpath and FQN abstractions and methods to resolve/map stored content 2 /* 3 * Copyright (c) 2018-2024, NVIDIA CORPORATION. All rights reserved. 4 */ 5 package fs 6 7 import ( 8 "context" 9 iofs "io/fs" 10 "os" 11 "path/filepath" 12 "sort" 13 14 "github.com/NVIDIA/aistore/cmn" 15 "github.com/NVIDIA/aistore/cmn/atomic" 16 "github.com/NVIDIA/aistore/cmn/cos" 17 "github.com/NVIDIA/aistore/cmn/debug" 18 "github.com/NVIDIA/aistore/cmn/feat" 19 "github.com/NVIDIA/aistore/cmn/nlog" 20 "github.com/NVIDIA/aistore/memsys" 21 "github.com/karrick/godirwalk" 22 ) 23 24 const ( 25 // Determines the threshold of error count which will result in halting 26 // the walking operation. 27 errThreshold = 1000 28 29 // Determines the buffer size of the mpath worker queue. 30 mpathQueueSize = 100 31 ) 32 33 type ( 34 DirEntry interface { 35 IsDir() bool 36 } 37 38 walkFunc func(fqn string, de DirEntry) error 39 40 WalkOpts struct { 41 Mi *Mountpath 42 Callback walkFunc 43 Bck cmn.Bck 44 Dir string 45 Prefix string 46 CTs []string 47 Sorted bool 48 } 49 50 errCallbackWrapper struct { 51 counter atomic.Int64 52 } 53 54 walkDirWrapper struct { 55 ucb func(string, DirEntry) error // user-provided callback 56 dir string // root pathname 57 errCallbackWrapper 58 } 59 ) 60 61 // PathErrToAction is a default error callback for fast godirwalk.Walk. 62 // The idea is that on any error that was produced during the walk we dispatch 63 // this handler and act upon the error. 64 // 65 // By default it halts on bucket level errors because there is no option to 66 // continue walking if there is a problem with a bucket. Also we count "soft" 67 // errors and abort if we reach certain amount of them. 68 func (ew *errCallbackWrapper) PathErrToAction(_ string, err error) godirwalk.ErrorAction { 69 if cmn.IsErrBucketLevel(err) { 70 return godirwalk.Halt 71 } 72 if ew.counter.Load() > errThreshold { 73 return godirwalk.Halt 74 } 75 if cmn.IsErrObjLevel(err) { 76 ew.counter.Inc() 77 return godirwalk.SkipNode 78 } 79 return godirwalk.Halt 80 } 81 82 // godirwalk is used by default. If you want to switch to standard filepath.Walk do: 83 // 1. Rewrite `callback` to: 84 // func (opts *WalkOpts) callback(fqn string, de os.FileInfo, err error) error { 85 // if err != nil { 86 // if err := cmn.PathWalkErr(err); err != nil { 87 // return err 88 // } 89 // return nil 90 // } 91 // return opts.callback(fqn, de) 92 // } 93 // 2. Replace `Walk` body with one-liner: 94 // return filepath.Walk(fqn, opts.callback) 95 // No more changes required. 96 // NOTE: for standard filepath.Walk option 'Sorted' is ignored 97 98 // interface guard 99 var _ DirEntry = (*godirwalk.Dirent)(nil) 100 101 func (opts *WalkOpts) callback(fqn string, de *godirwalk.Dirent) error { 102 return opts.Callback(fqn, de) 103 } 104 105 func Walk(opts *WalkOpts) error { 106 var ( 107 fqns []string 108 err error 109 ew = &errCallbackWrapper{} 110 ) 111 if opts.Dir != "" { 112 debug.Assert(opts.Prefix == "") 113 fqns = append(fqns, opts.Dir) 114 } else if opts.Bck.Name != "" { 115 debug.Assert(len(opts.CTs) > 0) 116 // one bucket 117 for _, ct := range opts.CTs { 118 bdir := opts.Mi.MakePathCT(&opts.Bck, ct) 119 if opts.Prefix != "" { 120 fqns = append(fqns, _join(bdir, opts.Prefix)) 121 } else { 122 fqns = append(fqns, bdir) 123 } 124 } 125 } else { 126 // all buckets 127 debug.Assert(len(opts.CTs) > 0) 128 fqns, err = allMpathCTpaths(opts) 129 if len(fqns) == 0 || err != nil { 130 return err 131 } 132 } 133 scratch, slab := memsys.PageMM().AllocSize(memsys.DefaultBufSize) 134 gOpts := &godirwalk.Options{ 135 ErrorCallback: ew.PathErrToAction, // "halts the walk" or "skips the node" (detailed comment above) 136 Callback: opts.callback, 137 Unsorted: !opts.Sorted, 138 ScratchBuffer: scratch, 139 } 140 for _, fqn := range fqns { 141 err1 := godirwalk.Walk(fqn, gOpts) 142 if err1 == nil || os.IsNotExist(err1) { 143 continue 144 } 145 // NOTE: mountpath is getting detached or disabled 146 if cmn.IsErrMountpathNotFound(err1) { 147 nlog.Errorln(err1) 148 continue 149 } 150 if cmn.IsErrAborted(err1) { 151 // Errors different from cmn.ErrAborted should not be overwritten 152 // by cmn.ErrAborted. Assign err = err1 only when there wasn't any other error 153 if err == nil { 154 err = err1 155 } 156 continue 157 } 158 if err1 != context.Canceled && !cos.IsNotExist(err1, 0) { 159 nlog.Errorln(err1) 160 } 161 err = err1 162 } 163 slab.Free(scratch) 164 return err 165 } 166 167 func _join(bdir, prefix string) string { 168 sub := bdir + cos.PathSeparator + prefix 169 if cos.IsLastB(prefix, filepath.Separator) { 170 return sub 171 } 172 if !cmn.Rom.Features().IsSet(feat.DontOptimizeVirtualDir) { 173 if finfo, err := os.Stat(sub); err == nil && finfo.IsDir() { 174 return sub 175 } 176 } 177 return bdir 178 } 179 180 func allMpathCTpaths(opts *WalkOpts) (fqns []string, err error) { 181 children, erc := mpathChildren(opts) 182 if erc != nil { 183 return nil, erc 184 } 185 if len(opts.CTs) > 1 { 186 fqns = make([]string, 0, len(children)*len(opts.CTs)) 187 } else { 188 fqns = children[:0] // optimization to reuse previously allocated slice 189 } 190 bck := opts.Bck 191 for _, child := range children { 192 bck.Name = child 193 if err := bck.ValidateName(); err != nil { 194 continue 195 } 196 for _, ct := range opts.CTs { 197 bdir := opts.Mi.MakePathCT(&bck, ct) 198 if opts.Prefix != "" { 199 fqns = append(fqns, _join(bdir, opts.Prefix)) 200 } else { 201 fqns = append(fqns, bdir) 202 } 203 } 204 } 205 return 206 } 207 208 func AllMpathBcks(opts *WalkOpts) (bcks []cmn.Bck, err error) { 209 children, erc := mpathChildren(opts) 210 if erc != nil { 211 return nil, erc 212 } 213 bck := opts.Bck 214 for _, child := range children { 215 bck.Name = child 216 if err := bck.ValidateName(); err != nil { 217 continue 218 } 219 bcks = append(bcks, bck) 220 } 221 return 222 } 223 224 func mpathChildren(opts *WalkOpts) (children []string, err error) { 225 var ( 226 fqn = opts.Mi.MakePathBck(&opts.Bck) 227 scratch, slab = memsys.PageMM().AllocSize(memsys.DefaultBufSize) 228 ) 229 children, err = godirwalk.ReadDirnames(fqn, scratch) 230 slab.Free(scratch) 231 if err != nil { 232 if os.IsNotExist(err) { 233 err = nil 234 } 235 return 236 } 237 if opts.Sorted { 238 sort.Strings(children) 239 } 240 return 241 } 242 243 //////////////////// 244 // WalkDir & walkDirWrapper - non-recursive walk 245 //////////////////// 246 247 // NOTE: using Go filepath.WalkDir 248 // pros: lexical deterministic order; cons: reads the entire directory 249 func WalkDir(dir string, ucb func(string, DirEntry) error) error { 250 wd := &walkDirWrapper{dir: dir, ucb: ucb} 251 return filepath.WalkDir(dir, wd.wcb) 252 } 253 254 // wraps around user callback to implement default error handling and skipping 255 func (wd *walkDirWrapper) wcb(path string, de iofs.DirEntry, err error) error { 256 if err != nil { 257 // Walk and WalkDir share the same error-processing logic (hence, godirwalk enum) 258 if path != wd.dir && wd.PathErrToAction(path, err) != godirwalk.Halt { 259 err = nil 260 } 261 return err 262 } 263 if de.IsDir() && path != wd.dir { 264 return filepath.SkipDir 265 } 266 if !de.Type().IsRegular() { 267 return nil 268 } 269 // user callback 270 return wd.ucb(path, de) 271 }