github.com/v2fly/tools@v0.100.0/internal/fastwalk/fastwalk.go (about) 1 // Copyright 2016 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Package fastwalk provides a faster version of filepath.Walk for file system 6 // scanning tools. 7 package fastwalk 8 9 import ( 10 "errors" 11 "os" 12 "path/filepath" 13 "runtime" 14 "sync" 15 ) 16 17 // ErrTraverseLink is used as a return value from WalkFuncs to indicate that the 18 // symlink named in the call may be traversed. 19 var ErrTraverseLink = errors.New("fastwalk: traverse symlink, assuming target is a directory") 20 21 // ErrSkipFiles is a used as a return value from WalkFuncs to indicate that the 22 // callback should not be called for any other files in the current directory. 23 // Child directories will still be traversed. 24 var ErrSkipFiles = errors.New("fastwalk: skip remaining files in directory") 25 26 // Walk is a faster implementation of filepath.Walk. 27 // 28 // filepath.Walk's design necessarily calls os.Lstat on each file, 29 // even if the caller needs less info. 30 // Many tools need only the type of each file. 31 // On some platforms, this information is provided directly by the readdir 32 // system call, avoiding the need to stat each file individually. 33 // fastwalk_unix.go contains a fork of the syscall routines. 34 // 35 // See golang.org/issue/16399 36 // 37 // Walk walks the file tree rooted at root, calling walkFn for 38 // each file or directory in the tree, including root. 39 // 40 // If fastWalk returns filepath.SkipDir, the directory is skipped. 41 // 42 // Unlike filepath.Walk: 43 // * file stat calls must be done by the user. 44 // The only provided metadata is the file type, which does not include 45 // any permission bits. 46 // * multiple goroutines stat the filesystem concurrently. The provided 47 // walkFn must be safe for concurrent use. 48 // * fastWalk can follow symlinks if walkFn returns the TraverseLink 49 // sentinel error. It is the walkFn's responsibility to prevent 50 // fastWalk from going into symlink cycles. 51 func Walk(root string, walkFn func(path string, typ os.FileMode) error) error { 52 // TODO(bradfitz): make numWorkers configurable? We used a 53 // minimum of 4 to give the kernel more info about multiple 54 // things we want, in hopes its I/O scheduling can take 55 // advantage of that. Hopefully most are in cache. Maybe 4 is 56 // even too low of a minimum. Profile more. 57 numWorkers := 4 58 if n := runtime.NumCPU(); n > numWorkers { 59 numWorkers = n 60 } 61 62 // Make sure to wait for all workers to finish, otherwise 63 // walkFn could still be called after returning. This Wait call 64 // runs after close(e.donec) below. 65 var wg sync.WaitGroup 66 defer wg.Wait() 67 68 w := &walker{ 69 fn: walkFn, 70 enqueuec: make(chan walkItem, numWorkers), // buffered for performance 71 workc: make(chan walkItem, numWorkers), // buffered for performance 72 donec: make(chan struct{}), 73 74 // buffered for correctness & not leaking goroutines: 75 resc: make(chan error, numWorkers), 76 } 77 defer close(w.donec) 78 79 for i := 0; i < numWorkers; i++ { 80 wg.Add(1) 81 go w.doWork(&wg) 82 } 83 todo := []walkItem{{dir: root}} 84 out := 0 85 for { 86 workc := w.workc 87 var workItem walkItem 88 if len(todo) == 0 { 89 workc = nil 90 } else { 91 workItem = todo[len(todo)-1] 92 } 93 select { 94 case workc <- workItem: 95 todo = todo[:len(todo)-1] 96 out++ 97 case it := <-w.enqueuec: 98 todo = append(todo, it) 99 case err := <-w.resc: 100 out-- 101 if err != nil { 102 return err 103 } 104 if out == 0 && len(todo) == 0 { 105 // It's safe to quit here, as long as the buffered 106 // enqueue channel isn't also readable, which might 107 // happen if the worker sends both another unit of 108 // work and its result before the other select was 109 // scheduled and both w.resc and w.enqueuec were 110 // readable. 111 select { 112 case it := <-w.enqueuec: 113 todo = append(todo, it) 114 default: 115 return nil 116 } 117 } 118 } 119 } 120 } 121 122 // doWork reads directories as instructed (via workc) and runs the 123 // user's callback function. 124 func (w *walker) doWork(wg *sync.WaitGroup) { 125 defer wg.Done() 126 for { 127 select { 128 case <-w.donec: 129 return 130 case it := <-w.workc: 131 select { 132 case <-w.donec: 133 return 134 case w.resc <- w.walk(it.dir, !it.callbackDone): 135 } 136 } 137 } 138 } 139 140 type walker struct { 141 fn func(path string, typ os.FileMode) error 142 143 donec chan struct{} // closed on fastWalk's return 144 workc chan walkItem // to workers 145 enqueuec chan walkItem // from workers 146 resc chan error // from workers 147 } 148 149 type walkItem struct { 150 dir string 151 callbackDone bool // callback already called; don't do it again 152 } 153 154 func (w *walker) enqueue(it walkItem) { 155 select { 156 case w.enqueuec <- it: 157 case <-w.donec: 158 } 159 } 160 161 func (w *walker) onDirEnt(dirName, baseName string, typ os.FileMode) error { 162 joined := dirName + string(os.PathSeparator) + baseName 163 if typ == os.ModeDir { 164 w.enqueue(walkItem{dir: joined}) 165 return nil 166 } 167 168 err := w.fn(joined, typ) 169 if typ == os.ModeSymlink { 170 if err == ErrTraverseLink { 171 // Set callbackDone so we don't call it twice for both the 172 // symlink-as-symlink and the symlink-as-directory later: 173 w.enqueue(walkItem{dir: joined, callbackDone: true}) 174 return nil 175 } 176 if err == filepath.SkipDir { 177 // Permit SkipDir on symlinks too. 178 return nil 179 } 180 } 181 return err 182 } 183 184 func (w *walker) walk(root string, runUserCallback bool) error { 185 if runUserCallback { 186 err := w.fn(root, os.ModeDir) 187 if err == filepath.SkipDir { 188 return nil 189 } 190 if err != nil { 191 return err 192 } 193 } 194 195 return readDir(root, w.onDirEnt) 196 }