cuelang.org/go@v0.10.1/internal/golangorgx/tools/gopathwalk/walk.go (about) 1 // Copyright 2018 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Package gopathwalk is like filepath.Walk but specialized for finding Go 6 // packages, particularly in $GOPATH and $GOROOT. 7 package gopathwalk 8 9 import ( 10 "bufio" 11 "bytes" 12 "io" 13 "io/fs" 14 "os" 15 "path/filepath" 16 "runtime" 17 "strings" 18 "sync" 19 "time" 20 ) 21 22 // Options controls the behavior of a Walk call. 23 type Options struct { 24 // If Logf is non-nil, debug logging is enabled through this function. 25 Logf func(format string, args ...interface{}) 26 27 // Search module caches. Also disables legacy goimports ignore rules. 28 ModulesEnabled bool 29 30 // Maximum number of concurrent calls to user-provided callbacks, 31 // or 0 for GOMAXPROCS. 32 Concurrency int 33 } 34 35 // RootType indicates the type of a Root. 36 type RootType int 37 38 const ( 39 RootUnknown RootType = iota 40 RootGOROOT 41 RootGOPATH 42 RootCurrentModule 43 RootModuleCache 44 RootOther 45 ) 46 47 // A Root is a starting point for a Walk. 48 type Root struct { 49 Path string 50 Type RootType 51 } 52 53 // Walk concurrently walks Go source directories ($GOROOT, $GOPATH, etc) to find packages. 54 // 55 // For each package found, add will be called with the absolute 56 // paths of the containing source directory and the package directory. 57 // 58 // Unlike filepath.WalkDir, Walk follows symbolic links 59 // (while guarding against cycles). 60 func Walk(roots []Root, add func(root Root, dir string), opts Options) { 61 WalkSkip(roots, add, func(Root, string) bool { return false }, opts) 62 } 63 64 // WalkSkip concurrently walks Go source directories ($GOROOT, $GOPATH, etc) to 65 // find packages. 66 // 67 // For each package found, add will be called with the absolute 68 // paths of the containing source directory and the package directory. 69 // For each directory that will be scanned, skip will be called 70 // with the absolute paths of the containing source directory and the directory. 71 // If skip returns false on a directory it will be processed. 72 // 73 // Unlike filepath.WalkDir, WalkSkip follows symbolic links 74 // (while guarding against cycles). 75 func WalkSkip(roots []Root, add func(root Root, dir string), skip func(root Root, dir string) bool, opts Options) { 76 for _, root := range roots { 77 walkDir(root, add, skip, opts) 78 } 79 } 80 81 // walkDir creates a walker and starts fastwalk with this walker. 82 func walkDir(root Root, add func(Root, string), skip func(root Root, dir string) bool, opts Options) { 83 if opts.Logf == nil { 84 opts.Logf = func(format string, args ...interface{}) {} 85 } 86 if _, err := os.Stat(root.Path); os.IsNotExist(err) { 87 opts.Logf("skipping nonexistent directory: %v", root.Path) 88 return 89 } 90 start := time.Now() 91 opts.Logf("scanning %s", root.Path) 92 93 concurrency := opts.Concurrency 94 if concurrency == 0 { 95 // The walk be either CPU-bound or I/O-bound, depending on what the 96 // caller-supplied add function does and the details of the user's platform 97 // and machine. Rather than trying to fine-tune the concurrency level for a 98 // specific environment, we default to GOMAXPROCS: it is likely to be a good 99 // choice for a CPU-bound add function, and if it is instead I/O-bound, then 100 // dealing with I/O saturation is arguably the job of the kernel and/or 101 // runtime. (Oversaturating I/O seems unlikely to harm performance as badly 102 // as failing to saturate would.) 103 concurrency = runtime.GOMAXPROCS(0) 104 } 105 w := &walker{ 106 root: root, 107 add: add, 108 skip: skip, 109 opts: opts, 110 sem: make(chan struct{}, concurrency), 111 } 112 w.init() 113 114 w.sem <- struct{}{} 115 path := root.Path 116 if path == "" { 117 path = "." 118 } 119 if fi, err := os.Lstat(path); err == nil { 120 w.walk(path, nil, fs.FileInfoToDirEntry(fi)) 121 } else { 122 w.opts.Logf("scanning directory %v: %v", root.Path, err) 123 } 124 <-w.sem 125 w.walking.Wait() 126 127 opts.Logf("scanned %s in %v", root.Path, time.Since(start)) 128 } 129 130 // walker is the callback for fastwalk.Walk. 131 type walker struct { 132 root Root // The source directory to scan. 133 add func(Root, string) // The callback that will be invoked for every possible Go package dir. 134 skip func(Root, string) bool // The callback that will be invoked for every dir. dir is skipped if it returns true. 135 opts Options // Options passed to Walk by the user. 136 137 walking sync.WaitGroup 138 sem chan struct{} // Channel of semaphore tokens; send to acquire, receive to release. 139 ignoredDirs []string 140 141 added sync.Map // map[string]bool 142 } 143 144 // A symlinkList is a linked list of os.FileInfos for parent directories 145 // reached via symlinks. 146 type symlinkList struct { 147 info os.FileInfo 148 prev *symlinkList 149 } 150 151 // init initializes the walker based on its Options 152 func (w *walker) init() { 153 var ignoredPaths []string 154 if w.root.Type == RootModuleCache { 155 ignoredPaths = []string{"cache"} 156 } 157 if !w.opts.ModulesEnabled && w.root.Type == RootGOPATH { 158 ignoredPaths = w.getIgnoredDirs(w.root.Path) 159 ignoredPaths = append(ignoredPaths, "v", "mod") 160 } 161 162 for _, p := range ignoredPaths { 163 full := filepath.Join(w.root.Path, p) 164 w.ignoredDirs = append(w.ignoredDirs, full) 165 w.opts.Logf("Directory added to ignore list: %s", full) 166 } 167 } 168 169 // getIgnoredDirs reads an optional config file at <path>/.goimportsignore 170 // of relative directories to ignore when scanning for go files. 171 // The provided path is one of the $GOPATH entries with "src" appended. 172 func (w *walker) getIgnoredDirs(path string) []string { 173 file := filepath.Join(path, ".goimportsignore") 174 slurp, err := os.ReadFile(file) 175 if err != nil { 176 w.opts.Logf("%v", err) 177 } else { 178 w.opts.Logf("Read %s", file) 179 } 180 if err != nil { 181 return nil 182 } 183 184 var ignoredDirs []string 185 bs := bufio.NewScanner(bytes.NewReader(slurp)) 186 for bs.Scan() { 187 line := strings.TrimSpace(bs.Text()) 188 if line == "" || strings.HasPrefix(line, "#") { 189 continue 190 } 191 ignoredDirs = append(ignoredDirs, line) 192 } 193 return ignoredDirs 194 } 195 196 // shouldSkipDir reports whether the file should be skipped or not. 197 func (w *walker) shouldSkipDir(dir string) bool { 198 for _, ignoredDir := range w.ignoredDirs { 199 if dir == ignoredDir { 200 return true 201 } 202 } 203 if w.skip != nil { 204 // Check with the user specified callback. 205 return w.skip(w.root, dir) 206 } 207 return false 208 } 209 210 // walk walks through the given path. 211 // 212 // Errors are logged if w.opts.Logf is non-nil, but otherwise ignored. 213 func (w *walker) walk(path string, pathSymlinks *symlinkList, d fs.DirEntry) { 214 if d.Type()&os.ModeSymlink != 0 { 215 // Walk the symlink's target rather than the symlink itself. 216 // 217 // (Note that os.Stat, unlike the lower-lever os.Readlink, 218 // follows arbitrarily many layers of symlinks, so it will eventually 219 // reach either a non-symlink or a nonexistent target.) 220 // 221 // TODO(bcmills): 'go list all' itself ignores symlinks within GOROOT/src 222 // and GOPATH/src. Do we really need to traverse them here? If so, why? 223 224 fi, err := os.Stat(path) 225 if err != nil { 226 w.opts.Logf("%v", err) 227 return 228 } 229 230 // Avoid walking symlink cycles: if we have already followed a symlink to 231 // this directory as a parent of itself, don't follow it again. 232 // 233 // This doesn't catch the first time through a cycle, but it also minimizes 234 // the number of extra stat calls we make if we *don't* encounter a cycle. 235 // Since we don't actually expect to encounter symlink cycles in practice, 236 // this seems like the right tradeoff. 237 for parent := pathSymlinks; parent != nil; parent = parent.prev { 238 if os.SameFile(fi, parent.info) { 239 return 240 } 241 } 242 243 pathSymlinks = &symlinkList{ 244 info: fi, 245 prev: pathSymlinks, 246 } 247 d = fs.FileInfoToDirEntry(fi) 248 } 249 250 if d.Type().IsRegular() { 251 if !strings.HasSuffix(path, ".go") { 252 return 253 } 254 255 dir := filepath.Dir(path) 256 if dir == w.root.Path && (w.root.Type == RootGOROOT || w.root.Type == RootGOPATH) { 257 // Doesn't make sense to have regular files 258 // directly in your $GOPATH/src or $GOROOT/src. 259 // 260 // TODO(bcmills): there are many levels of directory within 261 // RootModuleCache where this also wouldn't make sense, 262 // Can we generalize this to any directory without a corresponding 263 // import path? 264 return 265 } 266 267 if _, dup := w.added.LoadOrStore(dir, true); !dup { 268 w.add(w.root, dir) 269 } 270 } 271 272 if !d.IsDir() { 273 return 274 } 275 276 base := filepath.Base(path) 277 if base == "" || base[0] == '.' || base[0] == '_' || 278 base == "testdata" || 279 (w.root.Type == RootGOROOT && w.opts.ModulesEnabled && base == "vendor") || 280 (!w.opts.ModulesEnabled && base == "node_modules") || 281 w.shouldSkipDir(path) { 282 return 283 } 284 285 // Read the directory and walk its entries. 286 287 f, err := os.Open(path) 288 if err != nil { 289 w.opts.Logf("%v", err) 290 return 291 } 292 defer f.Close() 293 294 for { 295 // We impose an arbitrary limit on the number of ReadDir results per 296 // directory to limit the amount of memory consumed for stale or upcoming 297 // directory entries. The limit trades off CPU (number of syscalls to read 298 // the whole directory) against RAM (reachable directory entries other than 299 // the one currently being processed). 300 // 301 // Since we process the directories recursively, we will end up maintaining 302 // a slice of entries for each level of the directory tree. 303 // (Compare https://go.dev/issue/36197.) 304 ents, err := f.ReadDir(1024) 305 if err != nil { 306 if err != io.EOF { 307 w.opts.Logf("%v", err) 308 } 309 break 310 } 311 312 for _, d := range ents { 313 nextPath := filepath.Join(path, d.Name()) 314 if d.IsDir() { 315 select { 316 case w.sem <- struct{}{}: 317 // Got a new semaphore token, so we can traverse the directory concurrently. 318 d := d 319 w.walking.Add(1) 320 go func() { 321 defer func() { 322 <-w.sem 323 w.walking.Done() 324 }() 325 w.walk(nextPath, pathSymlinks, d) 326 }() 327 continue 328 329 default: 330 // No tokens available, so traverse serially. 331 } 332 } 333 334 w.walk(nextPath, pathSymlinks, d) 335 } 336 } 337 }