golang.org/x/tools@v0.21.0/internal/gopathwalk/walk.go (about)

     1  // Copyright 2018 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // Package gopathwalk is like filepath.Walk but specialized for finding Go
     6  // packages, particularly in $GOPATH and $GOROOT.
     7  package gopathwalk
     8  
     9  import (
    10  	"bufio"
    11  	"bytes"
    12  	"io"
    13  	"io/fs"
    14  	"os"
    15  	"path/filepath"
    16  	"runtime"
    17  	"strings"
    18  	"sync"
    19  	"time"
    20  )
    21  
    22  // Options controls the behavior of a Walk call.
    23  type Options struct {
    24  	// If Logf is non-nil, debug logging is enabled through this function.
    25  	Logf func(format string, args ...interface{})
    26  
    27  	// Search module caches. Also disables legacy goimports ignore rules.
    28  	ModulesEnabled bool
    29  
    30  	// Maximum number of concurrent calls to user-provided callbacks,
    31  	// or 0 for GOMAXPROCS.
    32  	Concurrency int
    33  }
    34  
    35  // RootType indicates the type of a Root.
    36  type RootType int
    37  
    38  const (
    39  	RootUnknown RootType = iota
    40  	RootGOROOT
    41  	RootGOPATH
    42  	RootCurrentModule
    43  	RootModuleCache
    44  	RootOther
    45  )
    46  
    47  // A Root is a starting point for a Walk.
    48  type Root struct {
    49  	Path string
    50  	Type RootType
    51  }
    52  
    53  // Walk concurrently walks Go source directories ($GOROOT, $GOPATH, etc) to find packages.
    54  //
    55  // For each package found, add will be called with the absolute
    56  // paths of the containing source directory and the package directory.
    57  //
    58  // Unlike filepath.WalkDir, Walk follows symbolic links
    59  // (while guarding against cycles).
    60  func Walk(roots []Root, add func(root Root, dir string), opts Options) {
    61  	WalkSkip(roots, add, func(Root, string) bool { return false }, opts)
    62  }
    63  
    64  // WalkSkip concurrently walks Go source directories ($GOROOT, $GOPATH, etc) to
    65  // find packages.
    66  //
    67  // For each package found, add will be called with the absolute
    68  // paths of the containing source directory and the package directory.
    69  // For each directory that will be scanned, skip will be called
    70  // with the absolute paths of the containing source directory and the directory.
    71  // If skip returns false on a directory it will be processed.
    72  //
    73  // Unlike filepath.WalkDir, WalkSkip follows symbolic links
    74  // (while guarding against cycles).
    75  func WalkSkip(roots []Root, add func(root Root, dir string), skip func(root Root, dir string) bool, opts Options) {
    76  	for _, root := range roots {
    77  		walkDir(root, add, skip, opts)
    78  	}
    79  }
    80  
    81  // walkDir creates a walker and starts fastwalk with this walker.
    82  func walkDir(root Root, add func(Root, string), skip func(root Root, dir string) bool, opts Options) {
    83  	if opts.Logf == nil {
    84  		opts.Logf = func(format string, args ...interface{}) {}
    85  	}
    86  	if _, err := os.Stat(root.Path); os.IsNotExist(err) {
    87  		opts.Logf("skipping nonexistent directory: %v", root.Path)
    88  		return
    89  	}
    90  	start := time.Now()
    91  	opts.Logf("scanning %s", root.Path)
    92  
    93  	concurrency := opts.Concurrency
    94  	if concurrency == 0 {
    95  		// The walk be either CPU-bound or I/O-bound, depending on what the
    96  		// caller-supplied add function does and the details of the user's platform
    97  		// and machine. Rather than trying to fine-tune the concurrency level for a
    98  		// specific environment, we default to GOMAXPROCS: it is likely to be a good
    99  		// choice for a CPU-bound add function, and if it is instead I/O-bound, then
   100  		// dealing with I/O saturation is arguably the job of the kernel and/or
   101  		// runtime. (Oversaturating I/O seems unlikely to harm performance as badly
   102  		// as failing to saturate would.)
   103  		concurrency = runtime.GOMAXPROCS(0)
   104  	}
   105  	w := &walker{
   106  		root: root,
   107  		add:  add,
   108  		skip: skip,
   109  		opts: opts,
   110  		sem:  make(chan struct{}, concurrency),
   111  	}
   112  	w.init()
   113  
   114  	w.sem <- struct{}{}
   115  	path := root.Path
   116  	if path == "" {
   117  		path = "."
   118  	}
   119  	if fi, err := os.Lstat(path); err == nil {
   120  		w.walk(path, nil, fs.FileInfoToDirEntry(fi))
   121  	} else {
   122  		w.opts.Logf("scanning directory %v: %v", root.Path, err)
   123  	}
   124  	<-w.sem
   125  	w.walking.Wait()
   126  
   127  	opts.Logf("scanned %s in %v", root.Path, time.Since(start))
   128  }
   129  
   130  // walker is the callback for fastwalk.Walk.
   131  type walker struct {
   132  	root Root                    // The source directory to scan.
   133  	add  func(Root, string)      // The callback that will be invoked for every possible Go package dir.
   134  	skip func(Root, string) bool // The callback that will be invoked for every dir. dir is skipped if it returns true.
   135  	opts Options                 // Options passed to Walk by the user.
   136  
   137  	walking     sync.WaitGroup
   138  	sem         chan struct{} // Channel of semaphore tokens; send to acquire, receive to release.
   139  	ignoredDirs []string
   140  
   141  	added sync.Map // map[string]bool
   142  }
   143  
   144  // A symlinkList is a linked list of os.FileInfos for parent directories
   145  // reached via symlinks.
   146  type symlinkList struct {
   147  	info os.FileInfo
   148  	prev *symlinkList
   149  }
   150  
   151  // init initializes the walker based on its Options
   152  func (w *walker) init() {
   153  	var ignoredPaths []string
   154  	if w.root.Type == RootModuleCache {
   155  		ignoredPaths = []string{"cache"}
   156  	}
   157  	if !w.opts.ModulesEnabled && w.root.Type == RootGOPATH {
   158  		ignoredPaths = w.getIgnoredDirs(w.root.Path)
   159  		ignoredPaths = append(ignoredPaths, "v", "mod")
   160  	}
   161  
   162  	for _, p := range ignoredPaths {
   163  		full := filepath.Join(w.root.Path, p)
   164  		w.ignoredDirs = append(w.ignoredDirs, full)
   165  		w.opts.Logf("Directory added to ignore list: %s", full)
   166  	}
   167  }
   168  
   169  // getIgnoredDirs reads an optional config file at <path>/.goimportsignore
   170  // of relative directories to ignore when scanning for go files.
   171  // The provided path is one of the $GOPATH entries with "src" appended.
   172  func (w *walker) getIgnoredDirs(path string) []string {
   173  	file := filepath.Join(path, ".goimportsignore")
   174  	slurp, err := os.ReadFile(file)
   175  	if err != nil {
   176  		w.opts.Logf("%v", err)
   177  	} else {
   178  		w.opts.Logf("Read %s", file)
   179  	}
   180  	if err != nil {
   181  		return nil
   182  	}
   183  
   184  	var ignoredDirs []string
   185  	bs := bufio.NewScanner(bytes.NewReader(slurp))
   186  	for bs.Scan() {
   187  		line := strings.TrimSpace(bs.Text())
   188  		if line == "" || strings.HasPrefix(line, "#") {
   189  			continue
   190  		}
   191  		ignoredDirs = append(ignoredDirs, line)
   192  	}
   193  	return ignoredDirs
   194  }
   195  
   196  // shouldSkipDir reports whether the file should be skipped or not.
   197  func (w *walker) shouldSkipDir(dir string) bool {
   198  	for _, ignoredDir := range w.ignoredDirs {
   199  		if dir == ignoredDir {
   200  			return true
   201  		}
   202  	}
   203  	if w.skip != nil {
   204  		// Check with the user specified callback.
   205  		return w.skip(w.root, dir)
   206  	}
   207  	return false
   208  }
   209  
   210  // walk walks through the given path.
   211  //
   212  // Errors are logged if w.opts.Logf is non-nil, but otherwise ignored.
   213  func (w *walker) walk(path string, pathSymlinks *symlinkList, d fs.DirEntry) {
   214  	if d.Type()&os.ModeSymlink != 0 {
   215  		// Walk the symlink's target rather than the symlink itself.
   216  		//
   217  		// (Note that os.Stat, unlike the lower-lever os.Readlink,
   218  		// follows arbitrarily many layers of symlinks, so it will eventually
   219  		// reach either a non-symlink or a nonexistent target.)
   220  		//
   221  		// TODO(bcmills): 'go list all' itself ignores symlinks within GOROOT/src
   222  		// and GOPATH/src. Do we really need to traverse them here? If so, why?
   223  
   224  		fi, err := os.Stat(path)
   225  		if err != nil {
   226  			w.opts.Logf("%v", err)
   227  			return
   228  		}
   229  
   230  		// Avoid walking symlink cycles: if we have already followed a symlink to
   231  		// this directory as a parent of itself, don't follow it again.
   232  		//
   233  		// This doesn't catch the first time through a cycle, but it also minimizes
   234  		// the number of extra stat calls we make if we *don't* encounter a cycle.
   235  		// Since we don't actually expect to encounter symlink cycles in practice,
   236  		// this seems like the right tradeoff.
   237  		for parent := pathSymlinks; parent != nil; parent = parent.prev {
   238  			if os.SameFile(fi, parent.info) {
   239  				return
   240  			}
   241  		}
   242  
   243  		pathSymlinks = &symlinkList{
   244  			info: fi,
   245  			prev: pathSymlinks,
   246  		}
   247  		d = fs.FileInfoToDirEntry(fi)
   248  	}
   249  
   250  	if d.Type().IsRegular() {
   251  		if !strings.HasSuffix(path, ".go") {
   252  			return
   253  		}
   254  
   255  		dir := filepath.Dir(path)
   256  		if dir == w.root.Path && (w.root.Type == RootGOROOT || w.root.Type == RootGOPATH) {
   257  			// Doesn't make sense to have regular files
   258  			// directly in your $GOPATH/src or $GOROOT/src.
   259  			//
   260  			// TODO(bcmills): there are many levels of directory within
   261  			// RootModuleCache where this also wouldn't make sense,
   262  			// Can we generalize this to any directory without a corresponding
   263  			// import path?
   264  			return
   265  		}
   266  
   267  		if _, dup := w.added.LoadOrStore(dir, true); !dup {
   268  			w.add(w.root, dir)
   269  		}
   270  	}
   271  
   272  	if !d.IsDir() {
   273  		return
   274  	}
   275  
   276  	base := filepath.Base(path)
   277  	if base == "" || base[0] == '.' || base[0] == '_' ||
   278  		base == "testdata" ||
   279  		(w.root.Type == RootGOROOT && w.opts.ModulesEnabled && base == "vendor") ||
   280  		(!w.opts.ModulesEnabled && base == "node_modules") ||
   281  		w.shouldSkipDir(path) {
   282  		return
   283  	}
   284  
   285  	// Read the directory and walk its entries.
   286  
   287  	f, err := os.Open(path)
   288  	if err != nil {
   289  		w.opts.Logf("%v", err)
   290  		return
   291  	}
   292  	defer f.Close()
   293  
   294  	for {
   295  		// We impose an arbitrary limit on the number of ReadDir results per
   296  		// directory to limit the amount of memory consumed for stale or upcoming
   297  		// directory entries. The limit trades off CPU (number of syscalls to read
   298  		// the whole directory) against RAM (reachable directory entries other than
   299  		// the one currently being processed).
   300  		//
   301  		// Since we process the directories recursively, we will end up maintaining
   302  		// a slice of entries for each level of the directory tree.
   303  		// (Compare https://go.dev/issue/36197.)
   304  		ents, err := f.ReadDir(1024)
   305  		if err != nil {
   306  			if err != io.EOF {
   307  				w.opts.Logf("%v", err)
   308  			}
   309  			break
   310  		}
   311  
   312  		for _, d := range ents {
   313  			nextPath := filepath.Join(path, d.Name())
   314  			if d.IsDir() {
   315  				select {
   316  				case w.sem <- struct{}{}:
   317  					// Got a new semaphore token, so we can traverse the directory concurrently.
   318  					d := d
   319  					w.walking.Add(1)
   320  					go func() {
   321  						defer func() {
   322  							<-w.sem
   323  							w.walking.Done()
   324  						}()
   325  						w.walk(nextPath, pathSymlinks, d)
   326  					}()
   327  					continue
   328  
   329  				default:
   330  					// No tokens available, so traverse serially.
   331  				}
   332  			}
   333  
   334  			w.walk(nextPath, pathSymlinks, d)
   335  		}
   336  	}
   337  }