github.com/v2fly/tools@v0.100.0/internal/fastwalk/fastwalk.go (about)

     1  // Copyright 2016 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // Package fastwalk provides a faster version of filepath.Walk for file system
     6  // scanning tools.
     7  package fastwalk
     8  
     9  import (
    10  	"errors"
    11  	"os"
    12  	"path/filepath"
    13  	"runtime"
    14  	"sync"
    15  )
    16  
    17  // ErrTraverseLink is used as a return value from WalkFuncs to indicate that the
    18  // symlink named in the call may be traversed.
    19  var ErrTraverseLink = errors.New("fastwalk: traverse symlink, assuming target is a directory")
    20  
    21  // ErrSkipFiles is a used as a return value from WalkFuncs to indicate that the
    22  // callback should not be called for any other files in the current directory.
    23  // Child directories will still be traversed.
    24  var ErrSkipFiles = errors.New("fastwalk: skip remaining files in directory")
    25  
    26  // Walk is a faster implementation of filepath.Walk.
    27  //
    28  // filepath.Walk's design necessarily calls os.Lstat on each file,
    29  // even if the caller needs less info.
    30  // Many tools need only the type of each file.
    31  // On some platforms, this information is provided directly by the readdir
    32  // system call, avoiding the need to stat each file individually.
    33  // fastwalk_unix.go contains a fork of the syscall routines.
    34  //
    35  // See golang.org/issue/16399
    36  //
    37  // Walk walks the file tree rooted at root, calling walkFn for
    38  // each file or directory in the tree, including root.
    39  //
    40  // If fastWalk returns filepath.SkipDir, the directory is skipped.
    41  //
    42  // Unlike filepath.Walk:
    43  //   * file stat calls must be done by the user.
    44  //     The only provided metadata is the file type, which does not include
    45  //     any permission bits.
    46  //   * multiple goroutines stat the filesystem concurrently. The provided
    47  //     walkFn must be safe for concurrent use.
    48  //   * fastWalk can follow symlinks if walkFn returns the TraverseLink
    49  //     sentinel error. It is the walkFn's responsibility to prevent
    50  //     fastWalk from going into symlink cycles.
    51  func Walk(root string, walkFn func(path string, typ os.FileMode) error) error {
    52  	// TODO(bradfitz): make numWorkers configurable? We used a
    53  	// minimum of 4 to give the kernel more info about multiple
    54  	// things we want, in hopes its I/O scheduling can take
    55  	// advantage of that. Hopefully most are in cache. Maybe 4 is
    56  	// even too low of a minimum. Profile more.
    57  	numWorkers := 4
    58  	if n := runtime.NumCPU(); n > numWorkers {
    59  		numWorkers = n
    60  	}
    61  
    62  	// Make sure to wait for all workers to finish, otherwise
    63  	// walkFn could still be called after returning. This Wait call
    64  	// runs after close(e.donec) below.
    65  	var wg sync.WaitGroup
    66  	defer wg.Wait()
    67  
    68  	w := &walker{
    69  		fn:       walkFn,
    70  		enqueuec: make(chan walkItem, numWorkers), // buffered for performance
    71  		workc:    make(chan walkItem, numWorkers), // buffered for performance
    72  		donec:    make(chan struct{}),
    73  
    74  		// buffered for correctness & not leaking goroutines:
    75  		resc: make(chan error, numWorkers),
    76  	}
    77  	defer close(w.donec)
    78  
    79  	for i := 0; i < numWorkers; i++ {
    80  		wg.Add(1)
    81  		go w.doWork(&wg)
    82  	}
    83  	todo := []walkItem{{dir: root}}
    84  	out := 0
    85  	for {
    86  		workc := w.workc
    87  		var workItem walkItem
    88  		if len(todo) == 0 {
    89  			workc = nil
    90  		} else {
    91  			workItem = todo[len(todo)-1]
    92  		}
    93  		select {
    94  		case workc <- workItem:
    95  			todo = todo[:len(todo)-1]
    96  			out++
    97  		case it := <-w.enqueuec:
    98  			todo = append(todo, it)
    99  		case err := <-w.resc:
   100  			out--
   101  			if err != nil {
   102  				return err
   103  			}
   104  			if out == 0 && len(todo) == 0 {
   105  				// It's safe to quit here, as long as the buffered
   106  				// enqueue channel isn't also readable, which might
   107  				// happen if the worker sends both another unit of
   108  				// work and its result before the other select was
   109  				// scheduled and both w.resc and w.enqueuec were
   110  				// readable.
   111  				select {
   112  				case it := <-w.enqueuec:
   113  					todo = append(todo, it)
   114  				default:
   115  					return nil
   116  				}
   117  			}
   118  		}
   119  	}
   120  }
   121  
   122  // doWork reads directories as instructed (via workc) and runs the
   123  // user's callback function.
   124  func (w *walker) doWork(wg *sync.WaitGroup) {
   125  	defer wg.Done()
   126  	for {
   127  		select {
   128  		case <-w.donec:
   129  			return
   130  		case it := <-w.workc:
   131  			select {
   132  			case <-w.donec:
   133  				return
   134  			case w.resc <- w.walk(it.dir, !it.callbackDone):
   135  			}
   136  		}
   137  	}
   138  }
   139  
   140  type walker struct {
   141  	fn func(path string, typ os.FileMode) error
   142  
   143  	donec    chan struct{} // closed on fastWalk's return
   144  	workc    chan walkItem // to workers
   145  	enqueuec chan walkItem // from workers
   146  	resc     chan error    // from workers
   147  }
   148  
   149  type walkItem struct {
   150  	dir          string
   151  	callbackDone bool // callback already called; don't do it again
   152  }
   153  
   154  func (w *walker) enqueue(it walkItem) {
   155  	select {
   156  	case w.enqueuec <- it:
   157  	case <-w.donec:
   158  	}
   159  }
   160  
   161  func (w *walker) onDirEnt(dirName, baseName string, typ os.FileMode) error {
   162  	joined := dirName + string(os.PathSeparator) + baseName
   163  	if typ == os.ModeDir {
   164  		w.enqueue(walkItem{dir: joined})
   165  		return nil
   166  	}
   167  
   168  	err := w.fn(joined, typ)
   169  	if typ == os.ModeSymlink {
   170  		if err == ErrTraverseLink {
   171  			// Set callbackDone so we don't call it twice for both the
   172  			// symlink-as-symlink and the symlink-as-directory later:
   173  			w.enqueue(walkItem{dir: joined, callbackDone: true})
   174  			return nil
   175  		}
   176  		if err == filepath.SkipDir {
   177  			// Permit SkipDir on symlinks too.
   178  			return nil
   179  		}
   180  	}
   181  	return err
   182  }
   183  
   184  func (w *walker) walk(root string, runUserCallback bool) error {
   185  	if runUserCallback {
   186  		err := w.fn(root, os.ModeDir)
   187  		if err == filepath.SkipDir {
   188  			return nil
   189  		}
   190  		if err != nil {
   191  			return err
   192  		}
   193  	}
   194  
   195  	return readDir(root, w.onDirEnt)
   196  }