github.com/westcoastroms/westcoastroms-build@v0.0.0-20190928114312-2350e5a73030/build/soong/finder/finder.go (about)

     1  // Copyright 2017 Google Inc. All rights reserved.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package finder
    16  
    17  import (
    18  	"bufio"
    19  	"bytes"
    20  	"encoding/json"
    21  	"errors"
    22  	"fmt"
    23  	"io"
    24  	"os"
    25  	"path/filepath"
    26  	"runtime"
    27  	"sort"
    28  	"strings"
    29  	"sync"
    30  	"sync/atomic"
    31  	"time"
    32  
    33  	"android/soong/finder/fs"
    34  )
    35  
    36  // This file provides a Finder struct that can quickly search for files satisfying
    37  // certain criteria.
    38  // This Finder gets its speed partially from parallelism and partially from caching.
    39  // If a Stat call returns the same result as last time, then it means Finder
    40  // can skip the ReadDir call for that dir.
    41  
    42  // The primary data structure used by the finder is the field Finder.nodes ,
    43  // which is a tree of nodes of type *pathMap .
    44  // Each node represents a directory on disk, along with its stats, subdirectories,
    45  // and contained files.
    46  
    47  // The common use case for the Finder is that the caller creates a Finder and gives
    48  // it the same query that was given to it in the previous execution.
    49  // In this situation, the major events that take place are:
    50  // 1. The Finder begins to load its db
    51  // 2. The Finder begins to stat the directories mentioned in its db (using multiple threads)
    52  //    Calling Stat on each of these directories is generally a large fraction of the total time
    53  // 3. The Finder begins to construct a separate tree of nodes in each of its threads
    54  // 4. The Finder merges the individual node trees into the main node tree
    55  // 5. The Finder may call ReadDir a few times if there are a few directories that are out-of-date
    56  //    These ReadDir calls might prompt additional Stat calls, etc
    57  // 6. The Finder waits for all loading to complete
    58  // 7. The Finder searches the cache for files matching the user's query (using multiple threads)
    59  
    60  // These are the invariants regarding concurrency:
    61  // 1. The public methods of Finder are threadsafe.
    62  //      The public methods are only performance-optimized for one caller at a time, however.
    63  //      For the moment, multiple concurrent callers shouldn't expect any better performance than
    64  //      multiple serial callers.
    65  // 2. While building the node tree, only one thread may ever access the <children> collection of a
    66  //    *pathMap at once.
    67  //    a) The thread that accesses the <children> collection is the thread that discovers the
    68  //       children (by reading them from the cache or by having received a response to ReadDir).
    69  //       1) Consequently, the thread that discovers the children also spawns requests to stat
    70  //          subdirs.
    71  //    b) Consequently, while building the node tree, no thread may do a lookup of its
    72  //       *pathMap via filepath because another thread may be adding children to the
    73  //       <children> collection of an ancestor node. Additionally, in rare cases, another thread
    74  //       may be removing children from an ancestor node if the children were only discovered to
    75  //       be irrelevant after calling ReadDir (which happens if a prune-file was just added).
    76  // 3. No query will begin to be serviced until all loading (both reading the db
    77  //    and scanning the filesystem) is complete.
    78  //    Tests indicate that it only takes about 10% as long to search the in-memory cache as to
    79  //    generate it, making this not a huge loss in performance.
    80  // 4. The parsing of the db and the initial setup of the pathMap tree must complete before
    81  //      beginning to call listDirSync (because listDirSync can create new entries in the pathMap)
    82  
    83  // see cmd/finder.go or finder_test.go for usage examples
    84  
    85  // Update versionString whenever making a backwards-incompatible change to the cache file format
    86  const versionString = "Android finder version 1"
    87  
    88  // a CacheParams specifies which files and directories the user wishes be scanned and
    89  // potentially added to the cache
    90  type CacheParams struct {
    91  	// WorkingDirectory is used as a base for any relative file paths given to the Finder
    92  	WorkingDirectory string
    93  
    94  	// RootDirs are the root directories used to initiate the search
    95  	RootDirs []string
    96  
    97  	// ExcludeDirs are directory names that if encountered are removed from the search
    98  	ExcludeDirs []string
    99  
   100  	// PruneFiles are file names that if encountered prune their entire directory
   101  	// (including siblings)
   102  	PruneFiles []string
   103  
   104  	// IncludeFiles are file names to include as matches
   105  	IncludeFiles []string
   106  }
   107  
   108  // a cacheConfig stores the inputs that determine what should be included in the cache
   109  type cacheConfig struct {
   110  	CacheParams
   111  
   112  	// FilesystemView is a unique identifier telling which parts of which file systems
   113  	// are readable by the Finder. In practice its value is essentially username@hostname.
   114  	// FilesystemView is set to ensure that a cache file copied to another host or
   115  	// found by another user doesn't inadvertently get reused.
   116  	FilesystemView string
   117  }
   118  
   119  func (p *cacheConfig) Dump() ([]byte, error) {
   120  	bytes, err := json.Marshal(p)
   121  	return bytes, err
   122  }
   123  
   124  // a cacheMetadata stores version information about the cache
   125  type cacheMetadata struct {
   126  	// The Version enables the Finder to determine whether it can even parse the file
   127  	// If the version changes, the entire cache file must be regenerated
   128  	Version string
   129  
   130  	// The CacheParams enables the Finder to determine whether the parameters match
   131  	// If the CacheParams change, the Finder can choose how much of the cache file to reuse
   132  	// (although in practice, the Finder will probably choose to ignore the entire file anyway)
   133  	Config cacheConfig
   134  }
   135  
   136  type Logger interface {
   137  	Output(calldepth int, s string) error
   138  }
   139  
   140  // the Finder is the main struct that callers will want to use
   141  type Finder struct {
   142  	// configuration
   143  	DbPath              string
   144  	numDbLoadingThreads int
   145  	numSearchingThreads int
   146  	cacheMetadata       cacheMetadata
   147  	logger              Logger
   148  	filesystem          fs.FileSystem
   149  
   150  	// temporary state
   151  	threadPool        *threadPool
   152  	mutex             sync.Mutex
   153  	fsErrs            []fsErr
   154  	errlock           sync.Mutex
   155  	shutdownWaitgroup sync.WaitGroup
   156  
   157  	// non-temporary state
   158  	modifiedFlag int32
   159  	nodes        pathMap
   160  }
   161  
   162  var defaultNumThreads = runtime.NumCPU() * 2
   163  
   164  // New creates a new Finder for use
   165  func New(cacheParams CacheParams, filesystem fs.FileSystem,
   166  	logger Logger, dbPath string) (f *Finder, err error) {
   167  	return newImpl(cacheParams, filesystem, logger, dbPath, defaultNumThreads)
   168  }
   169  
   170  // newImpl is like New but accepts more params
   171  func newImpl(cacheParams CacheParams, filesystem fs.FileSystem,
   172  	logger Logger, dbPath string, numThreads int) (f *Finder, err error) {
   173  	numDbLoadingThreads := numThreads
   174  	numSearchingThreads := numThreads
   175  
   176  	metadata := cacheMetadata{
   177  		Version: versionString,
   178  		Config: cacheConfig{
   179  			CacheParams:    cacheParams,
   180  			FilesystemView: filesystem.ViewId(),
   181  		},
   182  	}
   183  
   184  	f = &Finder{
   185  		numDbLoadingThreads: numDbLoadingThreads,
   186  		numSearchingThreads: numSearchingThreads,
   187  		cacheMetadata:       metadata,
   188  		logger:              logger,
   189  		filesystem:          filesystem,
   190  
   191  		nodes:  *newPathMap("/"),
   192  		DbPath: dbPath,
   193  
   194  		shutdownWaitgroup: sync.WaitGroup{},
   195  	}
   196  
   197  	f.loadFromFilesystem()
   198  
   199  	// check for any filesystem errors
   200  	err = f.getErr()
   201  	if err != nil {
   202  		return nil, err
   203  	}
   204  
   205  	// confirm that every path mentioned in the CacheConfig exists
   206  	for _, path := range cacheParams.RootDirs {
   207  		if !filepath.IsAbs(path) {
   208  			path = filepath.Join(f.cacheMetadata.Config.WorkingDirectory, path)
   209  		}
   210  		node := f.nodes.GetNode(filepath.Clean(path), false)
   211  		if node == nil || node.ModTime == 0 {
   212  			return nil, fmt.Errorf("path %v was specified to be included in the cache but does not exist\n", path)
   213  		}
   214  	}
   215  
   216  	return f, nil
   217  }
   218  
   219  // FindNamed searches for every cached file
   220  func (f *Finder) FindAll() []string {
   221  	return f.FindAt("/")
   222  }
   223  
   224  // FindNamed searches for every cached file under <rootDir>
   225  func (f *Finder) FindAt(rootDir string) []string {
   226  	filter := func(entries DirEntries) (dirNames []string, fileNames []string) {
   227  		return entries.DirNames, entries.FileNames
   228  	}
   229  	return f.FindMatching(rootDir, filter)
   230  }
   231  
   232  // FindNamed searches for every cached file named <fileName>
   233  func (f *Finder) FindNamed(fileName string) []string {
   234  	return f.FindNamedAt("/", fileName)
   235  }
   236  
   237  // FindNamedAt searches under <rootPath> for every file named <fileName>
   238  // The reason a caller might use FindNamedAt instead of FindNamed is if they want
   239  // to limit their search to a subset of the cache
   240  func (f *Finder) FindNamedAt(rootPath string, fileName string) []string {
   241  	filter := func(entries DirEntries) (dirNames []string, fileNames []string) {
   242  		matches := []string{}
   243  		for _, foundName := range entries.FileNames {
   244  			if foundName == fileName {
   245  				matches = append(matches, foundName)
   246  			}
   247  		}
   248  		return entries.DirNames, matches
   249  
   250  	}
   251  	return f.FindMatching(rootPath, filter)
   252  }
   253  
   254  // FindFirstNamed searches for every file named <fileName>
   255  // Whenever it finds a match, it stops search subdirectories
   256  func (f *Finder) FindFirstNamed(fileName string) []string {
   257  	return f.FindFirstNamedAt("/", fileName)
   258  }
   259  
   260  // FindFirstNamedAt searches for every file named <fileName>
   261  // Whenever it finds a match, it stops search subdirectories
   262  func (f *Finder) FindFirstNamedAt(rootPath string, fileName string) []string {
   263  	filter := func(entries DirEntries) (dirNames []string, fileNames []string) {
   264  		matches := []string{}
   265  		for _, foundName := range entries.FileNames {
   266  			if foundName == fileName {
   267  				matches = append(matches, foundName)
   268  			}
   269  		}
   270  
   271  		if len(matches) > 0 {
   272  			return []string{}, matches
   273  		}
   274  		return entries.DirNames, matches
   275  	}
   276  	return f.FindMatching(rootPath, filter)
   277  }
   278  
   279  // FindMatching is the most general exported function for searching for files in the cache
   280  // The WalkFunc will be invoked repeatedly and is expected to modify the provided DirEntries
   281  // in place, removing file paths and directories as desired.
   282  // WalkFunc will be invoked potentially many times in parallel, and must be threadsafe.
   283  func (f *Finder) FindMatching(rootPath string, filter WalkFunc) []string {
   284  	// set up some parameters
   285  	scanStart := time.Now()
   286  	var isRel bool
   287  	workingDir := f.cacheMetadata.Config.WorkingDirectory
   288  
   289  	isRel = !filepath.IsAbs(rootPath)
   290  	if isRel {
   291  		rootPath = filepath.Join(workingDir, rootPath)
   292  	}
   293  
   294  	rootPath = filepath.Clean(rootPath)
   295  
   296  	// ensure nothing else is using the Finder
   297  	f.verbosef("FindMatching waiting for finder to be idle\n")
   298  	f.lock()
   299  	defer f.unlock()
   300  
   301  	node := f.nodes.GetNode(rootPath, false)
   302  	if node == nil {
   303  		f.verbosef("No data for path %v ; apparently not included in cache params: %v\n",
   304  			rootPath, f.cacheMetadata.Config.CacheParams)
   305  		// path is not found; don't do a search
   306  		return []string{}
   307  	}
   308  
   309  	// search for matching files
   310  	f.verbosef("Finder finding %v using cache\n", rootPath)
   311  	results := f.findInCacheMultithreaded(node, filter, f.numSearchingThreads)
   312  
   313  	// format and return results
   314  	if isRel {
   315  		for i := 0; i < len(results); i++ {
   316  			results[i] = strings.Replace(results[i], workingDir+"/", "", 1)
   317  		}
   318  	}
   319  	sort.Strings(results)
   320  	f.verbosef("Found %v files under %v in %v using cache\n",
   321  		len(results), rootPath, time.Since(scanStart))
   322  	return results
   323  }
   324  
   325  // Shutdown declares that the finder is no longer needed and waits for its cleanup to complete
   326  // Currently, that only entails waiting for the database dump to complete.
   327  func (f *Finder) Shutdown() {
   328  	f.waitForDbDump()
   329  }
   330  
   331  // End of public api
   332  
   333  func (f *Finder) goDumpDb() {
   334  	if f.wasModified() {
   335  		f.shutdownWaitgroup.Add(1)
   336  		go func() {
   337  			err := f.dumpDb()
   338  			if err != nil {
   339  				f.verbosef("%v\n", err)
   340  			}
   341  			f.shutdownWaitgroup.Done()
   342  		}()
   343  	} else {
   344  		f.verbosef("Skipping dumping unmodified db\n")
   345  	}
   346  }
   347  
   348  func (f *Finder) waitForDbDump() {
   349  	f.shutdownWaitgroup.Wait()
   350  }
   351  
   352  // joinCleanPaths is like filepath.Join but is faster because
   353  // joinCleanPaths doesn't have to support paths ending in "/" or containing ".."
   354  func joinCleanPaths(base string, leaf string) string {
   355  	if base == "" {
   356  		return leaf
   357  	}
   358  	if base == "/" {
   359  		return base + leaf
   360  	}
   361  	if leaf == "" {
   362  		return base
   363  	}
   364  	return base + "/" + leaf
   365  }
   366  
   367  func (f *Finder) verbosef(format string, args ...interface{}) {
   368  	f.logger.Output(2, fmt.Sprintf(format, args...))
   369  }
   370  
   371  // loadFromFilesystem populates the in-memory cache based on the contents of the filesystem
   372  func (f *Finder) loadFromFilesystem() {
   373  	f.threadPool = newThreadPool(f.numDbLoadingThreads)
   374  
   375  	err := f.startFromExternalCache()
   376  	if err != nil {
   377  		f.startWithoutExternalCache()
   378  	}
   379  
   380  	f.goDumpDb()
   381  
   382  	f.threadPool = nil
   383  }
   384  
   385  func (f *Finder) startFind(path string) {
   386  	if !filepath.IsAbs(path) {
   387  		path = filepath.Join(f.cacheMetadata.Config.WorkingDirectory, path)
   388  	}
   389  	node := f.nodes.GetNode(path, true)
   390  	f.statDirAsync(node)
   391  }
   392  
   393  func (f *Finder) lock() {
   394  	f.mutex.Lock()
   395  }
   396  
   397  func (f *Finder) unlock() {
   398  	f.mutex.Unlock()
   399  }
   400  
   401  // a statResponse is the relevant portion of the response from the filesystem to a Stat call
   402  type statResponse struct {
   403  	ModTime int64
   404  	Inode   uint64
   405  	Device  uint64
   406  }
   407  
   408  // a pathAndStats stores a path and its stats
   409  type pathAndStats struct {
   410  	statResponse
   411  
   412  	Path string
   413  }
   414  
   415  // a dirFullInfo stores all of the relevant information we know about a directory
   416  type dirFullInfo struct {
   417  	pathAndStats
   418  
   419  	FileNames []string
   420  }
   421  
   422  // a PersistedDirInfo is the information about a dir that we save to our cache on disk
   423  type PersistedDirInfo struct {
   424  	// These field names are short because they are repeated many times in the output json file
   425  	P string   // path
   426  	T int64    // modification time
   427  	I uint64   // inode number
   428  	F []string // relevant filenames contained
   429  }
   430  
   431  // a PersistedDirs is the information that we persist for a group of dirs
   432  type PersistedDirs struct {
   433  	// the device on which each directory is stored
   434  	Device uint64
   435  	// the common root path to which all contained dirs are relative
   436  	Root string
   437  	// the directories themselves
   438  	Dirs []PersistedDirInfo
   439  }
   440  
   441  // a CacheEntry is the smallest unit that can be read and parsed from the cache (on disk) at a time
   442  type CacheEntry []PersistedDirs
   443  
   444  // a DirEntries lists the files and directories contained directly within a specific directory
   445  type DirEntries struct {
   446  	Path string
   447  
   448  	// elements of DirNames are just the dir names; they don't include any '/' character
   449  	DirNames []string
   450  	// elements of FileNames are just the file names; they don't include '/' character
   451  	FileNames []string
   452  }
   453  
   454  // a WalkFunc is the type that is passed into various Find functions for determining which
   455  // directories the caller wishes be walked. The WalkFunc is expected to decide which
   456  // directories to walk and which files to consider as matches to the original query.
   457  type WalkFunc func(DirEntries) (dirs []string, files []string)
   458  
   459  // a mapNode stores the relevant stats about a directory to be stored in a pathMap
   460  type mapNode struct {
   461  	statResponse
   462  	FileNames []string
   463  }
   464  
   465  // a pathMap implements the directory tree structure of nodes
   466  type pathMap struct {
   467  	mapNode
   468  
   469  	path string
   470  
   471  	children map[string]*pathMap
   472  
   473  	// number of descendent nodes, including self
   474  	approximateNumDescendents int
   475  }
   476  
   477  func newPathMap(path string) *pathMap {
   478  	result := &pathMap{path: path, children: make(map[string]*pathMap, 4),
   479  		approximateNumDescendents: 1}
   480  	return result
   481  }
   482  
   483  // GetNode returns the node at <path>
   484  func (m *pathMap) GetNode(path string, createIfNotFound bool) *pathMap {
   485  	if len(path) > 0 && path[0] == '/' {
   486  		path = path[1:]
   487  	}
   488  
   489  	node := m
   490  	for {
   491  		if path == "" {
   492  			return node
   493  		}
   494  
   495  		index := strings.Index(path, "/")
   496  		var firstComponent string
   497  		if index >= 0 {
   498  			firstComponent = path[:index]
   499  			path = path[index+1:]
   500  		} else {
   501  			firstComponent = path
   502  			path = ""
   503  		}
   504  
   505  		child, found := node.children[firstComponent]
   506  
   507  		if !found {
   508  			if createIfNotFound {
   509  				child = node.newChild(firstComponent)
   510  			} else {
   511  				return nil
   512  			}
   513  		}
   514  
   515  		node = child
   516  	}
   517  }
   518  
   519  func (m *pathMap) newChild(name string) (child *pathMap) {
   520  	path := joinCleanPaths(m.path, name)
   521  	newChild := newPathMap(path)
   522  	m.children[name] = newChild
   523  
   524  	return m.children[name]
   525  }
   526  
   527  func (m *pathMap) UpdateNumDescendents() int {
   528  	count := 1
   529  	for _, child := range m.children {
   530  		count += child.approximateNumDescendents
   531  	}
   532  	m.approximateNumDescendents = count
   533  	return count
   534  }
   535  
   536  func (m *pathMap) UpdateNumDescendentsRecursive() {
   537  	for _, child := range m.children {
   538  		child.UpdateNumDescendentsRecursive()
   539  	}
   540  	m.UpdateNumDescendents()
   541  }
   542  
   543  func (m *pathMap) MergeIn(other *pathMap) {
   544  	for key, theirs := range other.children {
   545  		ours, found := m.children[key]
   546  		if found {
   547  			ours.MergeIn(theirs)
   548  		} else {
   549  			m.children[key] = theirs
   550  		}
   551  	}
   552  	if other.ModTime != 0 {
   553  		m.mapNode = other.mapNode
   554  	}
   555  	m.UpdateNumDescendents()
   556  }
   557  
   558  func (m *pathMap) DumpAll() []dirFullInfo {
   559  	results := []dirFullInfo{}
   560  	m.dumpInto("", &results)
   561  	return results
   562  }
   563  
   564  func (m *pathMap) dumpInto(path string, results *[]dirFullInfo) {
   565  	*results = append(*results,
   566  		dirFullInfo{
   567  			pathAndStats{statResponse: m.statResponse, Path: path},
   568  			m.FileNames},
   569  	)
   570  	for key, child := range m.children {
   571  		childPath := joinCleanPaths(path, key)
   572  		if len(childPath) == 0 || childPath[0] != '/' {
   573  			childPath = "/" + childPath
   574  		}
   575  		child.dumpInto(childPath, results)
   576  	}
   577  }
   578  
   579  // a semaphore can be locked by up to <capacity> callers at once
   580  type semaphore struct {
   581  	pool chan bool
   582  }
   583  
   584  func newSemaphore(capacity int) *semaphore {
   585  	return &semaphore{pool: make(chan bool, capacity)}
   586  }
   587  
   588  func (l *semaphore) Lock() {
   589  	l.pool <- true
   590  }
   591  
   592  func (l *semaphore) Unlock() {
   593  	<-l.pool
   594  }
   595  
   596  // A threadPool runs goroutines and supports throttling and waiting.
   597  // Without throttling, Go may exhaust the maximum number of various resources, such as
   598  // threads or file descriptors, and crash the program.
   599  type threadPool struct {
   600  	receivedRequests sync.WaitGroup
   601  	activeRequests   semaphore
   602  }
   603  
   604  func newThreadPool(maxNumConcurrentThreads int) *threadPool {
   605  	return &threadPool{
   606  		receivedRequests: sync.WaitGroup{},
   607  		activeRequests:   *newSemaphore(maxNumConcurrentThreads),
   608  	}
   609  }
   610  
   611  // Run requests to run the given function in its own goroutine
   612  func (p *threadPool) Run(function func()) {
   613  	p.receivedRequests.Add(1)
   614  	// If Run() was called from within a goroutine spawned by this threadPool,
   615  	// then we may need to return from Run() before having capacity to actually
   616  	// run <function>.
   617  	//
   618  	// It's possible that the body of <function> contains a statement (such as a syscall)
   619  	// that will cause Go to pin it to a thread, or will contain a statement that uses
   620  	// another resource that is in short supply (such as a file descriptor), so we can't
   621  	// actually run <function> until we have capacity.
   622  	//
   623  	// However, the semaphore used for synchronization is implemented via a channel and
   624  	// shouldn't require a new thread for each access.
   625  	go func() {
   626  		p.activeRequests.Lock()
   627  		function()
   628  		p.activeRequests.Unlock()
   629  		p.receivedRequests.Done()
   630  	}()
   631  }
   632  
   633  // Wait waits until all goroutines are done, just like sync.WaitGroup's Wait
   634  func (p *threadPool) Wait() {
   635  	p.receivedRequests.Wait()
   636  }
   637  
   638  type fsErr struct {
   639  	path string
   640  	err  error
   641  }
   642  
   643  func (e fsErr) String() string {
   644  	return e.path + ": " + e.err.Error()
   645  }
   646  
   647  func (f *Finder) serializeCacheEntry(dirInfos []dirFullInfo) ([]byte, error) {
   648  	// group each dirFullInfo by its Device, to avoid having to repeat it in the output
   649  	dirsByDevice := map[uint64][]PersistedDirInfo{}
   650  	for _, entry := range dirInfos {
   651  		_, found := dirsByDevice[entry.Device]
   652  		if !found {
   653  			dirsByDevice[entry.Device] = []PersistedDirInfo{}
   654  		}
   655  		dirsByDevice[entry.Device] = append(dirsByDevice[entry.Device],
   656  			PersistedDirInfo{P: entry.Path, T: entry.ModTime, I: entry.Inode, F: entry.FileNames})
   657  	}
   658  
   659  	cacheEntry := CacheEntry{}
   660  
   661  	for device, infos := range dirsByDevice {
   662  		// find common prefix
   663  		prefix := ""
   664  		if len(infos) > 0 {
   665  			prefix = infos[0].P
   666  		}
   667  		for _, info := range infos {
   668  			for !strings.HasPrefix(info.P+"/", prefix+"/") {
   669  				prefix = filepath.Dir(prefix)
   670  				if prefix == "/" {
   671  					break
   672  				}
   673  			}
   674  		}
   675  		// remove common prefix
   676  		for i := range infos {
   677  			suffix := strings.Replace(infos[i].P, prefix, "", 1)
   678  			if len(suffix) > 0 && suffix[0] == '/' {
   679  				suffix = suffix[1:]
   680  			}
   681  			infos[i].P = suffix
   682  		}
   683  
   684  		// turn the map (keyed by device) into a list of structs with labeled fields
   685  		// this is to improve readability of the output
   686  		cacheEntry = append(cacheEntry, PersistedDirs{Device: device, Root: prefix, Dirs: infos})
   687  	}
   688  
   689  	// convert to json.
   690  	// it would save some space to use a different format than json for the db file,
   691  	// but the space and time savings are small, and json is easy for humans to read
   692  	bytes, err := json.Marshal(cacheEntry)
   693  	return bytes, err
   694  }
   695  
   696  func (f *Finder) parseCacheEntry(bytes []byte) ([]dirFullInfo, error) {
   697  	var cacheEntry CacheEntry
   698  	err := json.Unmarshal(bytes, &cacheEntry)
   699  	if err != nil {
   700  		return nil, err
   701  	}
   702  
   703  	// convert from a CacheEntry to a []dirFullInfo (by copying a few fields)
   704  	capacity := 0
   705  	for _, element := range cacheEntry {
   706  		capacity += len(element.Dirs)
   707  	}
   708  	nodes := make([]dirFullInfo, capacity)
   709  	count := 0
   710  	for _, element := range cacheEntry {
   711  		for _, dir := range element.Dirs {
   712  			path := joinCleanPaths(element.Root, dir.P)
   713  
   714  			nodes[count] = dirFullInfo{
   715  				pathAndStats: pathAndStats{
   716  					statResponse: statResponse{
   717  						ModTime: dir.T, Inode: dir.I, Device: element.Device,
   718  					},
   719  					Path: path},
   720  				FileNames: dir.F}
   721  			count++
   722  		}
   723  	}
   724  	return nodes, nil
   725  }
   726  
   727  // We use the following separator byte to distinguish individually parseable blocks of json
   728  // because we know this separator won't appear in the json that we're parsing.
   729  //
   730  // The newline byte can only appear in a UTF-8 stream if the newline character appears, because:
   731  // - The newline character is encoded as "0000 1010" in binary ("0a" in hex)
   732  // - UTF-8 dictates that bytes beginning with a "0" bit are never emitted as part of a multibyte
   733  //   character.
   734  //
   735  // We know that the newline character will never appear in our json string, because:
   736  // - If a newline character appears as part of a data string, then json encoding will
   737  //   emit two characters instead: '\' and 'n'.
   738  // - The json encoder that we use doesn't emit the optional newlines between any of its
   739  //   other outputs.
   740  const lineSeparator = byte('\n')
   741  
   742  func (f *Finder) readLine(reader *bufio.Reader) ([]byte, error) {
   743  	return reader.ReadBytes(lineSeparator)
   744  }
   745  
   746  // validateCacheHeader reads the cache header from cacheReader and tells whether the cache is compatible with this Finder
   747  func (f *Finder) validateCacheHeader(cacheReader *bufio.Reader) bool {
   748  	cacheVersionBytes, err := f.readLine(cacheReader)
   749  	if err != nil {
   750  		f.verbosef("Failed to read database header; database is invalid\n")
   751  		return false
   752  	}
   753  	if len(cacheVersionBytes) > 0 && cacheVersionBytes[len(cacheVersionBytes)-1] == lineSeparator {
   754  		cacheVersionBytes = cacheVersionBytes[:len(cacheVersionBytes)-1]
   755  	}
   756  	cacheVersionString := string(cacheVersionBytes)
   757  	currentVersion := f.cacheMetadata.Version
   758  	if cacheVersionString != currentVersion {
   759  		f.verbosef("Version changed from %q to %q, database is not applicable\n", cacheVersionString, currentVersion)
   760  		return false
   761  	}
   762  
   763  	cacheParamBytes, err := f.readLine(cacheReader)
   764  	if err != nil {
   765  		f.verbosef("Failed to read database search params; database is invalid\n")
   766  		return false
   767  	}
   768  
   769  	if len(cacheParamBytes) > 0 && cacheParamBytes[len(cacheParamBytes)-1] == lineSeparator {
   770  		cacheParamBytes = cacheParamBytes[:len(cacheParamBytes)-1]
   771  	}
   772  
   773  	currentParamBytes, err := f.cacheMetadata.Config.Dump()
   774  	if err != nil {
   775  		panic("Finder failed to serialize its parameters")
   776  	}
   777  	cacheParamString := string(cacheParamBytes)
   778  	currentParamString := string(currentParamBytes)
   779  	if cacheParamString != currentParamString {
   780  		f.verbosef("Params changed from %q to %q, database is not applicable\n", cacheParamString, currentParamString)
   781  		return false
   782  	}
   783  	return true
   784  }
   785  
   786  // loadBytes compares the cache info in <data> to the state of the filesystem
   787  // loadBytes returns a map representing <data> and also a slice of dirs that need to be re-walked
   788  func (f *Finder) loadBytes(id int, data []byte) (m *pathMap, dirsToWalk []string, err error) {
   789  
   790  	helperStartTime := time.Now()
   791  
   792  	cachedNodes, err := f.parseCacheEntry(data)
   793  	if err != nil {
   794  		return nil, nil, fmt.Errorf("Failed to parse block %v: %v\n", id, err.Error())
   795  	}
   796  
   797  	unmarshalDate := time.Now()
   798  	f.verbosef("Unmarshaled %v objects for %v in %v\n",
   799  		len(cachedNodes), id, unmarshalDate.Sub(helperStartTime))
   800  
   801  	tempMap := newPathMap("/")
   802  	stats := make([]statResponse, len(cachedNodes))
   803  
   804  	for i, node := range cachedNodes {
   805  		// check the file system for an updated timestamp
   806  		stats[i] = f.statDirSync(node.Path)
   807  	}
   808  
   809  	dirsToWalk = []string{}
   810  	for i, cachedNode := range cachedNodes {
   811  		updated := stats[i]
   812  		// save the cached value
   813  		container := tempMap.GetNode(cachedNode.Path, true)
   814  		container.mapNode = mapNode{statResponse: updated}
   815  
   816  		// if the metadata changed and the directory still exists, then
   817  		// make a note to walk it later
   818  		if !f.isInfoUpToDate(cachedNode.statResponse, updated) && updated.ModTime != 0 {
   819  			f.setModified()
   820  			// make a note that the directory needs to be walked
   821  			dirsToWalk = append(dirsToWalk, cachedNode.Path)
   822  		} else {
   823  			container.mapNode.FileNames = cachedNode.FileNames
   824  		}
   825  	}
   826  	// count the number of nodes to improve our understanding of the shape of the tree,
   827  	// thereby improving parallelism of subsequent searches
   828  	tempMap.UpdateNumDescendentsRecursive()
   829  
   830  	f.verbosef("Statted inodes of block %v in %v\n", id, time.Now().Sub(unmarshalDate))
   831  	return tempMap, dirsToWalk, nil
   832  }
   833  
   834  // startFromExternalCache loads the cache database from disk
   835  // startFromExternalCache waits to return until the load of the cache db is complete, but
   836  // startFromExternalCache does not wait for all every listDir() or statDir() request to complete
   837  func (f *Finder) startFromExternalCache() (err error) {
   838  	startTime := time.Now()
   839  	dbPath := f.DbPath
   840  
   841  	// open cache file and validate its header
   842  	reader, err := f.filesystem.Open(dbPath)
   843  	if err != nil {
   844  		return errors.New("No data to load from database\n")
   845  	}
   846  	bufferedReader := bufio.NewReader(reader)
   847  	if !f.validateCacheHeader(bufferedReader) {
   848  		return errors.New("Cache header does not match")
   849  	}
   850  	f.verbosef("Database header matches, will attempt to use database %v\n", f.DbPath)
   851  
   852  	// read the file and spawn threads to process it
   853  	nodesToWalk := [][]*pathMap{}
   854  	mainTree := newPathMap("/")
   855  
   856  	// read the blocks and stream them into <blockChannel>
   857  	type dataBlock struct {
   858  		id   int
   859  		err  error
   860  		data []byte
   861  	}
   862  	blockChannel := make(chan dataBlock, f.numDbLoadingThreads)
   863  	readBlocks := func() {
   864  		index := 0
   865  		for {
   866  			// It takes some time to unmarshal the input from json, so we want
   867  			// to unmarshal it in parallel. In order to find valid places to
   868  			// break the input, we scan for the line separators that we inserted
   869  			// (for this purpose) when we dumped the database.
   870  			data, err := f.readLine(bufferedReader)
   871  			var response dataBlock
   872  			done := false
   873  			if err != nil && err != io.EOF {
   874  				response = dataBlock{id: index, err: err, data: nil}
   875  				done = true
   876  			} else {
   877  				done = (err == io.EOF)
   878  				response = dataBlock{id: index, err: nil, data: data}
   879  			}
   880  			blockChannel <- response
   881  			index++
   882  			duration := time.Since(startTime)
   883  			f.verbosef("Read block %v after %v\n", index, duration)
   884  			if done {
   885  				f.verbosef("Read %v blocks in %v\n", index, duration)
   886  				close(blockChannel)
   887  				return
   888  			}
   889  		}
   890  	}
   891  	go readBlocks()
   892  
   893  	// Read from <blockChannel> and stream the responses into <resultChannel>.
   894  	type workResponse struct {
   895  		id          int
   896  		err         error
   897  		tree        *pathMap
   898  		updatedDirs []string
   899  	}
   900  	resultChannel := make(chan workResponse)
   901  	processBlocks := func() {
   902  		numProcessed := 0
   903  		threadPool := newThreadPool(f.numDbLoadingThreads)
   904  		for {
   905  			// get a block to process
   906  			block, received := <-blockChannel
   907  			if !received {
   908  				break
   909  			}
   910  
   911  			if block.err != nil {
   912  				resultChannel <- workResponse{err: block.err}
   913  				break
   914  			}
   915  			numProcessed++
   916  			// wait until there is CPU available to process it
   917  			threadPool.Run(
   918  				func() {
   919  					processStartTime := time.Now()
   920  					f.verbosef("Starting to process block %v after %v\n",
   921  						block.id, processStartTime.Sub(startTime))
   922  					tempMap, updatedDirs, err := f.loadBytes(block.id, block.data)
   923  					var response workResponse
   924  					if err != nil {
   925  						f.verbosef(
   926  							"Block %v failed to parse with error %v\n",
   927  							block.id, err)
   928  						response = workResponse{err: err}
   929  					} else {
   930  						response = workResponse{
   931  							id:          block.id,
   932  							err:         nil,
   933  							tree:        tempMap,
   934  							updatedDirs: updatedDirs,
   935  						}
   936  					}
   937  					f.verbosef("Processed block %v in %v\n",
   938  						block.id, time.Since(processStartTime),
   939  					)
   940  					resultChannel <- response
   941  				},
   942  			)
   943  		}
   944  		threadPool.Wait()
   945  		f.verbosef("Finished processing %v blocks in %v\n",
   946  			numProcessed, time.Since(startTime))
   947  		close(resultChannel)
   948  	}
   949  	go processBlocks()
   950  
   951  	// Read from <resultChannel> and use the results
   952  	combineResults := func() (err error) {
   953  		for {
   954  			result, received := <-resultChannel
   955  			if !received {
   956  				break
   957  			}
   958  			if err != nil {
   959  				// In case of an error, wait for work to complete before
   960  				// returning the error. This ensures that any subsequent
   961  				// work doesn't need to compete for resources (and possibly
   962  				// fail due to, for example, a filesystem limit on the number of
   963  				// concurrently open files) with past work.
   964  				continue
   965  			}
   966  			if result.err != nil {
   967  				err = result.err
   968  				continue
   969  			}
   970  			// update main tree
   971  			mainTree.MergeIn(result.tree)
   972  			// record any new directories that we will need to Stat()
   973  			updatedNodes := make([]*pathMap, len(result.updatedDirs))
   974  			for j, dir := range result.updatedDirs {
   975  				node := mainTree.GetNode(dir, false)
   976  				updatedNodes[j] = node
   977  			}
   978  			nodesToWalk = append(nodesToWalk, updatedNodes)
   979  		}
   980  		return err
   981  	}
   982  	err = combineResults()
   983  	if err != nil {
   984  		return err
   985  	}
   986  
   987  	f.nodes = *mainTree
   988  
   989  	// after having loaded the entire db and therefore created entries for
   990  	// the directories we know of, now it's safe to start calling ReadDir on
   991  	// any updated directories
   992  	for i := range nodesToWalk {
   993  		f.listDirsAsync(nodesToWalk[i])
   994  	}
   995  	f.verbosef("Loaded db and statted known dirs in %v\n", time.Since(startTime))
   996  	f.threadPool.Wait()
   997  	f.verbosef("Loaded db and statted all dirs in %v\n", time.Now().Sub(startTime))
   998  
   999  	return err
  1000  }
  1001  
  1002  // startWithoutExternalCache starts scanning the filesystem according to the cache config
  1003  // startWithoutExternalCache should be called if startFromExternalCache is not applicable
  1004  func (f *Finder) startWithoutExternalCache() {
  1005  	startTime := time.Now()
  1006  	configDirs := f.cacheMetadata.Config.RootDirs
  1007  
  1008  	// clean paths
  1009  	candidates := make([]string, len(configDirs))
  1010  	for i, dir := range configDirs {
  1011  		candidates[i] = filepath.Clean(dir)
  1012  	}
  1013  	// remove duplicates
  1014  	dirsToScan := make([]string, 0, len(configDirs))
  1015  	for _, candidate := range candidates {
  1016  		include := true
  1017  		for _, included := range dirsToScan {
  1018  			if included == "/" || strings.HasPrefix(candidate+"/", included+"/") {
  1019  				include = false
  1020  				break
  1021  			}
  1022  		}
  1023  		if include {
  1024  			dirsToScan = append(dirsToScan, candidate)
  1025  		}
  1026  	}
  1027  
  1028  	// start searching finally
  1029  	for _, path := range dirsToScan {
  1030  		f.verbosef("Starting find of %v\n", path)
  1031  		f.startFind(path)
  1032  	}
  1033  
  1034  	f.threadPool.Wait()
  1035  
  1036  	f.verbosef("Scanned filesystem (not using cache) in %v\n", time.Now().Sub(startTime))
  1037  }
  1038  
  1039  // isInfoUpToDate tells whether <new> can confirm that results computed at <old> are still valid
  1040  func (f *Finder) isInfoUpToDate(old statResponse, new statResponse) (equal bool) {
  1041  	if old.Inode != new.Inode {
  1042  		return false
  1043  	}
  1044  	if old.ModTime != new.ModTime {
  1045  		return false
  1046  	}
  1047  	if old.Device != new.Device {
  1048  		return false
  1049  	}
  1050  	return true
  1051  }
  1052  
  1053  func (f *Finder) wasModified() bool {
  1054  	return atomic.LoadInt32(&f.modifiedFlag) > 0
  1055  }
  1056  
  1057  func (f *Finder) setModified() {
  1058  	var newVal int32
  1059  	newVal = 1
  1060  	atomic.StoreInt32(&f.modifiedFlag, newVal)
  1061  }
  1062  
  1063  // sortedDirEntries exports directory entries to facilitate dumping them to the external cache
  1064  func (f *Finder) sortedDirEntries() []dirFullInfo {
  1065  	startTime := time.Now()
  1066  	nodes := make([]dirFullInfo, 0)
  1067  	for _, node := range f.nodes.DumpAll() {
  1068  		if node.ModTime != 0 {
  1069  			nodes = append(nodes, node)
  1070  		}
  1071  	}
  1072  	discoveryDate := time.Now()
  1073  	f.verbosef("Generated %v cache entries in %v\n", len(nodes), discoveryDate.Sub(startTime))
  1074  	less := func(i int, j int) bool {
  1075  		return nodes[i].Path < nodes[j].Path
  1076  	}
  1077  	sort.Slice(nodes, less)
  1078  	sortDate := time.Now()
  1079  	f.verbosef("Sorted %v cache entries in %v\n", len(nodes), sortDate.Sub(discoveryDate))
  1080  
  1081  	return nodes
  1082  }
  1083  
  1084  // serializeDb converts the cache database into a form to save to disk
  1085  func (f *Finder) serializeDb() ([]byte, error) {
  1086  	// sort dir entries
  1087  	var entryList = f.sortedDirEntries()
  1088  
  1089  	// Generate an output file that can be conveniently loaded using the same number of threads
  1090  	// as were used in this execution (because presumably that will be the number of threads
  1091  	// used in the next execution too)
  1092  
  1093  	// generate header
  1094  	header := []byte{}
  1095  	header = append(header, []byte(f.cacheMetadata.Version)...)
  1096  	header = append(header, lineSeparator)
  1097  	configDump, err := f.cacheMetadata.Config.Dump()
  1098  	if err != nil {
  1099  		return nil, err
  1100  	}
  1101  	header = append(header, configDump...)
  1102  
  1103  	// serialize individual blocks in parallel
  1104  	numBlocks := f.numDbLoadingThreads
  1105  	if numBlocks > len(entryList) {
  1106  		numBlocks = len(entryList)
  1107  	}
  1108  	blocks := make([][]byte, 1+numBlocks)
  1109  	blocks[0] = header
  1110  	blockMin := 0
  1111  	wg := sync.WaitGroup{}
  1112  	var errLock sync.Mutex
  1113  
  1114  	for i := 1; i <= numBlocks; i++ {
  1115  		// identify next block
  1116  		blockMax := len(entryList) * i / numBlocks
  1117  		block := entryList[blockMin:blockMax]
  1118  
  1119  		// process block
  1120  		wg.Add(1)
  1121  		go func(index int, block []dirFullInfo) {
  1122  			byteBlock, subErr := f.serializeCacheEntry(block)
  1123  			f.verbosef("Serialized block %v into %v bytes\n", index, len(byteBlock))
  1124  			if subErr != nil {
  1125  				f.verbosef("%v\n", subErr.Error())
  1126  				errLock.Lock()
  1127  				err = subErr
  1128  				errLock.Unlock()
  1129  			} else {
  1130  				blocks[index] = byteBlock
  1131  			}
  1132  			wg.Done()
  1133  		}(i, block)
  1134  
  1135  		blockMin = blockMax
  1136  	}
  1137  
  1138  	wg.Wait()
  1139  
  1140  	if err != nil {
  1141  		return nil, err
  1142  	}
  1143  
  1144  	content := bytes.Join(blocks, []byte{lineSeparator})
  1145  
  1146  	return content, nil
  1147  }
  1148  
  1149  // dumpDb saves the cache database to disk
  1150  func (f *Finder) dumpDb() error {
  1151  	startTime := time.Now()
  1152  	f.verbosef("Dumping db\n")
  1153  
  1154  	tempPath := f.DbPath + ".tmp"
  1155  
  1156  	bytes, err := f.serializeDb()
  1157  	if err != nil {
  1158  		return err
  1159  	}
  1160  	serializeDate := time.Now()
  1161  	f.verbosef("Serialized db in %v\n", serializeDate.Sub(startTime))
  1162  	// dump file and atomically move
  1163  	err = f.filesystem.WriteFile(tempPath, bytes, 0777)
  1164  	if err != nil {
  1165  		return err
  1166  	}
  1167  	err = f.filesystem.Rename(tempPath, f.DbPath)
  1168  	if err != nil {
  1169  		return err
  1170  	}
  1171  
  1172  	f.verbosef("Wrote db in %v\n", time.Now().Sub(serializeDate))
  1173  	return nil
  1174  
  1175  }
  1176  
  1177  // canIgnoreFsErr checks for certain classes of filesystem errors that are safe to ignore
  1178  func (f *Finder) canIgnoreFsErr(err error) bool {
  1179  	pathErr, isPathErr := err.(*os.PathError)
  1180  	if !isPathErr {
  1181  		// Don't recognize this error
  1182  		return false
  1183  	}
  1184  	if os.IsPermission(pathErr) {
  1185  		// Permission errors are ignored:
  1186  		// https://issuetracker.google.com/37553659
  1187  		// https://github.com/google/kati/pull/116
  1188  		return true
  1189  	}
  1190  	if pathErr.Err == os.ErrNotExist {
  1191  		// If a directory doesn't exist, that generally means the cache is out-of-date
  1192  		return true
  1193  	}
  1194  	// Don't recognize this error
  1195  	return false
  1196  }
  1197  
  1198  // onFsError should be called whenever a potentially fatal error is returned from a filesystem call
  1199  func (f *Finder) onFsError(path string, err error) {
  1200  	if !f.canIgnoreFsErr(err) {
  1201  		// We could send the errors through a channel instead, although that would cause this call
  1202  		// to block unless we preallocated a sufficient buffer or spawned a reader thread.
  1203  		// Although it wouldn't be too complicated to spawn a reader thread, it's still slightly
  1204  		// more convenient to use a lock. Only in an unusual situation should this code be
  1205  		// invoked anyway.
  1206  		f.errlock.Lock()
  1207  		f.fsErrs = append(f.fsErrs, fsErr{path: path, err: err})
  1208  		f.errlock.Unlock()
  1209  	}
  1210  }
  1211  
  1212  // discardErrsForPrunedPaths removes any errors for paths that are no longer included in the cache
  1213  func (f *Finder) discardErrsForPrunedPaths() {
  1214  	// This function could be somewhat inefficient due to being single-threaded,
  1215  	// but the length of f.fsErrs should be approximately 0, so it shouldn't take long anyway.
  1216  	relevantErrs := make([]fsErr, 0, len(f.fsErrs))
  1217  	for _, fsErr := range f.fsErrs {
  1218  		path := fsErr.path
  1219  		node := f.nodes.GetNode(path, false)
  1220  		if node != nil {
  1221  			// The path in question wasn't pruned due to a failure to process a parent directory.
  1222  			// So, the failure to process this path is important
  1223  			relevantErrs = append(relevantErrs, fsErr)
  1224  		}
  1225  	}
  1226  	f.fsErrs = relevantErrs
  1227  }
  1228  
  1229  // getErr returns an error based on previous calls to onFsErr, if any
  1230  func (f *Finder) getErr() error {
  1231  	f.discardErrsForPrunedPaths()
  1232  
  1233  	numErrs := len(f.fsErrs)
  1234  	if numErrs < 1 {
  1235  		return nil
  1236  	}
  1237  
  1238  	maxNumErrsToInclude := 10
  1239  	message := ""
  1240  	if numErrs > maxNumErrsToInclude {
  1241  		message = fmt.Sprintf("finder encountered %v errors: %v...", numErrs, f.fsErrs[:maxNumErrsToInclude])
  1242  	} else {
  1243  		message = fmt.Sprintf("finder encountered %v errors: %v", numErrs, f.fsErrs)
  1244  	}
  1245  
  1246  	return errors.New(message)
  1247  }
  1248  
  1249  func (f *Finder) statDirAsync(dir *pathMap) {
  1250  	node := dir
  1251  	path := dir.path
  1252  	f.threadPool.Run(
  1253  		func() {
  1254  			updatedStats := f.statDirSync(path)
  1255  
  1256  			if !f.isInfoUpToDate(node.statResponse, updatedStats) {
  1257  				node.mapNode = mapNode{
  1258  					statResponse: updatedStats,
  1259  					FileNames:    []string{},
  1260  				}
  1261  				f.setModified()
  1262  				if node.statResponse.ModTime != 0 {
  1263  					// modification time was updated, so re-scan for
  1264  					// child directories
  1265  					f.listDirAsync(dir)
  1266  				}
  1267  			}
  1268  		},
  1269  	)
  1270  }
  1271  
  1272  func (f *Finder) statDirSync(path string) statResponse {
  1273  
  1274  	fileInfo, err := f.filesystem.Lstat(path)
  1275  
  1276  	var stats statResponse
  1277  	if err != nil {
  1278  		// possibly record this error
  1279  		f.onFsError(path, err)
  1280  		// in case of a failure to stat the directory, treat the directory as missing (modTime = 0)
  1281  		return stats
  1282  	}
  1283  	modTime := fileInfo.ModTime()
  1284  	stats = statResponse{}
  1285  	inode, err := f.filesystem.InodeNumber(fileInfo)
  1286  	if err != nil {
  1287  		panic(fmt.Sprintf("Could not get inode number of %v: %v\n", path, err.Error()))
  1288  	}
  1289  	stats.Inode = inode
  1290  	device, err := f.filesystem.DeviceNumber(fileInfo)
  1291  	if err != nil {
  1292  		panic(fmt.Sprintf("Could not get device number of %v: %v\n", path, err.Error()))
  1293  	}
  1294  	stats.Device = device
  1295  	permissionsChangeTime, err := f.filesystem.PermTime(fileInfo)
  1296  
  1297  	if err != nil {
  1298  		panic(fmt.Sprintf("Could not get permissions modification time (CTime) of %v: %v\n", path, err.Error()))
  1299  	}
  1300  	// We're only interested in knowing whether anything about the directory
  1301  	// has changed since last check, so we use the latest of the two
  1302  	// modification times (content modification (mtime) and
  1303  	// permission modification (ctime))
  1304  	if permissionsChangeTime.After(modTime) {
  1305  		modTime = permissionsChangeTime
  1306  	}
  1307  	stats.ModTime = modTime.UnixNano()
  1308  
  1309  	return stats
  1310  }
  1311  
  1312  // pruneCacheCandidates removes the items that we don't want to include in our persistent cache
  1313  func (f *Finder) pruneCacheCandidates(items *DirEntries) {
  1314  
  1315  	for _, fileName := range items.FileNames {
  1316  		for _, abortedName := range f.cacheMetadata.Config.PruneFiles {
  1317  			if fileName == abortedName {
  1318  				items.FileNames = []string{}
  1319  				items.DirNames = []string{}
  1320  				return
  1321  			}
  1322  		}
  1323  	}
  1324  
  1325  	// remove any files that aren't the ones we want to include
  1326  	writeIndex := 0
  1327  	for _, fileName := range items.FileNames {
  1328  		// include only these files
  1329  		for _, includedName := range f.cacheMetadata.Config.IncludeFiles {
  1330  			if fileName == includedName {
  1331  				items.FileNames[writeIndex] = fileName
  1332  				writeIndex++
  1333  				break
  1334  			}
  1335  		}
  1336  	}
  1337  	// resize
  1338  	items.FileNames = items.FileNames[:writeIndex]
  1339  
  1340  	writeIndex = 0
  1341  	for _, dirName := range items.DirNames {
  1342  		items.DirNames[writeIndex] = dirName
  1343  		// ignore other dirs that are known to not be inputs to the build process
  1344  		include := true
  1345  		for _, excludedName := range f.cacheMetadata.Config.ExcludeDirs {
  1346  			if dirName == excludedName {
  1347  				// don't include
  1348  				include = false
  1349  				break
  1350  			}
  1351  		}
  1352  		if include {
  1353  			writeIndex++
  1354  		}
  1355  	}
  1356  	// resize
  1357  	items.DirNames = items.DirNames[:writeIndex]
  1358  }
  1359  
  1360  func (f *Finder) listDirsAsync(nodes []*pathMap) {
  1361  	f.threadPool.Run(
  1362  		func() {
  1363  			for i := range nodes {
  1364  				f.listDirSync(nodes[i])
  1365  			}
  1366  		},
  1367  	)
  1368  }
  1369  
  1370  func (f *Finder) listDirAsync(node *pathMap) {
  1371  	f.threadPool.Run(
  1372  		func() {
  1373  			f.listDirSync(node)
  1374  		},
  1375  	)
  1376  }
  1377  
  1378  func (f *Finder) listDirSync(dir *pathMap) {
  1379  	path := dir.path
  1380  	children, err := f.filesystem.ReadDir(path)
  1381  
  1382  	if err != nil {
  1383  		// possibly record this error
  1384  		f.onFsError(path, err)
  1385  		// if listing the contents of the directory fails (presumably due to
  1386  		// permission denied), then treat the directory as empty
  1387  		children = nil
  1388  	}
  1389  
  1390  	var subdirs []string
  1391  	var subfiles []string
  1392  
  1393  	for _, child := range children {
  1394  		linkBits := child.Mode() & os.ModeSymlink
  1395  		isLink := linkBits != 0
  1396  		if child.IsDir() {
  1397  			if !isLink {
  1398  				// Skip symlink dirs.
  1399  				// We don't have to support symlink dirs because
  1400  				// that would cause duplicates.
  1401  				subdirs = append(subdirs, child.Name())
  1402  			}
  1403  		} else {
  1404  			// We do have to support symlink files because the link name might be
  1405  			// different than the target name
  1406  			// (for example, Android.bp -> build/soong/root.bp)
  1407  			subfiles = append(subfiles, child.Name())
  1408  		}
  1409  
  1410  	}
  1411  	parentNode := dir
  1412  
  1413  	entry := &DirEntries{Path: path, DirNames: subdirs, FileNames: subfiles}
  1414  	f.pruneCacheCandidates(entry)
  1415  
  1416  	// create a pathMap node for each relevant subdirectory
  1417  	relevantChildren := map[string]*pathMap{}
  1418  	for _, subdirName := range entry.DirNames {
  1419  		childNode, found := parentNode.children[subdirName]
  1420  		// if we already knew of this directory, then we already have a request pending to Stat it
  1421  		// if we didn't already know of this directory, then we must Stat it now
  1422  		if !found {
  1423  			childNode = parentNode.newChild(subdirName)
  1424  			f.statDirAsync(childNode)
  1425  		}
  1426  		relevantChildren[subdirName] = childNode
  1427  	}
  1428  	// Note that in rare cases, it's possible that we're reducing the set of
  1429  	// children via this statement, if these are all true:
  1430  	// 1. we previously had a cache that knew about subdirectories of parentNode
  1431  	// 2. the user created a prune-file (described in pruneCacheCandidates)
  1432  	//    inside <parentNode>, which specifies that the contents of parentNode
  1433  	//    are to be ignored.
  1434  	// The fact that it's possible to remove children here means that *pathMap structs
  1435  	// must not be looked up from f.nodes by filepath (and instead must be accessed by
  1436  	// direct pointer) until after every listDirSync completes
  1437  	parentNode.FileNames = entry.FileNames
  1438  	parentNode.children = relevantChildren
  1439  
  1440  }
  1441  
  1442  // listMatches takes a node and a function that specifies which subdirectories and
  1443  // files to include, and listMatches returns the matches
  1444  func (f *Finder) listMatches(node *pathMap,
  1445  	filter WalkFunc) (subDirs []*pathMap, filePaths []string) {
  1446  	entries := DirEntries{
  1447  		FileNames: node.FileNames,
  1448  	}
  1449  	entries.DirNames = make([]string, 0, len(node.children))
  1450  	for childName := range node.children {
  1451  		entries.DirNames = append(entries.DirNames, childName)
  1452  	}
  1453  
  1454  	dirNames, fileNames := filter(entries)
  1455  
  1456  	subDirs = []*pathMap{}
  1457  	filePaths = make([]string, 0, len(fileNames))
  1458  	for _, fileName := range fileNames {
  1459  		filePaths = append(filePaths, joinCleanPaths(node.path, fileName))
  1460  	}
  1461  	subDirs = make([]*pathMap, 0, len(dirNames))
  1462  	for _, childName := range dirNames {
  1463  		child, ok := node.children[childName]
  1464  		if ok {
  1465  			subDirs = append(subDirs, child)
  1466  		}
  1467  	}
  1468  
  1469  	return subDirs, filePaths
  1470  }
  1471  
  1472  // findInCacheMultithreaded spawns potentially multiple goroutines with which to search the cache.
  1473  func (f *Finder) findInCacheMultithreaded(node *pathMap, filter WalkFunc,
  1474  	approxNumThreads int) []string {
  1475  
  1476  	if approxNumThreads < 2 {
  1477  		// Done spawning threads; process remaining directories
  1478  		return f.findInCacheSinglethreaded(node, filter)
  1479  	}
  1480  
  1481  	totalWork := 0
  1482  	for _, child := range node.children {
  1483  		totalWork += child.approximateNumDescendents
  1484  	}
  1485  	childrenResults := make(chan []string, len(node.children))
  1486  
  1487  	subDirs, filePaths := f.listMatches(node, filter)
  1488  
  1489  	// process child directories
  1490  	for _, child := range subDirs {
  1491  		numChildThreads := approxNumThreads * child.approximateNumDescendents / totalWork
  1492  		childProcessor := func(child *pathMap) {
  1493  			childResults := f.findInCacheMultithreaded(child, filter, numChildThreads)
  1494  			childrenResults <- childResults
  1495  		}
  1496  		// If we're allowed to use more than 1 thread to process this directory,
  1497  		// then instead we use 1 thread for each subdirectory.
  1498  		// It would be strange to spawn threads for only some subdirectories.
  1499  		go childProcessor(child)
  1500  	}
  1501  
  1502  	// collect results
  1503  	for i := 0; i < len(subDirs); i++ {
  1504  		childResults := <-childrenResults
  1505  		filePaths = append(filePaths, childResults...)
  1506  	}
  1507  	close(childrenResults)
  1508  
  1509  	return filePaths
  1510  }
  1511  
  1512  // findInCacheSinglethreaded synchronously searches the cache for all matching file paths
  1513  // note findInCacheSinglethreaded runs 2X to 4X as fast by being iterative rather than recursive
  1514  func (f *Finder) findInCacheSinglethreaded(node *pathMap, filter WalkFunc) []string {
  1515  	if node == nil {
  1516  		return []string{}
  1517  	}
  1518  
  1519  	nodes := []*pathMap{node}
  1520  	matches := []string{}
  1521  
  1522  	for len(nodes) > 0 {
  1523  		currentNode := nodes[0]
  1524  		nodes = nodes[1:]
  1525  
  1526  		subDirs, filePaths := f.listMatches(currentNode, filter)
  1527  
  1528  		nodes = append(nodes, subDirs...)
  1529  
  1530  		matches = append(matches, filePaths...)
  1531  	}
  1532  	return matches
  1533  }