golang.org/x/tools/gopls@v0.15.3/internal/cache/parse_cache.go (about)

     1  // Copyright 2023 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package cache
     6  
     7  import (
     8  	"bytes"
     9  	"container/heap"
    10  	"context"
    11  	"fmt"
    12  	"go/parser"
    13  	"go/token"
    14  	"math/bits"
    15  	"runtime"
    16  	"sync"
    17  	"time"
    18  
    19  	"golang.org/x/sync/errgroup"
    20  	"golang.org/x/tools/gopls/internal/file"
    21  	"golang.org/x/tools/gopls/internal/cache/parsego"
    22  	"golang.org/x/tools/gopls/internal/protocol"
    23  	"golang.org/x/tools/internal/memoize"
    24  	"golang.org/x/tools/internal/tokeninternal"
    25  )
    26  
    27  // This file contains an implementation of an LRU parse cache, that offsets the
    28  // base token.Pos value of each cached file so that they may be later described
    29  // by a single dedicated FileSet.
    30  //
    31  // This is achieved by tracking a monotonic offset in the token.Pos space, that
    32  // is incremented before parsing allow room for the resulting parsed file.
    33  
    34  // reservedForParsing defines the room in the token.Pos space reserved for
    35  // cached parsed files.
    36  //
    37  // Files parsed through the parseCache are guaranteed not to have overlapping
    38  // spans: the parseCache tracks a monotonic base for newly parsed files.
    39  //
    40  // By offsetting the initial base of a FileSet, we can allow other operations
    41  // accepting the FileSet (such as the gcimporter) to add new files using the
    42  // normal FileSet APIs without overlapping with cached parsed files.
    43  //
    44  // Note that 1<<60 represents an exabyte of parsed data, more than any gopls
    45  // process can ever parse.
    46  //
    47  // On 32-bit systems we don't cache parse results (see parseFiles).
    48  const reservedForParsing = 1 << (bits.UintSize - 4)
    49  
    50  // fileSetWithBase returns a new token.FileSet with Base() equal to the
    51  // requested base.
    52  //
    53  // If base < 1, fileSetWithBase panics.
    54  // (1 is the smallest permitted FileSet base).
    55  func fileSetWithBase(base int) *token.FileSet {
    56  	fset := token.NewFileSet()
    57  	if base > 1 {
    58  		// Add a dummy file to set the base of fset. We won't ever use the
    59  		// resulting FileSet, so it doesn't matter how we achieve this.
    60  		//
    61  		// FileSets leave a 1-byte padding between files, so we set the base by
    62  		// adding a zero-length file at base-1.
    63  		fset.AddFile("", base-1, 0)
    64  	}
    65  	if fset.Base() != base {
    66  		panic("unexpected FileSet.Base")
    67  	}
    68  	return fset
    69  }
    70  
    71  const (
    72  	// Always keep 100 recent files, independent of their wall-clock age, to
    73  	// optimize the case where the user resumes editing after a delay.
    74  	parseCacheMinFiles = 100
    75  )
    76  
    77  // parsePadding is additional padding allocated to allow for increases in
    78  // length (such as appending missing braces) caused by fixAST.
    79  //
    80  // This is used to mitigate a chicken and egg problem: we must know the base
    81  // offset of the file we're about to parse, before we start parsing, and yet
    82  // src fixups may affect the actual size of the parsed content (and therefore
    83  // the offsets of subsequent files).
    84  //
    85  // When we encounter a file that no longer fits in its allocated space in the
    86  // fileset, we have no choice but to re-parse it. Leaving a generous padding
    87  // reduces the likelihood of this "slow path".
    88  //
    89  // This value is mutable for testing, so that we can exercise the slow path.
    90  var parsePadding = 1000 // mutable for testing
    91  
    92  // A parseCache holds recently accessed parsed Go files. After new files are
    93  // stored, older files may be evicted from the cache via garbage collection.
    94  //
    95  // The parseCache.parseFiles method exposes a batch API for parsing (and
    96  // caching) multiple files. This is necessary for type-checking, where files
    97  // must be parsed in a common fileset.
    98  type parseCache struct {
    99  	expireAfter time.Duration // interval at which to collect expired cache entries
   100  	done        chan struct{} // closed when GC is stopped
   101  
   102  	mu       sync.Mutex
   103  	m        map[parseKey]*parseCacheEntry
   104  	lru      queue  // min-atime priority queue of *parseCacheEntry
   105  	clock    uint64 // clock time, incremented when the cache is updated
   106  	nextBase int    // base offset for the next parsed file
   107  }
   108  
   109  // newParseCache creates a new parse cache and starts a goroutine to garbage
   110  // collect entries whose age is at least expireAfter.
   111  //
   112  // Callers must call parseCache.stop when the parse cache is no longer in use.
   113  func newParseCache(expireAfter time.Duration) *parseCache {
   114  	c := &parseCache{
   115  		expireAfter: expireAfter,
   116  		m:           make(map[parseKey]*parseCacheEntry),
   117  		done:        make(chan struct{}),
   118  	}
   119  	go c.gc()
   120  	return c
   121  }
   122  
   123  // stop causes the GC goroutine to exit.
   124  func (c *parseCache) stop() {
   125  	close(c.done)
   126  }
   127  
   128  // parseKey uniquely identifies a parsed Go file.
   129  type parseKey struct {
   130  	uri             protocol.DocumentURI
   131  	mode            parser.Mode
   132  	purgeFuncBodies bool
   133  }
   134  
   135  type parseCacheEntry struct {
   136  	key      parseKey
   137  	hash     file.Hash
   138  	promise  *memoize.Promise // memoize.Promise[*ParsedGoFile]
   139  	atime    uint64           // clock time of last access, for use in LRU sorting
   140  	walltime time.Time        // actual time of last access, for use in time-based eviction; too coarse for LRU on some systems
   141  	lruIndex int              // owned by the queue implementation
   142  }
   143  
   144  // startParse prepares a parsing pass, creating new promises in the cache for
   145  // any cache misses.
   146  //
   147  // The resulting slice has an entry for every given file handle, though some
   148  // entries may be nil if there was an error reading the file (in which case the
   149  // resulting error will be non-nil).
   150  func (c *parseCache) startParse(mode parser.Mode, purgeFuncBodies bool, fhs ...file.Handle) ([]*memoize.Promise, error) {
   151  	c.mu.Lock()
   152  	defer c.mu.Unlock()
   153  
   154  	// Any parsing pass increments the clock, as we'll update access times.
   155  	// (technically, if fhs is empty this isn't necessary, but that's a degenerate case).
   156  	//
   157  	// All entries parsed from a single call get the same access time.
   158  	c.clock++
   159  	walltime := time.Now()
   160  
   161  	// Read file data and collect cacheable files.
   162  	var (
   163  		data           = make([][]byte, len(fhs)) // file content for each readable file
   164  		promises       = make([]*memoize.Promise, len(fhs))
   165  		firstReadError error // first error from fh.Read, or nil
   166  	)
   167  	for i, fh := range fhs {
   168  		content, err := fh.Content()
   169  		if err != nil {
   170  			if firstReadError == nil {
   171  				firstReadError = err
   172  			}
   173  			continue
   174  		}
   175  		data[i] = content
   176  
   177  		key := parseKey{
   178  			uri:             fh.URI(),
   179  			mode:            mode,
   180  			purgeFuncBodies: purgeFuncBodies,
   181  		}
   182  
   183  		if e, ok := c.m[key]; ok {
   184  			if e.hash == fh.Identity().Hash { // cache hit
   185  				e.atime = c.clock
   186  				e.walltime = walltime
   187  				heap.Fix(&c.lru, e.lruIndex)
   188  				promises[i] = e.promise
   189  				continue
   190  			} else {
   191  				// A cache hit, for a different version. Delete it.
   192  				delete(c.m, e.key)
   193  				heap.Remove(&c.lru, e.lruIndex)
   194  			}
   195  		}
   196  
   197  		uri := fh.URI()
   198  		promise := memoize.NewPromise("parseCache.parse", func(ctx context.Context, _ interface{}) interface{} {
   199  			// Allocate 2*len(content)+parsePadding to allow for re-parsing once
   200  			// inside of parseGoSrc without exceeding the allocated space.
   201  			base, nextBase := c.allocateSpace(2*len(content) + parsePadding)
   202  
   203  			pgf, fixes1 := parsego.Parse(ctx, fileSetWithBase(base), uri, content, mode, purgeFuncBodies)
   204  			file := pgf.Tok
   205  			if file.Base()+file.Size()+1 > nextBase {
   206  				// The parsed file exceeds its allocated space, likely due to multiple
   207  				// passes of src fixing. In this case, we have no choice but to re-do
   208  				// the operation with the correct size.
   209  				//
   210  				// Even though the final successful parse requires only file.Size()
   211  				// bytes of Pos space, we need to accommodate all the missteps to get
   212  				// there, as parseGoSrc will repeat them.
   213  				actual := file.Base() + file.Size() - base // actual size consumed, after re-parsing
   214  				base2, nextBase2 := c.allocateSpace(actual)
   215  				pgf2, fixes2 := parsego.Parse(ctx, fileSetWithBase(base2), uri, content, mode, purgeFuncBodies)
   216  
   217  				// In golang/go#59097 we observed that this panic condition was hit.
   218  				// One bug was found and fixed, but record more information here in
   219  				// case there is still a bug here.
   220  				if end := pgf2.Tok.Base() + pgf2.Tok.Size(); end != nextBase2-1 {
   221  					var errBuf bytes.Buffer
   222  					fmt.Fprintf(&errBuf, "internal error: non-deterministic parsing result:\n")
   223  					fmt.Fprintf(&errBuf, "\t%q (%d-%d) does not span %d-%d\n", uri, pgf2.Tok.Base(), base2, end, nextBase2-1)
   224  					fmt.Fprintf(&errBuf, "\tfirst %q (%d-%d)\n", pgf.URI, pgf.Tok.Base(), pgf.Tok.Base()+pgf.Tok.Size())
   225  					fmt.Fprintf(&errBuf, "\tfirst space: (%d-%d), second space: (%d-%d)\n", base, nextBase, base2, nextBase2)
   226  					fmt.Fprintf(&errBuf, "\tfirst mode: %v, second mode: %v", pgf.Mode, pgf2.Mode)
   227  					fmt.Fprintf(&errBuf, "\tfirst err: %v, second err: %v", pgf.ParseErr, pgf2.ParseErr)
   228  					fmt.Fprintf(&errBuf, "\tfirst fixes: %v, second fixes: %v", fixes1, fixes2)
   229  					panic(errBuf.String())
   230  				}
   231  				pgf = pgf2
   232  			}
   233  			return pgf
   234  		})
   235  		promises[i] = promise
   236  
   237  		// add new entry; entries are gc'ed asynchronously
   238  		e := &parseCacheEntry{
   239  			key:      key,
   240  			hash:     fh.Identity().Hash,
   241  			promise:  promise,
   242  			atime:    c.clock,
   243  			walltime: walltime,
   244  		}
   245  		c.m[e.key] = e
   246  		heap.Push(&c.lru, e)
   247  	}
   248  
   249  	if len(c.m) != len(c.lru) {
   250  		panic("map and LRU are inconsistent")
   251  	}
   252  
   253  	return promises, firstReadError
   254  }
   255  
   256  func (c *parseCache) gc() {
   257  	const period = 10 * time.Second // gc period
   258  	timer := time.NewTicker(period)
   259  	defer timer.Stop()
   260  
   261  	for {
   262  		select {
   263  		case <-c.done:
   264  			return
   265  		case <-timer.C:
   266  		}
   267  
   268  		c.gcOnce()
   269  	}
   270  }
   271  
   272  func (c *parseCache) gcOnce() {
   273  	now := time.Now()
   274  	c.mu.Lock()
   275  	defer c.mu.Unlock()
   276  
   277  	for len(c.m) > parseCacheMinFiles {
   278  		e := heap.Pop(&c.lru).(*parseCacheEntry)
   279  		if now.Sub(e.walltime) >= c.expireAfter {
   280  			delete(c.m, e.key)
   281  		} else {
   282  			heap.Push(&c.lru, e)
   283  			break
   284  		}
   285  	}
   286  }
   287  
   288  // allocateSpace reserves the next n bytes of token.Pos space in the
   289  // cache.
   290  //
   291  // It returns the resulting file base, next base, and an offset FileSet to use
   292  // for parsing.
   293  func (c *parseCache) allocateSpace(size int) (int, int) {
   294  	c.mu.Lock()
   295  	defer c.mu.Unlock()
   296  
   297  	if c.nextBase == 0 {
   298  		// FileSet base values must be at least 1.
   299  		c.nextBase = 1
   300  	}
   301  	base := c.nextBase
   302  	c.nextBase += size + 1
   303  	return base, c.nextBase
   304  }
   305  
   306  // parseFiles returns a ParsedGoFile for each file handle in fhs, in the
   307  // requested parse mode.
   308  //
   309  // For parsed files that already exists in the cache, access time will be
   310  // updated. For others, parseFiles will parse and store as many results in the
   311  // cache as space allows.
   312  //
   313  // The token.File for each resulting parsed file will be added to the provided
   314  // FileSet, using the tokeninternal.AddExistingFiles API. Consequently, the
   315  // given fset should only be used in other APIs if its base is >=
   316  // reservedForParsing.
   317  //
   318  // If parseFiles returns an error, it still returns a slice,
   319  // but with a nil entry for each file that could not be parsed.
   320  func (c *parseCache) parseFiles(ctx context.Context, fset *token.FileSet, mode parser.Mode, purgeFuncBodies bool, fhs ...file.Handle) ([]*ParsedGoFile, error) {
   321  	pgfs := make([]*ParsedGoFile, len(fhs))
   322  
   323  	// Temporary fall-back for 32-bit systems, where reservedForParsing is too
   324  	// small to be viable. We don't actually support 32-bit systems, so this
   325  	// workaround is only for tests and can be removed when we stop running
   326  	// 32-bit TryBots for gopls.
   327  	if bits.UintSize == 32 {
   328  		for i, fh := range fhs {
   329  			var err error
   330  			pgfs[i], err = parseGoImpl(ctx, fset, fh, mode, purgeFuncBodies)
   331  			if err != nil {
   332  				return pgfs, err
   333  			}
   334  		}
   335  		return pgfs, nil
   336  	}
   337  
   338  	promises, firstErr := c.startParse(mode, purgeFuncBodies, fhs...)
   339  
   340  	// Await all parsing.
   341  	var g errgroup.Group
   342  	g.SetLimit(runtime.GOMAXPROCS(-1)) // parsing is CPU-bound.
   343  	for i, promise := range promises {
   344  		if promise == nil {
   345  			continue
   346  		}
   347  		i := i
   348  		promise := promise
   349  		g.Go(func() error {
   350  			result, err := promise.Get(ctx, nil)
   351  			if err != nil {
   352  				return err
   353  			}
   354  			pgfs[i] = result.(*ParsedGoFile)
   355  			return nil
   356  		})
   357  	}
   358  
   359  	if err := g.Wait(); err != nil && firstErr == nil {
   360  		firstErr = err
   361  	}
   362  
   363  	// Augment the FileSet to map all parsed files.
   364  	var tokenFiles []*token.File
   365  	for _, pgf := range pgfs {
   366  		if pgf == nil {
   367  			continue
   368  		}
   369  		tokenFiles = append(tokenFiles, pgf.Tok)
   370  	}
   371  	tokeninternal.AddExistingFiles(fset, tokenFiles)
   372  
   373  	const debugIssue59080 = true
   374  	if debugIssue59080 {
   375  		for _, f := range tokenFiles {
   376  			pos := token.Pos(f.Base())
   377  			f2 := fset.File(pos)
   378  			if f2 != f {
   379  				panic(fmt.Sprintf("internal error: File(%d (start)) = %v, not %v", pos, f2, f))
   380  			}
   381  			pos = token.Pos(f.Base() + f.Size())
   382  			f2 = fset.File(pos)
   383  			if f2 != f {
   384  				panic(fmt.Sprintf("internal error: File(%d (end)) = %v, not %v", pos, f2, f))
   385  			}
   386  		}
   387  	}
   388  
   389  	return pgfs, firstErr
   390  }
   391  
   392  // -- priority queue boilerplate --
   393  
   394  // queue is a min-atime prority queue of cache entries.
   395  type queue []*parseCacheEntry
   396  
   397  func (q queue) Len() int { return len(q) }
   398  
   399  func (q queue) Less(i, j int) bool { return q[i].atime < q[j].atime }
   400  
   401  func (q queue) Swap(i, j int) {
   402  	q[i], q[j] = q[j], q[i]
   403  	q[i].lruIndex = i
   404  	q[j].lruIndex = j
   405  }
   406  
   407  func (q *queue) Push(x interface{}) {
   408  	e := x.(*parseCacheEntry)
   409  	e.lruIndex = len(*q)
   410  	*q = append(*q, e)
   411  }
   412  
   413  func (q *queue) Pop() interface{} {
   414  	last := len(*q) - 1
   415  	e := (*q)[last]
   416  	(*q)[last] = nil // aid GC
   417  	*q = (*q)[:last]
   418  	return e
   419  }