golang.org/x/tools/gopls@v0.15.3/internal/cache/parse_cache.go (about) 1 // Copyright 2023 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package cache 6 7 import ( 8 "bytes" 9 "container/heap" 10 "context" 11 "fmt" 12 "go/parser" 13 "go/token" 14 "math/bits" 15 "runtime" 16 "sync" 17 "time" 18 19 "golang.org/x/sync/errgroup" 20 "golang.org/x/tools/gopls/internal/file" 21 "golang.org/x/tools/gopls/internal/cache/parsego" 22 "golang.org/x/tools/gopls/internal/protocol" 23 "golang.org/x/tools/internal/memoize" 24 "golang.org/x/tools/internal/tokeninternal" 25 ) 26 27 // This file contains an implementation of an LRU parse cache, that offsets the 28 // base token.Pos value of each cached file so that they may be later described 29 // by a single dedicated FileSet. 30 // 31 // This is achieved by tracking a monotonic offset in the token.Pos space, that 32 // is incremented before parsing allow room for the resulting parsed file. 33 34 // reservedForParsing defines the room in the token.Pos space reserved for 35 // cached parsed files. 36 // 37 // Files parsed through the parseCache are guaranteed not to have overlapping 38 // spans: the parseCache tracks a monotonic base for newly parsed files. 39 // 40 // By offsetting the initial base of a FileSet, we can allow other operations 41 // accepting the FileSet (such as the gcimporter) to add new files using the 42 // normal FileSet APIs without overlapping with cached parsed files. 43 // 44 // Note that 1<<60 represents an exabyte of parsed data, more than any gopls 45 // process can ever parse. 46 // 47 // On 32-bit systems we don't cache parse results (see parseFiles). 48 const reservedForParsing = 1 << (bits.UintSize - 4) 49 50 // fileSetWithBase returns a new token.FileSet with Base() equal to the 51 // requested base. 52 // 53 // If base < 1, fileSetWithBase panics. 54 // (1 is the smallest permitted FileSet base). 55 func fileSetWithBase(base int) *token.FileSet { 56 fset := token.NewFileSet() 57 if base > 1 { 58 // Add a dummy file to set the base of fset. We won't ever use the 59 // resulting FileSet, so it doesn't matter how we achieve this. 60 // 61 // FileSets leave a 1-byte padding between files, so we set the base by 62 // adding a zero-length file at base-1. 63 fset.AddFile("", base-1, 0) 64 } 65 if fset.Base() != base { 66 panic("unexpected FileSet.Base") 67 } 68 return fset 69 } 70 71 const ( 72 // Always keep 100 recent files, independent of their wall-clock age, to 73 // optimize the case where the user resumes editing after a delay. 74 parseCacheMinFiles = 100 75 ) 76 77 // parsePadding is additional padding allocated to allow for increases in 78 // length (such as appending missing braces) caused by fixAST. 79 // 80 // This is used to mitigate a chicken and egg problem: we must know the base 81 // offset of the file we're about to parse, before we start parsing, and yet 82 // src fixups may affect the actual size of the parsed content (and therefore 83 // the offsets of subsequent files). 84 // 85 // When we encounter a file that no longer fits in its allocated space in the 86 // fileset, we have no choice but to re-parse it. Leaving a generous padding 87 // reduces the likelihood of this "slow path". 88 // 89 // This value is mutable for testing, so that we can exercise the slow path. 90 var parsePadding = 1000 // mutable for testing 91 92 // A parseCache holds recently accessed parsed Go files. After new files are 93 // stored, older files may be evicted from the cache via garbage collection. 94 // 95 // The parseCache.parseFiles method exposes a batch API for parsing (and 96 // caching) multiple files. This is necessary for type-checking, where files 97 // must be parsed in a common fileset. 98 type parseCache struct { 99 expireAfter time.Duration // interval at which to collect expired cache entries 100 done chan struct{} // closed when GC is stopped 101 102 mu sync.Mutex 103 m map[parseKey]*parseCacheEntry 104 lru queue // min-atime priority queue of *parseCacheEntry 105 clock uint64 // clock time, incremented when the cache is updated 106 nextBase int // base offset for the next parsed file 107 } 108 109 // newParseCache creates a new parse cache and starts a goroutine to garbage 110 // collect entries whose age is at least expireAfter. 111 // 112 // Callers must call parseCache.stop when the parse cache is no longer in use. 113 func newParseCache(expireAfter time.Duration) *parseCache { 114 c := &parseCache{ 115 expireAfter: expireAfter, 116 m: make(map[parseKey]*parseCacheEntry), 117 done: make(chan struct{}), 118 } 119 go c.gc() 120 return c 121 } 122 123 // stop causes the GC goroutine to exit. 124 func (c *parseCache) stop() { 125 close(c.done) 126 } 127 128 // parseKey uniquely identifies a parsed Go file. 129 type parseKey struct { 130 uri protocol.DocumentURI 131 mode parser.Mode 132 purgeFuncBodies bool 133 } 134 135 type parseCacheEntry struct { 136 key parseKey 137 hash file.Hash 138 promise *memoize.Promise // memoize.Promise[*ParsedGoFile] 139 atime uint64 // clock time of last access, for use in LRU sorting 140 walltime time.Time // actual time of last access, for use in time-based eviction; too coarse for LRU on some systems 141 lruIndex int // owned by the queue implementation 142 } 143 144 // startParse prepares a parsing pass, creating new promises in the cache for 145 // any cache misses. 146 // 147 // The resulting slice has an entry for every given file handle, though some 148 // entries may be nil if there was an error reading the file (in which case the 149 // resulting error will be non-nil). 150 func (c *parseCache) startParse(mode parser.Mode, purgeFuncBodies bool, fhs ...file.Handle) ([]*memoize.Promise, error) { 151 c.mu.Lock() 152 defer c.mu.Unlock() 153 154 // Any parsing pass increments the clock, as we'll update access times. 155 // (technically, if fhs is empty this isn't necessary, but that's a degenerate case). 156 // 157 // All entries parsed from a single call get the same access time. 158 c.clock++ 159 walltime := time.Now() 160 161 // Read file data and collect cacheable files. 162 var ( 163 data = make([][]byte, len(fhs)) // file content for each readable file 164 promises = make([]*memoize.Promise, len(fhs)) 165 firstReadError error // first error from fh.Read, or nil 166 ) 167 for i, fh := range fhs { 168 content, err := fh.Content() 169 if err != nil { 170 if firstReadError == nil { 171 firstReadError = err 172 } 173 continue 174 } 175 data[i] = content 176 177 key := parseKey{ 178 uri: fh.URI(), 179 mode: mode, 180 purgeFuncBodies: purgeFuncBodies, 181 } 182 183 if e, ok := c.m[key]; ok { 184 if e.hash == fh.Identity().Hash { // cache hit 185 e.atime = c.clock 186 e.walltime = walltime 187 heap.Fix(&c.lru, e.lruIndex) 188 promises[i] = e.promise 189 continue 190 } else { 191 // A cache hit, for a different version. Delete it. 192 delete(c.m, e.key) 193 heap.Remove(&c.lru, e.lruIndex) 194 } 195 } 196 197 uri := fh.URI() 198 promise := memoize.NewPromise("parseCache.parse", func(ctx context.Context, _ interface{}) interface{} { 199 // Allocate 2*len(content)+parsePadding to allow for re-parsing once 200 // inside of parseGoSrc without exceeding the allocated space. 201 base, nextBase := c.allocateSpace(2*len(content) + parsePadding) 202 203 pgf, fixes1 := parsego.Parse(ctx, fileSetWithBase(base), uri, content, mode, purgeFuncBodies) 204 file := pgf.Tok 205 if file.Base()+file.Size()+1 > nextBase { 206 // The parsed file exceeds its allocated space, likely due to multiple 207 // passes of src fixing. In this case, we have no choice but to re-do 208 // the operation with the correct size. 209 // 210 // Even though the final successful parse requires only file.Size() 211 // bytes of Pos space, we need to accommodate all the missteps to get 212 // there, as parseGoSrc will repeat them. 213 actual := file.Base() + file.Size() - base // actual size consumed, after re-parsing 214 base2, nextBase2 := c.allocateSpace(actual) 215 pgf2, fixes2 := parsego.Parse(ctx, fileSetWithBase(base2), uri, content, mode, purgeFuncBodies) 216 217 // In golang/go#59097 we observed that this panic condition was hit. 218 // One bug was found and fixed, but record more information here in 219 // case there is still a bug here. 220 if end := pgf2.Tok.Base() + pgf2.Tok.Size(); end != nextBase2-1 { 221 var errBuf bytes.Buffer 222 fmt.Fprintf(&errBuf, "internal error: non-deterministic parsing result:\n") 223 fmt.Fprintf(&errBuf, "\t%q (%d-%d) does not span %d-%d\n", uri, pgf2.Tok.Base(), base2, end, nextBase2-1) 224 fmt.Fprintf(&errBuf, "\tfirst %q (%d-%d)\n", pgf.URI, pgf.Tok.Base(), pgf.Tok.Base()+pgf.Tok.Size()) 225 fmt.Fprintf(&errBuf, "\tfirst space: (%d-%d), second space: (%d-%d)\n", base, nextBase, base2, nextBase2) 226 fmt.Fprintf(&errBuf, "\tfirst mode: %v, second mode: %v", pgf.Mode, pgf2.Mode) 227 fmt.Fprintf(&errBuf, "\tfirst err: %v, second err: %v", pgf.ParseErr, pgf2.ParseErr) 228 fmt.Fprintf(&errBuf, "\tfirst fixes: %v, second fixes: %v", fixes1, fixes2) 229 panic(errBuf.String()) 230 } 231 pgf = pgf2 232 } 233 return pgf 234 }) 235 promises[i] = promise 236 237 // add new entry; entries are gc'ed asynchronously 238 e := &parseCacheEntry{ 239 key: key, 240 hash: fh.Identity().Hash, 241 promise: promise, 242 atime: c.clock, 243 walltime: walltime, 244 } 245 c.m[e.key] = e 246 heap.Push(&c.lru, e) 247 } 248 249 if len(c.m) != len(c.lru) { 250 panic("map and LRU are inconsistent") 251 } 252 253 return promises, firstReadError 254 } 255 256 func (c *parseCache) gc() { 257 const period = 10 * time.Second // gc period 258 timer := time.NewTicker(period) 259 defer timer.Stop() 260 261 for { 262 select { 263 case <-c.done: 264 return 265 case <-timer.C: 266 } 267 268 c.gcOnce() 269 } 270 } 271 272 func (c *parseCache) gcOnce() { 273 now := time.Now() 274 c.mu.Lock() 275 defer c.mu.Unlock() 276 277 for len(c.m) > parseCacheMinFiles { 278 e := heap.Pop(&c.lru).(*parseCacheEntry) 279 if now.Sub(e.walltime) >= c.expireAfter { 280 delete(c.m, e.key) 281 } else { 282 heap.Push(&c.lru, e) 283 break 284 } 285 } 286 } 287 288 // allocateSpace reserves the next n bytes of token.Pos space in the 289 // cache. 290 // 291 // It returns the resulting file base, next base, and an offset FileSet to use 292 // for parsing. 293 func (c *parseCache) allocateSpace(size int) (int, int) { 294 c.mu.Lock() 295 defer c.mu.Unlock() 296 297 if c.nextBase == 0 { 298 // FileSet base values must be at least 1. 299 c.nextBase = 1 300 } 301 base := c.nextBase 302 c.nextBase += size + 1 303 return base, c.nextBase 304 } 305 306 // parseFiles returns a ParsedGoFile for each file handle in fhs, in the 307 // requested parse mode. 308 // 309 // For parsed files that already exists in the cache, access time will be 310 // updated. For others, parseFiles will parse and store as many results in the 311 // cache as space allows. 312 // 313 // The token.File for each resulting parsed file will be added to the provided 314 // FileSet, using the tokeninternal.AddExistingFiles API. Consequently, the 315 // given fset should only be used in other APIs if its base is >= 316 // reservedForParsing. 317 // 318 // If parseFiles returns an error, it still returns a slice, 319 // but with a nil entry for each file that could not be parsed. 320 func (c *parseCache) parseFiles(ctx context.Context, fset *token.FileSet, mode parser.Mode, purgeFuncBodies bool, fhs ...file.Handle) ([]*ParsedGoFile, error) { 321 pgfs := make([]*ParsedGoFile, len(fhs)) 322 323 // Temporary fall-back for 32-bit systems, where reservedForParsing is too 324 // small to be viable. We don't actually support 32-bit systems, so this 325 // workaround is only for tests and can be removed when we stop running 326 // 32-bit TryBots for gopls. 327 if bits.UintSize == 32 { 328 for i, fh := range fhs { 329 var err error 330 pgfs[i], err = parseGoImpl(ctx, fset, fh, mode, purgeFuncBodies) 331 if err != nil { 332 return pgfs, err 333 } 334 } 335 return pgfs, nil 336 } 337 338 promises, firstErr := c.startParse(mode, purgeFuncBodies, fhs...) 339 340 // Await all parsing. 341 var g errgroup.Group 342 g.SetLimit(runtime.GOMAXPROCS(-1)) // parsing is CPU-bound. 343 for i, promise := range promises { 344 if promise == nil { 345 continue 346 } 347 i := i 348 promise := promise 349 g.Go(func() error { 350 result, err := promise.Get(ctx, nil) 351 if err != nil { 352 return err 353 } 354 pgfs[i] = result.(*ParsedGoFile) 355 return nil 356 }) 357 } 358 359 if err := g.Wait(); err != nil && firstErr == nil { 360 firstErr = err 361 } 362 363 // Augment the FileSet to map all parsed files. 364 var tokenFiles []*token.File 365 for _, pgf := range pgfs { 366 if pgf == nil { 367 continue 368 } 369 tokenFiles = append(tokenFiles, pgf.Tok) 370 } 371 tokeninternal.AddExistingFiles(fset, tokenFiles) 372 373 const debugIssue59080 = true 374 if debugIssue59080 { 375 for _, f := range tokenFiles { 376 pos := token.Pos(f.Base()) 377 f2 := fset.File(pos) 378 if f2 != f { 379 panic(fmt.Sprintf("internal error: File(%d (start)) = %v, not %v", pos, f2, f)) 380 } 381 pos = token.Pos(f.Base() + f.Size()) 382 f2 = fset.File(pos) 383 if f2 != f { 384 panic(fmt.Sprintf("internal error: File(%d (end)) = %v, not %v", pos, f2, f)) 385 } 386 } 387 } 388 389 return pgfs, firstErr 390 } 391 392 // -- priority queue boilerplate -- 393 394 // queue is a min-atime prority queue of cache entries. 395 type queue []*parseCacheEntry 396 397 func (q queue) Len() int { return len(q) } 398 399 func (q queue) Less(i, j int) bool { return q[i].atime < q[j].atime } 400 401 func (q queue) Swap(i, j int) { 402 q[i], q[j] = q[j], q[i] 403 q[i].lruIndex = i 404 q[j].lruIndex = j 405 } 406 407 func (q *queue) Push(x interface{}) { 408 e := x.(*parseCacheEntry) 409 e.lruIndex = len(*q) 410 *q = append(*q, e) 411 } 412 413 func (q *queue) Pop() interface{} { 414 last := len(*q) - 1 415 e := (*q)[last] 416 (*q)[last] = nil // aid GC 417 *q = (*q)[:last] 418 return e 419 }