golang.org/x/tools/gopls@v0.15.3/internal/cache/fs_memoized.go (about) 1 // Copyright 2023 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package cache 6 7 import ( 8 "context" 9 "os" 10 "sync" 11 "time" 12 13 "golang.org/x/tools/gopls/internal/file" 14 "golang.org/x/tools/gopls/internal/protocol" 15 "golang.org/x/tools/internal/event" 16 "golang.org/x/tools/internal/event/tag" 17 "golang.org/x/tools/internal/robustio" 18 ) 19 20 // A memoizedFS is a file source that memoizes reads, to reduce IO. 21 type memoizedFS struct { 22 mu sync.Mutex 23 24 // filesByID maps existing file inodes to the result of a read. 25 // (The read may have failed, e.g. due to EACCES or a delete between stat+read.) 26 // Each slice is a non-empty list of aliases: different URIs. 27 filesByID map[robustio.FileID][]*diskFile 28 } 29 30 func newMemoizedFS() *memoizedFS { 31 return &memoizedFS{filesByID: make(map[robustio.FileID][]*diskFile)} 32 } 33 34 // A diskFile is a file in the filesystem, or a failure to read one. 35 // It implements the file.Source interface. 36 type diskFile struct { 37 uri protocol.DocumentURI 38 modTime time.Time 39 content []byte 40 hash file.Hash 41 err error 42 } 43 44 func (h *diskFile) URI() protocol.DocumentURI { return h.uri } 45 46 func (h *diskFile) Identity() file.Identity { 47 return file.Identity{ 48 URI: h.uri, 49 Hash: h.hash, 50 } 51 } 52 53 func (h *diskFile) SameContentsOnDisk() bool { return true } 54 func (h *diskFile) Version() int32 { return 0 } 55 func (h *diskFile) Content() ([]byte, error) { return h.content, h.err } 56 57 // ReadFile stats and (maybe) reads the file, updates the cache, and returns it. 58 func (fs *memoizedFS) ReadFile(ctx context.Context, uri protocol.DocumentURI) (file.Handle, error) { 59 id, mtime, err := robustio.GetFileID(uri.Path()) 60 if err != nil { 61 // file does not exist 62 return &diskFile{ 63 err: err, 64 uri: uri, 65 }, nil 66 } 67 68 // We check if the file has changed by comparing modification times. Notably, 69 // this is an imperfect heuristic as various systems have low resolution 70 // mtimes (as much as 1s on WSL or s390x builders), so we only cache 71 // filehandles if mtime is old enough to be reliable, meaning that we don't 72 // expect a subsequent write to have the same mtime. 73 // 74 // The coarsest mtime precision we've seen in practice is 1s, so consider 75 // mtime to be unreliable if it is less than 2s old. Capture this before 76 // doing anything else. 77 recentlyModified := time.Since(mtime) < 2*time.Second 78 79 fs.mu.Lock() 80 fhs, ok := fs.filesByID[id] 81 if ok && fhs[0].modTime.Equal(mtime) { 82 var fh *diskFile 83 // We have already seen this file and it has not changed. 84 for _, h := range fhs { 85 if h.uri == uri { 86 fh = h 87 break 88 } 89 } 90 // No file handle for this exact URI. Create an alias, but share content. 91 if fh == nil { 92 newFH := *fhs[0] 93 newFH.uri = uri 94 fh = &newFH 95 fhs = append(fhs, fh) 96 fs.filesByID[id] = fhs 97 } 98 fs.mu.Unlock() 99 return fh, nil 100 } 101 fs.mu.Unlock() 102 103 // Unknown file, or file has changed. Read (or re-read) it. 104 fh, err := readFile(ctx, uri, mtime) // ~25us 105 if err != nil { 106 return nil, err // e.g. cancelled (not: read failed) 107 } 108 109 fs.mu.Lock() 110 if !recentlyModified { 111 fs.filesByID[id] = []*diskFile{fh} 112 } else { 113 delete(fs.filesByID, id) 114 } 115 fs.mu.Unlock() 116 return fh, nil 117 } 118 119 // fileStats returns information about the set of files stored in fs. It is 120 // intended for debugging only. 121 func (fs *memoizedFS) fileStats() (files, largest, errs int) { 122 fs.mu.Lock() 123 defer fs.mu.Unlock() 124 125 files = len(fs.filesByID) 126 largest = 0 127 errs = 0 128 129 for _, files := range fs.filesByID { 130 rep := files[0] 131 if len(rep.content) > largest { 132 largest = len(rep.content) 133 } 134 if rep.err != nil { 135 errs++ 136 } 137 } 138 return files, largest, errs 139 } 140 141 // ioLimit limits the number of parallel file reads per process. 142 var ioLimit = make(chan struct{}, 128) 143 144 func readFile(ctx context.Context, uri protocol.DocumentURI, mtime time.Time) (*diskFile, error) { 145 select { 146 case ioLimit <- struct{}{}: 147 case <-ctx.Done(): 148 return nil, ctx.Err() 149 } 150 defer func() { <-ioLimit }() 151 152 ctx, done := event.Start(ctx, "cache.readFile", tag.File.Of(uri.Path())) 153 _ = ctx 154 defer done() 155 156 // It is possible that a race causes us to read a file with different file 157 // ID, or whose mtime differs from the given mtime. However, in these cases 158 // we expect the client to notify of a subsequent file change, and the file 159 // content should be eventually consistent. 160 content, err := os.ReadFile(uri.Path()) // ~20us 161 if err != nil { 162 content = nil // just in case 163 } 164 return &diskFile{ 165 modTime: mtime, 166 uri: uri, 167 content: content, 168 hash: file.HashOf(content), 169 err: err, 170 }, nil 171 }