golang.org/x/tools/gopls@v0.15.3/internal/cache/fs_memoized.go (about)

     1  // Copyright 2023 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package cache
     6  
     7  import (
     8  	"context"
     9  	"os"
    10  	"sync"
    11  	"time"
    12  
    13  	"golang.org/x/tools/gopls/internal/file"
    14  	"golang.org/x/tools/gopls/internal/protocol"
    15  	"golang.org/x/tools/internal/event"
    16  	"golang.org/x/tools/internal/event/tag"
    17  	"golang.org/x/tools/internal/robustio"
    18  )
    19  
    20  // A memoizedFS is a file source that memoizes reads, to reduce IO.
    21  type memoizedFS struct {
    22  	mu sync.Mutex
    23  
    24  	// filesByID maps existing file inodes to the result of a read.
    25  	// (The read may have failed, e.g. due to EACCES or a delete between stat+read.)
    26  	// Each slice is a non-empty list of aliases: different URIs.
    27  	filesByID map[robustio.FileID][]*diskFile
    28  }
    29  
    30  func newMemoizedFS() *memoizedFS {
    31  	return &memoizedFS{filesByID: make(map[robustio.FileID][]*diskFile)}
    32  }
    33  
    34  // A diskFile is a file in the filesystem, or a failure to read one.
    35  // It implements the file.Source interface.
    36  type diskFile struct {
    37  	uri     protocol.DocumentURI
    38  	modTime time.Time
    39  	content []byte
    40  	hash    file.Hash
    41  	err     error
    42  }
    43  
    44  func (h *diskFile) URI() protocol.DocumentURI { return h.uri }
    45  
    46  func (h *diskFile) Identity() file.Identity {
    47  	return file.Identity{
    48  		URI:  h.uri,
    49  		Hash: h.hash,
    50  	}
    51  }
    52  
    53  func (h *diskFile) SameContentsOnDisk() bool { return true }
    54  func (h *diskFile) Version() int32           { return 0 }
    55  func (h *diskFile) Content() ([]byte, error) { return h.content, h.err }
    56  
    57  // ReadFile stats and (maybe) reads the file, updates the cache, and returns it.
    58  func (fs *memoizedFS) ReadFile(ctx context.Context, uri protocol.DocumentURI) (file.Handle, error) {
    59  	id, mtime, err := robustio.GetFileID(uri.Path())
    60  	if err != nil {
    61  		// file does not exist
    62  		return &diskFile{
    63  			err: err,
    64  			uri: uri,
    65  		}, nil
    66  	}
    67  
    68  	// We check if the file has changed by comparing modification times. Notably,
    69  	// this is an imperfect heuristic as various systems have low resolution
    70  	// mtimes (as much as 1s on WSL or s390x builders), so we only cache
    71  	// filehandles if mtime is old enough to be reliable, meaning that we don't
    72  	// expect a subsequent write to have the same mtime.
    73  	//
    74  	// The coarsest mtime precision we've seen in practice is 1s, so consider
    75  	// mtime to be unreliable if it is less than 2s old. Capture this before
    76  	// doing anything else.
    77  	recentlyModified := time.Since(mtime) < 2*time.Second
    78  
    79  	fs.mu.Lock()
    80  	fhs, ok := fs.filesByID[id]
    81  	if ok && fhs[0].modTime.Equal(mtime) {
    82  		var fh *diskFile
    83  		// We have already seen this file and it has not changed.
    84  		for _, h := range fhs {
    85  			if h.uri == uri {
    86  				fh = h
    87  				break
    88  			}
    89  		}
    90  		// No file handle for this exact URI. Create an alias, but share content.
    91  		if fh == nil {
    92  			newFH := *fhs[0]
    93  			newFH.uri = uri
    94  			fh = &newFH
    95  			fhs = append(fhs, fh)
    96  			fs.filesByID[id] = fhs
    97  		}
    98  		fs.mu.Unlock()
    99  		return fh, nil
   100  	}
   101  	fs.mu.Unlock()
   102  
   103  	// Unknown file, or file has changed. Read (or re-read) it.
   104  	fh, err := readFile(ctx, uri, mtime) // ~25us
   105  	if err != nil {
   106  		return nil, err // e.g. cancelled (not: read failed)
   107  	}
   108  
   109  	fs.mu.Lock()
   110  	if !recentlyModified {
   111  		fs.filesByID[id] = []*diskFile{fh}
   112  	} else {
   113  		delete(fs.filesByID, id)
   114  	}
   115  	fs.mu.Unlock()
   116  	return fh, nil
   117  }
   118  
   119  // fileStats returns information about the set of files stored in fs. It is
   120  // intended for debugging only.
   121  func (fs *memoizedFS) fileStats() (files, largest, errs int) {
   122  	fs.mu.Lock()
   123  	defer fs.mu.Unlock()
   124  
   125  	files = len(fs.filesByID)
   126  	largest = 0
   127  	errs = 0
   128  
   129  	for _, files := range fs.filesByID {
   130  		rep := files[0]
   131  		if len(rep.content) > largest {
   132  			largest = len(rep.content)
   133  		}
   134  		if rep.err != nil {
   135  			errs++
   136  		}
   137  	}
   138  	return files, largest, errs
   139  }
   140  
   141  // ioLimit limits the number of parallel file reads per process.
   142  var ioLimit = make(chan struct{}, 128)
   143  
   144  func readFile(ctx context.Context, uri protocol.DocumentURI, mtime time.Time) (*diskFile, error) {
   145  	select {
   146  	case ioLimit <- struct{}{}:
   147  	case <-ctx.Done():
   148  		return nil, ctx.Err()
   149  	}
   150  	defer func() { <-ioLimit }()
   151  
   152  	ctx, done := event.Start(ctx, "cache.readFile", tag.File.Of(uri.Path()))
   153  	_ = ctx
   154  	defer done()
   155  
   156  	// It is possible that a race causes us to read a file with different file
   157  	// ID, or whose mtime differs from the given mtime. However, in these cases
   158  	// we expect the client to notify of a subsequent file change, and the file
   159  	// content should be eventually consistent.
   160  	content, err := os.ReadFile(uri.Path()) // ~20us
   161  	if err != nil {
   162  		content = nil // just in case
   163  	}
   164  	return &diskFile{
   165  		modTime: mtime,
   166  		uri:     uri,
   167  		content: content,
   168  		hash:    file.HashOf(content),
   169  		err:     err,
   170  	}, nil
   171  }