github.com/elek/golangci-lint@v1.42.2-0.20211208090441-c05b7fcb3a9a/internal/cache/cache.go (about)

     1  // Copyright 2017 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // Package cache implements a build artifact cache.
     6  //
     7  // This package is a slightly modified fork of Go's
     8  // cmd/go/internal/cache package.
     9  package cache
    10  
    11  import (
    12  	"bytes"
    13  	"crypto/sha256"
    14  	"encoding/hex"
    15  	"fmt"
    16  	"io"
    17  	"os"
    18  	"path/filepath"
    19  	"strconv"
    20  	"strings"
    21  	"time"
    22  
    23  	"github.com/pkg/errors"
    24  
    25  	"github.com/elek/golangci-lint/internal/renameio"
    26  	"github.com/elek/golangci-lint/internal/robustio"
    27  )
    28  
    29  // An ActionID is a cache action key, the hash of a complete description of a
    30  // repeatable computation (command line, environment variables,
    31  // input file contents, executable contents).
    32  type ActionID [HashSize]byte
    33  
    34  // An OutputID is a cache output key, the hash of an output of a computation.
    35  type OutputID [HashSize]byte
    36  
    37  // A Cache is a package cache, backed by a file system directory tree.
    38  type Cache struct {
    39  	dir string
    40  	now func() time.Time
    41  }
    42  
    43  // Open opens and returns the cache in the given directory.
    44  //
    45  // It is safe for multiple processes on a single machine to use the
    46  // same cache directory in a local file system simultaneously.
    47  // They will coordinate using operating system file locks and may
    48  // duplicate effort but will not corrupt the cache.
    49  //
    50  // However, it is NOT safe for multiple processes on different machines
    51  // to share a cache directory (for example, if the directory were stored
    52  // in a network file system). File locking is notoriously unreliable in
    53  // network file systems and may not suffice to protect the cache.
    54  //
    55  func Open(dir string) (*Cache, error) {
    56  	info, err := os.Stat(dir)
    57  	if err != nil {
    58  		return nil, err
    59  	}
    60  	if !info.IsDir() {
    61  		return nil, &os.PathError{Op: "open", Path: dir, Err: fmt.Errorf("not a directory")}
    62  	}
    63  	for i := 0; i < 256; i++ {
    64  		name := filepath.Join(dir, fmt.Sprintf("%02x", i))
    65  		if err := os.MkdirAll(name, 0744); err != nil {
    66  			return nil, err
    67  		}
    68  	}
    69  	c := &Cache{
    70  		dir: dir,
    71  		now: time.Now,
    72  	}
    73  	return c, nil
    74  }
    75  
    76  // fileName returns the name of the file corresponding to the given id.
    77  func (c *Cache) fileName(id [HashSize]byte, key string) string {
    78  	return filepath.Join(c.dir, fmt.Sprintf("%02x", id[0]), fmt.Sprintf("%x", id)+"-"+key)
    79  }
    80  
    81  var errMissing = errors.New("cache entry not found")
    82  
    83  func IsErrMissing(err error) bool {
    84  	return errors.Cause(err) == errMissing
    85  }
    86  
    87  const (
    88  	// action entry file is "v1 <hex id> <hex out> <decimal size space-padded to 20 bytes> <unixnano space-padded to 20 bytes>\n"
    89  	hexSize   = HashSize * 2
    90  	entrySize = 2 + 1 + hexSize + 1 + hexSize + 1 + 20 + 1 + 20 + 1
    91  )
    92  
    93  // verify controls whether to run the cache in verify mode.
    94  // In verify mode, the cache always returns errMissing from Get
    95  // but then double-checks in Put that the data being written
    96  // exactly matches any existing entry. This provides an easy
    97  // way to detect program behavior that would have been different
    98  // had the cache entry been returned from Get.
    99  //
   100  // verify is enabled by setting the environment variable
   101  // GODEBUG=gocacheverify=1.
   102  var verify = false
   103  
   104  // DebugTest is set when GODEBUG=gocachetest=1 is in the environment.
   105  var DebugTest = false
   106  
   107  func init() { initEnv() }
   108  
   109  func initEnv() {
   110  	verify = false
   111  	debugHash = false
   112  	debug := strings.Split(os.Getenv("GODEBUG"), ",")
   113  	for _, f := range debug {
   114  		if f == "gocacheverify=1" {
   115  			verify = true
   116  		}
   117  		if f == "gocachehash=1" {
   118  			debugHash = true
   119  		}
   120  		if f == "gocachetest=1" {
   121  			DebugTest = true
   122  		}
   123  	}
   124  }
   125  
   126  // Get looks up the action ID in the cache,
   127  // returning the corresponding output ID and file size, if any.
   128  // Note that finding an output ID does not guarantee that the
   129  // saved file for that output ID is still available.
   130  func (c *Cache) Get(id ActionID) (Entry, error) {
   131  	if verify {
   132  		return Entry{}, errMissing
   133  	}
   134  	return c.get(id)
   135  }
   136  
   137  type Entry struct {
   138  	OutputID OutputID
   139  	Size     int64
   140  	Time     time.Time
   141  }
   142  
   143  // get is Get but does not respect verify mode, so that Put can use it.
   144  func (c *Cache) get(id ActionID) (Entry, error) {
   145  	missing := func() (Entry, error) {
   146  		return Entry{}, errMissing
   147  	}
   148  	failed := func(err error) (Entry, error) {
   149  		return Entry{}, err
   150  	}
   151  	fileName := c.fileName(id, "a")
   152  	f, err := os.Open(fileName)
   153  	if err != nil {
   154  		if os.IsNotExist(err) {
   155  			return missing()
   156  		}
   157  		return failed(err)
   158  	}
   159  	defer f.Close()
   160  	entry := make([]byte, entrySize+1) // +1 to detect whether f is too long
   161  	if n, readErr := io.ReadFull(f, entry); n != entrySize || readErr != io.ErrUnexpectedEOF {
   162  		return failed(fmt.Errorf("read %d/%d bytes from %s with error %s", n, entrySize, fileName, readErr))
   163  	}
   164  	if entry[0] != 'v' || entry[1] != '1' || entry[2] != ' ' || entry[3+hexSize] != ' ' || entry[3+hexSize+1+hexSize] != ' ' || entry[3+hexSize+1+hexSize+1+20] != ' ' || entry[entrySize-1] != '\n' {
   165  		return failed(fmt.Errorf("bad data in %s", fileName))
   166  	}
   167  	eid, entry := entry[3:3+hexSize], entry[3+hexSize:]
   168  	eout, entry := entry[1:1+hexSize], entry[1+hexSize:]
   169  	esize, entry := entry[1:1+20], entry[1+20:]
   170  	etime := entry[1 : 1+20]
   171  	var buf [HashSize]byte
   172  	if _, err = hex.Decode(buf[:], eid); err != nil || buf != id {
   173  		return failed(errors.Wrapf(err, "failed to hex decode eid data in %s", fileName))
   174  	}
   175  	if _, err = hex.Decode(buf[:], eout); err != nil {
   176  		return failed(errors.Wrapf(err, "failed to hex decode eout data in %s", fileName))
   177  	}
   178  	i := 0
   179  	for i < len(esize) && esize[i] == ' ' {
   180  		i++
   181  	}
   182  	size, err := strconv.ParseInt(string(esize[i:]), 10, 64)
   183  	if err != nil || size < 0 {
   184  		return failed(fmt.Errorf("failed to parse esize int from %s with error %s", fileName, err))
   185  	}
   186  	i = 0
   187  	for i < len(etime) && etime[i] == ' ' {
   188  		i++
   189  	}
   190  	tm, err := strconv.ParseInt(string(etime[i:]), 10, 64)
   191  	if err != nil || tm < 0 {
   192  		return failed(fmt.Errorf("failed to parse etime int from %s with error %s", fileName, err))
   193  	}
   194  
   195  	if err = c.used(fileName); err != nil {
   196  		return failed(errors.Wrapf(err, "failed to mark %s as used", fileName))
   197  	}
   198  
   199  	return Entry{buf, size, time.Unix(0, tm)}, nil
   200  }
   201  
   202  // GetBytes looks up the action ID in the cache and returns
   203  // the corresponding output bytes.
   204  // GetBytes should only be used for data that can be expected to fit in memory.
   205  func (c *Cache) GetBytes(id ActionID) ([]byte, Entry, error) {
   206  	entry, err := c.Get(id)
   207  	if err != nil {
   208  		return nil, entry, err
   209  	}
   210  	outputFile, err := c.OutputFile(entry.OutputID)
   211  	if err != nil {
   212  		return nil, entry, err
   213  	}
   214  
   215  	data, err := robustio.ReadFile(outputFile)
   216  	if err != nil {
   217  		return nil, entry, err
   218  	}
   219  
   220  	if sha256.Sum256(data) != entry.OutputID {
   221  		return nil, entry, errMissing
   222  	}
   223  	return data, entry, nil
   224  }
   225  
   226  // OutputFile returns the name of the cache file storing output with the given OutputID.
   227  func (c *Cache) OutputFile(out OutputID) (string, error) {
   228  	file := c.fileName(out, "d")
   229  	if err := c.used(file); err != nil {
   230  		return "", err
   231  	}
   232  	return file, nil
   233  }
   234  
   235  // Time constants for cache expiration.
   236  //
   237  // We set the mtime on a cache file on each use, but at most one per mtimeInterval (1 hour),
   238  // to avoid causing many unnecessary inode updates. The mtimes therefore
   239  // roughly reflect "time of last use" but may in fact be older by at most an hour.
   240  //
   241  // We scan the cache for entries to delete at most once per trimInterval (1 day).
   242  //
   243  // When we do scan the cache, we delete entries that have not been used for
   244  // at least trimLimit (5 days). Statistics gathered from a month of usage by
   245  // Go developers found that essentially all reuse of cached entries happened
   246  // within 5 days of the previous reuse. See golang.org/issue/22990.
   247  const (
   248  	mtimeInterval = 1 * time.Hour
   249  	trimInterval  = 24 * time.Hour
   250  	trimLimit     = 5 * 24 * time.Hour
   251  )
   252  
   253  // used makes a best-effort attempt to update mtime on file,
   254  // so that mtime reflects cache access time.
   255  //
   256  // Because the reflection only needs to be approximate,
   257  // and to reduce the amount of disk activity caused by using
   258  // cache entries, used only updates the mtime if the current
   259  // mtime is more than an hour old. This heuristic eliminates
   260  // nearly all of the mtime updates that would otherwise happen,
   261  // while still keeping the mtimes useful for cache trimming.
   262  func (c *Cache) used(file string) error {
   263  	info, err := os.Stat(file)
   264  	if err != nil {
   265  		if os.IsNotExist(err) {
   266  			return errMissing
   267  		}
   268  		return errors.Wrapf(err, "failed to stat file %s", file)
   269  	}
   270  
   271  	if c.now().Sub(info.ModTime()) < mtimeInterval {
   272  		return nil
   273  	}
   274  
   275  	if err := os.Chtimes(file, c.now(), c.now()); err != nil {
   276  		return errors.Wrapf(err, "failed to change time of file %s", file)
   277  	}
   278  
   279  	return nil
   280  }
   281  
   282  // Trim removes old cache entries that are likely not to be reused.
   283  func (c *Cache) Trim() {
   284  	now := c.now()
   285  
   286  	// We maintain in dir/trim.txt the time of the last completed cache trim.
   287  	// If the cache has been trimmed recently enough, do nothing.
   288  	// This is the common case.
   289  	data, _ := renameio.ReadFile(filepath.Join(c.dir, "trim.txt"))
   290  	t, err := strconv.ParseInt(strings.TrimSpace(string(data)), 10, 64)
   291  	if err == nil && now.Sub(time.Unix(t, 0)) < trimInterval {
   292  		return
   293  	}
   294  
   295  	// Trim each of the 256 subdirectories.
   296  	// We subtract an additional mtimeInterval
   297  	// to account for the imprecision of our "last used" mtimes.
   298  	cutoff := now.Add(-trimLimit - mtimeInterval)
   299  	for i := 0; i < 256; i++ {
   300  		subdir := filepath.Join(c.dir, fmt.Sprintf("%02x", i))
   301  		c.trimSubdir(subdir, cutoff)
   302  	}
   303  
   304  	// Ignore errors from here: if we don't write the complete timestamp, the
   305  	// cache will appear older than it is, and we'll trim it again next time.
   306  	_ = renameio.WriteFile(filepath.Join(c.dir, "trim.txt"), []byte(fmt.Sprintf("%d", now.Unix())), 0666)
   307  }
   308  
   309  // trimSubdir trims a single cache subdirectory.
   310  func (c *Cache) trimSubdir(subdir string, cutoff time.Time) {
   311  	// Read all directory entries from subdir before removing
   312  	// any files, in case removing files invalidates the file offset
   313  	// in the directory scan. Also, ignore error from f.Readdirnames,
   314  	// because we don't care about reporting the error and we still
   315  	// want to process any entries found before the error.
   316  	f, err := os.Open(subdir)
   317  	if err != nil {
   318  		return
   319  	}
   320  	names, _ := f.Readdirnames(-1)
   321  	f.Close()
   322  
   323  	for _, name := range names {
   324  		// Remove only cache entries (xxxx-a and xxxx-d).
   325  		if !strings.HasSuffix(name, "-a") && !strings.HasSuffix(name, "-d") {
   326  			continue
   327  		}
   328  		entry := filepath.Join(subdir, name)
   329  		info, err := os.Stat(entry)
   330  		if err == nil && info.ModTime().Before(cutoff) {
   331  			os.Remove(entry)
   332  		}
   333  	}
   334  }
   335  
   336  // putIndexEntry adds an entry to the cache recording that executing the action
   337  // with the given id produces an output with the given output id (hash) and size.
   338  func (c *Cache) putIndexEntry(id ActionID, out OutputID, size int64, allowVerify bool) error {
   339  	// Note: We expect that for one reason or another it may happen
   340  	// that repeating an action produces a different output hash
   341  	// (for example, if the output contains a time stamp or temp dir name).
   342  	// While not ideal, this is also not a correctness problem, so we
   343  	// don't make a big deal about it. In particular, we leave the action
   344  	// cache entries writable specifically so that they can be overwritten.
   345  	//
   346  	// Setting GODEBUG=gocacheverify=1 does make a big deal:
   347  	// in verify mode we are double-checking that the cache entries
   348  	// are entirely reproducible. As just noted, this may be unrealistic
   349  	// in some cases but the check is also useful for shaking out real bugs.
   350  	entry := fmt.Sprintf("v1 %x %x %20d %20d\n", id, out, size, time.Now().UnixNano())
   351  
   352  	if verify && allowVerify {
   353  		old, err := c.get(id)
   354  		if err == nil && (old.OutputID != out || old.Size != size) {
   355  			// panic to show stack trace, so we can see what code is generating this cache entry.
   356  			msg := fmt.Sprintf("go: internal cache error: cache verify failed: id=%x changed:<<<\n%s\n>>>\nold: %x %d\nnew: %x %d", id, reverseHash(id), out, size, old.OutputID, old.Size)
   357  			panic(msg)
   358  		}
   359  	}
   360  	file := c.fileName(id, "a")
   361  
   362  	// Copy file to cache directory.
   363  	mode := os.O_WRONLY | os.O_CREATE
   364  	f, err := os.OpenFile(file, mode, 0666)
   365  	if err != nil {
   366  		return err
   367  	}
   368  	_, err = f.WriteString(entry)
   369  	if err == nil {
   370  		// Truncate the file only *after* writing it.
   371  		// (This should be a no-op, but truncate just in case of previous corruption.)
   372  		//
   373  		// This differs from ioutil.WriteFile, which truncates to 0 *before* writing
   374  		// via os.O_TRUNC. Truncating only after writing ensures that a second write
   375  		// of the same content to the same file is idempotent, and does not — even
   376  		// temporarily! — undo the effect of the first write.
   377  		err = f.Truncate(int64(len(entry)))
   378  	}
   379  	if closeErr := f.Close(); err == nil {
   380  		err = closeErr
   381  	}
   382  	if err != nil {
   383  		// TODO(bcmills): This Remove potentially races with another go command writing to file.
   384  		// Can we eliminate it?
   385  		os.Remove(file)
   386  		return err
   387  	}
   388  	if err = os.Chtimes(file, c.now(), c.now()); err != nil { // mainly for tests
   389  		return errors.Wrapf(err, "failed to change time of file %s", file)
   390  	}
   391  
   392  	return nil
   393  }
   394  
   395  // Put stores the given output in the cache as the output for the action ID.
   396  // It may read file twice. The content of file must not change between the two passes.
   397  func (c *Cache) Put(id ActionID, file io.ReadSeeker) (OutputID, int64, error) {
   398  	return c.put(id, file, true)
   399  }
   400  
   401  // PutNoVerify is like Put but disables the verify check
   402  // when GODEBUG=goverifycache=1 is set.
   403  // It is meant for data that is OK to cache but that we expect to vary slightly from run to run,
   404  // like test output containing times and the like.
   405  func (c *Cache) PutNoVerify(id ActionID, file io.ReadSeeker) (OutputID, int64, error) {
   406  	return c.put(id, file, false)
   407  }
   408  
   409  func (c *Cache) put(id ActionID, file io.ReadSeeker, allowVerify bool) (OutputID, int64, error) {
   410  	// Compute output ID.
   411  	h := sha256.New()
   412  	if _, err := file.Seek(0, 0); err != nil {
   413  		return OutputID{}, 0, err
   414  	}
   415  	size, err := io.Copy(h, file)
   416  	if err != nil {
   417  		return OutputID{}, 0, err
   418  	}
   419  	var out OutputID
   420  	h.Sum(out[:0])
   421  
   422  	// Copy to cached output file (if not already present).
   423  	if err := c.copyFile(file, out, size); err != nil {
   424  		return out, size, err
   425  	}
   426  
   427  	// Add to cache index.
   428  	return out, size, c.putIndexEntry(id, out, size, allowVerify)
   429  }
   430  
   431  // PutBytes stores the given bytes in the cache as the output for the action ID.
   432  func (c *Cache) PutBytes(id ActionID, data []byte) error {
   433  	_, _, err := c.Put(id, bytes.NewReader(data))
   434  	return err
   435  }
   436  
   437  // copyFile copies file into the cache, expecting it to have the given
   438  // output ID and size, if that file is not present already.
   439  func (c *Cache) copyFile(file io.ReadSeeker, out OutputID, size int64) error {
   440  	name := c.fileName(out, "d")
   441  	info, err := os.Stat(name)
   442  	if err == nil && info.Size() == size {
   443  		// Check hash.
   444  		if f, openErr := os.Open(name); openErr == nil {
   445  			h := sha256.New()
   446  			if _, copyErr := io.Copy(h, f); copyErr != nil {
   447  				return errors.Wrap(copyErr, "failed to copy to sha256")
   448  			}
   449  
   450  			f.Close()
   451  			var out2 OutputID
   452  			h.Sum(out2[:0])
   453  			if out == out2 {
   454  				return nil
   455  			}
   456  		}
   457  		// Hash did not match. Fall through and rewrite file.
   458  	}
   459  
   460  	// Copy file to cache directory.
   461  	mode := os.O_RDWR | os.O_CREATE
   462  	if err == nil && info.Size() > size { // shouldn't happen but fix in case
   463  		mode |= os.O_TRUNC
   464  	}
   465  	f, err := os.OpenFile(name, mode, 0666)
   466  	if err != nil {
   467  		return err
   468  	}
   469  	defer f.Close()
   470  	if size == 0 {
   471  		// File now exists with correct size.
   472  		// Only one possible zero-length file, so contents are OK too.
   473  		// Early return here makes sure there's a "last byte" for code below.
   474  		return nil
   475  	}
   476  
   477  	// From here on, if any of the I/O writing the file fails,
   478  	// we make a best-effort attempt to truncate the file f
   479  	// before returning, to avoid leaving bad bytes in the file.
   480  
   481  	// Copy file to f, but also into h to double-check hash.
   482  	if _, err = file.Seek(0, 0); err != nil {
   483  		_ = f.Truncate(0)
   484  		return err
   485  	}
   486  	h := sha256.New()
   487  	w := io.MultiWriter(f, h)
   488  	if _, err = io.CopyN(w, file, size-1); err != nil {
   489  		_ = f.Truncate(0)
   490  		return err
   491  	}
   492  	// Check last byte before writing it; writing it will make the size match
   493  	// what other processes expect to find and might cause them to start
   494  	// using the file.
   495  	buf := make([]byte, 1)
   496  	if _, err = file.Read(buf); err != nil {
   497  		_ = f.Truncate(0)
   498  		return err
   499  	}
   500  	if n, wErr := h.Write(buf); n != len(buf) {
   501  		return fmt.Errorf("wrote to hash %d/%d bytes with error %s", n, len(buf), wErr)
   502  	}
   503  
   504  	sum := h.Sum(nil)
   505  	if !bytes.Equal(sum, out[:]) {
   506  		_ = f.Truncate(0)
   507  		return fmt.Errorf("file content changed underfoot")
   508  	}
   509  
   510  	// Commit cache file entry.
   511  	if _, err = f.Write(buf); err != nil {
   512  		_ = f.Truncate(0)
   513  		return err
   514  	}
   515  	if err = f.Close(); err != nil {
   516  		// Data might not have been written,
   517  		// but file may look like it is the right size.
   518  		// To be extra careful, remove cached file.
   519  		os.Remove(name)
   520  		return err
   521  	}
   522  	if err = os.Chtimes(name, c.now(), c.now()); err != nil { // mainly for tests
   523  		return errors.Wrapf(err, "failed to change time of file %s", name)
   524  	}
   525  
   526  	return nil
   527  }