github.com/sirkon/goproxy@v1.4.8/internal/cache/cache.go (about)

     1  // Copyright 2017 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // Package cache implements a build artifact cache.
     6  package cache
     7  
     8  import (
     9  	"bytes"
    10  	"crypto/sha256"
    11  	"encoding/hex"
    12  	"errors"
    13  	"fmt"
    14  	"io"
    15  	"io/ioutil"
    16  	"os"
    17  	"path/filepath"
    18  	"strconv"
    19  	"strings"
    20  	"time"
    21  )
    22  
    23  // An ActionID is a cache action key, the hash of a complete description of a
    24  // repeatable computation (command line, environment variables,
    25  // input file contents, executable contents).
    26  type ActionID [HashSize]byte
    27  
    28  // An OutputID is a cache output key, the hash of an output of a computation.
    29  type OutputID [HashSize]byte
    30  
    31  // A Cache is a package cache, backed by a file system directory tree.
    32  type Cache struct {
    33  	dir string
    34  	log *os.File
    35  	now func() time.Time
    36  }
    37  
    38  // Open opens and returns the cache in the given directory.
    39  //
    40  // It is safe for multiple processes on a single machine to use the
    41  // same cache directory in a local file system simultaneously.
    42  // They will coordinate using operating system file locks and may
    43  // duplicate effort but will not corrupt the cache.
    44  //
    45  // However, it is NOT safe for multiple processes on different machines
    46  // to share a cache directory (for example, if the directory were stored
    47  // in a network file system). File locking is notoriously unreliable in
    48  // network file systems and may not suffice to protect the cache.
    49  //
    50  func Open(dir string) (*Cache, error) {
    51  	info, err := os.Stat(dir)
    52  	if err != nil {
    53  		return nil, err
    54  	}
    55  	if !info.IsDir() {
    56  		return nil, &os.PathError{Op: "open", Path: dir, Err: fmt.Errorf("not a directory")}
    57  	}
    58  	for i := 0; i < 256; i++ {
    59  		name := filepath.Join(dir, fmt.Sprintf("%02x", i))
    60  		if err := os.MkdirAll(name, 0777); err != nil {
    61  			return nil, err
    62  		}
    63  	}
    64  	f, err := os.OpenFile(filepath.Join(dir, "log.txt"), os.O_WRONLY|os.O_APPEND|os.O_CREATE, 0666)
    65  	if err != nil {
    66  		return nil, err
    67  	}
    68  	c := &Cache{
    69  		dir: dir,
    70  		log: f,
    71  		now: time.Now,
    72  	}
    73  	return c, nil
    74  }
    75  
    76  // fileName returns the name of the file corresponding to the given id.
    77  func (c *Cache) fileName(id [HashSize]byte, key string) string {
    78  	return filepath.Join(c.dir, fmt.Sprintf("%02x", id[0]), fmt.Sprintf("%x", id)+"-"+key)
    79  }
    80  
    81  var errMissing = errors.New("cache entry not found")
    82  
    83  const (
    84  	// action entry file is "v1 <hex id> <hex out> <decimal size space-padded to 20 bytes> <unixnano space-padded to 20 bytes>\n"
    85  	hexSize   = HashSize * 2
    86  	entrySize = 2 + 1 + hexSize + 1 + hexSize + 1 + 20 + 1 + 20 + 1
    87  )
    88  
    89  // verify controls whether to run the cache in verify mode.
    90  // In verify mode, the cache always returns errMissing from Get
    91  // but then double-checks in Put that the data being written
    92  // exactly matches any existing entry. This provides an easy
    93  // way to detect program behavior that would have been different
    94  // had the cache entry been returned from Get.
    95  //
    96  // verify is enabled by setting the environment variable
    97  // GODEBUG=gocacheverify=1.
    98  var verify = false
    99  
   100  // DebugTest is set when GODEBUG=gocachetest=1 is in the environment.
   101  var DebugTest = false
   102  
   103  func init() { initEnv() }
   104  
   105  func initEnv() {
   106  	verify = false
   107  	debugHash = false
   108  	debug := strings.Split(os.Getenv("GODEBUG"), ",")
   109  	for _, f := range debug {
   110  		if f == "gocacheverify=1" {
   111  			verify = true
   112  		}
   113  		if f == "gocachehash=1" {
   114  			debugHash = true
   115  		}
   116  		if f == "gocachetest=1" {
   117  			DebugTest = true
   118  		}
   119  	}
   120  }
   121  
   122  // Get looks up the action ID in the cache,
   123  // returning the corresponding output ID and file size, if any.
   124  // Note that finding an output ID does not guarantee that the
   125  // saved file for that output ID is still available.
   126  func (c *Cache) Get(id ActionID) (Entry, error) {
   127  	if verify {
   128  		return Entry{}, errMissing
   129  	}
   130  	return c.get(id)
   131  }
   132  
   133  type Entry struct {
   134  	OutputID OutputID
   135  	Size     int64
   136  	Time     time.Time
   137  }
   138  
   139  // get is Get but does not respect verify mode, so that Put can use it.
   140  func (c *Cache) get(id ActionID) (Entry, error) {
   141  	missing := func() (Entry, error) {
   142  		fmt.Fprintf(c.log, "%d miss %x\n", c.now().Unix(), id)
   143  		return Entry{}, errMissing
   144  	}
   145  	f, err := os.Open(c.fileName(id, "a"))
   146  	if err != nil {
   147  		return missing()
   148  	}
   149  	defer f.Close()
   150  	entry := make([]byte, entrySize+1) // +1 to detect whether f is too long
   151  	if n, err := io.ReadFull(f, entry); n != entrySize || err != io.ErrUnexpectedEOF {
   152  		return missing()
   153  	}
   154  	if entry[0] != 'v' || entry[1] != '1' || entry[2] != ' ' || entry[3+hexSize] != ' ' || entry[3+hexSize+1+hexSize] != ' ' || entry[3+hexSize+1+hexSize+1+20] != ' ' || entry[entrySize-1] != '\n' {
   155  		return missing()
   156  	}
   157  	eid, entry := entry[3:3+hexSize], entry[3+hexSize:]
   158  	eout, entry := entry[1:1+hexSize], entry[1+hexSize:]
   159  	esize, entry := entry[1:1+20], entry[1+20:]
   160  	etime, entry := entry[1:1+20], entry[1+20:]
   161  	var buf [HashSize]byte
   162  	if _, err := hex.Decode(buf[:], eid); err != nil || buf != id {
   163  		return missing()
   164  	}
   165  	if _, err := hex.Decode(buf[:], eout); err != nil {
   166  		return missing()
   167  	}
   168  	i := 0
   169  	for i < len(esize) && esize[i] == ' ' {
   170  		i++
   171  	}
   172  	size, err := strconv.ParseInt(string(esize[i:]), 10, 64)
   173  	if err != nil || size < 0 {
   174  		return missing()
   175  	}
   176  	i = 0
   177  	for i < len(etime) && etime[i] == ' ' {
   178  		i++
   179  	}
   180  	tm, err := strconv.ParseInt(string(etime[i:]), 10, 64)
   181  	if err != nil || size < 0 {
   182  		return missing()
   183  	}
   184  
   185  	fmt.Fprintf(c.log, "%d get %x\n", c.now().Unix(), id)
   186  
   187  	c.used(c.fileName(id, "a"))
   188  
   189  	return Entry{buf, size, time.Unix(0, tm)}, nil
   190  }
   191  
   192  // GetFile looks up the action ID in the cache and returns
   193  // the name of the corresponding data file.
   194  func (c *Cache) GetFile(id ActionID) (file string, entry Entry, err error) {
   195  	entry, err = c.Get(id)
   196  	if err != nil {
   197  		return "", Entry{}, err
   198  	}
   199  	file = c.OutputFile(entry.OutputID)
   200  	info, err := os.Stat(file)
   201  	if err != nil || info.Size() != entry.Size {
   202  		return "", Entry{}, errMissing
   203  	}
   204  	return file, entry, nil
   205  }
   206  
   207  // GetBytes looks up the action ID in the cache and returns
   208  // the corresponding output bytes.
   209  // GetBytes should only be used for data that can be expected to fit in memory.
   210  func (c *Cache) GetBytes(id ActionID) ([]byte, Entry, error) {
   211  	entry, err := c.Get(id)
   212  	if err != nil {
   213  		return nil, entry, err
   214  	}
   215  	data, _ := ioutil.ReadFile(c.OutputFile(entry.OutputID))
   216  	if sha256.Sum256(data) != entry.OutputID {
   217  		return nil, entry, errMissing
   218  	}
   219  	return data, entry, nil
   220  }
   221  
   222  // OutputFile returns the name of the cache file storing output with the given OutputID.
   223  func (c *Cache) OutputFile(out OutputID) string {
   224  	file := c.fileName(out, "d")
   225  	c.used(file)
   226  	return file
   227  }
   228  
   229  // Time constants for cache expiration.
   230  //
   231  // We set the mtime on a cache file on each use, but at most one per mtimeInterval (1 hour),
   232  // to avoid causing many unnecessary inode updates. The mtimes therefore
   233  // roughly reflect "time of last use" but may in fact be older by at most an hour.
   234  //
   235  // We scan the cache for entries to delete at most once per trimInterval (1 day).
   236  //
   237  // When we do scan the cache, we delete entries that have not been used for
   238  // at least trimLimit (5 days). Statistics gathered from a month of usage by
   239  // Go developers found that essentially all reuse of cached entries happened
   240  // within 5 days of the previous reuse. See golang.org/issue/22990.
   241  const (
   242  	mtimeInterval = 1 * time.Hour
   243  	trimInterval  = 24 * time.Hour
   244  	trimLimit     = 5 * 24 * time.Hour
   245  )
   246  
   247  // used makes a best-effort attempt to update mtime on file,
   248  // so that mtime reflects cache access time.
   249  //
   250  // Because the reflection only needs to be approximate,
   251  // and to reduce the amount of disk activity caused by using
   252  // cache entries, used only updates the mtime if the current
   253  // mtime is more than an hour old. This heuristic eliminates
   254  // nearly all of the mtime updates that would otherwise happen,
   255  // while still keeping the mtimes useful for cache trimming.
   256  func (c *Cache) used(file string) {
   257  	info, err := os.Stat(file)
   258  	if err == nil && c.now().Sub(info.ModTime()) < mtimeInterval {
   259  		return
   260  	}
   261  	os.Chtimes(file, c.now(), c.now())
   262  }
   263  
   264  // Trim removes old cache entries that are likely not to be reused.
   265  func (c *Cache) Trim() {
   266  	now := c.now()
   267  
   268  	// We maintain in dir/trim.txt the time of the last completed cache trim.
   269  	// If the cache has been trimmed recently enough, do nothing.
   270  	// This is the common case.
   271  	data, _ := ioutil.ReadFile(filepath.Join(c.dir, "trim.txt"))
   272  	t, err := strconv.ParseInt(strings.TrimSpace(string(data)), 10, 64)
   273  	if err == nil && now.Sub(time.Unix(t, 0)) < trimInterval {
   274  		return
   275  	}
   276  
   277  	// Trim each of the 256 subdirectories.
   278  	// We subtract an additional mtimeInterval
   279  	// to account for the imprecision of our "last used" mtimes.
   280  	cutoff := now.Add(-trimLimit - mtimeInterval)
   281  	for i := 0; i < 256; i++ {
   282  		subdir := filepath.Join(c.dir, fmt.Sprintf("%02x", i))
   283  		c.trimSubdir(subdir, cutoff)
   284  	}
   285  
   286  	ioutil.WriteFile(filepath.Join(c.dir, "trim.txt"), []byte(fmt.Sprintf("%d", now.Unix())), 0666)
   287  }
   288  
   289  // trimSubdir trims a single cache subdirectory.
   290  func (c *Cache) trimSubdir(subdir string, cutoff time.Time) {
   291  	// Read all directory entries from subdir before removing
   292  	// any files, in case removing files invalidates the file offset
   293  	// in the directory scan. Also, ignore error from f.Readdirnames,
   294  	// because we don't care about reporting the error and we still
   295  	// want to process any entries found before the error.
   296  	f, err := os.Open(subdir)
   297  	if err != nil {
   298  		return
   299  	}
   300  	names, _ := f.Readdirnames(-1)
   301  	f.Close()
   302  
   303  	for _, name := range names {
   304  		// Remove only cache entries (xxxx-a and xxxx-d).
   305  		if !strings.HasSuffix(name, "-a") && !strings.HasSuffix(name, "-d") {
   306  			continue
   307  		}
   308  		entry := filepath.Join(subdir, name)
   309  		info, err := os.Stat(entry)
   310  		if err == nil && info.ModTime().Before(cutoff) {
   311  			os.Remove(entry)
   312  		}
   313  	}
   314  }
   315  
   316  // putIndexEntry adds an entry to the cache recording that executing the action
   317  // with the given id produces an output with the given output id (hash) and size.
   318  func (c *Cache) putIndexEntry(id ActionID, out OutputID, size int64, allowVerify bool) error {
   319  	// Note: We expect that for one reason or another it may happen
   320  	// that repeating an action produces a different output hash
   321  	// (for example, if the output contains a time stamp or temp dir name).
   322  	// While not ideal, this is also not a correctness problem, so we
   323  	// don't make a big deal about it. In particular, we leave the action
   324  	// cache entries writable specifically so that they can be overwritten.
   325  	//
   326  	// Setting GODEBUG=gocacheverify=1 does make a big deal:
   327  	// in verify mode we are double-checking that the cache entries
   328  	// are entirely reproducible. As just noted, this may be unrealistic
   329  	// in some cases but the check is also useful for shaking out real bugs.
   330  	entry := []byte(fmt.Sprintf("v1 %x %x %20d %20d\n", id, out, size, time.Now().UnixNano()))
   331  	if verify && allowVerify {
   332  		old, err := c.get(id)
   333  		if err == nil && (old.OutputID != out || old.Size != size) {
   334  			// panic to show stack trace, so we can see what code is generating this cache entry.
   335  			msg := fmt.Sprintf("go: internal cache error: cache verify failed: id=%x changed:<<<\n%s\n>>>\nold: %x %d\nnew: %x %d", id, reverseHash(id), out, size, old.OutputID, old.Size)
   336  			panic(msg)
   337  		}
   338  	}
   339  	file := c.fileName(id, "a")
   340  	if err := ioutil.WriteFile(file, entry, 0666); err != nil {
   341  		os.Remove(file)
   342  		return err
   343  	}
   344  	os.Chtimes(file, c.now(), c.now()) // mainly for tests
   345  
   346  	fmt.Fprintf(c.log, "%d put %x %x %d\n", c.now().Unix(), id, out, size)
   347  	return nil
   348  }
   349  
   350  // Put stores the given output in the cache as the output for the action ID.
   351  // It may read file twice. The content of file must not change between the two passes.
   352  func (c *Cache) Put(id ActionID, file io.ReadSeeker) (OutputID, int64, error) {
   353  	return c.put(id, file, true)
   354  }
   355  
   356  // PutNoVerify is like Put but disables the verify check
   357  // when GODEBUG=goverifycache=1 is set.
   358  // It is meant for data that is OK to cache but that we expect to vary slightly from run to run,
   359  // like test output containing times and the like.
   360  func (c *Cache) PutNoVerify(id ActionID, file io.ReadSeeker) (OutputID, int64, error) {
   361  	return c.put(id, file, false)
   362  }
   363  
   364  func (c *Cache) put(id ActionID, file io.ReadSeeker, allowVerify bool) (OutputID, int64, error) {
   365  	// Compute output ID.
   366  	h := sha256.New()
   367  	if _, err := file.Seek(0, 0); err != nil {
   368  		return OutputID{}, 0, err
   369  	}
   370  	size, err := io.Copy(h, file)
   371  	if err != nil {
   372  		return OutputID{}, 0, err
   373  	}
   374  	var out OutputID
   375  	h.Sum(out[:0])
   376  
   377  	// Copy to cached output file (if not already present).
   378  	if err := c.copyFile(file, out, size); err != nil {
   379  		return out, size, err
   380  	}
   381  
   382  	// Add to cache index.
   383  	return out, size, c.putIndexEntry(id, out, size, allowVerify)
   384  }
   385  
   386  // PutBytes stores the given bytes in the cache as the output for the action ID.
   387  func (c *Cache) PutBytes(id ActionID, data []byte) error {
   388  	_, _, err := c.Put(id, bytes.NewReader(data))
   389  	return err
   390  }
   391  
   392  // copyFile copies file into the cache, expecting it to have the given
   393  // output ID and size, if that file is not present already.
   394  func (c *Cache) copyFile(file io.ReadSeeker, out OutputID, size int64) error {
   395  	name := c.fileName(out, "d")
   396  	info, err := os.Stat(name)
   397  	if err == nil && info.Size() == size {
   398  		// Check hash.
   399  		if f, err := os.Open(name); err == nil {
   400  			h := sha256.New()
   401  			io.Copy(h, f)
   402  			f.Close()
   403  			var out2 OutputID
   404  			h.Sum(out2[:0])
   405  			if out == out2 {
   406  				return nil
   407  			}
   408  		}
   409  		// Hash did not match. Fall through and rewrite file.
   410  	}
   411  
   412  	// Copy file to cache directory.
   413  	mode := os.O_RDWR | os.O_CREATE
   414  	if err == nil && info.Size() > size { // shouldn't happen but fix in case
   415  		mode |= os.O_TRUNC
   416  	}
   417  	f, err := os.OpenFile(name, mode, 0666)
   418  	if err != nil {
   419  		return err
   420  	}
   421  	defer f.Close()
   422  	if size == 0 {
   423  		// File now exists with correct size.
   424  		// Only one possible zero-length file, so contents are OK too.
   425  		// Early return here makes sure there's a "last byte" for code below.
   426  		return nil
   427  	}
   428  
   429  	// From here on, if any of the I/O writing the file fails,
   430  	// we make a best-effort attempt to truncate the file f
   431  	// before returning, to avoid leaving bad bytes in the file.
   432  
   433  	// Copy file to f, but also into h to double-check hash.
   434  	if _, err := file.Seek(0, 0); err != nil {
   435  		f.Truncate(0)
   436  		return err
   437  	}
   438  	h := sha256.New()
   439  	w := io.MultiWriter(f, h)
   440  	if _, err := io.CopyN(w, file, size-1); err != nil {
   441  		f.Truncate(0)
   442  		return err
   443  	}
   444  	// Check last byte before writing it; writing it will make the size match
   445  	// what other processes expect to find and might cause them to start
   446  	// using the file.
   447  	buf := make([]byte, 1)
   448  	if _, err := file.Read(buf); err != nil {
   449  		f.Truncate(0)
   450  		return err
   451  	}
   452  	h.Write(buf)
   453  	sum := h.Sum(nil)
   454  	if !bytes.Equal(sum, out[:]) {
   455  		f.Truncate(0)
   456  		return fmt.Errorf("file content changed underfoot")
   457  	}
   458  
   459  	// Commit cache file entry.
   460  	if _, err := f.Write(buf); err != nil {
   461  		f.Truncate(0)
   462  		return err
   463  	}
   464  	if err := f.Close(); err != nil {
   465  		// Data might not have been written,
   466  		// but file may look like it is the right size.
   467  		// To be extra careful, remove cached file.
   468  		os.Remove(name)
   469  		return err
   470  	}
   471  	os.Chtimes(name, c.now(), c.now()) // mainly for tests
   472  
   473  	return nil
   474  }