github.com/goproxy0/go@v0.0.0-20171111080102-49cc0c489d2c/src/cmd/go/internal/cache/cache.go (about)

     1  // Copyright 2017 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // Package cache implements a build artifact cache.
     6  package cache
     7  
     8  import (
     9  	"bytes"
    10  	"crypto/sha256"
    11  	"encoding/hex"
    12  	"errors"
    13  	"fmt"
    14  	"io"
    15  	"io/ioutil"
    16  	"os"
    17  	"path/filepath"
    18  	"strconv"
    19  	"strings"
    20  	"time"
    21  )
    22  
    23  // An ActionID is a cache action key, the hash of a complete description of a
    24  // repeatable computation (command line, environment variables,
    25  // input file contents, executable contents).
    26  type ActionID [HashSize]byte
    27  
    28  // An OutputID is a cache output key, the hash of an output of a computation.
    29  type OutputID [HashSize]byte
    30  
    31  // A Cache is a package cache, backed by a file system directory tree.
    32  type Cache struct {
    33  	dir string
    34  	log *os.File
    35  	now func() time.Time
    36  }
    37  
    38  // Open opens and returns the cache in the given directory.
    39  //
    40  // It is safe for multiple processes on a single machine to use the
    41  // same cache directory in a local file system simultaneously.
    42  // They will coordinate using operating system file locks and may
    43  // duplicate effort but will not corrupt the cache.
    44  //
    45  // However, it is NOT safe for multiple processes on different machines
    46  // to share a cache directory (for example, if the directory were stored
    47  // in a network file system). File locking is notoriously unreliable in
    48  // network file systems and may not suffice to protect the cache.
    49  //
    50  func Open(dir string) (*Cache, error) {
    51  	info, err := os.Stat(dir)
    52  	if err != nil {
    53  		return nil, err
    54  	}
    55  	if !info.IsDir() {
    56  		return nil, &os.PathError{Op: "open", Path: dir, Err: fmt.Errorf("not a directory")}
    57  	}
    58  	for i := 0; i < 256; i++ {
    59  		name := filepath.Join(dir, fmt.Sprintf("%02x", i))
    60  		if err := os.MkdirAll(name, 0777); err != nil {
    61  			return nil, err
    62  		}
    63  	}
    64  	f, err := os.OpenFile(filepath.Join(dir, "log.txt"), os.O_WRONLY|os.O_APPEND|os.O_CREATE, 0666)
    65  	if err != nil {
    66  		return nil, err
    67  	}
    68  	c := &Cache{
    69  		dir: dir,
    70  		log: f,
    71  		now: time.Now,
    72  	}
    73  	return c, nil
    74  }
    75  
    76  // fileName returns the name of the file corresponding to the given id.
    77  func (c *Cache) fileName(id [HashSize]byte, key string) string {
    78  	return filepath.Join(c.dir, fmt.Sprintf("%02x", id[0]), fmt.Sprintf("%x", id)+"-"+key)
    79  }
    80  
    81  var errMissing = errors.New("cache entry not found")
    82  
    83  const (
    84  	// action entry file is "v1 <hex id> <hex out> <decimal size space-padded to 20 bytes>\n"
    85  	hexSize   = HashSize * 2
    86  	entrySize = 2 + 1 + hexSize + 1 + hexSize + 1 + 20 + 1
    87  )
    88  
    89  // verify controls whether to run the cache in verify mode.
    90  // In verify mode, the cache always returns errMissing from Get
    91  // but then double-checks in Put that the data being written
    92  // exactly matches any existing entry. This provides an easy
    93  // way to detect program behavior that would have been different
    94  // had the cache entry been returned from Get.
    95  //
    96  // verify is enabled by setting the environment variable
    97  // GODEBUG=gocacheverify=1.
    98  var verify = false
    99  
   100  func init() { initEnv() }
   101  
   102  func initEnv() {
   103  	verify = false
   104  	debugHash = false
   105  	debug := strings.Split(os.Getenv("GODEBUG"), ",")
   106  	for _, f := range debug {
   107  		if f == "gocacheverify=1" {
   108  			verify = true
   109  		}
   110  		if f == "gocachehash=1" {
   111  			debugHash = true
   112  		}
   113  	}
   114  }
   115  
   116  // Get looks up the action ID in the cache,
   117  // returning the corresponding output ID and file size, if any.
   118  // Note that finding an output ID does not guarantee that the
   119  // saved file for that output ID is still available.
   120  func (c *Cache) Get(id ActionID) (OutputID, int64, error) {
   121  	if verify {
   122  		return OutputID{}, 0, errMissing
   123  	}
   124  	return c.get(id)
   125  }
   126  
   127  // get is Get but does not respect verify mode, so that Put can use it.
   128  func (c *Cache) get(id ActionID) (OutputID, int64, error) {
   129  	missing := func() (OutputID, int64, error) {
   130  		fmt.Fprintf(c.log, "%d miss %x\n", c.now().Unix(), id)
   131  		return OutputID{}, 0, errMissing
   132  	}
   133  	f, err := os.Open(c.fileName(id, "a"))
   134  	if err != nil {
   135  		return missing()
   136  	}
   137  	defer f.Close()
   138  	entry := make([]byte, entrySize+1) // +1 to detect whether f is too long
   139  	if n, err := io.ReadFull(f, entry); n != entrySize || err != io.ErrUnexpectedEOF {
   140  		return missing()
   141  	}
   142  	if entry[0] != 'v' || entry[1] != '1' || entry[2] != ' ' || entry[3+hexSize] != ' ' || entry[3+hexSize+1+64] != ' ' || entry[entrySize-1] != '\n' {
   143  		return missing()
   144  	}
   145  	eid, eout, esize := entry[3:3+hexSize], entry[3+hexSize+1:3+hexSize+1+hexSize], entry[3+hexSize+1+hexSize+1:entrySize-1]
   146  	var buf [HashSize]byte
   147  	if _, err := hex.Decode(buf[:], eid); err != nil || buf != id {
   148  		return missing()
   149  	}
   150  	if _, err := hex.Decode(buf[:], eout); err != nil {
   151  		return missing()
   152  	}
   153  	i := 0
   154  	for i < len(esize) && esize[i] == ' ' {
   155  		i++
   156  	}
   157  	size, err := strconv.ParseInt(string(esize[i:]), 10, 64)
   158  	if err != nil || size < 0 {
   159  		return missing()
   160  	}
   161  
   162  	fmt.Fprintf(c.log, "%d get %x\n", c.now().Unix(), id)
   163  
   164  	// Best-effort attempt to update mtime on file,
   165  	// so that mtime reflects cache access time.
   166  	os.Chtimes(c.fileName(id, "a"), c.now(), c.now())
   167  
   168  	return buf, size, nil
   169  }
   170  
   171  // GetBytes looks up the action ID in the cache and returns
   172  // the corresponding output bytes.
   173  // GetBytes should only be used for data that can be expected to fit in memory.
   174  func (c *Cache) GetBytes(id ActionID) ([]byte, error) {
   175  	out, _, err := c.Get(id)
   176  	if err != nil {
   177  		return nil, err
   178  	}
   179  	data, _ := ioutil.ReadFile(c.OutputFile(out))
   180  	if sha256.Sum256(data) != out {
   181  		return nil, errMissing
   182  	}
   183  	return data, nil
   184  }
   185  
   186  // OutputFile returns the name of the cache file storing output with the given OutputID.
   187  func (c *Cache) OutputFile(out OutputID) string {
   188  	file := c.fileName(out, "d")
   189  
   190  	// Best-effort attempt to update mtime on file,
   191  	// so that mtime reflects cache access time.
   192  	os.Chtimes(file, c.now(), c.now())
   193  
   194  	return file
   195  }
   196  
   197  // putIndexEntry adds an entry to the cache recording that executing the action
   198  // with the given id produces an output with the given output id (hash) and size.
   199  func (c *Cache) putIndexEntry(id ActionID, out OutputID, size int64, allowVerify bool) error {
   200  	// Note: We expect that for one reason or another it may happen
   201  	// that repeating an action produces a different output hash
   202  	// (for example, if the output contains a time stamp or temp dir name).
   203  	// While not ideal, this is also not a correctness problem, so we
   204  	// don't make a big deal about it. In particular, we leave the action
   205  	// cache entries writable specifically so that they can be overwritten.
   206  	//
   207  	// Setting GODEBUG=gocacheverify=1 does make a big deal:
   208  	// in verify mode we are double-checking that the cache entries
   209  	// are entirely reproducible. As just noted, this may be unrealistic
   210  	// in some cases but the check is also useful for shaking out real bugs.
   211  	entry := []byte(fmt.Sprintf("v1 %x %x %20d\n", id, out, size))
   212  	if verify && allowVerify {
   213  		oldOut, oldSize, err := c.get(id)
   214  		if err == nil && (oldOut != out || oldSize != size) {
   215  			fmt.Fprintf(os.Stderr, "go: internal cache error: id=%x changed:<<<\n%s\n>>>\nold: %x %d\nnew: %x %d\n", id, reverseHash(id), out, size, oldOut, oldSize)
   216  			// panic to show stack trace, so we can see what code is generating this cache entry.
   217  			panic("cache verify failed")
   218  		}
   219  	}
   220  	file := c.fileName(id, "a")
   221  	if err := ioutil.WriteFile(file, entry, 0666); err != nil {
   222  		os.Remove(file)
   223  		return err
   224  	}
   225  
   226  	fmt.Fprintf(c.log, "%d put %x %x %d\n", c.now().Unix(), id, out, size)
   227  	return nil
   228  }
   229  
   230  // Put stores the given output in the cache as the output for the action ID.
   231  // It may read file twice. The content of file must not change between the two passes.
   232  func (c *Cache) Put(id ActionID, file io.ReadSeeker) (OutputID, int64, error) {
   233  	return c.put(id, file, true)
   234  }
   235  
   236  // PutNoVerify is like Put but disables the verify check
   237  // when GODEBUG=goverifycache=1 is set.
   238  // It is meant for data that is OK to cache but that we expect to vary slightly from run to run,
   239  // like test output containing times and the like.
   240  func (c *Cache) PutNoVerify(id ActionID, file io.ReadSeeker) (OutputID, int64, error) {
   241  	return c.put(id, file, false)
   242  }
   243  
   244  func (c *Cache) put(id ActionID, file io.ReadSeeker, allowVerify bool) (OutputID, int64, error) {
   245  	// Compute output ID.
   246  	h := sha256.New()
   247  	if _, err := file.Seek(0, 0); err != nil {
   248  		return OutputID{}, 0, err
   249  	}
   250  	size, err := io.Copy(h, file)
   251  	if err != nil {
   252  		return OutputID{}, 0, err
   253  	}
   254  	var out OutputID
   255  	h.Sum(out[:0])
   256  
   257  	// Copy to cached output file (if not already present).
   258  	if err := c.copyFile(file, out, size); err != nil {
   259  		return out, size, err
   260  	}
   261  
   262  	// Add to cache index.
   263  	return out, size, c.putIndexEntry(id, out, size, allowVerify)
   264  }
   265  
   266  // PutBytes stores the given bytes in the cache as the output for the action ID.
   267  func (c *Cache) PutBytes(id ActionID, data []byte) error {
   268  	_, _, err := c.Put(id, bytes.NewReader(data))
   269  	return err
   270  }
   271  
   272  // copyFile copies file into the cache, expecting it to have the given
   273  // output ID and size, if that file is not present already.
   274  func (c *Cache) copyFile(file io.ReadSeeker, out OutputID, size int64) error {
   275  	name := c.fileName(out, "d")
   276  	info, err := os.Stat(name)
   277  	if err == nil && info.Size() == size {
   278  		// Check hash.
   279  		if f, err := os.Open(name); err == nil {
   280  			h := sha256.New()
   281  			io.Copy(h, f)
   282  			f.Close()
   283  			var out2 OutputID
   284  			h.Sum(out2[:0])
   285  			if out == out2 {
   286  				return nil
   287  			}
   288  		}
   289  		// Hash did not match. Fall through and rewrite file.
   290  	}
   291  
   292  	// Copy file to cache directory.
   293  	mode := os.O_RDWR | os.O_CREATE
   294  	if err == nil && info.Size() > size { // shouldn't happen but fix in case
   295  		mode |= os.O_TRUNC
   296  	}
   297  	f, err := os.OpenFile(name, mode, 0666)
   298  	if err != nil {
   299  		return err
   300  	}
   301  	defer f.Close()
   302  	if size == 0 {
   303  		// File now exists with correct size.
   304  		// Only one possible zero-length file, so contents are OK too.
   305  		// Early return here makes sure there's a "last byte" for code below.
   306  		return nil
   307  	}
   308  
   309  	// From here on, if any of the I/O writing the file fails,
   310  	// we make a best-effort attempt to truncate the file f
   311  	// before returning, to avoid leaving bad bytes in the file.
   312  
   313  	// Copy file to f, but also into h to double-check hash.
   314  	if _, err := file.Seek(0, 0); err != nil {
   315  		f.Truncate(0)
   316  		return err
   317  	}
   318  	h := sha256.New()
   319  	w := io.MultiWriter(f, h)
   320  	if _, err := io.CopyN(w, file, size-1); err != nil {
   321  		f.Truncate(0)
   322  		return err
   323  	}
   324  	// Check last byte before writing it; writing it will make the size match
   325  	// what other processes expect to find and might cause them to start
   326  	// using the file.
   327  	buf := make([]byte, 1)
   328  	if _, err := file.Read(buf); err != nil {
   329  		f.Truncate(0)
   330  		return err
   331  	}
   332  	h.Write(buf)
   333  	sum := h.Sum(nil)
   334  	if !bytes.Equal(sum, out[:]) {
   335  		f.Truncate(0)
   336  		return fmt.Errorf("file content changed underfoot")
   337  	}
   338  
   339  	// Commit cache file entry.
   340  	if _, err := f.Write(buf); err != nil {
   341  		f.Truncate(0)
   342  		return err
   343  	}
   344  	if err := f.Close(); err != nil {
   345  		// Data might not have been written,
   346  		// but file may look like it is the right size.
   347  		// To be extra careful, remove cached file.
   348  		os.Remove(name)
   349  		return err
   350  	}
   351  
   352  	return nil
   353  }