github.com/tiagovtristao/plz@v13.4.0+incompatible/src/cache/dir_cache.go (about)

     1  // Diretory-based cache.
     2  
     3  package cache
     4  
     5  import (
     6  	"archive/tar"
     7  	"bufio"
     8  	"compress/gzip"
     9  	"encoding/base64"
    10  	"io"
    11  	"os"
    12  	"path"
    13  	"path/filepath"
    14  	"sort"
    15  	"strings"
    16  	"sync"
    17  	"time"
    18  
    19  	"github.com/djherbis/atime"
    20  	"github.com/dustin/go-humanize"
    21  
    22  	"github.com/thought-machine/please/src/core"
    23  	"github.com/thought-machine/please/src/fs"
    24  )
    25  
    26  type dirCache struct {
    27  	Dir      string
    28  	Compress bool
    29  	Suffix   string
    30  	mtime    time.Time
    31  	added    map[string]uint64
    32  	mutex    sync.Mutex
    33  }
    34  
    35  func (cache *dirCache) Store(target *core.BuildTarget, key []byte, files ...string) {
    36  	cacheDir := cache.getPath(target, key, "")
    37  	tmpDir := cache.getFullPath(target, key, "", "=")
    38  	cache.markDir(cacheDir, 0)
    39  	if err := os.RemoveAll(cacheDir); err != nil {
    40  		log.Warning("Failed to remove existing cache directory %s: %s", cacheDir, err)
    41  		return
    42  	}
    43  	cache.storeFiles(target, key, "", cacheDir, tmpDir, cacheArtifacts(target, files...), true)
    44  	if err := os.Rename(tmpDir, cacheDir); err != nil && !os.IsNotExist(err) {
    45  		log.Warning("Failed to create cache directory %s: %s", cacheDir, err)
    46  	}
    47  }
    48  
    49  func (cache *dirCache) StoreExtra(target *core.BuildTarget, key []byte, out string) {
    50  	cacheDir := cache.getPath(target, key, out)
    51  	cache.storeFiles(target, key, out, cacheDir, cacheDir, []string{out}, false)
    52  }
    53  
    54  // storeFiles stores the given files in the cache, either compressed or not.
    55  func (cache *dirCache) storeFiles(target *core.BuildTarget, key []byte, suffix, cacheDir, tmpDir string, files []string, clean bool) {
    56  	var totalSize uint64
    57  	if cache.Compress {
    58  		totalSize = cache.storeCompressed(target, tmpDir, files)
    59  	} else {
    60  		for _, out := range files {
    61  			totalSize += cache.storeFile(target, out, tmpDir)
    62  		}
    63  	}
    64  	cache.markDir(cacheDir, totalSize)
    65  }
    66  
    67  // storeCompressed stores all the given files in the cache as a single compressed tarball.
    68  func (cache *dirCache) storeCompressed(target *core.BuildTarget, filename string, files []string) uint64 {
    69  	log.Debug("Storing %s: %s in dir cache...", target.Label, filename)
    70  	if err := cache.storeCompressed2(target, filename, files); err != nil {
    71  		log.Warning("Failed to store files in cache: %s", err)
    72  		os.RemoveAll(filename) // Just a best-effort removal at this point
    73  		return 0
    74  	}
    75  	// It's too hard to tell from a tar.Writer how big the resulting tarball is. Easier to just re-stat it here.
    76  	info, err := os.Stat(filename)
    77  	if err != nil {
    78  		log.Warning("Can't read stored file: %s", err)
    79  		return 0
    80  	}
    81  	return uint64(info.Size())
    82  }
    83  
    84  // storeCompressed2 stores all the given files in the cache as a single compressed tarball.
    85  func (cache *dirCache) storeCompressed2(target *core.BuildTarget, filename string, files []string) error {
    86  	if err := cache.ensureStoreReady(filename); err != nil {
    87  		return err
    88  	}
    89  	f, err := os.Create(filename)
    90  	if err != nil {
    91  		return err
    92  	}
    93  	defer f.Close()
    94  	bw := bufio.NewWriter(f)
    95  	defer bw.Flush()
    96  	gw := gzip.NewWriter(bw)
    97  	defer gw.Close()
    98  	tw := tar.NewWriter(gw)
    99  	defer tw.Close()
   100  	outDir := target.OutDir()
   101  	for _, file := range files {
   102  		// Any one of these might be a directory, so we have to walk them.
   103  		if err := fs.Walk(path.Join(outDir, file), func(name string, isDir bool) error {
   104  			hdr, err := cache.tarHeader(name, outDir)
   105  			if err != nil {
   106  				return err
   107  			} else if err := tw.WriteHeader(hdr); err != nil {
   108  				return err
   109  			} else if hdr.Typeflag != tar.TypeDir && hdr.Typeflag != tar.TypeSymlink {
   110  				f, err := os.Open(name)
   111  				if err != nil {
   112  					return err
   113  				} else if _, err := io.Copy(tw, f); err != nil {
   114  					return err
   115  				}
   116  				f.Close() // Do not defer this, otherwise we can open too many files at once.
   117  			}
   118  			return nil
   119  		}); err != nil {
   120  			return err
   121  		}
   122  	}
   123  	return nil
   124  }
   125  
   126  // tarHeader returns an appropriate tar header for the given file.
   127  func (cache *dirCache) tarHeader(file, prefix string) (*tar.Header, error) {
   128  	info, err := os.Lstat(file)
   129  	if err != nil {
   130  		return nil, err
   131  	}
   132  	link := ""
   133  	if info.Mode()&os.ModeSymlink != 0 {
   134  		// We have to read the link target separately.
   135  		link, err = os.Readlink(file)
   136  		if err != nil {
   137  			return nil, err
   138  		}
   139  	}
   140  	hdr, err := tar.FileInfoHeader(info, link)
   141  	if hdr != nil {
   142  		hdr.Name = strings.TrimLeft(strings.TrimPrefix(file, prefix), "/")
   143  		// Zero out all timestamps.
   144  		hdr.ModTime = cache.mtime
   145  		hdr.AccessTime = cache.mtime
   146  		hdr.ChangeTime = cache.mtime
   147  		// Strip user/group ids.
   148  		hdr.Uid = 0
   149  		hdr.Gid = 0
   150  		// Setting the user/group write bits helps consistency of output.
   151  		hdr.Mode |= 0220
   152  	}
   153  	return hdr, err
   154  }
   155  
   156  // ensureStoreReady ensures that the directory containing the given filename exists and any previous file has been removed.
   157  func (cache *dirCache) ensureStoreReady(filename string) error {
   158  	dir := path.Dir(filename)
   159  	if err := os.MkdirAll(dir, core.DirPermissions); err != nil {
   160  		return err
   161  	} else if err := os.RemoveAll(filename); err != nil {
   162  		return err
   163  	}
   164  	return nil
   165  }
   166  
   167  func (cache *dirCache) storeFile(target *core.BuildTarget, out, cacheDir string) uint64 {
   168  	log.Debug("Storing %s: %s in dir cache...", target.Label, out)
   169  	outFile := path.Join(core.RepoRoot, target.OutDir(), out)
   170  	cachedFile := path.Join(cacheDir, out)
   171  	if err := cache.ensureStoreReady(cachedFile); err != nil {
   172  		log.Warning("Failed to setup cache directory: %s", err)
   173  		return 0
   174  	}
   175  	if err := fs.RecursiveLink(outFile, cachedFile, target.OutMode()); err != nil {
   176  		// Cannot hardlink files into the cache, must copy them for reals.
   177  		log.Warning("Failed to store cache file %s: %s", cachedFile, err)
   178  	}
   179  	// TODO(peterebden): This is a little inefficient, it would be better to track the size in
   180  	//                   RecursiveCopy rather than walking again.
   181  	size, _ := findSize(cachedFile)
   182  	return size
   183  }
   184  
   185  func (cache *dirCache) Retrieve(target *core.BuildTarget, key []byte) bool {
   186  	return cache.retrieveFiles(target, key, "", cacheArtifacts(target))
   187  }
   188  
   189  func (cache *dirCache) RetrieveExtra(target *core.BuildTarget, key []byte, out string) bool {
   190  	return cache.retrieveFiles(target, key, out, []string{out})
   191  }
   192  
   193  // retrieveFiles retrieves the given set of files from the cache.
   194  func (cache *dirCache) retrieveFiles(target *core.BuildTarget, key []byte, suffix string, outs []string) bool {
   195  	found, err := cache.retrieveFiles2(target, cache.getPath(target, key, suffix), outs)
   196  	if err != nil && !os.IsNotExist(err) {
   197  		log.Warning("Failed to retrieve %s from dir cache: %s", target.Label, err)
   198  		return false
   199  	} else if found {
   200  		log.Debug("Retrieved %s: %s from dir cache", target.Label, suffix)
   201  	}
   202  	return found
   203  }
   204  
   205  func (cache *dirCache) retrieveFiles2(target *core.BuildTarget, cacheDir string, outs []string) (bool, error) {
   206  	if !core.PathExists(cacheDir) {
   207  		log.Debug("%s: %s doesn't exist in dir cache", target.Label, cacheDir)
   208  		return false, nil
   209  	}
   210  	cache.markDir(cacheDir, 0)
   211  	if cache.Compress {
   212  		log.Debug("Retrieving %s: %s from compressed cache", target.Label, cacheDir)
   213  		return true, cache.retrieveCompressed(target, cacheDir)
   214  	}
   215  	for _, out := range outs {
   216  		realOut, err := cache.ensureRetrieveReady(target, out)
   217  		if err != nil {
   218  			return false, err
   219  		}
   220  		cachedOut := path.Join(cacheDir, out)
   221  		log.Debug("Retrieving %s: %s from dir cache...", target.Label, cachedOut)
   222  		if err := fs.RecursiveLink(cachedOut, realOut, target.OutMode()); err != nil {
   223  			return false, err
   224  		}
   225  	}
   226  	return true, nil
   227  }
   228  
   229  // retrieveCompressed retrieves the given outs from a compressed tarball.
   230  // Right now it retrieves everything from the file which is sort of slightly incorrect but in practice
   231  // we should get away with it (because changing the set of outputs from what was stored would also change
   232  // the hash, so theoretically at least the two should line up).
   233  func (cache *dirCache) retrieveCompressed(target *core.BuildTarget, filename string) error {
   234  	f, err := os.Open(filename)
   235  	if err != nil {
   236  		return err
   237  	}
   238  	defer f.Close()
   239  	gr, err := gzip.NewReader(f)
   240  	if err != nil {
   241  		return err
   242  	}
   243  	defer gr.Close()
   244  	tr := tar.NewReader(gr)
   245  	for {
   246  		hdr, err := tr.Next()
   247  		if err != nil {
   248  			if err == io.EOF {
   249  				break // End of archive
   250  			}
   251  			return err
   252  		}
   253  		out, err := cache.ensureRetrieveReady(target, hdr.Name)
   254  		if err != nil {
   255  			return err
   256  		}
   257  		if hdr.Typeflag == tar.TypeDir {
   258  			// Just create the directory
   259  			if err := os.MkdirAll(out, core.DirPermissions); err != nil {
   260  				return err
   261  			}
   262  		} else if hdr.Typeflag == tar.TypeSymlink {
   263  			if err := os.Symlink(hdr.Linkname, out); err != nil {
   264  				return err
   265  			}
   266  		} else {
   267  			f, err := os.OpenFile(out, os.O_WRONLY|os.O_CREATE, os.FileMode(hdr.Mode))
   268  			if err != nil {
   269  				return err
   270  			}
   271  			_, err = io.Copy(f, tr)
   272  			// N.B. It is important not to defer this - since defers do not run until the function
   273  			//      exits, we can stack up many open files within this loop, and when retrieving multiple
   274  			//      large artifacts at once can easily run out of file handles.
   275  			f.Close()
   276  			if err != nil {
   277  				return err
   278  			}
   279  		}
   280  	}
   281  	return nil
   282  }
   283  
   284  // ensureRetrieveReady makes sure that appropriate directories are created and old outputs are removed.
   285  func (cache *dirCache) ensureRetrieveReady(target *core.BuildTarget, out string) (string, error) {
   286  	fullOut := path.Join(core.RepoRoot, target.OutDir(), out)
   287  	if strings.ContainsRune(out, '/') { // The root directory will be there, only need to worry about outs in subdirectories.
   288  		if err := os.MkdirAll(path.Dir(fullOut), core.DirPermissions); err != nil {
   289  			return "", err
   290  		}
   291  	}
   292  	// It seems to be quite important that we unlink the existing file first to avoid ETXTBSY errors
   293  	// in cases where we're running an existing binary (as Please does during bootstrap, for example).
   294  	if err := os.RemoveAll(fullOut); err != nil {
   295  		return "", err
   296  	}
   297  	return fullOut, nil
   298  }
   299  
   300  func (cache *dirCache) Clean(target *core.BuildTarget) {
   301  	// Remove for all possible keys, so can't get getPath here
   302  	if err := os.RemoveAll(path.Join(cache.Dir, target.Label.PackageName, target.Label.Name)); err != nil {
   303  		log.Warning("Failed to remove artifacts for %s from dir cache: %s", target.Label, err)
   304  	}
   305  }
   306  
   307  func (cache *dirCache) CleanAll() {
   308  	if err := core.AsyncDeleteDir(cache.Dir); err != nil {
   309  		log.Error("Failed to clean cache: %s", err)
   310  	}
   311  	// We used to store the cache in .plz-cache by default; we now use UserCacheDir but
   312  	// if the old one's there, we'll clean it out too.
   313  	if dir2 := path.Join(core.RepoRoot, ".plz-cache"); dir2 != cache.Dir && fs.PathExists(dir2) {
   314  		core.AsyncDeleteDir(dir2)
   315  	}
   316  }
   317  
   318  func (cache *dirCache) Shutdown() {}
   319  
   320  func (cache *dirCache) getPath(target *core.BuildTarget, key []byte, extra string) string {
   321  	return cache.getFullPath(target, key, extra, "")
   322  }
   323  
   324  func (cache *dirCache) getFullPath(target *core.BuildTarget, key []byte, extra, suffix string) string {
   325  	// The extra identifier is not needed for non-compressed caches.
   326  	if !cache.Compress {
   327  		extra = ""
   328  	} else {
   329  		extra = strings.Replace(extra, "/", "_", -1)
   330  	}
   331  	// NB. Is very important to use a padded encoding here so lengths are consistent when cleaning.
   332  	return path.Join(cache.Dir, target.Label.PackageName, target.Label.Name, base64.URLEncoding.EncodeToString(key)) + extra + suffix + cache.Suffix
   333  }
   334  
   335  // markDir marks a directory as added to the cache, which saves it from later deletion.
   336  func (cache *dirCache) markDir(path string, size uint64) {
   337  	cache.mutex.Lock()
   338  	defer cache.mutex.Unlock()
   339  	cache.added[path] = size
   340  	cache.added[path+"="] = size
   341  }
   342  
   343  // isMarked returns true if a directory has previously been passed to markDir.
   344  func (cache *dirCache) isMarked(path string) (uint64, bool) {
   345  	cache.mutex.Lock()
   346  	defer cache.mutex.Unlock()
   347  	size, present := cache.added[path]
   348  	return size, present
   349  }
   350  
   351  func newDirCache(config *core.Configuration) *dirCache {
   352  	cache := &dirCache{
   353  		Compress: config.Cache.DirCompress,
   354  		Dir:      config.Cache.Dir,
   355  		added:    map[string]uint64{},
   356  		mtime:    time.Date(2000, time.January, 1, 0, 0, 0, 0, time.UTC),
   357  	}
   358  	if cache.Compress {
   359  		cache.Suffix = ".tar.gz"
   360  	}
   361  	// Absolute paths are allowed. Relative paths are interpreted relative to the repo root.
   362  	if config.Cache.Dir[0] != '/' {
   363  		cache.Dir = path.Join(core.RepoRoot, config.Cache.Dir)
   364  	}
   365  	// Make directory if it doesn't exist.
   366  	if err := os.MkdirAll(cache.Dir, core.DirPermissions); err != nil {
   367  		log.Fatalf("Failed to create root cache directory %s: %s", cache.Dir, err)
   368  	}
   369  	// Start the cache-cleaning goroutine.
   370  	if config.Cache.DirClean {
   371  		go cache.clean(uint64(config.Cache.DirCacheHighWaterMark), uint64(config.Cache.DirCacheLowWaterMark))
   372  	}
   373  	return cache
   374  }
   375  
   376  // Period of time in seconds between which two artifacts are considered to have the same atime.
   377  const accessTimeGracePeriod = 600 // Ten minutes
   378  
   379  // A cacheEntry represents a single file entry in the cache.
   380  type cacheEntry struct {
   381  	Path  string
   382  	Size  uint64
   383  	Atime int64
   384  }
   385  
   386  func findSize(path string) (uint64, error) {
   387  	var totalSize uint64
   388  	if err := filepath.Walk(path, func(path string, info os.FileInfo, err error) error {
   389  		if err != nil {
   390  			return err
   391  		}
   392  		totalSize += uint64(info.Size())
   393  		return nil
   394  	}); err != nil {
   395  		return 0, err
   396  	}
   397  	return totalSize, nil
   398  }
   399  
   400  // clean runs background cleaning of this cache until the process exits.
   401  // Returns the total size of the cache after it's finished.
   402  func (cache *dirCache) clean(highWaterMark, lowWaterMark uint64) uint64 {
   403  	entries := []cacheEntry{}
   404  	var totalSize uint64
   405  	if err := fs.Walk(cache.Dir, func(path string, isDir bool) error {
   406  		name := filepath.Base(path)
   407  		if cache.shouldClean(name, isDir) {
   408  			if size, marked := cache.isMarked(path); marked {
   409  				totalSize += size
   410  				return filepath.SkipDir // Already handled
   411  			}
   412  			size, err := findSize(path)
   413  			if err != nil {
   414  				return err
   415  			}
   416  			info, err := os.Stat(path)
   417  			if err != nil {
   418  				return err
   419  			}
   420  			entries = append(entries, cacheEntry{
   421  				Path:  path,
   422  				Size:  size,
   423  				Atime: atime.Get(info).Unix(),
   424  			})
   425  			totalSize += size
   426  			return filepath.SkipDir
   427  		}
   428  		return nil // nothing particularly to do for other entries
   429  	}); err != nil {
   430  		log.Error("error walking cache directory: %s\n", err)
   431  		return totalSize
   432  	}
   433  	log.Info("Total cache size: %s", humanize.Bytes(uint64(totalSize)))
   434  	if totalSize < highWaterMark {
   435  		return totalSize // Nothing to do, cache is small enough.
   436  	}
   437  	// OK, we need to slim it down a bit. We implement a simple LRU algorithm.
   438  	sort.Slice(entries, func(i, j int) bool {
   439  		diff := entries[i].Atime - entries[j].Atime
   440  		if diff > -accessTimeGracePeriod && diff < accessTimeGracePeriod {
   441  			return entries[i].Size > entries[j].Size
   442  		}
   443  		return entries[i].Atime < entries[j].Atime
   444  	})
   445  	for _, entry := range entries {
   446  		if _, marked := cache.isMarked(entry.Path); marked {
   447  			continue
   448  		}
   449  
   450  		log.Debug("Cleaning %s, accessed %s, saves %s", entry.Path, humanize.Time(time.Unix(entry.Atime, 0)), humanize.Bytes(uint64(entry.Size)))
   451  		// Try to rename the directory first so we don't delete bits while someone might access them.
   452  		newPath := entry.Path + "="
   453  		if err := os.Rename(entry.Path, newPath); err != nil {
   454  			log.Errorf("Couldn't rename %s: %s", entry.Path, err)
   455  			continue
   456  		}
   457  		if err := os.RemoveAll(newPath); err != nil {
   458  			log.Errorf("Couldn't remove %s: %s", newPath, err)
   459  			continue
   460  		}
   461  		totalSize -= entry.Size
   462  		if totalSize < lowWaterMark {
   463  			break
   464  		}
   465  	}
   466  	return totalSize
   467  }
   468  
   469  // shouldClean returns true if we should clean this file.
   470  // We track this in order to clean only entire entries in the cache, not just individual files from them.
   471  func (cache *dirCache) shouldClean(name string, isDir bool) bool {
   472  	if cache.Compress == isDir {
   473  		return false // If we're compressing, don't look for directories. If we're not, only look at directories.
   474  	} else if !strings.HasSuffix(name, cache.Suffix) {
   475  		return false // Suffix must match.
   476  	}
   477  	name = strings.TrimSuffix(name, cache.Suffix)
   478  	// 28 == length of 20-byte sha1 hash, encoded to base64, which always gets a trailing =
   479  	// as padding so we can check that to be "sure".
   480  	// Also 29 in case we appended an extra = (which we do for temporary files that are still being written to)
   481  	return (len(name) == 28 || len(name) == 29) && name[27] == '='
   482  }