github.com/tiagovtristao/plz@v13.4.0+incompatible/src/cache/dir_cache.go (about) 1 // Diretory-based cache. 2 3 package cache 4 5 import ( 6 "archive/tar" 7 "bufio" 8 "compress/gzip" 9 "encoding/base64" 10 "io" 11 "os" 12 "path" 13 "path/filepath" 14 "sort" 15 "strings" 16 "sync" 17 "time" 18 19 "github.com/djherbis/atime" 20 "github.com/dustin/go-humanize" 21 22 "github.com/thought-machine/please/src/core" 23 "github.com/thought-machine/please/src/fs" 24 ) 25 26 type dirCache struct { 27 Dir string 28 Compress bool 29 Suffix string 30 mtime time.Time 31 added map[string]uint64 32 mutex sync.Mutex 33 } 34 35 func (cache *dirCache) Store(target *core.BuildTarget, key []byte, files ...string) { 36 cacheDir := cache.getPath(target, key, "") 37 tmpDir := cache.getFullPath(target, key, "", "=") 38 cache.markDir(cacheDir, 0) 39 if err := os.RemoveAll(cacheDir); err != nil { 40 log.Warning("Failed to remove existing cache directory %s: %s", cacheDir, err) 41 return 42 } 43 cache.storeFiles(target, key, "", cacheDir, tmpDir, cacheArtifacts(target, files...), true) 44 if err := os.Rename(tmpDir, cacheDir); err != nil && !os.IsNotExist(err) { 45 log.Warning("Failed to create cache directory %s: %s", cacheDir, err) 46 } 47 } 48 49 func (cache *dirCache) StoreExtra(target *core.BuildTarget, key []byte, out string) { 50 cacheDir := cache.getPath(target, key, out) 51 cache.storeFiles(target, key, out, cacheDir, cacheDir, []string{out}, false) 52 } 53 54 // storeFiles stores the given files in the cache, either compressed or not. 55 func (cache *dirCache) storeFiles(target *core.BuildTarget, key []byte, suffix, cacheDir, tmpDir string, files []string, clean bool) { 56 var totalSize uint64 57 if cache.Compress { 58 totalSize = cache.storeCompressed(target, tmpDir, files) 59 } else { 60 for _, out := range files { 61 totalSize += cache.storeFile(target, out, tmpDir) 62 } 63 } 64 cache.markDir(cacheDir, totalSize) 65 } 66 67 // storeCompressed stores all the given files in the cache as a single compressed tarball. 68 func (cache *dirCache) storeCompressed(target *core.BuildTarget, filename string, files []string) uint64 { 69 log.Debug("Storing %s: %s in dir cache...", target.Label, filename) 70 if err := cache.storeCompressed2(target, filename, files); err != nil { 71 log.Warning("Failed to store files in cache: %s", err) 72 os.RemoveAll(filename) // Just a best-effort removal at this point 73 return 0 74 } 75 // It's too hard to tell from a tar.Writer how big the resulting tarball is. Easier to just re-stat it here. 76 info, err := os.Stat(filename) 77 if err != nil { 78 log.Warning("Can't read stored file: %s", err) 79 return 0 80 } 81 return uint64(info.Size()) 82 } 83 84 // storeCompressed2 stores all the given files in the cache as a single compressed tarball. 85 func (cache *dirCache) storeCompressed2(target *core.BuildTarget, filename string, files []string) error { 86 if err := cache.ensureStoreReady(filename); err != nil { 87 return err 88 } 89 f, err := os.Create(filename) 90 if err != nil { 91 return err 92 } 93 defer f.Close() 94 bw := bufio.NewWriter(f) 95 defer bw.Flush() 96 gw := gzip.NewWriter(bw) 97 defer gw.Close() 98 tw := tar.NewWriter(gw) 99 defer tw.Close() 100 outDir := target.OutDir() 101 for _, file := range files { 102 // Any one of these might be a directory, so we have to walk them. 103 if err := fs.Walk(path.Join(outDir, file), func(name string, isDir bool) error { 104 hdr, err := cache.tarHeader(name, outDir) 105 if err != nil { 106 return err 107 } else if err := tw.WriteHeader(hdr); err != nil { 108 return err 109 } else if hdr.Typeflag != tar.TypeDir && hdr.Typeflag != tar.TypeSymlink { 110 f, err := os.Open(name) 111 if err != nil { 112 return err 113 } else if _, err := io.Copy(tw, f); err != nil { 114 return err 115 } 116 f.Close() // Do not defer this, otherwise we can open too many files at once. 117 } 118 return nil 119 }); err != nil { 120 return err 121 } 122 } 123 return nil 124 } 125 126 // tarHeader returns an appropriate tar header for the given file. 127 func (cache *dirCache) tarHeader(file, prefix string) (*tar.Header, error) { 128 info, err := os.Lstat(file) 129 if err != nil { 130 return nil, err 131 } 132 link := "" 133 if info.Mode()&os.ModeSymlink != 0 { 134 // We have to read the link target separately. 135 link, err = os.Readlink(file) 136 if err != nil { 137 return nil, err 138 } 139 } 140 hdr, err := tar.FileInfoHeader(info, link) 141 if hdr != nil { 142 hdr.Name = strings.TrimLeft(strings.TrimPrefix(file, prefix), "/") 143 // Zero out all timestamps. 144 hdr.ModTime = cache.mtime 145 hdr.AccessTime = cache.mtime 146 hdr.ChangeTime = cache.mtime 147 // Strip user/group ids. 148 hdr.Uid = 0 149 hdr.Gid = 0 150 // Setting the user/group write bits helps consistency of output. 151 hdr.Mode |= 0220 152 } 153 return hdr, err 154 } 155 156 // ensureStoreReady ensures that the directory containing the given filename exists and any previous file has been removed. 157 func (cache *dirCache) ensureStoreReady(filename string) error { 158 dir := path.Dir(filename) 159 if err := os.MkdirAll(dir, core.DirPermissions); err != nil { 160 return err 161 } else if err := os.RemoveAll(filename); err != nil { 162 return err 163 } 164 return nil 165 } 166 167 func (cache *dirCache) storeFile(target *core.BuildTarget, out, cacheDir string) uint64 { 168 log.Debug("Storing %s: %s in dir cache...", target.Label, out) 169 outFile := path.Join(core.RepoRoot, target.OutDir(), out) 170 cachedFile := path.Join(cacheDir, out) 171 if err := cache.ensureStoreReady(cachedFile); err != nil { 172 log.Warning("Failed to setup cache directory: %s", err) 173 return 0 174 } 175 if err := fs.RecursiveLink(outFile, cachedFile, target.OutMode()); err != nil { 176 // Cannot hardlink files into the cache, must copy them for reals. 177 log.Warning("Failed to store cache file %s: %s", cachedFile, err) 178 } 179 // TODO(peterebden): This is a little inefficient, it would be better to track the size in 180 // RecursiveCopy rather than walking again. 181 size, _ := findSize(cachedFile) 182 return size 183 } 184 185 func (cache *dirCache) Retrieve(target *core.BuildTarget, key []byte) bool { 186 return cache.retrieveFiles(target, key, "", cacheArtifacts(target)) 187 } 188 189 func (cache *dirCache) RetrieveExtra(target *core.BuildTarget, key []byte, out string) bool { 190 return cache.retrieveFiles(target, key, out, []string{out}) 191 } 192 193 // retrieveFiles retrieves the given set of files from the cache. 194 func (cache *dirCache) retrieveFiles(target *core.BuildTarget, key []byte, suffix string, outs []string) bool { 195 found, err := cache.retrieveFiles2(target, cache.getPath(target, key, suffix), outs) 196 if err != nil && !os.IsNotExist(err) { 197 log.Warning("Failed to retrieve %s from dir cache: %s", target.Label, err) 198 return false 199 } else if found { 200 log.Debug("Retrieved %s: %s from dir cache", target.Label, suffix) 201 } 202 return found 203 } 204 205 func (cache *dirCache) retrieveFiles2(target *core.BuildTarget, cacheDir string, outs []string) (bool, error) { 206 if !core.PathExists(cacheDir) { 207 log.Debug("%s: %s doesn't exist in dir cache", target.Label, cacheDir) 208 return false, nil 209 } 210 cache.markDir(cacheDir, 0) 211 if cache.Compress { 212 log.Debug("Retrieving %s: %s from compressed cache", target.Label, cacheDir) 213 return true, cache.retrieveCompressed(target, cacheDir) 214 } 215 for _, out := range outs { 216 realOut, err := cache.ensureRetrieveReady(target, out) 217 if err != nil { 218 return false, err 219 } 220 cachedOut := path.Join(cacheDir, out) 221 log.Debug("Retrieving %s: %s from dir cache...", target.Label, cachedOut) 222 if err := fs.RecursiveLink(cachedOut, realOut, target.OutMode()); err != nil { 223 return false, err 224 } 225 } 226 return true, nil 227 } 228 229 // retrieveCompressed retrieves the given outs from a compressed tarball. 230 // Right now it retrieves everything from the file which is sort of slightly incorrect but in practice 231 // we should get away with it (because changing the set of outputs from what was stored would also change 232 // the hash, so theoretically at least the two should line up). 233 func (cache *dirCache) retrieveCompressed(target *core.BuildTarget, filename string) error { 234 f, err := os.Open(filename) 235 if err != nil { 236 return err 237 } 238 defer f.Close() 239 gr, err := gzip.NewReader(f) 240 if err != nil { 241 return err 242 } 243 defer gr.Close() 244 tr := tar.NewReader(gr) 245 for { 246 hdr, err := tr.Next() 247 if err != nil { 248 if err == io.EOF { 249 break // End of archive 250 } 251 return err 252 } 253 out, err := cache.ensureRetrieveReady(target, hdr.Name) 254 if err != nil { 255 return err 256 } 257 if hdr.Typeflag == tar.TypeDir { 258 // Just create the directory 259 if err := os.MkdirAll(out, core.DirPermissions); err != nil { 260 return err 261 } 262 } else if hdr.Typeflag == tar.TypeSymlink { 263 if err := os.Symlink(hdr.Linkname, out); err != nil { 264 return err 265 } 266 } else { 267 f, err := os.OpenFile(out, os.O_WRONLY|os.O_CREATE, os.FileMode(hdr.Mode)) 268 if err != nil { 269 return err 270 } 271 _, err = io.Copy(f, tr) 272 // N.B. It is important not to defer this - since defers do not run until the function 273 // exits, we can stack up many open files within this loop, and when retrieving multiple 274 // large artifacts at once can easily run out of file handles. 275 f.Close() 276 if err != nil { 277 return err 278 } 279 } 280 } 281 return nil 282 } 283 284 // ensureRetrieveReady makes sure that appropriate directories are created and old outputs are removed. 285 func (cache *dirCache) ensureRetrieveReady(target *core.BuildTarget, out string) (string, error) { 286 fullOut := path.Join(core.RepoRoot, target.OutDir(), out) 287 if strings.ContainsRune(out, '/') { // The root directory will be there, only need to worry about outs in subdirectories. 288 if err := os.MkdirAll(path.Dir(fullOut), core.DirPermissions); err != nil { 289 return "", err 290 } 291 } 292 // It seems to be quite important that we unlink the existing file first to avoid ETXTBSY errors 293 // in cases where we're running an existing binary (as Please does during bootstrap, for example). 294 if err := os.RemoveAll(fullOut); err != nil { 295 return "", err 296 } 297 return fullOut, nil 298 } 299 300 func (cache *dirCache) Clean(target *core.BuildTarget) { 301 // Remove for all possible keys, so can't get getPath here 302 if err := os.RemoveAll(path.Join(cache.Dir, target.Label.PackageName, target.Label.Name)); err != nil { 303 log.Warning("Failed to remove artifacts for %s from dir cache: %s", target.Label, err) 304 } 305 } 306 307 func (cache *dirCache) CleanAll() { 308 if err := core.AsyncDeleteDir(cache.Dir); err != nil { 309 log.Error("Failed to clean cache: %s", err) 310 } 311 // We used to store the cache in .plz-cache by default; we now use UserCacheDir but 312 // if the old one's there, we'll clean it out too. 313 if dir2 := path.Join(core.RepoRoot, ".plz-cache"); dir2 != cache.Dir && fs.PathExists(dir2) { 314 core.AsyncDeleteDir(dir2) 315 } 316 } 317 318 func (cache *dirCache) Shutdown() {} 319 320 func (cache *dirCache) getPath(target *core.BuildTarget, key []byte, extra string) string { 321 return cache.getFullPath(target, key, extra, "") 322 } 323 324 func (cache *dirCache) getFullPath(target *core.BuildTarget, key []byte, extra, suffix string) string { 325 // The extra identifier is not needed for non-compressed caches. 326 if !cache.Compress { 327 extra = "" 328 } else { 329 extra = strings.Replace(extra, "/", "_", -1) 330 } 331 // NB. Is very important to use a padded encoding here so lengths are consistent when cleaning. 332 return path.Join(cache.Dir, target.Label.PackageName, target.Label.Name, base64.URLEncoding.EncodeToString(key)) + extra + suffix + cache.Suffix 333 } 334 335 // markDir marks a directory as added to the cache, which saves it from later deletion. 336 func (cache *dirCache) markDir(path string, size uint64) { 337 cache.mutex.Lock() 338 defer cache.mutex.Unlock() 339 cache.added[path] = size 340 cache.added[path+"="] = size 341 } 342 343 // isMarked returns true if a directory has previously been passed to markDir. 344 func (cache *dirCache) isMarked(path string) (uint64, bool) { 345 cache.mutex.Lock() 346 defer cache.mutex.Unlock() 347 size, present := cache.added[path] 348 return size, present 349 } 350 351 func newDirCache(config *core.Configuration) *dirCache { 352 cache := &dirCache{ 353 Compress: config.Cache.DirCompress, 354 Dir: config.Cache.Dir, 355 added: map[string]uint64{}, 356 mtime: time.Date(2000, time.January, 1, 0, 0, 0, 0, time.UTC), 357 } 358 if cache.Compress { 359 cache.Suffix = ".tar.gz" 360 } 361 // Absolute paths are allowed. Relative paths are interpreted relative to the repo root. 362 if config.Cache.Dir[0] != '/' { 363 cache.Dir = path.Join(core.RepoRoot, config.Cache.Dir) 364 } 365 // Make directory if it doesn't exist. 366 if err := os.MkdirAll(cache.Dir, core.DirPermissions); err != nil { 367 log.Fatalf("Failed to create root cache directory %s: %s", cache.Dir, err) 368 } 369 // Start the cache-cleaning goroutine. 370 if config.Cache.DirClean { 371 go cache.clean(uint64(config.Cache.DirCacheHighWaterMark), uint64(config.Cache.DirCacheLowWaterMark)) 372 } 373 return cache 374 } 375 376 // Period of time in seconds between which two artifacts are considered to have the same atime. 377 const accessTimeGracePeriod = 600 // Ten minutes 378 379 // A cacheEntry represents a single file entry in the cache. 380 type cacheEntry struct { 381 Path string 382 Size uint64 383 Atime int64 384 } 385 386 func findSize(path string) (uint64, error) { 387 var totalSize uint64 388 if err := filepath.Walk(path, func(path string, info os.FileInfo, err error) error { 389 if err != nil { 390 return err 391 } 392 totalSize += uint64(info.Size()) 393 return nil 394 }); err != nil { 395 return 0, err 396 } 397 return totalSize, nil 398 } 399 400 // clean runs background cleaning of this cache until the process exits. 401 // Returns the total size of the cache after it's finished. 402 func (cache *dirCache) clean(highWaterMark, lowWaterMark uint64) uint64 { 403 entries := []cacheEntry{} 404 var totalSize uint64 405 if err := fs.Walk(cache.Dir, func(path string, isDir bool) error { 406 name := filepath.Base(path) 407 if cache.shouldClean(name, isDir) { 408 if size, marked := cache.isMarked(path); marked { 409 totalSize += size 410 return filepath.SkipDir // Already handled 411 } 412 size, err := findSize(path) 413 if err != nil { 414 return err 415 } 416 info, err := os.Stat(path) 417 if err != nil { 418 return err 419 } 420 entries = append(entries, cacheEntry{ 421 Path: path, 422 Size: size, 423 Atime: atime.Get(info).Unix(), 424 }) 425 totalSize += size 426 return filepath.SkipDir 427 } 428 return nil // nothing particularly to do for other entries 429 }); err != nil { 430 log.Error("error walking cache directory: %s\n", err) 431 return totalSize 432 } 433 log.Info("Total cache size: %s", humanize.Bytes(uint64(totalSize))) 434 if totalSize < highWaterMark { 435 return totalSize // Nothing to do, cache is small enough. 436 } 437 // OK, we need to slim it down a bit. We implement a simple LRU algorithm. 438 sort.Slice(entries, func(i, j int) bool { 439 diff := entries[i].Atime - entries[j].Atime 440 if diff > -accessTimeGracePeriod && diff < accessTimeGracePeriod { 441 return entries[i].Size > entries[j].Size 442 } 443 return entries[i].Atime < entries[j].Atime 444 }) 445 for _, entry := range entries { 446 if _, marked := cache.isMarked(entry.Path); marked { 447 continue 448 } 449 450 log.Debug("Cleaning %s, accessed %s, saves %s", entry.Path, humanize.Time(time.Unix(entry.Atime, 0)), humanize.Bytes(uint64(entry.Size))) 451 // Try to rename the directory first so we don't delete bits while someone might access them. 452 newPath := entry.Path + "=" 453 if err := os.Rename(entry.Path, newPath); err != nil { 454 log.Errorf("Couldn't rename %s: %s", entry.Path, err) 455 continue 456 } 457 if err := os.RemoveAll(newPath); err != nil { 458 log.Errorf("Couldn't remove %s: %s", newPath, err) 459 continue 460 } 461 totalSize -= entry.Size 462 if totalSize < lowWaterMark { 463 break 464 } 465 } 466 return totalSize 467 } 468 469 // shouldClean returns true if we should clean this file. 470 // We track this in order to clean only entire entries in the cache, not just individual files from them. 471 func (cache *dirCache) shouldClean(name string, isDir bool) bool { 472 if cache.Compress == isDir { 473 return false // If we're compressing, don't look for directories. If we're not, only look at directories. 474 } else if !strings.HasSuffix(name, cache.Suffix) { 475 return false // Suffix must match. 476 } 477 name = strings.TrimSuffix(name, cache.Suffix) 478 // 28 == length of 20-byte sha1 hash, encoded to base64, which always gets a trailing = 479 // as padding so we can check that to be "sure". 480 // Also 29 in case we appended an extra = (which we do for temporary files that are still being written to) 481 return (len(name) == 28 || len(name) == 29) && name[27] == '=' 482 }