github.com/octohelm/cuemod@v0.9.4/internal/cmd/go/internals/cache/cache.go (about) 1 // Copyright 2017 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Package cache implements a build artifact cache. 6 package cache 7 8 import ( 9 "bytes" 10 "crypto/sha256" 11 "encoding/hex" 12 "errors" 13 "fmt" 14 "github.com/octohelm/cuemod/internal/internals/godebug" 15 "io" 16 "io/fs" 17 "os" 18 "path/filepath" 19 "strconv" 20 "strings" 21 "time" 22 23 "github.com/octohelm/cuemod/internal/cmd/go/internals/lockedfile" 24 "github.com/octohelm/cuemod/internal/cmd/go/internals/mmap" 25 ) 26 27 // An ActionID is a cache action key, the hash of a complete description of a 28 // repeatable computation (command line, environment variables, 29 // input file contents, executable contents). 30 type ActionID [HashSize]byte 31 32 // An OutputID is a cache output key, the hash of an output of a computation. 33 type OutputID [HashSize]byte 34 35 // Cache is the interface as used by the cmd/go. 36 type Cache interface { 37 // Get returns the cache entry for the provided ActionID. 38 // On miss, the error type should be of type *entryNotFoundError. 39 // 40 // After a success call to Get, OutputFile(Entry.OutputID) must 41 // exist on disk for until Close is called (at the end of the process). 42 Get(ActionID) (Entry, error) 43 44 // Put adds an item to the cache. 45 // 46 // The seeker is only used to seek to the beginning. After a call to Put, 47 // the seek position is not guaranteed to be in any particular state. 48 // 49 // As a special case, if the ReadSeeker is of type noVerifyReadSeeker, 50 // the verification from GODEBUG=goverifycache=1 is skipped. 51 // 52 // After a success call to Get, OutputFile(Entry.OutputID) must 53 // exist on disk for until Close is called (at the end of the process). 54 Put(ActionID, io.ReadSeeker) (_ OutputID, size int64, _ error) 55 56 // Close is called at the end of the go process. Implementations can do 57 // cache cleanup work at this phase, or wait for and report any errors from 58 // background cleanup work started earlier. Any cache trimming should in one 59 // process should not violate cause the invariants of this interface to be 60 // violated in another process. Namely, a cache trim from one process should 61 // not delete an ObjectID from disk that was recently Get or Put from 62 // another process. As a rule of thumb, don't trim things used in the last 63 // day. 64 Close() error 65 66 // OutputFile returns the path on disk where OutputID is stored. 67 // 68 // It's only called after a successful get or put call so it doesn't need 69 // to return an error; it's assumed that if the previous get or put succeeded, 70 // it's already on disk. 71 OutputFile(OutputID) string 72 73 // FuzzDir returns where fuzz files are stored. 74 FuzzDir() string 75 } 76 77 // A Cache is a package cache, backed by a file system directory tree. 78 type DiskCache struct { 79 dir string 80 now func() time.Time 81 } 82 83 // Open opens and returns the cache in the given directory. 84 // 85 // It is safe for multiple processes on a single machine to use the 86 // same cache directory in a local file system simultaneously. 87 // They will coordinate using operating system file locks and may 88 // duplicate effort but will not corrupt the cache. 89 // 90 // However, it is NOT safe for multiple processes on different machines 91 // to share a cache directory (for example, if the directory were stored 92 // in a network file system). File locking is notoriously unreliable in 93 // network file systems and may not suffice to protect the cache. 94 func Open(dir string) (*DiskCache, error) { 95 info, err := os.Stat(dir) 96 if err != nil { 97 return nil, err 98 } 99 if !info.IsDir() { 100 return nil, &fs.PathError{Op: "open", Path: dir, Err: fmt.Errorf("not a directory")} 101 } 102 for i := 0; i < 256; i++ { 103 name := filepath.Join(dir, fmt.Sprintf("%02x", i)) 104 if err := os.MkdirAll(name, 0777); err != nil { 105 return nil, err 106 } 107 } 108 c := &DiskCache{ 109 dir: dir, 110 now: time.Now, 111 } 112 return c, nil 113 } 114 115 // fileName returns the name of the file corresponding to the given id. 116 func (c *DiskCache) fileName(id [HashSize]byte, key string) string { 117 return filepath.Join(c.dir, fmt.Sprintf("%02x", id[0]), fmt.Sprintf("%x", id)+"-"+key) 118 } 119 120 // An entryNotFoundError indicates that a cache entry was not found, with an 121 // optional underlying reason. 122 type entryNotFoundError struct { 123 Err error 124 } 125 126 func (e *entryNotFoundError) Error() string { 127 if e.Err == nil { 128 return "cache entry not found" 129 } 130 return fmt.Sprintf("cache entry not found: %v", e.Err) 131 } 132 133 func (e *entryNotFoundError) Unwrap() error { 134 return e.Err 135 } 136 137 const ( 138 // action entry file is "v1 <hex id> <hex out> <decimal size space-padded to 20 bytes> <unixnano space-padded to 20 bytes>\n" 139 hexSize = HashSize * 2 140 entrySize = 2 + 1 + hexSize + 1 + hexSize + 1 + 20 + 1 + 20 + 1 141 ) 142 143 // verify controls whether to run the cache in verify mode. 144 // In verify mode, the cache always returns errMissing from Get 145 // but then double-checks in Put that the data being written 146 // exactly matches any existing entry. This provides an easy 147 // way to detect program behavior that would have been different 148 // had the cache entry been returned from Get. 149 // 150 // verify is enabled by setting the environment variable 151 // GODEBUG=gocacheverify=1. 152 var verify = false 153 154 var errVerifyMode = errors.New("gocacheverify=1") 155 156 // DebugTest is set when GODEBUG=gocachetest=1 is in the environment. 157 var DebugTest = false 158 159 func init() { initEnv() } 160 161 var ( 162 goCacheVerify = godebug.New("gocacheverify") 163 goDebugHash = godebug.New("gocachehash") 164 goCacheTest = godebug.New("gocachetest") 165 ) 166 167 func initEnv() { 168 if goCacheVerify.Value() == "1" { 169 goCacheVerify.IncNonDefault() 170 verify = true 171 } 172 if goDebugHash.Value() == "1" { 173 goDebugHash.IncNonDefault() 174 debugHash = true 175 } 176 if goCacheTest.Value() == "1" { 177 goCacheTest.IncNonDefault() 178 DebugTest = true 179 } 180 } 181 182 // Get looks up the action ID in the cache, 183 // returning the corresponding output ID and file size, if any. 184 // Note that finding an output ID does not guarantee that the 185 // saved file for that output ID is still available. 186 func (c *DiskCache) Get(id ActionID) (Entry, error) { 187 if verify { 188 return Entry{}, &entryNotFoundError{Err: errVerifyMode} 189 } 190 return c.get(id) 191 } 192 193 type Entry struct { 194 OutputID OutputID 195 Size int64 196 Time time.Time // when added to cache 197 } 198 199 // get is Get but does not respect verify mode, so that Put can use it. 200 func (c *DiskCache) get(id ActionID) (Entry, error) { 201 missing := func(reason error) (Entry, error) { 202 return Entry{}, &entryNotFoundError{Err: reason} 203 } 204 f, err := os.Open(c.fileName(id, "a")) 205 if err != nil { 206 return missing(err) 207 } 208 defer f.Close() 209 entry := make([]byte, entrySize+1) // +1 to detect whether f is too long 210 if n, err := io.ReadFull(f, entry); n > entrySize { 211 return missing(errors.New("too long")) 212 } else if err != io.ErrUnexpectedEOF { 213 if err == io.EOF { 214 return missing(errors.New("file is empty")) 215 } 216 return missing(err) 217 } else if n < entrySize { 218 return missing(errors.New("entry file incomplete")) 219 } 220 if entry[0] != 'v' || entry[1] != '1' || entry[2] != ' ' || entry[3+hexSize] != ' ' || entry[3+hexSize+1+hexSize] != ' ' || entry[3+hexSize+1+hexSize+1+20] != ' ' || entry[entrySize-1] != '\n' { 221 return missing(errors.New("invalid header")) 222 } 223 eid, entry := entry[3:3+hexSize], entry[3+hexSize:] 224 eout, entry := entry[1:1+hexSize], entry[1+hexSize:] 225 esize, entry := entry[1:1+20], entry[1+20:] 226 etime, entry := entry[1:1+20], entry[1+20:] 227 var buf [HashSize]byte 228 if _, err := hex.Decode(buf[:], eid); err != nil { 229 return missing(fmt.Errorf("decoding ID: %v", err)) 230 } else if buf != id { 231 return missing(errors.New("mismatched ID")) 232 } 233 if _, err := hex.Decode(buf[:], eout); err != nil { 234 return missing(fmt.Errorf("decoding output ID: %v", err)) 235 } 236 i := 0 237 for i < len(esize) && esize[i] == ' ' { 238 i++ 239 } 240 size, err := strconv.ParseInt(string(esize[i:]), 10, 64) 241 if err != nil { 242 return missing(fmt.Errorf("parsing size: %v", err)) 243 } else if size < 0 { 244 return missing(errors.New("negative size")) 245 } 246 i = 0 247 for i < len(etime) && etime[i] == ' ' { 248 i++ 249 } 250 tm, err := strconv.ParseInt(string(etime[i:]), 10, 64) 251 if err != nil { 252 return missing(fmt.Errorf("parsing timestamp: %v", err)) 253 } else if tm < 0 { 254 return missing(errors.New("negative timestamp")) 255 } 256 257 c.used(c.fileName(id, "a")) 258 259 return Entry{buf, size, time.Unix(0, tm)}, nil 260 } 261 262 // GetFile looks up the action ID in the cache and returns 263 // the name of the corresponding data file. 264 func GetFile(c Cache, id ActionID) (file string, entry Entry, err error) { 265 entry, err = c.Get(id) 266 if err != nil { 267 return "", Entry{}, err 268 } 269 file = c.OutputFile(entry.OutputID) 270 info, err := os.Stat(file) 271 if err != nil { 272 return "", Entry{}, &entryNotFoundError{Err: err} 273 } 274 if info.Size() != entry.Size { 275 return "", Entry{}, &entryNotFoundError{Err: errors.New("file incomplete")} 276 } 277 return file, entry, nil 278 } 279 280 // GetBytes looks up the action ID in the cache and returns 281 // the corresponding output bytes. 282 // GetBytes should only be used for data that can be expected to fit in memory. 283 func GetBytes(c Cache, id ActionID) ([]byte, Entry, error) { 284 entry, err := c.Get(id) 285 if err != nil { 286 return nil, entry, err 287 } 288 data, _ := os.ReadFile(c.OutputFile(entry.OutputID)) 289 if sha256.Sum256(data) != entry.OutputID { 290 return nil, entry, &entryNotFoundError{Err: errors.New("bad checksum")} 291 } 292 return data, entry, nil 293 } 294 295 // GetMmap looks up the action ID in the cache and returns 296 // the corresponding output bytes. 297 // GetMmap should only be used for data that can be expected to fit in memory. 298 func GetMmap(c Cache, id ActionID) ([]byte, Entry, error) { 299 entry, err := c.Get(id) 300 if err != nil { 301 return nil, entry, err 302 } 303 md, err := mmap.Mmap(c.OutputFile(entry.OutputID)) 304 if err != nil { 305 return nil, Entry{}, err 306 } 307 if int64(len(md.Data)) != entry.Size { 308 return nil, Entry{}, &entryNotFoundError{Err: errors.New("file incomplete")} 309 } 310 return md.Data, entry, nil 311 } 312 313 // OutputFile returns the name of the cache file storing output with the given OutputID. 314 func (c *DiskCache) OutputFile(out OutputID) string { 315 file := c.fileName(out, "d") 316 c.used(file) 317 return file 318 } 319 320 // Time constants for cache expiration. 321 // 322 // We set the mtime on a cache file on each use, but at most one per mtimeInterval (1 hour), 323 // to avoid causing many unnecessary inode updates. The mtimes therefore 324 // roughly reflect "time of last use" but may in fact be older by at most an hour. 325 // 326 // We scan the cache for entries to delete at most once per trimInterval (1 day). 327 // 328 // When we do scan the cache, we delete entries that have not been used for 329 // at least trimLimit (5 days). Statistics gathered from a month of usage by 330 // Go developers found that essentially all reuse of cached entries happened 331 // within 5 days of the previous reuse. See golang.org/issue/22990. 332 const ( 333 mtimeInterval = 1 * time.Hour 334 trimInterval = 24 * time.Hour 335 trimLimit = 5 * 24 * time.Hour 336 ) 337 338 // used makes a best-effort attempt to update mtime on file, 339 // so that mtime reflects cache access time. 340 // 341 // Because the reflection only needs to be approximate, 342 // and to reduce the amount of disk activity caused by using 343 // cache entries, used only updates the mtime if the current 344 // mtime is more than an hour old. This heuristic eliminates 345 // nearly all of the mtime updates that would otherwise happen, 346 // while still keeping the mtimes useful for cache trimming. 347 func (c *DiskCache) used(file string) { 348 info, err := os.Stat(file) 349 if err == nil && c.now().Sub(info.ModTime()) < mtimeInterval { 350 return 351 } 352 os.Chtimes(file, c.now(), c.now()) 353 } 354 355 func (c *DiskCache) Close() error { return c.Trim() } 356 357 // Trim removes old cache entries that are likely not to be reused. 358 func (c *DiskCache) Trim() error { 359 now := c.now() 360 361 // We maintain in dir/trim.txt the time of the last completed cache trim. 362 // If the cache has been trimmed recently enough, do nothing. 363 // This is the common case. 364 // If the trim file is corrupt, detected if the file can't be parsed, or the 365 // trim time is too far in the future, attempt the trim anyway. It's possible that 366 // the cache was full when the corruption happened. Attempting a trim on 367 // an empty cache is cheap, so there wouldn't be a big performance hit in that case. 368 if data, err := lockedfile.Read(filepath.Join(c.dir, "trim.txt")); err == nil { 369 if t, err := strconv.ParseInt(strings.TrimSpace(string(data)), 10, 64); err == nil { 370 lastTrim := time.Unix(t, 0) 371 if d := now.Sub(lastTrim); d < trimInterval && d > -mtimeInterval { 372 return nil 373 } 374 } 375 } 376 377 // Trim each of the 256 subdirectories. 378 // We subtract an additional mtimeInterval 379 // to account for the imprecision of our "last used" mtimes. 380 cutoff := now.Add(-trimLimit - mtimeInterval) 381 for i := 0; i < 256; i++ { 382 subdir := filepath.Join(c.dir, fmt.Sprintf("%02x", i)) 383 c.trimSubdir(subdir, cutoff) 384 } 385 386 // Ignore errors from here: if we don't write the complete timestamp, the 387 // cache will appear older than it is, and we'll trim it again next time. 388 var b bytes.Buffer 389 fmt.Fprintf(&b, "%d", now.Unix()) 390 if err := lockedfile.Write(filepath.Join(c.dir, "trim.txt"), &b, 0666); err != nil { 391 return err 392 } 393 394 return nil 395 } 396 397 // trimSubdir trims a single cache subdirectory. 398 func (c *DiskCache) trimSubdir(subdir string, cutoff time.Time) { 399 // Read all directory entries from subdir before removing 400 // any files, in case removing files invalidates the file offset 401 // in the directory scan. Also, ignore error from f.Readdirnames, 402 // because we don't care about reporting the error and we still 403 // want to process any entries found before the error. 404 f, err := os.Open(subdir) 405 if err != nil { 406 return 407 } 408 names, _ := f.Readdirnames(-1) 409 f.Close() 410 411 for _, name := range names { 412 // Remove only cache entries (xxxx-a and xxxx-d). 413 if !strings.HasSuffix(name, "-a") && !strings.HasSuffix(name, "-d") { 414 continue 415 } 416 entry := filepath.Join(subdir, name) 417 info, err := os.Stat(entry) 418 if err == nil && info.ModTime().Before(cutoff) { 419 os.Remove(entry) 420 } 421 } 422 } 423 424 // putIndexEntry adds an entry to the cache recording that executing the action 425 // with the given id produces an output with the given output id (hash) and size. 426 func (c *DiskCache) putIndexEntry(id ActionID, out OutputID, size int64, allowVerify bool) error { 427 // Note: We expect that for one reason or another it may happen 428 // that repeating an action produces a different output hash 429 // (for example, if the output contains a time stamp or temp dir name). 430 // While not ideal, this is also not a correctness problem, so we 431 // don't make a big deal about it. In particular, we leave the action 432 // cache entries writable specifically so that they can be overwritten. 433 // 434 // Setting GODEBUG=gocacheverify=1 does make a big deal: 435 // in verify mode we are double-checking that the cache entries 436 // are entirely reproducible. As just noted, this may be unrealistic 437 // in some cases but the check is also useful for shaking out real bugs. 438 entry := fmt.Sprintf("v1 %x %x %20d %20d\n", id, out, size, time.Now().UnixNano()) 439 if verify && allowVerify { 440 old, err := c.get(id) 441 if err == nil && (old.OutputID != out || old.Size != size) { 442 // panic to show stack trace, so we can see what code is generating this cache entry. 443 msg := fmt.Sprintf("go: internal cache error: cache verify failed: id=%x changed:<<<\n%s\n>>>\nold: %x %d\nnew: %x %d", id, reverseHash(id), out, size, old.OutputID, old.Size) 444 panic(msg) 445 } 446 } 447 file := c.fileName(id, "a") 448 449 // Copy file to cache directory. 450 mode := os.O_WRONLY | os.O_CREATE 451 f, err := os.OpenFile(file, mode, 0666) 452 if err != nil { 453 return err 454 } 455 _, err = f.WriteString(entry) 456 if err == nil { 457 // Truncate the file only *after* writing it. 458 // (This should be a no-op, but truncate just in case of previous corruption.) 459 // 460 // This differs from os.WriteFile, which truncates to 0 *before* writing 461 // via os.O_TRUNC. Truncating only after writing ensures that a second write 462 // of the same content to the same file is idempotent, and does not — even 463 // temporarily! — undo the effect of the first write. 464 err = f.Truncate(int64(len(entry))) 465 } 466 if closeErr := f.Close(); err == nil { 467 err = closeErr 468 } 469 if err != nil { 470 // TODO(bcmills): This Remove potentially races with another go command writing to file. 471 // Can we eliminate it? 472 os.Remove(file) 473 return err 474 } 475 os.Chtimes(file, c.now(), c.now()) // mainly for tests 476 477 return nil 478 } 479 480 // noVerifyReadSeeker is an io.ReadSeeker wrapper sentinel type 481 // that says that Cache.Put should skip the verify check 482 // (from GODEBUG=goverifycache=1). 483 type noVerifyReadSeeker struct { 484 io.ReadSeeker 485 } 486 487 // Put stores the given output in the cache as the output for the action ID. 488 // It may read file twice. The content of file must not change between the two passes. 489 func (c *DiskCache) Put(id ActionID, file io.ReadSeeker) (OutputID, int64, error) { 490 wrapper, isNoVerify := file.(noVerifyReadSeeker) 491 if isNoVerify { 492 file = wrapper.ReadSeeker 493 } 494 return c.put(id, file, !isNoVerify) 495 } 496 497 // PutNoVerify is like Put but disables the verify check 498 // when GODEBUG=goverifycache=1 is set. 499 // It is meant for data that is OK to cache but that we expect to vary slightly from run to run, 500 // like test output containing times and the like. 501 func PutNoVerify(c Cache, id ActionID, file io.ReadSeeker) (OutputID, int64, error) { 502 return c.Put(id, noVerifyReadSeeker{file}) 503 } 504 505 func (c *DiskCache) put(id ActionID, file io.ReadSeeker, allowVerify bool) (OutputID, int64, error) { 506 // Compute output ID. 507 h := sha256.New() 508 if _, err := file.Seek(0, 0); err != nil { 509 return OutputID{}, 0, err 510 } 511 size, err := io.Copy(h, file) 512 if err != nil { 513 return OutputID{}, 0, err 514 } 515 var out OutputID 516 h.Sum(out[:0]) 517 518 // Copy to cached output file (if not already present). 519 if err := c.copyFile(file, out, size); err != nil { 520 return out, size, err 521 } 522 523 // Add to cache index. 524 return out, size, c.putIndexEntry(id, out, size, allowVerify) 525 } 526 527 // PutBytes stores the given bytes in the cache as the output for the action ID. 528 func PutBytes(c Cache, id ActionID, data []byte) error { 529 _, _, err := c.Put(id, bytes.NewReader(data)) 530 return err 531 } 532 533 // copyFile copies file into the cache, expecting it to have the given 534 // output ID and size, if that file is not present already. 535 func (c *DiskCache) copyFile(file io.ReadSeeker, out OutputID, size int64) error { 536 name := c.fileName(out, "d") 537 info, err := os.Stat(name) 538 if err == nil && info.Size() == size { 539 // Check hash. 540 if f, err := os.Open(name); err == nil { 541 h := sha256.New() 542 io.Copy(h, f) 543 f.Close() 544 var out2 OutputID 545 h.Sum(out2[:0]) 546 if out == out2 { 547 return nil 548 } 549 } 550 // Hash did not match. Fall through and rewrite file. 551 } 552 553 // Copy file to cache directory. 554 mode := os.O_RDWR | os.O_CREATE 555 if err == nil && info.Size() > size { // shouldn't happen but fix in case 556 mode |= os.O_TRUNC 557 } 558 f, err := os.OpenFile(name, mode, 0666) 559 if err != nil { 560 return err 561 } 562 defer f.Close() 563 if size == 0 { 564 // File now exists with correct size. 565 // Only one possible zero-length file, so contents are OK too. 566 // Early return here makes sure there's a "last byte" for code below. 567 return nil 568 } 569 570 // From here on, if any of the I/O writing the file fails, 571 // we make a best-effort attempt to truncate the file f 572 // before returning, to avoid leaving bad bytes in the file. 573 574 // Copy file to f, but also into h to double-check hash. 575 if _, err := file.Seek(0, 0); err != nil { 576 f.Truncate(0) 577 return err 578 } 579 h := sha256.New() 580 w := io.MultiWriter(f, h) 581 if _, err := io.CopyN(w, file, size-1); err != nil { 582 f.Truncate(0) 583 return err 584 } 585 // Check last byte before writing it; writing it will make the size match 586 // what other processes expect to find and might cause them to start 587 // using the file. 588 buf := make([]byte, 1) 589 if _, err := file.Read(buf); err != nil { 590 f.Truncate(0) 591 return err 592 } 593 h.Write(buf) 594 sum := h.Sum(nil) 595 if !bytes.Equal(sum, out[:]) { 596 f.Truncate(0) 597 return fmt.Errorf("file content changed underfoot") 598 } 599 600 // Commit cache file entry. 601 if _, err := f.Write(buf); err != nil { 602 f.Truncate(0) 603 return err 604 } 605 if err := f.Close(); err != nil { 606 // Data might not have been written, 607 // but file may look like it is the right size. 608 // To be extra careful, remove cached file. 609 os.Remove(name) 610 return err 611 } 612 os.Chtimes(name, c.now(), c.now()) // mainly for tests 613 614 return nil 615 } 616 617 // FuzzDir returns a subdirectory within the cache for storing fuzzing data. 618 // The subdirectory may not exist. 619 // 620 // This directory is managed by the internal/fuzz package. Files in this 621 // directory aren't removed by the 'go clean -cache' command or by Trim. 622 // They may be removed with 'go clean -fuzzcache'. 623 // 624 // TODO(#48526): make Trim remove unused files from this directory. 625 func (c *DiskCache) FuzzDir() string { 626 return filepath.Join(c.dir, "fuzz") 627 }