github.com/go-asm/go@v1.21.1-0.20240213172139-40c5ead50c48/cmd/go/cache/cache.go (about) 1 // Copyright 2017 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Package cache implements a build artifact cache. 6 package cache 7 8 import ( 9 "bytes" 10 "crypto/sha256" 11 "encoding/hex" 12 "errors" 13 "fmt" 14 "io" 15 "io/fs" 16 "os" 17 "path/filepath" 18 "strconv" 19 "strings" 20 "time" 21 22 "github.com/go-asm/go/godebug" 23 24 "github.com/go-asm/go/cmd/go/lockedfile" 25 "github.com/go-asm/go/cmd/go/mmap" 26 ) 27 28 // An ActionID is a cache action key, the hash of a complete description of a 29 // repeatable computation (command line, environment variables, 30 // input file contents, executable contents). 31 type ActionID [HashSize]byte 32 33 // An OutputID is a cache output key, the hash of an output of a computation. 34 type OutputID [HashSize]byte 35 36 // Cache is the interface as used by the cmd/go. 37 type Cache interface { 38 // Get returns the cache entry for the provided ActionID. 39 // On miss, the error type should be of type *entryNotFoundError. 40 // 41 // After a success call to Get, OutputFile(Entry.OutputID) must 42 // exist on disk for until Close is called (at the end of the process). 43 Get(ActionID) (Entry, error) 44 45 // Put adds an item to the cache. 46 // 47 // The seeker is only used to seek to the beginning. After a call to Put, 48 // the seek position is not guaranteed to be in any particular state. 49 // 50 // As a special case, if the ReadSeeker is of type noVerifyReadSeeker, 51 // the verification from GODEBUG=goverifycache=1 is skipped. 52 // 53 // After a success call to Get, OutputFile(Entry.OutputID) must 54 // exist on disk for until Close is called (at the end of the process). 55 Put(ActionID, io.ReadSeeker) (_ OutputID, size int64, _ error) 56 57 // Close is called at the end of the go process. Implementations can do 58 // cache cleanup work at this phase, or wait for and report any errors from 59 // background cleanup work started earlier. Any cache trimming should in one 60 // process should not violate cause the invariants of this interface to be 61 // violated in another process. Namely, a cache trim from one process should 62 // not delete an ObjectID from disk that was recently Get or Put from 63 // another process. As a rule of thumb, don't trim things used in the last 64 // day. 65 Close() error 66 67 // OutputFile returns the path on disk where OutputID is stored. 68 // 69 // It's only called after a successful get or put call so it doesn't need 70 // to return an error; it's assumed that if the previous get or put succeeded, 71 // it's already on disk. 72 OutputFile(OutputID) string 73 74 // FuzzDir returns where fuzz files are stored. 75 FuzzDir() string 76 } 77 78 // A Cache is a package cache, backed by a file system directory tree. 79 type DiskCache struct { 80 dir string 81 now func() time.Time 82 } 83 84 // Open opens and returns the cache in the given directory. 85 // 86 // It is safe for multiple processes on a single machine to use the 87 // same cache directory in a local file system simultaneously. 88 // They will coordinate using operating system file locks and may 89 // duplicate effort but will not corrupt the cache. 90 // 91 // However, it is NOT safe for multiple processes on different machines 92 // to share a cache directory (for example, if the directory were stored 93 // in a network file system). File locking is notoriously unreliable in 94 // network file systems and may not suffice to protect the cache. 95 func Open(dir string) (*DiskCache, error) { 96 info, err := os.Stat(dir) 97 if err != nil { 98 return nil, err 99 } 100 if !info.IsDir() { 101 return nil, &fs.PathError{Op: "open", Path: dir, Err: fmt.Errorf("not a directory")} 102 } 103 for i := 0; i < 256; i++ { 104 name := filepath.Join(dir, fmt.Sprintf("%02x", i)) 105 if err := os.MkdirAll(name, 0777); err != nil { 106 return nil, err 107 } 108 } 109 c := &DiskCache{ 110 dir: dir, 111 now: time.Now, 112 } 113 return c, nil 114 } 115 116 // fileName returns the name of the file corresponding to the given id. 117 func (c *DiskCache) fileName(id [HashSize]byte, key string) string { 118 return filepath.Join(c.dir, fmt.Sprintf("%02x", id[0]), fmt.Sprintf("%x", id)+"-"+key) 119 } 120 121 // An entryNotFoundError indicates that a cache entry was not found, with an 122 // optional underlying reason. 123 type entryNotFoundError struct { 124 Err error 125 } 126 127 func (e *entryNotFoundError) Error() string { 128 if e.Err == nil { 129 return "cache entry not found" 130 } 131 return fmt.Sprintf("cache entry not found: %v", e.Err) 132 } 133 134 func (e *entryNotFoundError) Unwrap() error { 135 return e.Err 136 } 137 138 const ( 139 // action entry file is "v1 <hex id> <hex out> <decimal size space-padded to 20 bytes> <unixnano space-padded to 20 bytes>\n" 140 hexSize = HashSize * 2 141 entrySize = 2 + 1 + hexSize + 1 + hexSize + 1 + 20 + 1 + 20 + 1 142 ) 143 144 // verify controls whether to run the cache in verify mode. 145 // In verify mode, the cache always returns errMissing from Get 146 // but then double-checks in Put that the data being written 147 // exactly matches any existing entry. This provides an easy 148 // way to detect program behavior that would have been different 149 // had the cache entry been returned from Get. 150 // 151 // verify is enabled by setting the environment variable 152 // GODEBUG=gocacheverify=1. 153 var verify = false 154 155 var errVerifyMode = errors.New("gocacheverify=1") 156 157 // DebugTest is set when GODEBUG=gocachetest=1 is in the environment. 158 var DebugTest = false 159 160 func init() { initEnv() } 161 162 var ( 163 goCacheVerify = godebug.New("gocacheverify") 164 goDebugHash = godebug.New("gocachehash") 165 goCacheTest = godebug.New("gocachetest") 166 ) 167 168 func initEnv() { 169 if goCacheVerify.Value() == "1" { 170 goCacheVerify.IncNonDefault() 171 verify = true 172 } 173 if goDebugHash.Value() == "1" { 174 goDebugHash.IncNonDefault() 175 debugHash = true 176 } 177 if goCacheTest.Value() == "1" { 178 goCacheTest.IncNonDefault() 179 DebugTest = true 180 } 181 } 182 183 // Get looks up the action ID in the cache, 184 // returning the corresponding output ID and file size, if any. 185 // Note that finding an output ID does not guarantee that the 186 // saved file for that output ID is still available. 187 func (c *DiskCache) Get(id ActionID) (Entry, error) { 188 if verify { 189 return Entry{}, &entryNotFoundError{Err: errVerifyMode} 190 } 191 return c.get(id) 192 } 193 194 type Entry struct { 195 OutputID OutputID 196 Size int64 197 Time time.Time // when added to cache 198 } 199 200 // get is Get but does not respect verify mode, so that Put can use it. 201 func (c *DiskCache) get(id ActionID) (Entry, error) { 202 missing := func(reason error) (Entry, error) { 203 return Entry{}, &entryNotFoundError{Err: reason} 204 } 205 f, err := os.Open(c.fileName(id, "a")) 206 if err != nil { 207 return missing(err) 208 } 209 defer f.Close() 210 entry := make([]byte, entrySize+1) // +1 to detect whether f is too long 211 if n, err := io.ReadFull(f, entry); n > entrySize { 212 return missing(errors.New("too long")) 213 } else if err != io.ErrUnexpectedEOF { 214 if err == io.EOF { 215 return missing(errors.New("file is empty")) 216 } 217 return missing(err) 218 } else if n < entrySize { 219 return missing(errors.New("entry file incomplete")) 220 } 221 if entry[0] != 'v' || entry[1] != '1' || entry[2] != ' ' || entry[3+hexSize] != ' ' || entry[3+hexSize+1+hexSize] != ' ' || entry[3+hexSize+1+hexSize+1+20] != ' ' || entry[entrySize-1] != '\n' { 222 return missing(errors.New("invalid header")) 223 } 224 eid, entry := entry[3:3+hexSize], entry[3+hexSize:] 225 eout, entry := entry[1:1+hexSize], entry[1+hexSize:] 226 esize, entry := entry[1:1+20], entry[1+20:] 227 etime, entry := entry[1:1+20], entry[1+20:] 228 var buf [HashSize]byte 229 if _, err := hex.Decode(buf[:], eid); err != nil { 230 return missing(fmt.Errorf("decoding ID: %v", err)) 231 } else if buf != id { 232 return missing(errors.New("mismatched ID")) 233 } 234 if _, err := hex.Decode(buf[:], eout); err != nil { 235 return missing(fmt.Errorf("decoding output ID: %v", err)) 236 } 237 i := 0 238 for i < len(esize) && esize[i] == ' ' { 239 i++ 240 } 241 size, err := strconv.ParseInt(string(esize[i:]), 10, 64) 242 if err != nil { 243 return missing(fmt.Errorf("parsing size: %v", err)) 244 } else if size < 0 { 245 return missing(errors.New("negative size")) 246 } 247 i = 0 248 for i < len(etime) && etime[i] == ' ' { 249 i++ 250 } 251 tm, err := strconv.ParseInt(string(etime[i:]), 10, 64) 252 if err != nil { 253 return missing(fmt.Errorf("parsing timestamp: %v", err)) 254 } else if tm < 0 { 255 return missing(errors.New("negative timestamp")) 256 } 257 258 c.used(c.fileName(id, "a")) 259 260 return Entry{buf, size, time.Unix(0, tm)}, nil 261 } 262 263 // GetFile looks up the action ID in the cache and returns 264 // the name of the corresponding data file. 265 func GetFile(c Cache, id ActionID) (file string, entry Entry, err error) { 266 entry, err = c.Get(id) 267 if err != nil { 268 return "", Entry{}, err 269 } 270 file = c.OutputFile(entry.OutputID) 271 info, err := os.Stat(file) 272 if err != nil { 273 return "", Entry{}, &entryNotFoundError{Err: err} 274 } 275 if info.Size() != entry.Size { 276 return "", Entry{}, &entryNotFoundError{Err: errors.New("file incomplete")} 277 } 278 return file, entry, nil 279 } 280 281 // GetBytes looks up the action ID in the cache and returns 282 // the corresponding output bytes. 283 // GetBytes should only be used for data that can be expected to fit in memory. 284 func GetBytes(c Cache, id ActionID) ([]byte, Entry, error) { 285 entry, err := c.Get(id) 286 if err != nil { 287 return nil, entry, err 288 } 289 data, _ := os.ReadFile(c.OutputFile(entry.OutputID)) 290 if sha256.Sum256(data) != entry.OutputID { 291 return nil, entry, &entryNotFoundError{Err: errors.New("bad checksum")} 292 } 293 return data, entry, nil 294 } 295 296 // GetMmap looks up the action ID in the cache and returns 297 // the corresponding output bytes. 298 // GetMmap should only be used for data that can be expected to fit in memory. 299 func GetMmap(c Cache, id ActionID) ([]byte, Entry, error) { 300 entry, err := c.Get(id) 301 if err != nil { 302 return nil, entry, err 303 } 304 md, err := mmap.Mmap(c.OutputFile(entry.OutputID)) 305 if err != nil { 306 return nil, Entry{}, err 307 } 308 if int64(len(md.Data)) != entry.Size { 309 return nil, Entry{}, &entryNotFoundError{Err: errors.New("file incomplete")} 310 } 311 return md.Data, entry, nil 312 } 313 314 // OutputFile returns the name of the cache file storing output with the given OutputID. 315 func (c *DiskCache) OutputFile(out OutputID) string { 316 file := c.fileName(out, "d") 317 c.used(file) 318 return file 319 } 320 321 // Time constants for cache expiration. 322 // 323 // We set the mtime on a cache file on each use, but at most one per mtimeInterval (1 hour), 324 // to avoid causing many unnecessary inode updates. The mtimes therefore 325 // roughly reflect "time of last use" but may in fact be older by at most an hour. 326 // 327 // We scan the cache for entries to delete at most once per trimInterval (1 day). 328 // 329 // When we do scan the cache, we delete entries that have not been used for 330 // at least trimLimit (5 days). Statistics gathered from a month of usage by 331 // Go developers found that essentially all reuse of cached entries happened 332 // within 5 days of the previous reuse. See golang.org/issue/22990. 333 const ( 334 mtimeInterval = 1 * time.Hour 335 trimInterval = 24 * time.Hour 336 trimLimit = 5 * 24 * time.Hour 337 ) 338 339 // used makes a best-effort attempt to update mtime on file, 340 // so that mtime reflects cache access time. 341 // 342 // Because the reflection only needs to be approximate, 343 // and to reduce the amount of disk activity caused by using 344 // cache entries, used only updates the mtime if the current 345 // mtime is more than an hour old. This heuristic eliminates 346 // nearly all of the mtime updates that would otherwise happen, 347 // while still keeping the mtimes useful for cache trimming. 348 func (c *DiskCache) used(file string) { 349 info, err := os.Stat(file) 350 if err == nil && c.now().Sub(info.ModTime()) < mtimeInterval { 351 return 352 } 353 os.Chtimes(file, c.now(), c.now()) 354 } 355 356 func (c *DiskCache) Close() error { return c.Trim() } 357 358 // Trim removes old cache entries that are likely not to be reused. 359 func (c *DiskCache) Trim() error { 360 now := c.now() 361 362 // We maintain in dir/trim.txt the time of the last completed cache trim. 363 // If the cache has been trimmed recently enough, do nothing. 364 // This is the common case. 365 // If the trim file is corrupt, detected if the file can't be parsed, or the 366 // trim time is too far in the future, attempt the trim anyway. It's possible that 367 // the cache was full when the corruption happened. Attempting a trim on 368 // an empty cache is cheap, so there wouldn't be a big performance hit in that case. 369 if data, err := lockedfile.Read(filepath.Join(c.dir, "trim.txt")); err == nil { 370 if t, err := strconv.ParseInt(strings.TrimSpace(string(data)), 10, 64); err == nil { 371 lastTrim := time.Unix(t, 0) 372 if d := now.Sub(lastTrim); d < trimInterval && d > -mtimeInterval { 373 return nil 374 } 375 } 376 } 377 378 // Trim each of the 256 subdirectories. 379 // We subtract an additional mtimeInterval 380 // to account for the imprecision of our "last used" mtimes. 381 cutoff := now.Add(-trimLimit - mtimeInterval) 382 for i := 0; i < 256; i++ { 383 subdir := filepath.Join(c.dir, fmt.Sprintf("%02x", i)) 384 c.trimSubdir(subdir, cutoff) 385 } 386 387 // Ignore errors from here: if we don't write the complete timestamp, the 388 // cache will appear older than it is, and we'll trim it again next time. 389 var b bytes.Buffer 390 fmt.Fprintf(&b, "%d", now.Unix()) 391 if err := lockedfile.Write(filepath.Join(c.dir, "trim.txt"), &b, 0666); err != nil { 392 return err 393 } 394 395 return nil 396 } 397 398 // trimSubdir trims a single cache subdirectory. 399 func (c *DiskCache) trimSubdir(subdir string, cutoff time.Time) { 400 // Read all directory entries from subdir before removing 401 // any files, in case removing files invalidates the file offset 402 // in the directory scan. Also, ignore error from f.Readdirnames, 403 // because we don't care about reporting the error and we still 404 // want to process any entries found before the error. 405 f, err := os.Open(subdir) 406 if err != nil { 407 return 408 } 409 names, _ := f.Readdirnames(-1) 410 f.Close() 411 412 for _, name := range names { 413 // Remove only cache entries (xxxx-a and xxxx-d). 414 if !strings.HasSuffix(name, "-a") && !strings.HasSuffix(name, "-d") { 415 continue 416 } 417 entry := filepath.Join(subdir, name) 418 info, err := os.Stat(entry) 419 if err == nil && info.ModTime().Before(cutoff) { 420 os.Remove(entry) 421 } 422 } 423 } 424 425 // putIndexEntry adds an entry to the cache recording that executing the action 426 // with the given id produces an output with the given output id (hash) and size. 427 func (c *DiskCache) putIndexEntry(id ActionID, out OutputID, size int64, allowVerify bool) error { 428 // Note: We expect that for one reason or another it may happen 429 // that repeating an action produces a different output hash 430 // (for example, if the output contains a time stamp or temp dir name). 431 // While not ideal, this is also not a correctness problem, so we 432 // don't make a big deal about it. In particular, we leave the action 433 // cache entries writable specifically so that they can be overwritten. 434 // 435 // Setting GODEBUG=gocacheverify=1 does make a big deal: 436 // in verify mode we are double-checking that the cache entries 437 // are entirely reproducible. As just noted, this may be unrealistic 438 // in some cases but the check is also useful for shaking out real bugs. 439 entry := fmt.Sprintf("v1 %x %x %20d %20d\n", id, out, size, time.Now().UnixNano()) 440 if verify && allowVerify { 441 old, err := c.get(id) 442 if err == nil && (old.OutputID != out || old.Size != size) { 443 // panic to show stack trace, so we can see what code is generating this cache entry. 444 msg := fmt.Sprintf("go: internal cache error: cache verify failed: id=%x changed:<<<\n%s\n>>>\nold: %x %d\nnew: %x %d", id, reverseHash(id), out, size, old.OutputID, old.Size) 445 panic(msg) 446 } 447 } 448 file := c.fileName(id, "a") 449 450 // Copy file to cache directory. 451 mode := os.O_WRONLY | os.O_CREATE 452 f, err := os.OpenFile(file, mode, 0666) 453 if err != nil { 454 return err 455 } 456 _, err = f.WriteString(entry) 457 if err == nil { 458 // Truncate the file only *after* writing it. 459 // (This should be a no-op, but truncate just in case of previous corruption.) 460 // 461 // This differs from os.WriteFile, which truncates to 0 *before* writing 462 // via os.O_TRUNC. Truncating only after writing ensures that a second write 463 // of the same content to the same file is idempotent, and does not — even 464 // temporarily! — undo the effect of the first write. 465 err = f.Truncate(int64(len(entry))) 466 } 467 if closeErr := f.Close(); err == nil { 468 err = closeErr 469 } 470 if err != nil { 471 // TODO(bcmills): This Remove potentially races with another go command writing to file. 472 // Can we eliminate it? 473 os.Remove(file) 474 return err 475 } 476 os.Chtimes(file, c.now(), c.now()) // mainly for tests 477 478 return nil 479 } 480 481 // noVerifyReadSeeker is an io.ReadSeeker wrapper sentinel type 482 // that says that Cache.Put should skip the verify check 483 // (from GODEBUG=goverifycache=1). 484 type noVerifyReadSeeker struct { 485 io.ReadSeeker 486 } 487 488 // Put stores the given output in the cache as the output for the action ID. 489 // It may read file twice. The content of file must not change between the two passes. 490 func (c *DiskCache) Put(id ActionID, file io.ReadSeeker) (OutputID, int64, error) { 491 wrapper, isNoVerify := file.(noVerifyReadSeeker) 492 if isNoVerify { 493 file = wrapper.ReadSeeker 494 } 495 return c.put(id, file, !isNoVerify) 496 } 497 498 // PutNoVerify is like Put but disables the verify check 499 // when GODEBUG=goverifycache=1 is set. 500 // It is meant for data that is OK to cache but that we expect to vary slightly from run to run, 501 // like test output containing times and the like. 502 func PutNoVerify(c Cache, id ActionID, file io.ReadSeeker) (OutputID, int64, error) { 503 return c.Put(id, noVerifyReadSeeker{file}) 504 } 505 506 func (c *DiskCache) put(id ActionID, file io.ReadSeeker, allowVerify bool) (OutputID, int64, error) { 507 // Compute output ID. 508 h := sha256.New() 509 if _, err := file.Seek(0, 0); err != nil { 510 return OutputID{}, 0, err 511 } 512 size, err := io.Copy(h, file) 513 if err != nil { 514 return OutputID{}, 0, err 515 } 516 var out OutputID 517 h.Sum(out[:0]) 518 519 // Copy to cached output file (if not already present). 520 if err := c.copyFile(file, out, size); err != nil { 521 return out, size, err 522 } 523 524 // Add to cache index. 525 return out, size, c.putIndexEntry(id, out, size, allowVerify) 526 } 527 528 // PutBytes stores the given bytes in the cache as the output for the action ID. 529 func PutBytes(c Cache, id ActionID, data []byte) error { 530 _, _, err := c.Put(id, bytes.NewReader(data)) 531 return err 532 } 533 534 // copyFile copies file into the cache, expecting it to have the given 535 // output ID and size, if that file is not present already. 536 func (c *DiskCache) copyFile(file io.ReadSeeker, out OutputID, size int64) error { 537 name := c.fileName(out, "d") 538 info, err := os.Stat(name) 539 if err == nil && info.Size() == size { 540 // Check hash. 541 if f, err := os.Open(name); err == nil { 542 h := sha256.New() 543 io.Copy(h, f) 544 f.Close() 545 var out2 OutputID 546 h.Sum(out2[:0]) 547 if out == out2 { 548 return nil 549 } 550 } 551 // Hash did not match. Fall through and rewrite file. 552 } 553 554 // Copy file to cache directory. 555 mode := os.O_RDWR | os.O_CREATE 556 if err == nil && info.Size() > size { // shouldn't happen but fix in case 557 mode |= os.O_TRUNC 558 } 559 f, err := os.OpenFile(name, mode, 0666) 560 if err != nil { 561 return err 562 } 563 defer f.Close() 564 if size == 0 { 565 // File now exists with correct size. 566 // Only one possible zero-length file, so contents are OK too. 567 // Early return here makes sure there's a "last byte" for code below. 568 return nil 569 } 570 571 // From here on, if any of the I/O writing the file fails, 572 // we make a best-effort attempt to truncate the file f 573 // before returning, to avoid leaving bad bytes in the file. 574 575 // Copy file to f, but also into h to double-check hash. 576 if _, err := file.Seek(0, 0); err != nil { 577 f.Truncate(0) 578 return err 579 } 580 h := sha256.New() 581 w := io.MultiWriter(f, h) 582 if _, err := io.CopyN(w, file, size-1); err != nil { 583 f.Truncate(0) 584 return err 585 } 586 // Check last byte before writing it; writing it will make the size match 587 // what other processes expect to find and might cause them to start 588 // using the file. 589 buf := make([]byte, 1) 590 if _, err := file.Read(buf); err != nil { 591 f.Truncate(0) 592 return err 593 } 594 h.Write(buf) 595 sum := h.Sum(nil) 596 if !bytes.Equal(sum, out[:]) { 597 f.Truncate(0) 598 return fmt.Errorf("file content changed underfoot") 599 } 600 601 // Commit cache file entry. 602 if _, err := f.Write(buf); err != nil { 603 f.Truncate(0) 604 return err 605 } 606 if err := f.Close(); err != nil { 607 // Data might not have been written, 608 // but file may look like it is the right size. 609 // To be extra careful, remove cached file. 610 os.Remove(name) 611 return err 612 } 613 os.Chtimes(name, c.now(), c.now()) // mainly for tests 614 615 return nil 616 } 617 618 // FuzzDir returns a subdirectory within the cache for storing fuzzing data. 619 // The subdirectory may not exist. 620 // 621 // This directory is managed by the github.com/go-asm/go/fuzz package. Files in this 622 // directory aren't removed by the 'go clean -cache' command or by Trim. 623 // They may be removed with 'go clean -fuzzcache'. 624 // 625 // TODO(#48526): make Trim remove unused files from this directory. 626 func (c *DiskCache) FuzzDir() string { 627 return filepath.Join(c.dir, "fuzz") 628 }