github.com/bir3/gocompiler@v0.3.205/src/cmd/gocmd/internal/cache/cache.go (about) 1 // Copyright 2017 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Package cache implements a build artifact cache. 6 package cache 7 8 import ( 9 "bytes" 10 "crypto/sha256" 11 "encoding/hex" 12 "errors" 13 "fmt" 14 "io" 15 "io/fs" 16 "os" 17 "path/filepath" 18 "strconv" 19 "strings" 20 "time" 21 22 "github.com/bir3/gocompiler/src/cmd/gocmd/internal/lockedfile" 23 "github.com/bir3/gocompiler/src/cmd/gocmd/internal/mmap" 24 ) 25 26 // An ActionID is a cache action key, the hash of a complete description of a 27 // repeatable computation (command line, environment variables, 28 // input file contents, executable contents). 29 type ActionID [HashSize]byte 30 31 // An OutputID is a cache output key, the hash of an output of a computation. 32 type OutputID [HashSize]byte 33 34 // A Cache is a package cache, backed by a file system directory tree. 35 type Cache struct { 36 dir string 37 now func() time.Time 38 } 39 40 // Open opens and returns the cache in the given directory. 41 // 42 // It is safe for multiple processes on a single machine to use the 43 // same cache directory in a local file system simultaneously. 44 // They will coordinate using operating system file locks and may 45 // duplicate effort but will not corrupt the cache. 46 // 47 // However, it is NOT safe for multiple processes on different machines 48 // to share a cache directory (for example, if the directory were stored 49 // in a network file system). File locking is notoriously unreliable in 50 // network file systems and may not suffice to protect the cache. 51 func Open(dir string) (*Cache, error) { 52 info, err := os.Stat(dir) 53 if err != nil { 54 return nil, err 55 } 56 if !info.IsDir() { 57 return nil, &fs.PathError{Op: "open", Path: dir, Err: fmt.Errorf("not a directory")} 58 } 59 for i := 0; i < 256; i++ { 60 name := filepath.Join(dir, fmt.Sprintf("%02x", i)) 61 if err := os.MkdirAll(name, 0777); err != nil { 62 return nil, err 63 } 64 } 65 c := &Cache{ 66 dir: dir, 67 now: time.Now, 68 } 69 return c, nil 70 } 71 72 // fileName returns the name of the file corresponding to the given id. 73 func (c *Cache) fileName(id [HashSize]byte, key string) string { 74 return filepath.Join(c.dir, fmt.Sprintf("%02x", id[0]), fmt.Sprintf("%x", id)+"-"+key) 75 } 76 77 // An entryNotFoundError indicates that a cache entry was not found, with an 78 // optional underlying reason. 79 type entryNotFoundError struct { 80 Err error 81 } 82 83 func (e *entryNotFoundError) Error() string { 84 if e.Err == nil { 85 return "cache entry not found" 86 } 87 return fmt.Sprintf("cache entry not found: %v", e.Err) 88 } 89 90 func (e *entryNotFoundError) Unwrap() error { 91 return e.Err 92 } 93 94 const ( 95 // action entry file is "v1 <hex id> <hex out> <decimal size space-padded to 20 bytes> <unixnano space-padded to 20 bytes>\n" 96 hexSize = HashSize * 2 97 entrySize = 2 + 1 + hexSize + 1 + hexSize + 1 + 20 + 1 + 20 + 1 98 ) 99 100 // verify controls whether to run the cache in verify mode. 101 // In verify mode, the cache always returns errMissing from Get 102 // but then double-checks in Put that the data being written 103 // exactly matches any existing entry. This provides an easy 104 // way to detect program behavior that would have been different 105 // had the cache entry been returned from Get. 106 // 107 // verify is enabled by setting the environment variable 108 // GODEBUG=gocacheverify=1. 109 var verify = false 110 111 var errVerifyMode = errors.New("gocacheverify=1") 112 113 // DebugTest is set when GODEBUG=gocachetest=1 is in the environment. 114 var DebugTest = false 115 116 func init() { initEnv() } 117 118 func initEnv() { 119 verify = false 120 debugHash = false 121 debug := strings.Split(os.Getenv("GODEBUG"), ",") 122 for _, f := range debug { 123 if f == "gocacheverify=1" { 124 verify = true 125 } 126 if f == "gocachehash=1" { 127 debugHash = true 128 } 129 if f == "gocachetest=1" { 130 DebugTest = true 131 } 132 } 133 } 134 135 // Get looks up the action ID in the cache, 136 // returning the corresponding output ID and file size, if any. 137 // Note that finding an output ID does not guarantee that the 138 // saved file for that output ID is still available. 139 func (c *Cache) Get(id ActionID) (Entry, error) { 140 if verify { 141 return Entry{}, &entryNotFoundError{Err: errVerifyMode} 142 } 143 return c.get(id) 144 } 145 146 type Entry struct { 147 OutputID OutputID 148 Size int64 149 Time time.Time 150 } 151 152 // get is Get but does not respect verify mode, so that Put can use it. 153 func (c *Cache) get(id ActionID) (Entry, error) { 154 missing := func(reason error) (Entry, error) { 155 return Entry{}, &entryNotFoundError{Err: reason} 156 } 157 f, err := os.Open(c.fileName(id, "a")) 158 if err != nil { 159 return missing(err) 160 } 161 defer f.Close() 162 entry := make([]byte, entrySize+1) // +1 to detect whether f is too long 163 if n, err := io.ReadFull(f, entry); n > entrySize { 164 return missing(errors.New("too long")) 165 } else if err != io.ErrUnexpectedEOF { 166 if err == io.EOF { 167 return missing(errors.New("file is empty")) 168 } 169 return missing(err) 170 } else if n < entrySize { 171 return missing(errors.New("entry file incomplete")) 172 } 173 if entry[0] != 'v' || entry[1] != '1' || entry[2] != ' ' || entry[3+hexSize] != ' ' || entry[3+hexSize+1+hexSize] != ' ' || entry[3+hexSize+1+hexSize+1+20] != ' ' || entry[entrySize-1] != '\n' { 174 return missing(errors.New("invalid header")) 175 } 176 eid, entry := entry[3:3+hexSize], entry[3+hexSize:] 177 eout, entry := entry[1:1+hexSize], entry[1+hexSize:] 178 esize, entry := entry[1:1+20], entry[1+20:] 179 etime, entry := entry[1:1+20], entry[1+20:] 180 var buf [HashSize]byte 181 if _, err := hex.Decode(buf[:], eid); err != nil { 182 return missing(fmt.Errorf("decoding ID: %v", err)) 183 } else if buf != id { 184 return missing(errors.New("mismatched ID")) 185 } 186 if _, err := hex.Decode(buf[:], eout); err != nil { 187 return missing(fmt.Errorf("decoding output ID: %v", err)) 188 } 189 i := 0 190 for i < len(esize) && esize[i] == ' ' { 191 i++ 192 } 193 size, err := strconv.ParseInt(string(esize[i:]), 10, 64) 194 if err != nil { 195 return missing(fmt.Errorf("parsing size: %v", err)) 196 } else if size < 0 { 197 return missing(errors.New("negative size")) 198 } 199 i = 0 200 for i < len(etime) && etime[i] == ' ' { 201 i++ 202 } 203 tm, err := strconv.ParseInt(string(etime[i:]), 10, 64) 204 if err != nil { 205 return missing(fmt.Errorf("parsing timestamp: %v", err)) 206 } else if tm < 0 { 207 return missing(errors.New("negative timestamp")) 208 } 209 210 c.used(c.fileName(id, "a")) 211 212 return Entry{buf, size, time.Unix(0, tm)}, nil 213 } 214 215 // GetFile looks up the action ID in the cache and returns 216 // the name of the corresponding data file. 217 func (c *Cache) GetFile(id ActionID) (file string, entry Entry, err error) { 218 entry, err = c.Get(id) 219 if err != nil { 220 return "", Entry{}, err 221 } 222 file = c.OutputFile(entry.OutputID) 223 info, err := os.Stat(file) 224 if err != nil { 225 return "", Entry{}, &entryNotFoundError{Err: err} 226 } 227 if info.Size() != entry.Size { 228 return "", Entry{}, &entryNotFoundError{Err: errors.New("file incomplete")} 229 } 230 return file, entry, nil 231 } 232 233 // GetBytes looks up the action ID in the cache and returns 234 // the corresponding output bytes. 235 // GetBytes should only be used for data that can be expected to fit in memory. 236 func (c *Cache) GetBytes(id ActionID) ([]byte, Entry, error) { 237 entry, err := c.Get(id) 238 if err != nil { 239 return nil, entry, err 240 } 241 data, _ := os.ReadFile(c.OutputFile(entry.OutputID)) 242 if sha256.Sum256(data) != entry.OutputID { 243 return nil, entry, &entryNotFoundError{Err: errors.New("bad checksum")} 244 } 245 return data, entry, nil 246 } 247 248 // GetMmap looks up the action ID in the cache and returns 249 // the corresponding output bytes. 250 // GetMmap should only be used for data that can be expected to fit in memory. 251 func (c *Cache) GetMmap(id ActionID) ([]byte, Entry, error) { 252 entry, err := c.Get(id) 253 if err != nil { 254 return nil, entry, err 255 } 256 md, err := mmap.Mmap(c.OutputFile(entry.OutputID)) 257 if err != nil { 258 return nil, Entry{}, err 259 } 260 if int64(len(md.Data)) != entry.Size { 261 return nil, Entry{}, &entryNotFoundError{Err: errors.New("file incomplete")} 262 } 263 return md.Data, entry, nil 264 } 265 266 // OutputFile returns the name of the cache file storing output with the given OutputID. 267 func (c *Cache) OutputFile(out OutputID) string { 268 file := c.fileName(out, "d") 269 c.used(file) 270 return file 271 } 272 273 // Time constants for cache expiration. 274 // 275 // We set the mtime on a cache file on each use, but at most one per mtimeInterval (1 hour), 276 // to avoid causing many unnecessary inode updates. The mtimes therefore 277 // roughly reflect "time of last use" but may in fact be older by at most an hour. 278 // 279 // We scan the cache for entries to delete at most once per trimInterval (1 day). 280 // 281 // When we do scan the cache, we delete entries that have not been used for 282 // at least trimLimit (5 days). Statistics gathered from a month of usage by 283 // Go developers found that essentially all reuse of cached entries happened 284 // within 5 days of the previous reuse. See golang.org/issue/22990. 285 const ( 286 mtimeInterval = 1 * time.Hour 287 trimInterval = 24 * time.Hour 288 trimLimit = 5 * 24 * time.Hour 289 ) 290 291 // used makes a best-effort attempt to update mtime on file, 292 // so that mtime reflects cache access time. 293 // 294 // Because the reflection only needs to be approximate, 295 // and to reduce the amount of disk activity caused by using 296 // cache entries, used only updates the mtime if the current 297 // mtime is more than an hour old. This heuristic eliminates 298 // nearly all of the mtime updates that would otherwise happen, 299 // while still keeping the mtimes useful for cache trimming. 300 func (c *Cache) used(file string) { 301 info, err := os.Stat(file) 302 if err == nil && c.now().Sub(info.ModTime()) < mtimeInterval { 303 return 304 } 305 os.Chtimes(file, c.now(), c.now()) 306 } 307 308 // Trim removes old cache entries that are likely not to be reused. 309 func (c *Cache) Trim() { 310 now := c.now() 311 312 // We maintain in dir/trim.txt the time of the last completed cache trim. 313 // If the cache has been trimmed recently enough, do nothing. 314 // This is the common case. 315 // If the trim file is corrupt, detected if the file can't be parsed, or the 316 // trim time is too far in the future, attempt the trim anyway. It's possible that 317 // the cache was full when the corruption happened. Attempting a trim on 318 // an empty cache is cheap, so there wouldn't be a big performance hit in that case. 319 if data, err := lockedfile.Read(filepath.Join(c.dir, "trim.txt")); err == nil { 320 if t, err := strconv.ParseInt(strings.TrimSpace(string(data)), 10, 64); err == nil { 321 lastTrim := time.Unix(t, 0) 322 if d := now.Sub(lastTrim); d < trimInterval && d > -mtimeInterval { 323 return 324 } 325 } 326 } 327 328 // Trim each of the 256 subdirectories. 329 // We subtract an additional mtimeInterval 330 // to account for the imprecision of our "last used" mtimes. 331 cutoff := now.Add(-trimLimit - mtimeInterval) 332 for i := 0; i < 256; i++ { 333 subdir := filepath.Join(c.dir, fmt.Sprintf("%02x", i)) 334 c.trimSubdir(subdir, cutoff) 335 } 336 337 // Ignore errors from here: if we don't write the complete timestamp, the 338 // cache will appear older than it is, and we'll trim it again next time. 339 var b bytes.Buffer 340 fmt.Fprintf(&b, "%d", now.Unix()) 341 if err := lockedfile.Write(filepath.Join(c.dir, "trim.txt"), &b, 0666); err != nil { 342 return 343 } 344 } 345 346 // trimSubdir trims a single cache subdirectory. 347 func (c *Cache) trimSubdir(subdir string, cutoff time.Time) { 348 // Read all directory entries from subdir before removing 349 // any files, in case removing files invalidates the file offset 350 // in the directory scan. Also, ignore error from f.Readdirnames, 351 // because we don't care about reporting the error and we still 352 // want to process any entries found before the error. 353 f, err := os.Open(subdir) 354 if err != nil { 355 return 356 } 357 names, _ := f.Readdirnames(-1) 358 f.Close() 359 360 for _, name := range names { 361 // Remove only cache entries (xxxx-a and xxxx-d). 362 if !strings.HasSuffix(name, "-a") && !strings.HasSuffix(name, "-d") { 363 continue 364 } 365 entry := filepath.Join(subdir, name) 366 info, err := os.Stat(entry) 367 if err == nil && info.ModTime().Before(cutoff) { 368 os.Remove(entry) 369 } 370 } 371 } 372 373 // putIndexEntry adds an entry to the cache recording that executing the action 374 // with the given id produces an output with the given output id (hash) and size. 375 func (c *Cache) putIndexEntry(id ActionID, out OutputID, size int64, allowVerify bool) error { 376 // Note: We expect that for one reason or another it may happen 377 // that repeating an action produces a different output hash 378 // (for example, if the output contains a time stamp or temp dir name). 379 // While not ideal, this is also not a correctness problem, so we 380 // don't make a big deal about it. In particular, we leave the action 381 // cache entries writable specifically so that they can be overwritten. 382 // 383 // Setting GODEBUG=gocacheverify=1 does make a big deal: 384 // in verify mode we are double-checking that the cache entries 385 // are entirely reproducible. As just noted, this may be unrealistic 386 // in some cases but the check is also useful for shaking out real bugs. 387 entry := fmt.Sprintf("v1 %x %x %20d %20d\n", id, out, size, time.Now().UnixNano()) 388 if verify && allowVerify { 389 old, err := c.get(id) 390 if err == nil && (old.OutputID != out || old.Size != size) { 391 // panic to show stack trace, so we can see what code is generating this cache entry. 392 msg := fmt.Sprintf("go: internal cache error: cache verify failed: id=%x changed:<<<\n%s\n>>>\nold: %x %d\nnew: %x %d", id, reverseHash(id), out, size, old.OutputID, old.Size) 393 panic(msg) 394 } 395 } 396 file := c.fileName(id, "a") 397 398 // Copy file to cache directory. 399 mode := os.O_WRONLY | os.O_CREATE 400 f, err := os.OpenFile(file, mode, 0666) 401 if err != nil { 402 return err 403 } 404 _, err = f.WriteString(entry) 405 if err == nil { 406 // Truncate the file only *after* writing it. 407 // (This should be a no-op, but truncate just in case of previous corruption.) 408 // 409 // This differs from os.WriteFile, which truncates to 0 *before* writing 410 // via os.O_TRUNC. Truncating only after writing ensures that a second write 411 // of the same content to the same file is idempotent, and does not — even 412 // temporarily! — undo the effect of the first write. 413 err = f.Truncate(int64(len(entry))) 414 } 415 if closeErr := f.Close(); err == nil { 416 err = closeErr 417 } 418 if err != nil { 419 // TODO(bcmills): This Remove potentially races with another go command writing to file. 420 // Can we eliminate it? 421 os.Remove(file) 422 return err 423 } 424 os.Chtimes(file, c.now(), c.now()) // mainly for tests 425 426 return nil 427 } 428 429 // Put stores the given output in the cache as the output for the action ID. 430 // It may read file twice. The content of file must not change between the two passes. 431 func (c *Cache) Put(id ActionID, file io.ReadSeeker) (OutputID, int64, error) { 432 return c.put(id, file, true) 433 } 434 435 // PutNoVerify is like Put but disables the verify check 436 // when GODEBUG=goverifycache=1 is set. 437 // It is meant for data that is OK to cache but that we expect to vary slightly from run to run, 438 // like test output containing times and the like. 439 func (c *Cache) PutNoVerify(id ActionID, file io.ReadSeeker) (OutputID, int64, error) { 440 return c.put(id, file, false) 441 } 442 443 func (c *Cache) put(id ActionID, file io.ReadSeeker, allowVerify bool) (OutputID, int64, error) { 444 // Compute output ID. 445 h := sha256.New() 446 if _, err := file.Seek(0, 0); err != nil { 447 return OutputID{}, 0, err 448 } 449 size, err := io.Copy(h, file) 450 if err != nil { 451 return OutputID{}, 0, err 452 } 453 var out OutputID 454 h.Sum(out[:0]) 455 456 // Copy to cached output file (if not already present). 457 if err := c.copyFile(file, out, size); err != nil { 458 return out, size, err 459 } 460 461 // Add to cache index. 462 return out, size, c.putIndexEntry(id, out, size, allowVerify) 463 } 464 465 // PutBytes stores the given bytes in the cache as the output for the action ID. 466 func (c *Cache) PutBytes(id ActionID, data []byte) error { 467 _, _, err := c.Put(id, bytes.NewReader(data)) 468 return err 469 } 470 471 // copyFile copies file into the cache, expecting it to have the given 472 // output ID and size, if that file is not present already. 473 func (c *Cache) copyFile(file io.ReadSeeker, out OutputID, size int64) error { 474 name := c.fileName(out, "d") 475 info, err := os.Stat(name) 476 if err == nil && info.Size() == size { 477 // Check hash. 478 if f, err := os.Open(name); err == nil { 479 h := sha256.New() 480 io.Copy(h, f) 481 f.Close() 482 var out2 OutputID 483 h.Sum(out2[:0]) 484 if out == out2 { 485 return nil 486 } 487 } 488 // Hash did not match. Fall through and rewrite file. 489 } 490 491 // Copy file to cache directory. 492 mode := os.O_RDWR | os.O_CREATE 493 if err == nil && info.Size() > size { // shouldn't happen but fix in case 494 mode |= os.O_TRUNC 495 } 496 f, err := os.OpenFile(name, mode, 0666) 497 if err != nil { 498 return err 499 } 500 defer f.Close() 501 if size == 0 { 502 // File now exists with correct size. 503 // Only one possible zero-length file, so contents are OK too. 504 // Early return here makes sure there's a "last byte" for code below. 505 return nil 506 } 507 508 // From here on, if any of the I/O writing the file fails, 509 // we make a best-effort attempt to truncate the file f 510 // before returning, to avoid leaving bad bytes in the file. 511 512 // Copy file to f, but also into h to double-check hash. 513 if _, err := file.Seek(0, 0); err != nil { 514 f.Truncate(0) 515 return err 516 } 517 h := sha256.New() 518 w := io.MultiWriter(f, h) 519 if _, err := io.CopyN(w, file, size-1); err != nil { 520 f.Truncate(0) 521 return err 522 } 523 // Check last byte before writing it; writing it will make the size match 524 // what other processes expect to find and might cause them to start 525 // using the file. 526 buf := make([]byte, 1) 527 if _, err := file.Read(buf); err != nil { 528 f.Truncate(0) 529 return err 530 } 531 h.Write(buf) 532 sum := h.Sum(nil) 533 if !bytes.Equal(sum, out[:]) { 534 f.Truncate(0) 535 return fmt.Errorf("file content changed underfoot") 536 } 537 538 // Commit cache file entry. 539 if _, err := f.Write(buf); err != nil { 540 f.Truncate(0) 541 return err 542 } 543 if err := f.Close(); err != nil { 544 // Data might not have been written, 545 // but file may look like it is the right size. 546 // To be extra careful, remove cached file. 547 os.Remove(name) 548 return err 549 } 550 os.Chtimes(name, c.now(), c.now()) // mainly for tests 551 552 return nil 553 } 554 555 // FuzzDir returns a subdirectory within the cache for storing fuzzing data. 556 // The subdirectory may not exist. 557 // 558 // This directory is managed by the internal/fuzz package. Files in this 559 // directory aren't removed by the 'go clean -cache' command or by Trim. 560 // They may be removed with 'go clean -fuzzcache'. 561 // 562 // TODO(#48526): make Trim remove unused files from this directory. 563 func (c *Cache) FuzzDir() string { 564 return filepath.Join(c.dir, "fuzz") 565 }