github.com/amarpal/go-tools@v0.0.0-20240422043104-40142f59f616/lintcmd/cache/cache.go (about) 1 // Copyright 2017 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Package cache implements a build artifact cache. 6 // 7 // This package is a slightly modified fork of Go's 8 // cmd/go/internal/cache package. 9 package cache 10 11 import ( 12 "bytes" 13 "crypto/sha256" 14 "encoding/hex" 15 "errors" 16 "fmt" 17 "io" 18 "os" 19 "path/filepath" 20 "strconv" 21 "strings" 22 "time" 23 24 "github.com/amarpal/go-tools/internal/renameio" 25 ) 26 27 // An ActionID is a cache action key, the hash of a complete description of a 28 // repeatable computation (command line, environment variables, 29 // input file contents, executable contents). 30 type ActionID [HashSize]byte 31 32 // An OutputID is a cache output key, the hash of an output of a computation. 33 type OutputID [HashSize]byte 34 35 // A Cache is a package cache, backed by a file system directory tree. 36 type Cache struct { 37 dir string 38 now func() time.Time 39 salt []byte 40 } 41 42 // Open opens and returns the cache in the given directory. 43 // 44 // It is safe for multiple processes on a single machine to use the 45 // same cache directory in a local file system simultaneously. 46 // They will coordinate using operating system file locks and may 47 // duplicate effort but will not corrupt the cache. 48 // 49 // However, it is NOT safe for multiple processes on different machines 50 // to share a cache directory (for example, if the directory were stored 51 // in a network file system). File locking is notoriously unreliable in 52 // network file systems and may not suffice to protect the cache. 53 func Open(dir string) (*Cache, error) { 54 info, err := os.Stat(dir) 55 if err != nil { 56 return nil, err 57 } 58 if !info.IsDir() { 59 return nil, &os.PathError{Op: "open", Path: dir, Err: fmt.Errorf("not a directory")} 60 } 61 for i := 0; i < 256; i++ { 62 name := filepath.Join(dir, fmt.Sprintf("%02x", i)) 63 if err := os.MkdirAll(name, 0777); err != nil { 64 return nil, err 65 } 66 } 67 c := &Cache{ 68 dir: dir, 69 now: time.Now, 70 } 71 return c, nil 72 } 73 74 func (c *Cache) SetSalt(b []byte) { 75 c.salt = b 76 } 77 78 // fileName returns the name of the file corresponding to the given id. 79 func (c *Cache) fileName(id [HashSize]byte, key string) string { 80 return filepath.Join(c.dir, fmt.Sprintf("%02x", id[0]), fmt.Sprintf("%x", id)+"-"+key) 81 } 82 83 // An entryNotFoundError indicates that a cache entry was not found, with an 84 // optional underlying reason. 85 type entryNotFoundError struct { 86 Err error 87 } 88 89 func (e *entryNotFoundError) Error() string { 90 if e.Err == nil { 91 return "cache entry not found" 92 } 93 return fmt.Sprintf("cache entry not found: %v", e.Err) 94 } 95 96 func (e *entryNotFoundError) Unwrap() error { 97 return e.Err 98 } 99 100 const ( 101 // action entry file is "v1 <hex id> <hex out> <decimal size space-padded to 20 bytes> <unixnano space-padded to 20 bytes>\n" 102 hexSize = HashSize * 2 103 entrySize = 2 + 1 + hexSize + 1 + hexSize + 1 + 20 + 1 + 20 + 1 104 ) 105 106 // verify controls whether to run the cache in verify mode. 107 // In verify mode, the cache always returns errMissing from Get 108 // but then double-checks in Put that the data being written 109 // exactly matches any existing entry. This provides an easy 110 // way to detect program behavior that would have been different 111 // had the cache entry been returned from Get. 112 // 113 // verify is enabled by setting the environment variable 114 // GODEBUG=gocacheverify=1. 115 var verify = false 116 117 var errVerifyMode = errors.New("gocacheverify=1") 118 119 // DebugTest is set when GODEBUG=gocachetest=1 is in the environment. 120 var DebugTest = false 121 122 func init() { initEnv() } 123 124 func initEnv() { 125 verify = false 126 debugHash = false 127 debug := strings.Split(os.Getenv("GODEBUG"), ",") 128 for _, f := range debug { 129 if f == "gocacheverify=1" { 130 verify = true 131 } 132 if f == "gocachehash=1" { 133 debugHash = true 134 } 135 if f == "gocachetest=1" { 136 DebugTest = true 137 } 138 } 139 } 140 141 // Get looks up the action ID in the cache, 142 // returning the corresponding output ID and file size, if any. 143 // Note that finding an output ID does not guarantee that the 144 // saved file for that output ID is still available. 145 func (c *Cache) Get(id ActionID) (Entry, error) { 146 if verify { 147 return Entry{}, &entryNotFoundError{Err: errVerifyMode} 148 } 149 return c.get(id) 150 } 151 152 type Entry struct { 153 OutputID OutputID 154 Size int64 155 Time time.Time 156 } 157 158 // get is Get but does not respect verify mode, so that Put can use it. 159 func (c *Cache) get(id ActionID) (Entry, error) { 160 missing := func(reason error) (Entry, error) { 161 return Entry{}, &entryNotFoundError{Err: reason} 162 } 163 f, err := os.Open(c.fileName(id, "a")) 164 if err != nil { 165 return missing(err) 166 } 167 defer f.Close() 168 entry := make([]byte, entrySize+1) // +1 to detect whether f is too long 169 if n, err := io.ReadFull(f, entry); n > entrySize { 170 return missing(errors.New("too long")) 171 } else if err != io.ErrUnexpectedEOF { 172 if err == io.EOF { 173 return missing(errors.New("file is empty")) 174 } 175 return missing(err) 176 } else if n < entrySize { 177 return missing(errors.New("entry file incomplete")) 178 } 179 if entry[0] != 'v' || entry[1] != '1' || entry[2] != ' ' || entry[3+hexSize] != ' ' || entry[3+hexSize+1+hexSize] != ' ' || entry[3+hexSize+1+hexSize+1+20] != ' ' || entry[entrySize-1] != '\n' { 180 return missing(errors.New("invalid header")) 181 } 182 eid, entry := entry[3:3+hexSize], entry[3+hexSize:] 183 eout, entry := entry[1:1+hexSize], entry[1+hexSize:] 184 esize, entry := entry[1:1+20], entry[1+20:] 185 //lint:ignore SA4006 See https://github.com/dominikh/go-tools/issues/465 186 etime, entry := entry[1:1+20], entry[1+20:] 187 var buf [HashSize]byte 188 if _, err := hex.Decode(buf[:], eid); err != nil { 189 return missing(fmt.Errorf("decoding ID: %v", err)) 190 } else if buf != id { 191 return missing(errors.New("mismatched ID")) 192 } 193 if _, err := hex.Decode(buf[:], eout); err != nil { 194 return missing(fmt.Errorf("decoding output ID: %v", err)) 195 } 196 i := 0 197 for i < len(esize) && esize[i] == ' ' { 198 i++ 199 } 200 size, err := strconv.ParseInt(string(esize[i:]), 10, 64) 201 if err != nil { 202 return missing(fmt.Errorf("parsing size: %v", err)) 203 } else if size < 0 { 204 return missing(errors.New("negative size")) 205 } 206 i = 0 207 for i < len(etime) && etime[i] == ' ' { 208 i++ 209 } 210 tm, err := strconv.ParseInt(string(etime[i:]), 10, 64) 211 if err != nil { 212 return missing(fmt.Errorf("parsing timestamp: %v", err)) 213 } else if tm < 0 { 214 return missing(errors.New("negative timestamp")) 215 } 216 217 c.used(c.fileName(id, "a")) 218 219 return Entry{buf, size, time.Unix(0, tm)}, nil 220 } 221 222 // GetFile looks up the action ID in the cache and returns 223 // the name of the corresponding data file. 224 func (c *Cache) GetFile(id ActionID) (file string, entry Entry, err error) { 225 entry, err = c.Get(id) 226 if err != nil { 227 return "", Entry{}, err 228 } 229 file = c.OutputFile(entry.OutputID) 230 info, err := os.Stat(file) 231 if err != nil { 232 return "", Entry{}, &entryNotFoundError{Err: err} 233 } 234 if info.Size() != entry.Size { 235 return "", Entry{}, &entryNotFoundError{Err: errors.New("file incomplete")} 236 } 237 return file, entry, nil 238 } 239 240 // GetBytes looks up the action ID in the cache and returns 241 // the corresponding output bytes. 242 // GetBytes should only be used for data that can be expected to fit in memory. 243 func (c *Cache) GetBytes(id ActionID) ([]byte, Entry, error) { 244 entry, err := c.Get(id) 245 if err != nil { 246 return nil, entry, err 247 } 248 data, _ := os.ReadFile(c.OutputFile(entry.OutputID)) 249 if sha256.Sum256(data) != entry.OutputID { 250 return nil, entry, &entryNotFoundError{Err: errors.New("bad checksum")} 251 } 252 return data, entry, nil 253 } 254 255 // OutputFile returns the name of the cache file storing output with the given OutputID. 256 func (c *Cache) OutputFile(out OutputID) string { 257 file := c.fileName(out, "d") 258 c.used(file) 259 return file 260 } 261 262 // Time constants for cache expiration. 263 // 264 // We set the mtime on a cache file on each use, but at most one per mtimeInterval (1 hour), 265 // to avoid causing many unnecessary inode updates. The mtimes therefore 266 // roughly reflect "time of last use" but may in fact be older by at most an hour. 267 // 268 // We scan the cache for entries to delete at most once per trimInterval (1 day). 269 // 270 // When we do scan the cache, we delete entries that have not been used for 271 // at least trimLimit (5 days). Statistics gathered from a month of usage by 272 // Go developers found that essentially all reuse of cached entries happened 273 // within 5 days of the previous reuse. See golang.org/issue/22990. 274 const ( 275 mtimeInterval = 1 * time.Hour 276 trimInterval = 24 * time.Hour 277 trimLimit = 5 * 24 * time.Hour 278 ) 279 280 // used makes a best-effort attempt to update mtime on file, 281 // so that mtime reflects cache access time. 282 // 283 // Because the reflection only needs to be approximate, 284 // and to reduce the amount of disk activity caused by using 285 // cache entries, used only updates the mtime if the current 286 // mtime is more than an hour old. This heuristic eliminates 287 // nearly all of the mtime updates that would otherwise happen, 288 // while still keeping the mtimes useful for cache trimming. 289 func (c *Cache) used(file string) { 290 info, err := os.Stat(file) 291 if err == nil && c.now().Sub(info.ModTime()) < mtimeInterval { 292 return 293 } 294 os.Chtimes(file, c.now(), c.now()) 295 } 296 297 // Trim removes old cache entries that are likely not to be reused. 298 func (c *Cache) Trim() { 299 now := c.now() 300 301 // We maintain in dir/trim.txt the time of the last completed cache trim. 302 // If the cache has been trimmed recently enough, do nothing. 303 // This is the common case. 304 data, _ := renameio.ReadFile(filepath.Join(c.dir, "trim.txt")) 305 t, err := strconv.ParseInt(strings.TrimSpace(string(data)), 10, 64) 306 if err == nil && now.Sub(time.Unix(t, 0)) < trimInterval { 307 return 308 } 309 310 // Trim each of the 256 subdirectories. 311 // We subtract an additional mtimeInterval 312 // to account for the imprecision of our "last used" mtimes. 313 cutoff := now.Add(-trimLimit - mtimeInterval) 314 for i := 0; i < 256; i++ { 315 subdir := filepath.Join(c.dir, fmt.Sprintf("%02x", i)) 316 c.trimSubdir(subdir, cutoff) 317 } 318 319 // Ignore errors from here: if we don't write the complete timestamp, the 320 // cache will appear older than it is, and we'll trim it again next time. 321 renameio.WriteFile(filepath.Join(c.dir, "trim.txt"), []byte(fmt.Sprintf("%d", now.Unix())), 0666) 322 } 323 324 // trimSubdir trims a single cache subdirectory. 325 func (c *Cache) trimSubdir(subdir string, cutoff time.Time) { 326 // Read all directory entries from subdir before removing 327 // any files, in case removing files invalidates the file offset 328 // in the directory scan. Also, ignore error from f.Readdirnames, 329 // because we don't care about reporting the error and we still 330 // want to process any entries found before the error. 331 f, err := os.Open(subdir) 332 if err != nil { 333 return 334 } 335 names, _ := f.Readdirnames(-1) 336 f.Close() 337 338 for _, name := range names { 339 // Remove only cache entries (xxxx-a and xxxx-d). 340 if !strings.HasSuffix(name, "-a") && !strings.HasSuffix(name, "-d") { 341 continue 342 } 343 entry := filepath.Join(subdir, name) 344 info, err := os.Stat(entry) 345 if err == nil && info.ModTime().Before(cutoff) { 346 os.Remove(entry) 347 } 348 } 349 } 350 351 // putIndexEntry adds an entry to the cache recording that executing the action 352 // with the given id produces an output with the given output id (hash) and size. 353 func (c *Cache) putIndexEntry(id ActionID, out OutputID, size int64, allowVerify bool) error { 354 // Note: We expect that for one reason or another it may happen 355 // that repeating an action produces a different output hash 356 // (for example, if the output contains a time stamp or temp dir name). 357 // While not ideal, this is also not a correctness problem, so we 358 // don't make a big deal about it. In particular, we leave the action 359 // cache entries writable specifically so that they can be overwritten. 360 // 361 // Setting GODEBUG=gocacheverify=1 does make a big deal: 362 // in verify mode we are double-checking that the cache entries 363 // are entirely reproducible. As just noted, this may be unrealistic 364 // in some cases but the check is also useful for shaking out real bugs. 365 entry := fmt.Sprintf("v1 %x %x %20d %20d\n", id, out, size, time.Now().UnixNano()) 366 if verify && allowVerify { 367 old, err := c.get(id) 368 if err == nil && (old.OutputID != out || old.Size != size) { 369 // panic to show stack trace, so we can see what code is generating this cache entry. 370 msg := fmt.Sprintf("go: internal cache error: cache verify failed: id=%x changed:<<<\n%s\n>>>\nold: %x %d\nnew: %x %d", id, reverseHash(id), out, size, old.OutputID, old.Size) 371 panic(msg) 372 } 373 } 374 file := c.fileName(id, "a") 375 376 // Copy file to cache directory. 377 mode := os.O_WRONLY | os.O_CREATE 378 f, err := os.OpenFile(file, mode, 0666) 379 if err != nil { 380 return err 381 } 382 _, err = f.WriteString(entry) 383 if err == nil { 384 // Truncate the file only *after* writing it. 385 // (This should be a no-op, but truncate just in case of previous corruption.) 386 // 387 // This differs from ioutil.WriteFile, which truncates to 0 *before* writing 388 // via os.O_TRUNC. Truncating only after writing ensures that a second write 389 // of the same content to the same file is idempotent, and does not — even 390 // temporarily! — undo the effect of the first write. 391 err = f.Truncate(int64(len(entry))) 392 } 393 if closeErr := f.Close(); err == nil { 394 err = closeErr 395 } 396 if err != nil { 397 // TODO(bcmills): This Remove potentially races with another go command writing to file. 398 // Can we eliminate it? 399 os.Remove(file) 400 return err 401 } 402 os.Chtimes(file, c.now(), c.now()) // mainly for tests 403 404 return nil 405 } 406 407 // Put stores the given output in the cache as the output for the action ID. 408 // It may read file twice. The content of file must not change between the two passes. 409 func (c *Cache) Put(id ActionID, file io.ReadSeeker) (OutputID, int64, error) { 410 return c.put(id, file, true) 411 } 412 413 // PutNoVerify is like Put but disables the verify check 414 // when GODEBUG=goverifycache=1 is set. 415 // It is meant for data that is OK to cache but that we expect to vary slightly from run to run, 416 // like test output containing times and the like. 417 func (c *Cache) PutNoVerify(id ActionID, file io.ReadSeeker) (OutputID, int64, error) { 418 return c.put(id, file, false) 419 } 420 421 func (c *Cache) put(id ActionID, file io.ReadSeeker, allowVerify bool) (OutputID, int64, error) { 422 // Compute output ID. 423 h := sha256.New() 424 if _, err := file.Seek(0, 0); err != nil { 425 return OutputID{}, 0, err 426 } 427 size, err := io.Copy(h, file) 428 if err != nil { 429 return OutputID{}, 0, err 430 } 431 var out OutputID 432 h.Sum(out[:0]) 433 434 // Copy to cached output file (if not already present). 435 if err := c.copyFile(file, out, size); err != nil { 436 return out, size, err 437 } 438 439 // Add to cache index. 440 return out, size, c.putIndexEntry(id, out, size, allowVerify) 441 } 442 443 // PutBytes stores the given bytes in the cache as the output for the action ID. 444 func (c *Cache) PutBytes(id ActionID, data []byte) error { 445 _, _, err := c.Put(id, bytes.NewReader(data)) 446 return err 447 } 448 449 // copyFile copies file into the cache, expecting it to have the given 450 // output ID and size, if that file is not present already. 451 func (c *Cache) copyFile(file io.ReadSeeker, out OutputID, size int64) error { 452 name := c.fileName(out, "d") 453 info, err := os.Stat(name) 454 if err == nil && info.Size() == size { 455 // Check hash. 456 if f, err := os.Open(name); err == nil { 457 h := sha256.New() 458 io.Copy(h, f) 459 f.Close() 460 var out2 OutputID 461 h.Sum(out2[:0]) 462 if out == out2 { 463 return nil 464 } 465 } 466 // Hash did not match. Fall through and rewrite file. 467 } 468 469 // Copy file to cache directory. 470 mode := os.O_RDWR | os.O_CREATE 471 if err == nil && info.Size() > size { // shouldn't happen but fix in case 472 mode |= os.O_TRUNC 473 } 474 f, err := os.OpenFile(name, mode, 0666) 475 if err != nil { 476 return err 477 } 478 defer f.Close() 479 if size == 0 { 480 // File now exists with correct size. 481 // Only one possible zero-length file, so contents are OK too. 482 // Early return here makes sure there's a "last byte" for code below. 483 return nil 484 } 485 486 // From here on, if any of the I/O writing the file fails, 487 // we make a best-effort attempt to truncate the file f 488 // before returning, to avoid leaving bad bytes in the file. 489 490 // Copy file to f, but also into h to double-check hash. 491 if _, err := file.Seek(0, 0); err != nil { 492 f.Truncate(0) 493 return err 494 } 495 h := sha256.New() 496 w := io.MultiWriter(f, h) 497 if _, err := io.CopyN(w, file, size-1); err != nil { 498 f.Truncate(0) 499 return err 500 } 501 // Check last byte before writing it; writing it will make the size match 502 // what other processes expect to find and might cause them to start 503 // using the file. 504 buf := make([]byte, 1) 505 if _, err := file.Read(buf); err != nil { 506 f.Truncate(0) 507 return err 508 } 509 h.Write(buf) 510 sum := h.Sum(nil) 511 if !bytes.Equal(sum, out[:]) { 512 f.Truncate(0) 513 return fmt.Errorf("file content changed underfoot") 514 } 515 516 // Commit cache file entry. 517 if _, err := f.Write(buf); err != nil { 518 f.Truncate(0) 519 return err 520 } 521 if err := f.Close(); err != nil { 522 // Data might not have been written, 523 // but file may look like it is the right size. 524 // To be extra careful, remove cached file. 525 os.Remove(name) 526 return err 527 } 528 os.Chtimes(name, c.now(), c.now()) // mainly for tests 529 530 return nil 531 }