github.com/elek/golangci-lint@v1.42.2-0.20211208090441-c05b7fcb3a9a/internal/cache/cache.go (about) 1 // Copyright 2017 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Package cache implements a build artifact cache. 6 // 7 // This package is a slightly modified fork of Go's 8 // cmd/go/internal/cache package. 9 package cache 10 11 import ( 12 "bytes" 13 "crypto/sha256" 14 "encoding/hex" 15 "fmt" 16 "io" 17 "os" 18 "path/filepath" 19 "strconv" 20 "strings" 21 "time" 22 23 "github.com/pkg/errors" 24 25 "github.com/elek/golangci-lint/internal/renameio" 26 "github.com/elek/golangci-lint/internal/robustio" 27 ) 28 29 // An ActionID is a cache action key, the hash of a complete description of a 30 // repeatable computation (command line, environment variables, 31 // input file contents, executable contents). 32 type ActionID [HashSize]byte 33 34 // An OutputID is a cache output key, the hash of an output of a computation. 35 type OutputID [HashSize]byte 36 37 // A Cache is a package cache, backed by a file system directory tree. 38 type Cache struct { 39 dir string 40 now func() time.Time 41 } 42 43 // Open opens and returns the cache in the given directory. 44 // 45 // It is safe for multiple processes on a single machine to use the 46 // same cache directory in a local file system simultaneously. 47 // They will coordinate using operating system file locks and may 48 // duplicate effort but will not corrupt the cache. 49 // 50 // However, it is NOT safe for multiple processes on different machines 51 // to share a cache directory (for example, if the directory were stored 52 // in a network file system). File locking is notoriously unreliable in 53 // network file systems and may not suffice to protect the cache. 54 // 55 func Open(dir string) (*Cache, error) { 56 info, err := os.Stat(dir) 57 if err != nil { 58 return nil, err 59 } 60 if !info.IsDir() { 61 return nil, &os.PathError{Op: "open", Path: dir, Err: fmt.Errorf("not a directory")} 62 } 63 for i := 0; i < 256; i++ { 64 name := filepath.Join(dir, fmt.Sprintf("%02x", i)) 65 if err := os.MkdirAll(name, 0744); err != nil { 66 return nil, err 67 } 68 } 69 c := &Cache{ 70 dir: dir, 71 now: time.Now, 72 } 73 return c, nil 74 } 75 76 // fileName returns the name of the file corresponding to the given id. 77 func (c *Cache) fileName(id [HashSize]byte, key string) string { 78 return filepath.Join(c.dir, fmt.Sprintf("%02x", id[0]), fmt.Sprintf("%x", id)+"-"+key) 79 } 80 81 var errMissing = errors.New("cache entry not found") 82 83 func IsErrMissing(err error) bool { 84 return errors.Cause(err) == errMissing 85 } 86 87 const ( 88 // action entry file is "v1 <hex id> <hex out> <decimal size space-padded to 20 bytes> <unixnano space-padded to 20 bytes>\n" 89 hexSize = HashSize * 2 90 entrySize = 2 + 1 + hexSize + 1 + hexSize + 1 + 20 + 1 + 20 + 1 91 ) 92 93 // verify controls whether to run the cache in verify mode. 94 // In verify mode, the cache always returns errMissing from Get 95 // but then double-checks in Put that the data being written 96 // exactly matches any existing entry. This provides an easy 97 // way to detect program behavior that would have been different 98 // had the cache entry been returned from Get. 99 // 100 // verify is enabled by setting the environment variable 101 // GODEBUG=gocacheverify=1. 102 var verify = false 103 104 // DebugTest is set when GODEBUG=gocachetest=1 is in the environment. 105 var DebugTest = false 106 107 func init() { initEnv() } 108 109 func initEnv() { 110 verify = false 111 debugHash = false 112 debug := strings.Split(os.Getenv("GODEBUG"), ",") 113 for _, f := range debug { 114 if f == "gocacheverify=1" { 115 verify = true 116 } 117 if f == "gocachehash=1" { 118 debugHash = true 119 } 120 if f == "gocachetest=1" { 121 DebugTest = true 122 } 123 } 124 } 125 126 // Get looks up the action ID in the cache, 127 // returning the corresponding output ID and file size, if any. 128 // Note that finding an output ID does not guarantee that the 129 // saved file for that output ID is still available. 130 func (c *Cache) Get(id ActionID) (Entry, error) { 131 if verify { 132 return Entry{}, errMissing 133 } 134 return c.get(id) 135 } 136 137 type Entry struct { 138 OutputID OutputID 139 Size int64 140 Time time.Time 141 } 142 143 // get is Get but does not respect verify mode, so that Put can use it. 144 func (c *Cache) get(id ActionID) (Entry, error) { 145 missing := func() (Entry, error) { 146 return Entry{}, errMissing 147 } 148 failed := func(err error) (Entry, error) { 149 return Entry{}, err 150 } 151 fileName := c.fileName(id, "a") 152 f, err := os.Open(fileName) 153 if err != nil { 154 if os.IsNotExist(err) { 155 return missing() 156 } 157 return failed(err) 158 } 159 defer f.Close() 160 entry := make([]byte, entrySize+1) // +1 to detect whether f is too long 161 if n, readErr := io.ReadFull(f, entry); n != entrySize || readErr != io.ErrUnexpectedEOF { 162 return failed(fmt.Errorf("read %d/%d bytes from %s with error %s", n, entrySize, fileName, readErr)) 163 } 164 if entry[0] != 'v' || entry[1] != '1' || entry[2] != ' ' || entry[3+hexSize] != ' ' || entry[3+hexSize+1+hexSize] != ' ' || entry[3+hexSize+1+hexSize+1+20] != ' ' || entry[entrySize-1] != '\n' { 165 return failed(fmt.Errorf("bad data in %s", fileName)) 166 } 167 eid, entry := entry[3:3+hexSize], entry[3+hexSize:] 168 eout, entry := entry[1:1+hexSize], entry[1+hexSize:] 169 esize, entry := entry[1:1+20], entry[1+20:] 170 etime := entry[1 : 1+20] 171 var buf [HashSize]byte 172 if _, err = hex.Decode(buf[:], eid); err != nil || buf != id { 173 return failed(errors.Wrapf(err, "failed to hex decode eid data in %s", fileName)) 174 } 175 if _, err = hex.Decode(buf[:], eout); err != nil { 176 return failed(errors.Wrapf(err, "failed to hex decode eout data in %s", fileName)) 177 } 178 i := 0 179 for i < len(esize) && esize[i] == ' ' { 180 i++ 181 } 182 size, err := strconv.ParseInt(string(esize[i:]), 10, 64) 183 if err != nil || size < 0 { 184 return failed(fmt.Errorf("failed to parse esize int from %s with error %s", fileName, err)) 185 } 186 i = 0 187 for i < len(etime) && etime[i] == ' ' { 188 i++ 189 } 190 tm, err := strconv.ParseInt(string(etime[i:]), 10, 64) 191 if err != nil || tm < 0 { 192 return failed(fmt.Errorf("failed to parse etime int from %s with error %s", fileName, err)) 193 } 194 195 if err = c.used(fileName); err != nil { 196 return failed(errors.Wrapf(err, "failed to mark %s as used", fileName)) 197 } 198 199 return Entry{buf, size, time.Unix(0, tm)}, nil 200 } 201 202 // GetBytes looks up the action ID in the cache and returns 203 // the corresponding output bytes. 204 // GetBytes should only be used for data that can be expected to fit in memory. 205 func (c *Cache) GetBytes(id ActionID) ([]byte, Entry, error) { 206 entry, err := c.Get(id) 207 if err != nil { 208 return nil, entry, err 209 } 210 outputFile, err := c.OutputFile(entry.OutputID) 211 if err != nil { 212 return nil, entry, err 213 } 214 215 data, err := robustio.ReadFile(outputFile) 216 if err != nil { 217 return nil, entry, err 218 } 219 220 if sha256.Sum256(data) != entry.OutputID { 221 return nil, entry, errMissing 222 } 223 return data, entry, nil 224 } 225 226 // OutputFile returns the name of the cache file storing output with the given OutputID. 227 func (c *Cache) OutputFile(out OutputID) (string, error) { 228 file := c.fileName(out, "d") 229 if err := c.used(file); err != nil { 230 return "", err 231 } 232 return file, nil 233 } 234 235 // Time constants for cache expiration. 236 // 237 // We set the mtime on a cache file on each use, but at most one per mtimeInterval (1 hour), 238 // to avoid causing many unnecessary inode updates. The mtimes therefore 239 // roughly reflect "time of last use" but may in fact be older by at most an hour. 240 // 241 // We scan the cache for entries to delete at most once per trimInterval (1 day). 242 // 243 // When we do scan the cache, we delete entries that have not been used for 244 // at least trimLimit (5 days). Statistics gathered from a month of usage by 245 // Go developers found that essentially all reuse of cached entries happened 246 // within 5 days of the previous reuse. See golang.org/issue/22990. 247 const ( 248 mtimeInterval = 1 * time.Hour 249 trimInterval = 24 * time.Hour 250 trimLimit = 5 * 24 * time.Hour 251 ) 252 253 // used makes a best-effort attempt to update mtime on file, 254 // so that mtime reflects cache access time. 255 // 256 // Because the reflection only needs to be approximate, 257 // and to reduce the amount of disk activity caused by using 258 // cache entries, used only updates the mtime if the current 259 // mtime is more than an hour old. This heuristic eliminates 260 // nearly all of the mtime updates that would otherwise happen, 261 // while still keeping the mtimes useful for cache trimming. 262 func (c *Cache) used(file string) error { 263 info, err := os.Stat(file) 264 if err != nil { 265 if os.IsNotExist(err) { 266 return errMissing 267 } 268 return errors.Wrapf(err, "failed to stat file %s", file) 269 } 270 271 if c.now().Sub(info.ModTime()) < mtimeInterval { 272 return nil 273 } 274 275 if err := os.Chtimes(file, c.now(), c.now()); err != nil { 276 return errors.Wrapf(err, "failed to change time of file %s", file) 277 } 278 279 return nil 280 } 281 282 // Trim removes old cache entries that are likely not to be reused. 283 func (c *Cache) Trim() { 284 now := c.now() 285 286 // We maintain in dir/trim.txt the time of the last completed cache trim. 287 // If the cache has been trimmed recently enough, do nothing. 288 // This is the common case. 289 data, _ := renameio.ReadFile(filepath.Join(c.dir, "trim.txt")) 290 t, err := strconv.ParseInt(strings.TrimSpace(string(data)), 10, 64) 291 if err == nil && now.Sub(time.Unix(t, 0)) < trimInterval { 292 return 293 } 294 295 // Trim each of the 256 subdirectories. 296 // We subtract an additional mtimeInterval 297 // to account for the imprecision of our "last used" mtimes. 298 cutoff := now.Add(-trimLimit - mtimeInterval) 299 for i := 0; i < 256; i++ { 300 subdir := filepath.Join(c.dir, fmt.Sprintf("%02x", i)) 301 c.trimSubdir(subdir, cutoff) 302 } 303 304 // Ignore errors from here: if we don't write the complete timestamp, the 305 // cache will appear older than it is, and we'll trim it again next time. 306 _ = renameio.WriteFile(filepath.Join(c.dir, "trim.txt"), []byte(fmt.Sprintf("%d", now.Unix())), 0666) 307 } 308 309 // trimSubdir trims a single cache subdirectory. 310 func (c *Cache) trimSubdir(subdir string, cutoff time.Time) { 311 // Read all directory entries from subdir before removing 312 // any files, in case removing files invalidates the file offset 313 // in the directory scan. Also, ignore error from f.Readdirnames, 314 // because we don't care about reporting the error and we still 315 // want to process any entries found before the error. 316 f, err := os.Open(subdir) 317 if err != nil { 318 return 319 } 320 names, _ := f.Readdirnames(-1) 321 f.Close() 322 323 for _, name := range names { 324 // Remove only cache entries (xxxx-a and xxxx-d). 325 if !strings.HasSuffix(name, "-a") && !strings.HasSuffix(name, "-d") { 326 continue 327 } 328 entry := filepath.Join(subdir, name) 329 info, err := os.Stat(entry) 330 if err == nil && info.ModTime().Before(cutoff) { 331 os.Remove(entry) 332 } 333 } 334 } 335 336 // putIndexEntry adds an entry to the cache recording that executing the action 337 // with the given id produces an output with the given output id (hash) and size. 338 func (c *Cache) putIndexEntry(id ActionID, out OutputID, size int64, allowVerify bool) error { 339 // Note: We expect that for one reason or another it may happen 340 // that repeating an action produces a different output hash 341 // (for example, if the output contains a time stamp or temp dir name). 342 // While not ideal, this is also not a correctness problem, so we 343 // don't make a big deal about it. In particular, we leave the action 344 // cache entries writable specifically so that they can be overwritten. 345 // 346 // Setting GODEBUG=gocacheverify=1 does make a big deal: 347 // in verify mode we are double-checking that the cache entries 348 // are entirely reproducible. As just noted, this may be unrealistic 349 // in some cases but the check is also useful for shaking out real bugs. 350 entry := fmt.Sprintf("v1 %x %x %20d %20d\n", id, out, size, time.Now().UnixNano()) 351 352 if verify && allowVerify { 353 old, err := c.get(id) 354 if err == nil && (old.OutputID != out || old.Size != size) { 355 // panic to show stack trace, so we can see what code is generating this cache entry. 356 msg := fmt.Sprintf("go: internal cache error: cache verify failed: id=%x changed:<<<\n%s\n>>>\nold: %x %d\nnew: %x %d", id, reverseHash(id), out, size, old.OutputID, old.Size) 357 panic(msg) 358 } 359 } 360 file := c.fileName(id, "a") 361 362 // Copy file to cache directory. 363 mode := os.O_WRONLY | os.O_CREATE 364 f, err := os.OpenFile(file, mode, 0666) 365 if err != nil { 366 return err 367 } 368 _, err = f.WriteString(entry) 369 if err == nil { 370 // Truncate the file only *after* writing it. 371 // (This should be a no-op, but truncate just in case of previous corruption.) 372 // 373 // This differs from ioutil.WriteFile, which truncates to 0 *before* writing 374 // via os.O_TRUNC. Truncating only after writing ensures that a second write 375 // of the same content to the same file is idempotent, and does not — even 376 // temporarily! — undo the effect of the first write. 377 err = f.Truncate(int64(len(entry))) 378 } 379 if closeErr := f.Close(); err == nil { 380 err = closeErr 381 } 382 if err != nil { 383 // TODO(bcmills): This Remove potentially races with another go command writing to file. 384 // Can we eliminate it? 385 os.Remove(file) 386 return err 387 } 388 if err = os.Chtimes(file, c.now(), c.now()); err != nil { // mainly for tests 389 return errors.Wrapf(err, "failed to change time of file %s", file) 390 } 391 392 return nil 393 } 394 395 // Put stores the given output in the cache as the output for the action ID. 396 // It may read file twice. The content of file must not change between the two passes. 397 func (c *Cache) Put(id ActionID, file io.ReadSeeker) (OutputID, int64, error) { 398 return c.put(id, file, true) 399 } 400 401 // PutNoVerify is like Put but disables the verify check 402 // when GODEBUG=goverifycache=1 is set. 403 // It is meant for data that is OK to cache but that we expect to vary slightly from run to run, 404 // like test output containing times and the like. 405 func (c *Cache) PutNoVerify(id ActionID, file io.ReadSeeker) (OutputID, int64, error) { 406 return c.put(id, file, false) 407 } 408 409 func (c *Cache) put(id ActionID, file io.ReadSeeker, allowVerify bool) (OutputID, int64, error) { 410 // Compute output ID. 411 h := sha256.New() 412 if _, err := file.Seek(0, 0); err != nil { 413 return OutputID{}, 0, err 414 } 415 size, err := io.Copy(h, file) 416 if err != nil { 417 return OutputID{}, 0, err 418 } 419 var out OutputID 420 h.Sum(out[:0]) 421 422 // Copy to cached output file (if not already present). 423 if err := c.copyFile(file, out, size); err != nil { 424 return out, size, err 425 } 426 427 // Add to cache index. 428 return out, size, c.putIndexEntry(id, out, size, allowVerify) 429 } 430 431 // PutBytes stores the given bytes in the cache as the output for the action ID. 432 func (c *Cache) PutBytes(id ActionID, data []byte) error { 433 _, _, err := c.Put(id, bytes.NewReader(data)) 434 return err 435 } 436 437 // copyFile copies file into the cache, expecting it to have the given 438 // output ID and size, if that file is not present already. 439 func (c *Cache) copyFile(file io.ReadSeeker, out OutputID, size int64) error { 440 name := c.fileName(out, "d") 441 info, err := os.Stat(name) 442 if err == nil && info.Size() == size { 443 // Check hash. 444 if f, openErr := os.Open(name); openErr == nil { 445 h := sha256.New() 446 if _, copyErr := io.Copy(h, f); copyErr != nil { 447 return errors.Wrap(copyErr, "failed to copy to sha256") 448 } 449 450 f.Close() 451 var out2 OutputID 452 h.Sum(out2[:0]) 453 if out == out2 { 454 return nil 455 } 456 } 457 // Hash did not match. Fall through and rewrite file. 458 } 459 460 // Copy file to cache directory. 461 mode := os.O_RDWR | os.O_CREATE 462 if err == nil && info.Size() > size { // shouldn't happen but fix in case 463 mode |= os.O_TRUNC 464 } 465 f, err := os.OpenFile(name, mode, 0666) 466 if err != nil { 467 return err 468 } 469 defer f.Close() 470 if size == 0 { 471 // File now exists with correct size. 472 // Only one possible zero-length file, so contents are OK too. 473 // Early return here makes sure there's a "last byte" for code below. 474 return nil 475 } 476 477 // From here on, if any of the I/O writing the file fails, 478 // we make a best-effort attempt to truncate the file f 479 // before returning, to avoid leaving bad bytes in the file. 480 481 // Copy file to f, but also into h to double-check hash. 482 if _, err = file.Seek(0, 0); err != nil { 483 _ = f.Truncate(0) 484 return err 485 } 486 h := sha256.New() 487 w := io.MultiWriter(f, h) 488 if _, err = io.CopyN(w, file, size-1); err != nil { 489 _ = f.Truncate(0) 490 return err 491 } 492 // Check last byte before writing it; writing it will make the size match 493 // what other processes expect to find and might cause them to start 494 // using the file. 495 buf := make([]byte, 1) 496 if _, err = file.Read(buf); err != nil { 497 _ = f.Truncate(0) 498 return err 499 } 500 if n, wErr := h.Write(buf); n != len(buf) { 501 return fmt.Errorf("wrote to hash %d/%d bytes with error %s", n, len(buf), wErr) 502 } 503 504 sum := h.Sum(nil) 505 if !bytes.Equal(sum, out[:]) { 506 _ = f.Truncate(0) 507 return fmt.Errorf("file content changed underfoot") 508 } 509 510 // Commit cache file entry. 511 if _, err = f.Write(buf); err != nil { 512 _ = f.Truncate(0) 513 return err 514 } 515 if err = f.Close(); err != nil { 516 // Data might not have been written, 517 // but file may look like it is the right size. 518 // To be extra careful, remove cached file. 519 os.Remove(name) 520 return err 521 } 522 if err = os.Chtimes(name, c.now(), c.now()); err != nil { // mainly for tests 523 return errors.Wrapf(err, "failed to change time of file %s", name) 524 } 525 526 return nil 527 }