github.com/nalekseevs/itns-golangci-lint@v1.0.2/internal/cache/cache.go (about) 1 // Copyright 2017 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Package cache implements a build artifact cache. 6 // 7 // This package is a slightly modified fork of Go's 8 // cmd/go/internal/cache package. 9 package cache 10 11 import ( 12 "bytes" 13 "crypto/sha256" 14 "encoding/hex" 15 "errors" 16 "fmt" 17 "io" 18 "os" 19 "path/filepath" 20 "strconv" 21 "strings" 22 "time" 23 24 "github.com/nalekseevs/itns-golangci-lint/internal/renameio" 25 "github.com/nalekseevs/itns-golangci-lint/internal/robustio" 26 ) 27 28 // An ActionID is a cache action key, the hash of a complete description of a 29 // repeatable computation (command line, environment variables, 30 // input file contents, executable contents). 31 type ActionID [HashSize]byte 32 33 // An OutputID is a cache output key, the hash of an output of a computation. 34 type OutputID [HashSize]byte 35 36 // A Cache is a package cache, backed by a file system directory tree. 37 type Cache struct { 38 dir string 39 now func() time.Time 40 } 41 42 // Open opens and returns the cache in the given directory. 43 // 44 // It is safe for multiple processes on a single machine to use the 45 // same cache directory in a local file system simultaneously. 46 // They will coordinate using operating system file locks and may 47 // duplicate effort but will not corrupt the cache. 48 // 49 // However, it is NOT safe for multiple processes on different machines 50 // to share a cache directory (for example, if the directory were stored 51 // in a network file system). File locking is notoriously unreliable in 52 // network file systems and may not suffice to protect the cache. 53 func Open(dir string) (*Cache, error) { 54 info, err := os.Stat(dir) 55 if err != nil { 56 return nil, err 57 } 58 if !info.IsDir() { 59 return nil, &os.PathError{Op: "open", Path: dir, Err: errors.New("not a directory")} 60 } 61 for i := 0; i < 256; i++ { 62 name := filepath.Join(dir, fmt.Sprintf("%02x", i)) 63 if err := os.MkdirAll(name, 0744); err != nil { 64 return nil, err 65 } 66 } 67 c := &Cache{ 68 dir: dir, 69 now: time.Now, 70 } 71 return c, nil 72 } 73 74 // fileName returns the name of the file corresponding to the given id. 75 func (c *Cache) fileName(id [HashSize]byte, key string) string { 76 return filepath.Join(c.dir, fmt.Sprintf("%02x", id[0]), fmt.Sprintf("%x", id)+"-"+key) 77 } 78 79 var errMissing = errors.New("cache entry not found") 80 81 func IsErrMissing(err error) bool { 82 return errors.Is(err, errMissing) 83 } 84 85 const ( 86 // action entry file is "v1 <hex id> <hex out> <decimal size space-padded to 20 bytes> <unixnano space-padded to 20 bytes>\n" 87 hexSize = HashSize * 2 88 entrySize = 2 + 1 + hexSize + 1 + hexSize + 1 + 20 + 1 + 20 + 1 89 ) 90 91 // verify controls whether to run the cache in verify mode. 92 // In verify mode, the cache always returns errMissing from Get 93 // but then double-checks in Put that the data being written 94 // exactly matches any existing entry. This provides an easy 95 // way to detect program behavior that would have been different 96 // had the cache entry been returned from Get. 97 // 98 // verify is enabled by setting the environment variable 99 // GODEBUG=gocacheverify=1. 100 var verify = false 101 102 // DebugTest is set when GODEBUG=gocachetest=1 is in the environment. 103 var DebugTest = false 104 105 func init() { initEnv() } 106 107 func initEnv() { 108 verify = false 109 debugHash = false 110 debug := strings.Split(os.Getenv("GODEBUG"), ",") 111 for _, f := range debug { 112 if f == "gocacheverify=1" { 113 verify = true 114 } 115 if f == "gocachehash=1" { 116 debugHash = true 117 } 118 if f == "gocachetest=1" { 119 DebugTest = true 120 } 121 } 122 } 123 124 // Get looks up the action ID in the cache, 125 // returning the corresponding output ID and file size, if any. 126 // Note that finding an output ID does not guarantee that the 127 // saved file for that output ID is still available. 128 func (c *Cache) Get(id ActionID) (Entry, error) { 129 if verify { 130 return Entry{}, errMissing 131 } 132 return c.get(id) 133 } 134 135 type Entry struct { 136 OutputID OutputID 137 Size int64 138 Time time.Time 139 } 140 141 // get is Get but does not respect verify mode, so that Put can use it. 142 func (c *Cache) get(id ActionID) (Entry, error) { 143 missing := func() (Entry, error) { 144 return Entry{}, errMissing 145 } 146 failed := func(err error) (Entry, error) { 147 return Entry{}, err 148 } 149 fileName := c.fileName(id, "a") 150 f, err := os.Open(fileName) 151 if err != nil { 152 if os.IsNotExist(err) { 153 return missing() 154 } 155 return failed(err) 156 } 157 defer f.Close() 158 entry := make([]byte, entrySize+1) // +1 to detect whether f is too long 159 if n, readErr := io.ReadFull(f, entry); n != entrySize || readErr != io.ErrUnexpectedEOF { 160 return failed(fmt.Errorf("read %d/%d bytes from %s with error %w", n, entrySize, fileName, readErr)) 161 } 162 if entry[0] != 'v' || entry[1] != '1' || entry[2] != ' ' || entry[3+hexSize] != ' ' || entry[3+hexSize+1+hexSize] != ' ' || entry[3+hexSize+1+hexSize+1+20] != ' ' || entry[entrySize-1] != '\n' { 163 return failed(fmt.Errorf("bad data in %s", fileName)) 164 } 165 eid, entry := entry[3:3+hexSize], entry[3+hexSize:] 166 eout, entry := entry[1:1+hexSize], entry[1+hexSize:] 167 esize, entry := entry[1:1+20], entry[1+20:] 168 etime := entry[1 : 1+20] 169 var buf [HashSize]byte 170 if _, err = hex.Decode(buf[:], eid); err != nil || buf != id { 171 return failed(fmt.Errorf("failed to hex decode eid data in %s: %w", fileName, err)) 172 } 173 if _, err = hex.Decode(buf[:], eout); err != nil { 174 return failed(fmt.Errorf("failed to hex decode eout data in %s: %w", fileName, err)) 175 } 176 i := 0 177 for i < len(esize) && esize[i] == ' ' { 178 i++ 179 } 180 size, err := strconv.ParseInt(string(esize[i:]), 10, 64) 181 if err != nil || size < 0 { 182 return failed(fmt.Errorf("failed to parse esize int from %s with error %w", fileName, err)) 183 } 184 i = 0 185 for i < len(etime) && etime[i] == ' ' { 186 i++ 187 } 188 tm, err := strconv.ParseInt(string(etime[i:]), 10, 64) 189 if err != nil || tm < 0 { 190 return failed(fmt.Errorf("failed to parse etime int from %s with error %w", fileName, err)) 191 } 192 193 if err = c.used(fileName); err != nil { 194 return failed(fmt.Errorf("failed to mark %s as used: %w", fileName, err)) 195 } 196 197 return Entry{buf, size, time.Unix(0, tm)}, nil 198 } 199 200 // GetBytes looks up the action ID in the cache and returns 201 // the corresponding output bytes. 202 // GetBytes should only be used for data that can be expected to fit in memory. 203 func (c *Cache) GetBytes(id ActionID) ([]byte, Entry, error) { 204 entry, err := c.Get(id) 205 if err != nil { 206 return nil, entry, err 207 } 208 outputFile, err := c.OutputFile(entry.OutputID) 209 if err != nil { 210 return nil, entry, err 211 } 212 213 data, err := robustio.ReadFile(outputFile) 214 if err != nil { 215 return nil, entry, err 216 } 217 218 if sha256.Sum256(data) != entry.OutputID { 219 return nil, entry, errMissing 220 } 221 return data, entry, nil 222 } 223 224 // OutputFile returns the name of the cache file storing output with the given OutputID. 225 func (c *Cache) OutputFile(out OutputID) (string, error) { 226 file := c.fileName(out, "d") 227 if err := c.used(file); err != nil { 228 return "", err 229 } 230 return file, nil 231 } 232 233 // Time constants for cache expiration. 234 // 235 // We set the mtime on a cache file on each use, but at most one per mtimeInterval (1 hour), 236 // to avoid causing many unnecessary inode updates. The mtimes therefore 237 // roughly reflect "time of last use" but may in fact be older by at most an hour. 238 // 239 // We scan the cache for entries to delete at most once per trimInterval (1 day). 240 // 241 // When we do scan the cache, we delete entries that have not been used for 242 // at least trimLimit (5 days). Statistics gathered from a month of usage by 243 // Go developers found that essentially all reuse of cached entries happened 244 // within 5 days of the previous reuse. See golang.org/issue/22990. 245 const ( 246 mtimeInterval = 1 * time.Hour 247 trimInterval = 24 * time.Hour 248 trimLimit = 5 * 24 * time.Hour 249 ) 250 251 // used makes a best-effort attempt to update mtime on file, 252 // so that mtime reflects cache access time. 253 // 254 // Because the reflection only needs to be approximate, 255 // and to reduce the amount of disk activity caused by using 256 // cache entries, used only updates the mtime if the current 257 // mtime is more than an hour old. This heuristic eliminates 258 // nearly all the mtime updates that would otherwise happen, 259 // while still keeping the mtimes useful for cache trimming. 260 func (c *Cache) used(file string) error { 261 info, err := os.Stat(file) 262 if err != nil { 263 if os.IsNotExist(err) { 264 return errMissing 265 } 266 return fmt.Errorf("failed to stat file %s: %w", file, err) 267 } 268 269 if c.now().Sub(info.ModTime()) < mtimeInterval { 270 return nil 271 } 272 273 if err := os.Chtimes(file, c.now(), c.now()); err != nil { 274 return fmt.Errorf("failed to change time of file %s: %w", file, err) 275 } 276 277 return nil 278 } 279 280 // Trim removes old cache entries that are likely not to be reused. 281 func (c *Cache) Trim() { 282 now := c.now() 283 284 // We maintain in dir/trim.txt the time of the last completed cache trim. 285 // If the cache has been trimmed recently enough, do nothing. 286 // This is the common case. 287 data, _ := renameio.ReadFile(filepath.Join(c.dir, "trim.txt")) 288 t, err := strconv.ParseInt(strings.TrimSpace(string(data)), 10, 64) 289 if err == nil && now.Sub(time.Unix(t, 0)) < trimInterval { 290 return 291 } 292 293 // Trim each of the 256 subdirectories. 294 // We subtract an additional mtimeInterval 295 // to account for the imprecision of our "last used" mtimes. 296 cutoff := now.Add(-trimLimit - mtimeInterval) 297 for i := 0; i < 256; i++ { 298 subdir := filepath.Join(c.dir, fmt.Sprintf("%02x", i)) 299 c.trimSubdir(subdir, cutoff) 300 } 301 302 // Ignore errors from here: if we don't write the complete timestamp, the 303 // cache will appear older than it is, and we'll trim it again next time. 304 _ = renameio.WriteFile(filepath.Join(c.dir, "trim.txt"), []byte(fmt.Sprintf("%d", now.Unix())), 0666) 305 } 306 307 // trimSubdir trims a single cache subdirectory. 308 func (c *Cache) trimSubdir(subdir string, cutoff time.Time) { 309 // Read all directory entries from subdir before removing 310 // any files, in case removing files invalidates the file offset 311 // in the directory scan. Also, ignore error from f.Readdirnames, 312 // because we don't care about reporting the error, and we still 313 // want to process any entries found before the error. 314 f, err := os.Open(subdir) 315 if err != nil { 316 return 317 } 318 names, _ := f.Readdirnames(-1) 319 f.Close() 320 321 for _, name := range names { 322 // Remove only cache entries (xxxx-a and xxxx-d). 323 if !strings.HasSuffix(name, "-a") && !strings.HasSuffix(name, "-d") { 324 continue 325 } 326 entry := filepath.Join(subdir, name) 327 info, err := os.Stat(entry) 328 if err == nil && info.ModTime().Before(cutoff) { 329 os.Remove(entry) 330 } 331 } 332 } 333 334 // putIndexEntry adds an entry to the cache recording that executing the action 335 // with the given id produces an output with the given output id (hash) and size. 336 func (c *Cache) putIndexEntry(id ActionID, out OutputID, size int64, allowVerify bool) error { 337 // Note: We expect that for one reason or another it may happen 338 // that repeating an action produces a different output hash 339 // (for example, if the output contains a time stamp or temp dir name). 340 // While not ideal, this is also not a correctness problem, so we 341 // don't make a big deal about it. In particular, we leave the action 342 // cache entries writable specifically so that they can be overwritten. 343 // 344 // Setting GODEBUG=gocacheverify=1 does make a big deal: 345 // in verify mode we are double-checking that the cache entries 346 // are entirely reproducible. As just noted, this may be unrealistic 347 // in some cases but the check is also useful for shaking out real bugs. 348 entry := fmt.Sprintf("v1 %x %x %20d %20d\n", id, out, size, time.Now().UnixNano()) 349 350 if verify && allowVerify { 351 old, err := c.get(id) 352 if err == nil && (old.OutputID != out || old.Size != size) { 353 // panic to show stack trace, so we can see what code is generating this cache entry. 354 msg := fmt.Sprintf("go: internal cache error: cache verify failed: id=%x changed:<<<\n%s\n>>>\nold: %x %d\nnew: %x %d", id, reverseHash(id), out, size, old.OutputID, old.Size) 355 panic(msg) 356 } 357 } 358 file := c.fileName(id, "a") 359 360 // Copy file to cache directory. 361 mode := os.O_WRONLY | os.O_CREATE 362 f, err := os.OpenFile(file, mode, 0666) 363 if err != nil { 364 return err 365 } 366 _, err = f.WriteString(entry) 367 if err == nil { 368 // Truncate the file only *after* writing it. 369 // (This should be a no-op, but truncate just in case of previous corruption.) 370 // 371 // This differs from os.WriteFile, which truncates to 0 *before* writing 372 // via os.O_TRUNC. Truncating only after writing ensures that a second write 373 // of the same content to the same file is idempotent, and does not — even 374 // temporarily! — undo the effect of the first write. 375 err = f.Truncate(int64(len(entry))) 376 } 377 if closeErr := f.Close(); err == nil { 378 err = closeErr 379 } 380 if err != nil { 381 // TODO(bcmills): This Remove potentially races with another go command writing to file. 382 // Can we eliminate it? 383 os.Remove(file) 384 return err 385 } 386 if err = os.Chtimes(file, c.now(), c.now()); err != nil { // mainly for tests 387 return fmt.Errorf("failed to change time of file %s: %w", file, err) 388 } 389 390 return nil 391 } 392 393 // Put stores the given output in the cache as the output for the action ID. 394 // It may read file twice. The content of file must not change between the two passes. 395 func (c *Cache) Put(id ActionID, file io.ReadSeeker) (OutputID, int64, error) { 396 return c.put(id, file, true) 397 } 398 399 // PutNoVerify is like Put but disables the verify check 400 // when GODEBUG=goverifycache=1 is set. 401 // It is meant for data that is OK to cache but that we expect to vary slightly from run to run, 402 // like test output containing times and the like. 403 func (c *Cache) PutNoVerify(id ActionID, file io.ReadSeeker) (OutputID, int64, error) { 404 return c.put(id, file, false) 405 } 406 407 func (c *Cache) put(id ActionID, file io.ReadSeeker, allowVerify bool) (OutputID, int64, error) { 408 // Compute output ID. 409 h := sha256.New() 410 if _, err := file.Seek(0, 0); err != nil { 411 return OutputID{}, 0, err 412 } 413 size, err := io.Copy(h, file) 414 if err != nil { 415 return OutputID{}, 0, err 416 } 417 var out OutputID 418 h.Sum(out[:0]) 419 420 // Copy to cached output file (if not already present). 421 if err := c.copyFile(file, out, size); err != nil { 422 return out, size, err 423 } 424 425 // Add to cache index. 426 return out, size, c.putIndexEntry(id, out, size, allowVerify) 427 } 428 429 // PutBytes stores the given bytes in the cache as the output for the action ID. 430 func (c *Cache) PutBytes(id ActionID, data []byte) error { 431 _, _, err := c.Put(id, bytes.NewReader(data)) 432 return err 433 } 434 435 // copyFile copies file into the cache, expecting it to have the given 436 // output ID and size, if that file is not present already. 437 func (c *Cache) copyFile(file io.ReadSeeker, out OutputID, size int64) error { 438 name := c.fileName(out, "d") 439 info, err := os.Stat(name) 440 if err == nil && info.Size() == size { 441 // Check hash. 442 if f, openErr := os.Open(name); openErr == nil { 443 h := sha256.New() 444 if _, copyErr := io.Copy(h, f); copyErr != nil { 445 return fmt.Errorf("failed to copy to sha256: %w", copyErr) 446 } 447 448 f.Close() 449 var out2 OutputID 450 h.Sum(out2[:0]) 451 if out == out2 { 452 return nil 453 } 454 } 455 // Hash did not match. Fall through and rewrite file. 456 } 457 458 // Copy file to cache directory. 459 mode := os.O_RDWR | os.O_CREATE 460 if err == nil && info.Size() > size { // shouldn't happen but fix in case 461 mode |= os.O_TRUNC 462 } 463 f, err := os.OpenFile(name, mode, 0666) 464 if err != nil { 465 return err 466 } 467 defer f.Close() 468 if size == 0 { 469 // File now exists with correct size. 470 // Only one possible zero-length file, so contents are OK too. 471 // Early return here makes sure there's a "last byte" for code below. 472 return nil 473 } 474 475 // From here on, if any of the I/O writing the file fails, 476 // we make a best-effort attempt to truncate the file f 477 // before returning, to avoid leaving bad bytes in the file. 478 479 // Copy file to f, but also into h to double-check hash. 480 if _, err = file.Seek(0, 0); err != nil { 481 _ = f.Truncate(0) 482 return err 483 } 484 h := sha256.New() 485 w := io.MultiWriter(f, h) 486 if _, err = io.CopyN(w, file, size-1); err != nil { 487 _ = f.Truncate(0) 488 return err 489 } 490 // Check last byte before writing it; writing it will make the size match 491 // what other processes expect to find and might cause them to start 492 // using the file. 493 buf := make([]byte, 1) 494 if _, err = file.Read(buf); err != nil { 495 _ = f.Truncate(0) 496 return err 497 } 498 if n, wErr := h.Write(buf); n != len(buf) { 499 return fmt.Errorf("wrote to hash %d/%d bytes with error %w", n, len(buf), wErr) 500 } 501 502 sum := h.Sum(nil) 503 if !bytes.Equal(sum, out[:]) { 504 _ = f.Truncate(0) 505 return errors.New("file content changed underfoot") 506 } 507 508 // Commit cache file entry. 509 if _, err = f.Write(buf); err != nil { 510 _ = f.Truncate(0) 511 return err 512 } 513 if err = f.Close(); err != nil { 514 // Data might not have been written, 515 // but file may look like it is the right size. 516 // To be extra careful, remove cached file. 517 os.Remove(name) 518 return err 519 } 520 if err = os.Chtimes(name, c.now(), c.now()); err != nil { // mainly for tests 521 return fmt.Errorf("failed to change time of file %s: %w", name, err) 522 } 523 524 return nil 525 }