github.com/gagliardetto/golang-go@v0.0.0-20201020153340-53909ea70814/cmd/go/not-internal/cache/cache.go (about) 1 // Copyright 2017 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Package cache implements a build artifact cache. 6 package cache 7 8 import ( 9 "bytes" 10 "crypto/sha256" 11 "encoding/hex" 12 "errors" 13 "fmt" 14 "io" 15 "io/ioutil" 16 "os" 17 "path/filepath" 18 "strconv" 19 "strings" 20 "time" 21 22 "github.com/gagliardetto/golang-go/cmd/go/not-internal/renameio" 23 ) 24 25 // An ActionID is a cache action key, the hash of a complete description of a 26 // repeatable computation (command line, environment variables, 27 // input file contents, executable contents). 28 type ActionID [HashSize]byte 29 30 // An OutputID is a cache output key, the hash of an output of a computation. 31 type OutputID [HashSize]byte 32 33 // A Cache is a package cache, backed by a file system directory tree. 34 type Cache struct { 35 dir string 36 now func() time.Time 37 } 38 39 // Open opens and returns the cache in the given directory. 40 // 41 // It is safe for multiple processes on a single machine to use the 42 // same cache directory in a local file system simultaneously. 43 // They will coordinate using operating system file locks and may 44 // duplicate effort but will not corrupt the cache. 45 // 46 // However, it is NOT safe for multiple processes on different machines 47 // to share a cache directory (for example, if the directory were stored 48 // in a network file system). File locking is notoriously unreliable in 49 // network file systems and may not suffice to protect the cache. 50 // 51 func Open(dir string) (*Cache, error) { 52 info, err := os.Stat(dir) 53 if err != nil { 54 return nil, err 55 } 56 if !info.IsDir() { 57 return nil, &os.PathError{Op: "open", Path: dir, Err: fmt.Errorf("not a directory")} 58 } 59 for i := 0; i < 256; i++ { 60 name := filepath.Join(dir, fmt.Sprintf("%02x", i)) 61 if err := os.MkdirAll(name, 0777); err != nil { 62 return nil, err 63 } 64 } 65 c := &Cache{ 66 dir: dir, 67 now: time.Now, 68 } 69 return c, nil 70 } 71 72 // fileName returns the name of the file corresponding to the given id. 73 func (c *Cache) fileName(id [HashSize]byte, key string) string { 74 return filepath.Join(c.dir, fmt.Sprintf("%02x", id[0]), fmt.Sprintf("%x", id)+"-"+key) 75 } 76 77 // An entryNotFoundError indicates that a cache entry was not found, with an 78 // optional underlying reason. 79 type entryNotFoundError struct { 80 Err error 81 } 82 83 func (e *entryNotFoundError) Error() string { 84 if e.Err == nil { 85 return "cache entry not found" 86 } 87 return fmt.Sprintf("cache entry not found: %v", e.Err) 88 } 89 90 func (e *entryNotFoundError) Unwrap() error { 91 return e.Err 92 } 93 94 const ( 95 // action entry file is "v1 <hex id> <hex out> <decimal size space-padded to 20 bytes> <unixnano space-padded to 20 bytes>\n" 96 hexSize = HashSize * 2 97 entrySize = 2 + 1 + hexSize + 1 + hexSize + 1 + 20 + 1 + 20 + 1 98 ) 99 100 // verify controls whether to run the cache in verify mode. 101 // In verify mode, the cache always returns errMissing from Get 102 // but then double-checks in Put that the data being written 103 // exactly matches any existing entry. This provides an easy 104 // way to detect program behavior that would have been different 105 // had the cache entry been returned from Get. 106 // 107 // verify is enabled by setting the environment variable 108 // GODEBUG=gocacheverify=1. 109 var verify = false 110 111 var errVerifyMode = errors.New("gocachverify=1") 112 113 // DebugTest is set when GODEBUG=gocachetest=1 is in the environment. 114 var DebugTest = false 115 116 func init() { initEnv() } 117 118 func initEnv() { 119 verify = false 120 debugHash = false 121 debug := strings.Split(os.Getenv("GODEBUG"), ",") 122 for _, f := range debug { 123 if f == "gocacheverify=1" { 124 verify = true 125 } 126 if f == "gocachehash=1" { 127 debugHash = true 128 } 129 if f == "gocachetest=1" { 130 DebugTest = true 131 } 132 } 133 } 134 135 // Get looks up the action ID in the cache, 136 // returning the corresponding output ID and file size, if any. 137 // Note that finding an output ID does not guarantee that the 138 // saved file for that output ID is still available. 139 func (c *Cache) Get(id ActionID) (Entry, error) { 140 if verify { 141 return Entry{}, &entryNotFoundError{Err: errVerifyMode} 142 } 143 return c.get(id) 144 } 145 146 type Entry struct { 147 OutputID OutputID 148 Size int64 149 Time time.Time 150 } 151 152 // get is Get but does not respect verify mode, so that Put can use it. 153 func (c *Cache) get(id ActionID) (Entry, error) { 154 missing := func(reason error) (Entry, error) { 155 return Entry{}, &entryNotFoundError{Err: reason} 156 } 157 f, err := os.Open(c.fileName(id, "a")) 158 if err != nil { 159 return missing(err) 160 } 161 defer f.Close() 162 entry := make([]byte, entrySize+1) // +1 to detect whether f is too long 163 if n, err := io.ReadFull(f, entry); n > entrySize { 164 return missing(errors.New("too long")) 165 } else if err != io.ErrUnexpectedEOF { 166 if err == io.EOF { 167 return missing(errors.New("file is empty")) 168 } 169 return missing(err) 170 } else if n < entrySize { 171 return missing(errors.New("entry file incomplete")) 172 } 173 if entry[0] != 'v' || entry[1] != '1' || entry[2] != ' ' || entry[3+hexSize] != ' ' || entry[3+hexSize+1+hexSize] != ' ' || entry[3+hexSize+1+hexSize+1+20] != ' ' || entry[entrySize-1] != '\n' { 174 return missing(errors.New("invalid header")) 175 } 176 eid, entry := entry[3:3+hexSize], entry[3+hexSize:] 177 eout, entry := entry[1:1+hexSize], entry[1+hexSize:] 178 esize, entry := entry[1:1+20], entry[1+20:] 179 etime, entry := entry[1:1+20], entry[1+20:] 180 var buf [HashSize]byte 181 if _, err := hex.Decode(buf[:], eid); err != nil { 182 return missing(fmt.Errorf("decoding ID: %v", err)) 183 } else if buf != id { 184 return missing(errors.New("mismatched ID")) 185 } 186 if _, err := hex.Decode(buf[:], eout); err != nil { 187 return missing(fmt.Errorf("decoding output ID: %v", err)) 188 } 189 i := 0 190 for i < len(esize) && esize[i] == ' ' { 191 i++ 192 } 193 size, err := strconv.ParseInt(string(esize[i:]), 10, 64) 194 if err != nil { 195 return missing(fmt.Errorf("parsing size: %v", err)) 196 } else if size < 0 { 197 return missing(errors.New("negative size")) 198 } 199 i = 0 200 for i < len(etime) && etime[i] == ' ' { 201 i++ 202 } 203 tm, err := strconv.ParseInt(string(etime[i:]), 10, 64) 204 if err != nil { 205 return missing(fmt.Errorf("parsing timestamp: %v", err)) 206 } else if tm < 0 { 207 return missing(errors.New("negative timestamp")) 208 } 209 210 c.used(c.fileName(id, "a")) 211 212 return Entry{buf, size, time.Unix(0, tm)}, nil 213 } 214 215 // GetFile looks up the action ID in the cache and returns 216 // the name of the corresponding data file. 217 func (c *Cache) GetFile(id ActionID) (file string, entry Entry, err error) { 218 entry, err = c.Get(id) 219 if err != nil { 220 return "", Entry{}, err 221 } 222 file = c.OutputFile(entry.OutputID) 223 info, err := os.Stat(file) 224 if err != nil { 225 return "", Entry{}, &entryNotFoundError{Err: err} 226 } 227 if info.Size() != entry.Size { 228 return "", Entry{}, &entryNotFoundError{Err: errors.New("file incomplete")} 229 } 230 return file, entry, nil 231 } 232 233 // GetBytes looks up the action ID in the cache and returns 234 // the corresponding output bytes. 235 // GetBytes should only be used for data that can be expected to fit in memory. 236 func (c *Cache) GetBytes(id ActionID) ([]byte, Entry, error) { 237 entry, err := c.Get(id) 238 if err != nil { 239 return nil, entry, err 240 } 241 data, _ := ioutil.ReadFile(c.OutputFile(entry.OutputID)) 242 if sha256.Sum256(data) != entry.OutputID { 243 return nil, entry, &entryNotFoundError{Err: errors.New("bad checksum")} 244 } 245 return data, entry, nil 246 } 247 248 // OutputFile returns the name of the cache file storing output with the given OutputID. 249 func (c *Cache) OutputFile(out OutputID) string { 250 file := c.fileName(out, "d") 251 c.used(file) 252 return file 253 } 254 255 // Time constants for cache expiration. 256 // 257 // We set the mtime on a cache file on each use, but at most one per mtimeInterval (1 hour), 258 // to avoid causing many unnecessary inode updates. The mtimes therefore 259 // roughly reflect "time of last use" but may in fact be older by at most an hour. 260 // 261 // We scan the cache for entries to delete at most once per trimInterval (1 day). 262 // 263 // When we do scan the cache, we delete entries that have not been used for 264 // at least trimLimit (5 days). Statistics gathered from a month of usage by 265 // Go developers found that essentially all reuse of cached entries happened 266 // within 5 days of the previous reuse. See golang.org/issue/22990. 267 const ( 268 mtimeInterval = 1 * time.Hour 269 trimInterval = 24 * time.Hour 270 trimLimit = 5 * 24 * time.Hour 271 ) 272 273 // used makes a best-effort attempt to update mtime on file, 274 // so that mtime reflects cache access time. 275 // 276 // Because the reflection only needs to be approximate, 277 // and to reduce the amount of disk activity caused by using 278 // cache entries, used only updates the mtime if the current 279 // mtime is more than an hour old. This heuristic eliminates 280 // nearly all of the mtime updates that would otherwise happen, 281 // while still keeping the mtimes useful for cache trimming. 282 func (c *Cache) used(file string) { 283 info, err := os.Stat(file) 284 if err == nil && c.now().Sub(info.ModTime()) < mtimeInterval { 285 return 286 } 287 os.Chtimes(file, c.now(), c.now()) 288 } 289 290 // Trim removes old cache entries that are likely not to be reused. 291 func (c *Cache) Trim() { 292 now := c.now() 293 294 // We maintain in dir/trim.txt the time of the last completed cache trim. 295 // If the cache has been trimmed recently enough, do nothing. 296 // This is the common case. 297 data, _ := renameio.ReadFile(filepath.Join(c.dir, "trim.txt")) 298 t, err := strconv.ParseInt(strings.TrimSpace(string(data)), 10, 64) 299 if err == nil && now.Sub(time.Unix(t, 0)) < trimInterval { 300 return 301 } 302 303 // Trim each of the 256 subdirectories. 304 // We subtract an additional mtimeInterval 305 // to account for the imprecision of our "last used" mtimes. 306 cutoff := now.Add(-trimLimit - mtimeInterval) 307 for i := 0; i < 256; i++ { 308 subdir := filepath.Join(c.dir, fmt.Sprintf("%02x", i)) 309 c.trimSubdir(subdir, cutoff) 310 } 311 312 // Ignore errors from here: if we don't write the complete timestamp, the 313 // cache will appear older than it is, and we'll trim it again next time. 314 renameio.WriteFile(filepath.Join(c.dir, "trim.txt"), []byte(fmt.Sprintf("%d", now.Unix())), 0666) 315 } 316 317 // trimSubdir trims a single cache subdirectory. 318 func (c *Cache) trimSubdir(subdir string, cutoff time.Time) { 319 // Read all directory entries from subdir before removing 320 // any files, in case removing files invalidates the file offset 321 // in the directory scan. Also, ignore error from f.Readdirnames, 322 // because we don't care about reporting the error and we still 323 // want to process any entries found before the error. 324 f, err := os.Open(subdir) 325 if err != nil { 326 return 327 } 328 names, _ := f.Readdirnames(-1) 329 f.Close() 330 331 for _, name := range names { 332 // Remove only cache entries (xxxx-a and xxxx-d). 333 if !strings.HasSuffix(name, "-a") && !strings.HasSuffix(name, "-d") { 334 continue 335 } 336 entry := filepath.Join(subdir, name) 337 info, err := os.Stat(entry) 338 if err == nil && info.ModTime().Before(cutoff) { 339 os.Remove(entry) 340 } 341 } 342 } 343 344 // putIndexEntry adds an entry to the cache recording that executing the action 345 // with the given id produces an output with the given output id (hash) and size. 346 func (c *Cache) putIndexEntry(id ActionID, out OutputID, size int64, allowVerify bool) error { 347 // Note: We expect that for one reason or another it may happen 348 // that repeating an action produces a different output hash 349 // (for example, if the output contains a time stamp or temp dir name). 350 // While not ideal, this is also not a correctness problem, so we 351 // don't make a big deal about it. In particular, we leave the action 352 // cache entries writable specifically so that they can be overwritten. 353 // 354 // Setting GODEBUG=gocacheverify=1 does make a big deal: 355 // in verify mode we are double-checking that the cache entries 356 // are entirely reproducible. As just noted, this may be unrealistic 357 // in some cases but the check is also useful for shaking out real bugs. 358 entry := fmt.Sprintf("v1 %x %x %20d %20d\n", id, out, size, time.Now().UnixNano()) 359 if verify && allowVerify { 360 old, err := c.get(id) 361 if err == nil && (old.OutputID != out || old.Size != size) { 362 // panic to show stack trace, so we can see what code is generating this cache entry. 363 msg := fmt.Sprintf("go: internal cache error: cache verify failed: id=%x changed:<<<\n%s\n>>>\nold: %x %d\nnew: %x %d", id, reverseHash(id), out, size, old.OutputID, old.Size) 364 panic(msg) 365 } 366 } 367 file := c.fileName(id, "a") 368 369 // Copy file to cache directory. 370 mode := os.O_WRONLY | os.O_CREATE 371 f, err := os.OpenFile(file, mode, 0666) 372 if err != nil { 373 return err 374 } 375 _, err = f.WriteString(entry) 376 if err == nil { 377 // Truncate the file only *after* writing it. 378 // (This should be a no-op, but truncate just in case of previous corruption.) 379 // 380 // This differs from ioutil.WriteFile, which truncates to 0 *before* writing 381 // via os.O_TRUNC. Truncating only after writing ensures that a second write 382 // of the same content to the same file is idempotent, and does not — even 383 // temporarily! — undo the effect of the first write. 384 err = f.Truncate(int64(len(entry))) 385 } 386 if closeErr := f.Close(); err == nil { 387 err = closeErr 388 } 389 if err != nil { 390 // TODO(bcmills): This Remove potentially races with another go command writing to file. 391 // Can we eliminate it? 392 os.Remove(file) 393 return err 394 } 395 os.Chtimes(file, c.now(), c.now()) // mainly for tests 396 397 return nil 398 } 399 400 // Put stores the given output in the cache as the output for the action ID. 401 // It may read file twice. The content of file must not change between the two passes. 402 func (c *Cache) Put(id ActionID, file io.ReadSeeker) (OutputID, int64, error) { 403 return c.put(id, file, true) 404 } 405 406 // PutNoVerify is like Put but disables the verify check 407 // when GODEBUG=goverifycache=1 is set. 408 // It is meant for data that is OK to cache but that we expect to vary slightly from run to run, 409 // like test output containing times and the like. 410 func (c *Cache) PutNoVerify(id ActionID, file io.ReadSeeker) (OutputID, int64, error) { 411 return c.put(id, file, false) 412 } 413 414 func (c *Cache) put(id ActionID, file io.ReadSeeker, allowVerify bool) (OutputID, int64, error) { 415 // Compute output ID. 416 h := sha256.New() 417 if _, err := file.Seek(0, 0); err != nil { 418 return OutputID{}, 0, err 419 } 420 size, err := io.Copy(h, file) 421 if err != nil { 422 return OutputID{}, 0, err 423 } 424 var out OutputID 425 h.Sum(out[:0]) 426 427 // Copy to cached output file (if not already present). 428 if err := c.copyFile(file, out, size); err != nil { 429 return out, size, err 430 } 431 432 // Add to cache index. 433 return out, size, c.putIndexEntry(id, out, size, allowVerify) 434 } 435 436 // PutBytes stores the given bytes in the cache as the output for the action ID. 437 func (c *Cache) PutBytes(id ActionID, data []byte) error { 438 _, _, err := c.Put(id, bytes.NewReader(data)) 439 return err 440 } 441 442 // copyFile copies file into the cache, expecting it to have the given 443 // output ID and size, if that file is not present already. 444 func (c *Cache) copyFile(file io.ReadSeeker, out OutputID, size int64) error { 445 name := c.fileName(out, "d") 446 info, err := os.Stat(name) 447 if err == nil && info.Size() == size { 448 // Check hash. 449 if f, err := os.Open(name); err == nil { 450 h := sha256.New() 451 io.Copy(h, f) 452 f.Close() 453 var out2 OutputID 454 h.Sum(out2[:0]) 455 if out == out2 { 456 return nil 457 } 458 } 459 // Hash did not match. Fall through and rewrite file. 460 } 461 462 // Copy file to cache directory. 463 mode := os.O_RDWR | os.O_CREATE 464 if err == nil && info.Size() > size { // shouldn't happen but fix in case 465 mode |= os.O_TRUNC 466 } 467 f, err := os.OpenFile(name, mode, 0666) 468 if err != nil { 469 return err 470 } 471 defer f.Close() 472 if size == 0 { 473 // File now exists with correct size. 474 // Only one possible zero-length file, so contents are OK too. 475 // Early return here makes sure there's a "last byte" for code below. 476 return nil 477 } 478 479 // From here on, if any of the I/O writing the file fails, 480 // we make a best-effort attempt to truncate the file f 481 // before returning, to avoid leaving bad bytes in the file. 482 483 // Copy file to f, but also into h to double-check hash. 484 if _, err := file.Seek(0, 0); err != nil { 485 f.Truncate(0) 486 return err 487 } 488 h := sha256.New() 489 w := io.MultiWriter(f, h) 490 if _, err := io.CopyN(w, file, size-1); err != nil { 491 f.Truncate(0) 492 return err 493 } 494 // Check last byte before writing it; writing it will make the size match 495 // what other processes expect to find and might cause them to start 496 // using the file. 497 buf := make([]byte, 1) 498 if _, err := file.Read(buf); err != nil { 499 f.Truncate(0) 500 return err 501 } 502 h.Write(buf) 503 sum := h.Sum(nil) 504 if !bytes.Equal(sum, out[:]) { 505 f.Truncate(0) 506 return fmt.Errorf("file content changed underfoot") 507 } 508 509 // Commit cache file entry. 510 if _, err := f.Write(buf); err != nil { 511 f.Truncate(0) 512 return err 513 } 514 if err := f.Close(); err != nil { 515 // Data might not have been written, 516 // but file may look like it is the right size. 517 // To be extra careful, remove cached file. 518 os.Remove(name) 519 return err 520 } 521 os.Chtimes(name, c.now(), c.now()) // mainly for tests 522 523 return nil 524 }