github.com/sirkon/goproxy@v1.4.8/internal/cache/cache.go (about) 1 // Copyright 2017 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Package cache implements a build artifact cache. 6 package cache 7 8 import ( 9 "bytes" 10 "crypto/sha256" 11 "encoding/hex" 12 "errors" 13 "fmt" 14 "io" 15 "io/ioutil" 16 "os" 17 "path/filepath" 18 "strconv" 19 "strings" 20 "time" 21 ) 22 23 // An ActionID is a cache action key, the hash of a complete description of a 24 // repeatable computation (command line, environment variables, 25 // input file contents, executable contents). 26 type ActionID [HashSize]byte 27 28 // An OutputID is a cache output key, the hash of an output of a computation. 29 type OutputID [HashSize]byte 30 31 // A Cache is a package cache, backed by a file system directory tree. 32 type Cache struct { 33 dir string 34 log *os.File 35 now func() time.Time 36 } 37 38 // Open opens and returns the cache in the given directory. 39 // 40 // It is safe for multiple processes on a single machine to use the 41 // same cache directory in a local file system simultaneously. 42 // They will coordinate using operating system file locks and may 43 // duplicate effort but will not corrupt the cache. 44 // 45 // However, it is NOT safe for multiple processes on different machines 46 // to share a cache directory (for example, if the directory were stored 47 // in a network file system). File locking is notoriously unreliable in 48 // network file systems and may not suffice to protect the cache. 49 // 50 func Open(dir string) (*Cache, error) { 51 info, err := os.Stat(dir) 52 if err != nil { 53 return nil, err 54 } 55 if !info.IsDir() { 56 return nil, &os.PathError{Op: "open", Path: dir, Err: fmt.Errorf("not a directory")} 57 } 58 for i := 0; i < 256; i++ { 59 name := filepath.Join(dir, fmt.Sprintf("%02x", i)) 60 if err := os.MkdirAll(name, 0777); err != nil { 61 return nil, err 62 } 63 } 64 f, err := os.OpenFile(filepath.Join(dir, "log.txt"), os.O_WRONLY|os.O_APPEND|os.O_CREATE, 0666) 65 if err != nil { 66 return nil, err 67 } 68 c := &Cache{ 69 dir: dir, 70 log: f, 71 now: time.Now, 72 } 73 return c, nil 74 } 75 76 // fileName returns the name of the file corresponding to the given id. 77 func (c *Cache) fileName(id [HashSize]byte, key string) string { 78 return filepath.Join(c.dir, fmt.Sprintf("%02x", id[0]), fmt.Sprintf("%x", id)+"-"+key) 79 } 80 81 var errMissing = errors.New("cache entry not found") 82 83 const ( 84 // action entry file is "v1 <hex id> <hex out> <decimal size space-padded to 20 bytes> <unixnano space-padded to 20 bytes>\n" 85 hexSize = HashSize * 2 86 entrySize = 2 + 1 + hexSize + 1 + hexSize + 1 + 20 + 1 + 20 + 1 87 ) 88 89 // verify controls whether to run the cache in verify mode. 90 // In verify mode, the cache always returns errMissing from Get 91 // but then double-checks in Put that the data being written 92 // exactly matches any existing entry. This provides an easy 93 // way to detect program behavior that would have been different 94 // had the cache entry been returned from Get. 95 // 96 // verify is enabled by setting the environment variable 97 // GODEBUG=gocacheverify=1. 98 var verify = false 99 100 // DebugTest is set when GODEBUG=gocachetest=1 is in the environment. 101 var DebugTest = false 102 103 func init() { initEnv() } 104 105 func initEnv() { 106 verify = false 107 debugHash = false 108 debug := strings.Split(os.Getenv("GODEBUG"), ",") 109 for _, f := range debug { 110 if f == "gocacheverify=1" { 111 verify = true 112 } 113 if f == "gocachehash=1" { 114 debugHash = true 115 } 116 if f == "gocachetest=1" { 117 DebugTest = true 118 } 119 } 120 } 121 122 // Get looks up the action ID in the cache, 123 // returning the corresponding output ID and file size, if any. 124 // Note that finding an output ID does not guarantee that the 125 // saved file for that output ID is still available. 126 func (c *Cache) Get(id ActionID) (Entry, error) { 127 if verify { 128 return Entry{}, errMissing 129 } 130 return c.get(id) 131 } 132 133 type Entry struct { 134 OutputID OutputID 135 Size int64 136 Time time.Time 137 } 138 139 // get is Get but does not respect verify mode, so that Put can use it. 140 func (c *Cache) get(id ActionID) (Entry, error) { 141 missing := func() (Entry, error) { 142 fmt.Fprintf(c.log, "%d miss %x\n", c.now().Unix(), id) 143 return Entry{}, errMissing 144 } 145 f, err := os.Open(c.fileName(id, "a")) 146 if err != nil { 147 return missing() 148 } 149 defer f.Close() 150 entry := make([]byte, entrySize+1) // +1 to detect whether f is too long 151 if n, err := io.ReadFull(f, entry); n != entrySize || err != io.ErrUnexpectedEOF { 152 return missing() 153 } 154 if entry[0] != 'v' || entry[1] != '1' || entry[2] != ' ' || entry[3+hexSize] != ' ' || entry[3+hexSize+1+hexSize] != ' ' || entry[3+hexSize+1+hexSize+1+20] != ' ' || entry[entrySize-1] != '\n' { 155 return missing() 156 } 157 eid, entry := entry[3:3+hexSize], entry[3+hexSize:] 158 eout, entry := entry[1:1+hexSize], entry[1+hexSize:] 159 esize, entry := entry[1:1+20], entry[1+20:] 160 etime, entry := entry[1:1+20], entry[1+20:] 161 var buf [HashSize]byte 162 if _, err := hex.Decode(buf[:], eid); err != nil || buf != id { 163 return missing() 164 } 165 if _, err := hex.Decode(buf[:], eout); err != nil { 166 return missing() 167 } 168 i := 0 169 for i < len(esize) && esize[i] == ' ' { 170 i++ 171 } 172 size, err := strconv.ParseInt(string(esize[i:]), 10, 64) 173 if err != nil || size < 0 { 174 return missing() 175 } 176 i = 0 177 for i < len(etime) && etime[i] == ' ' { 178 i++ 179 } 180 tm, err := strconv.ParseInt(string(etime[i:]), 10, 64) 181 if err != nil || size < 0 { 182 return missing() 183 } 184 185 fmt.Fprintf(c.log, "%d get %x\n", c.now().Unix(), id) 186 187 c.used(c.fileName(id, "a")) 188 189 return Entry{buf, size, time.Unix(0, tm)}, nil 190 } 191 192 // GetFile looks up the action ID in the cache and returns 193 // the name of the corresponding data file. 194 func (c *Cache) GetFile(id ActionID) (file string, entry Entry, err error) { 195 entry, err = c.Get(id) 196 if err != nil { 197 return "", Entry{}, err 198 } 199 file = c.OutputFile(entry.OutputID) 200 info, err := os.Stat(file) 201 if err != nil || info.Size() != entry.Size { 202 return "", Entry{}, errMissing 203 } 204 return file, entry, nil 205 } 206 207 // GetBytes looks up the action ID in the cache and returns 208 // the corresponding output bytes. 209 // GetBytes should only be used for data that can be expected to fit in memory. 210 func (c *Cache) GetBytes(id ActionID) ([]byte, Entry, error) { 211 entry, err := c.Get(id) 212 if err != nil { 213 return nil, entry, err 214 } 215 data, _ := ioutil.ReadFile(c.OutputFile(entry.OutputID)) 216 if sha256.Sum256(data) != entry.OutputID { 217 return nil, entry, errMissing 218 } 219 return data, entry, nil 220 } 221 222 // OutputFile returns the name of the cache file storing output with the given OutputID. 223 func (c *Cache) OutputFile(out OutputID) string { 224 file := c.fileName(out, "d") 225 c.used(file) 226 return file 227 } 228 229 // Time constants for cache expiration. 230 // 231 // We set the mtime on a cache file on each use, but at most one per mtimeInterval (1 hour), 232 // to avoid causing many unnecessary inode updates. The mtimes therefore 233 // roughly reflect "time of last use" but may in fact be older by at most an hour. 234 // 235 // We scan the cache for entries to delete at most once per trimInterval (1 day). 236 // 237 // When we do scan the cache, we delete entries that have not been used for 238 // at least trimLimit (5 days). Statistics gathered from a month of usage by 239 // Go developers found that essentially all reuse of cached entries happened 240 // within 5 days of the previous reuse. See golang.org/issue/22990. 241 const ( 242 mtimeInterval = 1 * time.Hour 243 trimInterval = 24 * time.Hour 244 trimLimit = 5 * 24 * time.Hour 245 ) 246 247 // used makes a best-effort attempt to update mtime on file, 248 // so that mtime reflects cache access time. 249 // 250 // Because the reflection only needs to be approximate, 251 // and to reduce the amount of disk activity caused by using 252 // cache entries, used only updates the mtime if the current 253 // mtime is more than an hour old. This heuristic eliminates 254 // nearly all of the mtime updates that would otherwise happen, 255 // while still keeping the mtimes useful for cache trimming. 256 func (c *Cache) used(file string) { 257 info, err := os.Stat(file) 258 if err == nil && c.now().Sub(info.ModTime()) < mtimeInterval { 259 return 260 } 261 os.Chtimes(file, c.now(), c.now()) 262 } 263 264 // Trim removes old cache entries that are likely not to be reused. 265 func (c *Cache) Trim() { 266 now := c.now() 267 268 // We maintain in dir/trim.txt the time of the last completed cache trim. 269 // If the cache has been trimmed recently enough, do nothing. 270 // This is the common case. 271 data, _ := ioutil.ReadFile(filepath.Join(c.dir, "trim.txt")) 272 t, err := strconv.ParseInt(strings.TrimSpace(string(data)), 10, 64) 273 if err == nil && now.Sub(time.Unix(t, 0)) < trimInterval { 274 return 275 } 276 277 // Trim each of the 256 subdirectories. 278 // We subtract an additional mtimeInterval 279 // to account for the imprecision of our "last used" mtimes. 280 cutoff := now.Add(-trimLimit - mtimeInterval) 281 for i := 0; i < 256; i++ { 282 subdir := filepath.Join(c.dir, fmt.Sprintf("%02x", i)) 283 c.trimSubdir(subdir, cutoff) 284 } 285 286 ioutil.WriteFile(filepath.Join(c.dir, "trim.txt"), []byte(fmt.Sprintf("%d", now.Unix())), 0666) 287 } 288 289 // trimSubdir trims a single cache subdirectory. 290 func (c *Cache) trimSubdir(subdir string, cutoff time.Time) { 291 // Read all directory entries from subdir before removing 292 // any files, in case removing files invalidates the file offset 293 // in the directory scan. Also, ignore error from f.Readdirnames, 294 // because we don't care about reporting the error and we still 295 // want to process any entries found before the error. 296 f, err := os.Open(subdir) 297 if err != nil { 298 return 299 } 300 names, _ := f.Readdirnames(-1) 301 f.Close() 302 303 for _, name := range names { 304 // Remove only cache entries (xxxx-a and xxxx-d). 305 if !strings.HasSuffix(name, "-a") && !strings.HasSuffix(name, "-d") { 306 continue 307 } 308 entry := filepath.Join(subdir, name) 309 info, err := os.Stat(entry) 310 if err == nil && info.ModTime().Before(cutoff) { 311 os.Remove(entry) 312 } 313 } 314 } 315 316 // putIndexEntry adds an entry to the cache recording that executing the action 317 // with the given id produces an output with the given output id (hash) and size. 318 func (c *Cache) putIndexEntry(id ActionID, out OutputID, size int64, allowVerify bool) error { 319 // Note: We expect that for one reason or another it may happen 320 // that repeating an action produces a different output hash 321 // (for example, if the output contains a time stamp or temp dir name). 322 // While not ideal, this is also not a correctness problem, so we 323 // don't make a big deal about it. In particular, we leave the action 324 // cache entries writable specifically so that they can be overwritten. 325 // 326 // Setting GODEBUG=gocacheverify=1 does make a big deal: 327 // in verify mode we are double-checking that the cache entries 328 // are entirely reproducible. As just noted, this may be unrealistic 329 // in some cases but the check is also useful for shaking out real bugs. 330 entry := []byte(fmt.Sprintf("v1 %x %x %20d %20d\n", id, out, size, time.Now().UnixNano())) 331 if verify && allowVerify { 332 old, err := c.get(id) 333 if err == nil && (old.OutputID != out || old.Size != size) { 334 // panic to show stack trace, so we can see what code is generating this cache entry. 335 msg := fmt.Sprintf("go: internal cache error: cache verify failed: id=%x changed:<<<\n%s\n>>>\nold: %x %d\nnew: %x %d", id, reverseHash(id), out, size, old.OutputID, old.Size) 336 panic(msg) 337 } 338 } 339 file := c.fileName(id, "a") 340 if err := ioutil.WriteFile(file, entry, 0666); err != nil { 341 os.Remove(file) 342 return err 343 } 344 os.Chtimes(file, c.now(), c.now()) // mainly for tests 345 346 fmt.Fprintf(c.log, "%d put %x %x %d\n", c.now().Unix(), id, out, size) 347 return nil 348 } 349 350 // Put stores the given output in the cache as the output for the action ID. 351 // It may read file twice. The content of file must not change between the two passes. 352 func (c *Cache) Put(id ActionID, file io.ReadSeeker) (OutputID, int64, error) { 353 return c.put(id, file, true) 354 } 355 356 // PutNoVerify is like Put but disables the verify check 357 // when GODEBUG=goverifycache=1 is set. 358 // It is meant for data that is OK to cache but that we expect to vary slightly from run to run, 359 // like test output containing times and the like. 360 func (c *Cache) PutNoVerify(id ActionID, file io.ReadSeeker) (OutputID, int64, error) { 361 return c.put(id, file, false) 362 } 363 364 func (c *Cache) put(id ActionID, file io.ReadSeeker, allowVerify bool) (OutputID, int64, error) { 365 // Compute output ID. 366 h := sha256.New() 367 if _, err := file.Seek(0, 0); err != nil { 368 return OutputID{}, 0, err 369 } 370 size, err := io.Copy(h, file) 371 if err != nil { 372 return OutputID{}, 0, err 373 } 374 var out OutputID 375 h.Sum(out[:0]) 376 377 // Copy to cached output file (if not already present). 378 if err := c.copyFile(file, out, size); err != nil { 379 return out, size, err 380 } 381 382 // Add to cache index. 383 return out, size, c.putIndexEntry(id, out, size, allowVerify) 384 } 385 386 // PutBytes stores the given bytes in the cache as the output for the action ID. 387 func (c *Cache) PutBytes(id ActionID, data []byte) error { 388 _, _, err := c.Put(id, bytes.NewReader(data)) 389 return err 390 } 391 392 // copyFile copies file into the cache, expecting it to have the given 393 // output ID and size, if that file is not present already. 394 func (c *Cache) copyFile(file io.ReadSeeker, out OutputID, size int64) error { 395 name := c.fileName(out, "d") 396 info, err := os.Stat(name) 397 if err == nil && info.Size() == size { 398 // Check hash. 399 if f, err := os.Open(name); err == nil { 400 h := sha256.New() 401 io.Copy(h, f) 402 f.Close() 403 var out2 OutputID 404 h.Sum(out2[:0]) 405 if out == out2 { 406 return nil 407 } 408 } 409 // Hash did not match. Fall through and rewrite file. 410 } 411 412 // Copy file to cache directory. 413 mode := os.O_RDWR | os.O_CREATE 414 if err == nil && info.Size() > size { // shouldn't happen but fix in case 415 mode |= os.O_TRUNC 416 } 417 f, err := os.OpenFile(name, mode, 0666) 418 if err != nil { 419 return err 420 } 421 defer f.Close() 422 if size == 0 { 423 // File now exists with correct size. 424 // Only one possible zero-length file, so contents are OK too. 425 // Early return here makes sure there's a "last byte" for code below. 426 return nil 427 } 428 429 // From here on, if any of the I/O writing the file fails, 430 // we make a best-effort attempt to truncate the file f 431 // before returning, to avoid leaving bad bytes in the file. 432 433 // Copy file to f, but also into h to double-check hash. 434 if _, err := file.Seek(0, 0); err != nil { 435 f.Truncate(0) 436 return err 437 } 438 h := sha256.New() 439 w := io.MultiWriter(f, h) 440 if _, err := io.CopyN(w, file, size-1); err != nil { 441 f.Truncate(0) 442 return err 443 } 444 // Check last byte before writing it; writing it will make the size match 445 // what other processes expect to find and might cause them to start 446 // using the file. 447 buf := make([]byte, 1) 448 if _, err := file.Read(buf); err != nil { 449 f.Truncate(0) 450 return err 451 } 452 h.Write(buf) 453 sum := h.Sum(nil) 454 if !bytes.Equal(sum, out[:]) { 455 f.Truncate(0) 456 return fmt.Errorf("file content changed underfoot") 457 } 458 459 // Commit cache file entry. 460 if _, err := f.Write(buf); err != nil { 461 f.Truncate(0) 462 return err 463 } 464 if err := f.Close(); err != nil { 465 // Data might not have been written, 466 // but file may look like it is the right size. 467 // To be extra careful, remove cached file. 468 os.Remove(name) 469 return err 470 } 471 os.Chtimes(name, c.now(), c.now()) // mainly for tests 472 473 return nil 474 }