github.com/stingnevermore/go@v0.0.0-20180120041312-3810f5bfed72/src/cmd/go/internal/cache/cache.go (about) 1 // Copyright 2017 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Package cache implements a build artifact cache. 6 package cache 7 8 import ( 9 "bytes" 10 "crypto/sha256" 11 "encoding/hex" 12 "errors" 13 "fmt" 14 "io" 15 "io/ioutil" 16 "os" 17 "path/filepath" 18 "strconv" 19 "strings" 20 "time" 21 ) 22 23 // An ActionID is a cache action key, the hash of a complete description of a 24 // repeatable computation (command line, environment variables, 25 // input file contents, executable contents). 26 type ActionID [HashSize]byte 27 28 // An OutputID is a cache output key, the hash of an output of a computation. 29 type OutputID [HashSize]byte 30 31 // A Cache is a package cache, backed by a file system directory tree. 32 type Cache struct { 33 dir string 34 log *os.File 35 now func() time.Time 36 } 37 38 // Open opens and returns the cache in the given directory. 39 // 40 // It is safe for multiple processes on a single machine to use the 41 // same cache directory in a local file system simultaneously. 42 // They will coordinate using operating system file locks and may 43 // duplicate effort but will not corrupt the cache. 44 // 45 // However, it is NOT safe for multiple processes on different machines 46 // to share a cache directory (for example, if the directory were stored 47 // in a network file system). File locking is notoriously unreliable in 48 // network file systems and may not suffice to protect the cache. 49 // 50 func Open(dir string) (*Cache, error) { 51 info, err := os.Stat(dir) 52 if err != nil { 53 return nil, err 54 } 55 if !info.IsDir() { 56 return nil, &os.PathError{Op: "open", Path: dir, Err: fmt.Errorf("not a directory")} 57 } 58 for i := 0; i < 256; i++ { 59 name := filepath.Join(dir, fmt.Sprintf("%02x", i)) 60 if err := os.MkdirAll(name, 0777); err != nil { 61 return nil, err 62 } 63 } 64 f, err := os.OpenFile(filepath.Join(dir, "log.txt"), os.O_WRONLY|os.O_APPEND|os.O_CREATE, 0666) 65 if err != nil { 66 return nil, err 67 } 68 c := &Cache{ 69 dir: dir, 70 log: f, 71 now: time.Now, 72 } 73 return c, nil 74 } 75 76 // fileName returns the name of the file corresponding to the given id. 77 func (c *Cache) fileName(id [HashSize]byte, key string) string { 78 return filepath.Join(c.dir, fmt.Sprintf("%02x", id[0]), fmt.Sprintf("%x", id)+"-"+key) 79 } 80 81 var errMissing = errors.New("cache entry not found") 82 83 const ( 84 // action entry file is "v1 <hex id> <hex out> <decimal size space-padded to 20 bytes> <unixnano space-padded to 20 bytes>\n" 85 hexSize = HashSize * 2 86 entrySize = 2 + 1 + hexSize + 1 + hexSize + 1 + 20 + 1 + 20 + 1 87 ) 88 89 // verify controls whether to run the cache in verify mode. 90 // In verify mode, the cache always returns errMissing from Get 91 // but then double-checks in Put that the data being written 92 // exactly matches any existing entry. This provides an easy 93 // way to detect program behavior that would have been different 94 // had the cache entry been returned from Get. 95 // 96 // verify is enabled by setting the environment variable 97 // GODEBUG=gocacheverify=1. 98 var verify = false 99 100 // DebugTest is set when GODEBUG=gocachetest=1 is in the environment. 101 var DebugTest = false 102 103 func init() { initEnv() } 104 105 func initEnv() { 106 verify = false 107 debugHash = false 108 debug := strings.Split(os.Getenv("GODEBUG"), ",") 109 for _, f := range debug { 110 if f == "gocacheverify=1" { 111 verify = true 112 } 113 if f == "gocachehash=1" { 114 debugHash = true 115 } 116 if f == "gocachetest=1" { 117 DebugTest = true 118 } 119 } 120 } 121 122 // Get looks up the action ID in the cache, 123 // returning the corresponding output ID and file size, if any. 124 // Note that finding an output ID does not guarantee that the 125 // saved file for that output ID is still available. 126 func (c *Cache) Get(id ActionID) (Entry, error) { 127 if verify { 128 return Entry{}, errMissing 129 } 130 return c.get(id) 131 } 132 133 type Entry struct { 134 OutputID OutputID 135 Size int64 136 Time time.Time 137 } 138 139 // get is Get but does not respect verify mode, so that Put can use it. 140 func (c *Cache) get(id ActionID) (Entry, error) { 141 missing := func() (Entry, error) { 142 fmt.Fprintf(c.log, "%d miss %x\n", c.now().Unix(), id) 143 return Entry{}, errMissing 144 } 145 f, err := os.Open(c.fileName(id, "a")) 146 if err != nil { 147 return missing() 148 } 149 defer f.Close() 150 entry := make([]byte, entrySize+1) // +1 to detect whether f is too long 151 if n, err := io.ReadFull(f, entry); n != entrySize || err != io.ErrUnexpectedEOF { 152 return missing() 153 } 154 if entry[0] != 'v' || entry[1] != '1' || entry[2] != ' ' || entry[3+hexSize] != ' ' || entry[3+hexSize+1+hexSize] != ' ' || entry[3+hexSize+1+hexSize+1+20] != ' ' || entry[entrySize-1] != '\n' { 155 return missing() 156 } 157 eid, entry := entry[3:3+hexSize], entry[3+hexSize:] 158 eout, entry := entry[1:1+hexSize], entry[1+hexSize:] 159 esize, entry := entry[1:1+20], entry[1+20:] 160 etime, entry := entry[1:1+20], entry[1+20:] 161 var buf [HashSize]byte 162 if _, err := hex.Decode(buf[:], eid); err != nil || buf != id { 163 return missing() 164 } 165 if _, err := hex.Decode(buf[:], eout); err != nil { 166 return missing() 167 } 168 i := 0 169 for i < len(esize) && esize[i] == ' ' { 170 i++ 171 } 172 size, err := strconv.ParseInt(string(esize[i:]), 10, 64) 173 if err != nil || size < 0 { 174 return missing() 175 } 176 i = 0 177 for i < len(etime) && etime[i] == ' ' { 178 i++ 179 } 180 tm, err := strconv.ParseInt(string(etime[i:]), 10, 64) 181 if err != nil || size < 0 { 182 return missing() 183 } 184 185 fmt.Fprintf(c.log, "%d get %x\n", c.now().Unix(), id) 186 187 c.used(c.fileName(id, "a")) 188 189 return Entry{buf, size, time.Unix(0, tm)}, nil 190 } 191 192 // GetBytes looks up the action ID in the cache and returns 193 // the corresponding output bytes. 194 // GetBytes should only be used for data that can be expected to fit in memory. 195 func (c *Cache) GetBytes(id ActionID) ([]byte, Entry, error) { 196 entry, err := c.Get(id) 197 if err != nil { 198 return nil, entry, err 199 } 200 data, _ := ioutil.ReadFile(c.OutputFile(entry.OutputID)) 201 if sha256.Sum256(data) != entry.OutputID { 202 return nil, entry, errMissing 203 } 204 return data, entry, nil 205 } 206 207 // OutputFile returns the name of the cache file storing output with the given OutputID. 208 func (c *Cache) OutputFile(out OutputID) string { 209 file := c.fileName(out, "d") 210 c.used(file) 211 return file 212 } 213 214 // Time constants for cache expiration. 215 // 216 // We set the mtime on a cache file on each use, but at most one per mtimeInterval (1 hour), 217 // to avoid causing many unnecessary inode updates. The mtimes therefore 218 // roughly reflect "time of last use" but may in fact be older by at most an hour. 219 // 220 // We scan the cache for entries to delete at most once per trimInterval (1 day). 221 // 222 // When we do scan the cache, we delete entries that have not been used for 223 // at least trimLimit (5 days). Statistics gathered from a month of usage by 224 // Go developers found that essentially all reuse of cached entries happened 225 // within 5 days of the previous reuse. See golang.org/issue/22990. 226 const ( 227 mtimeInterval = 1 * time.Hour 228 trimInterval = 24 * time.Hour 229 trimLimit = 5 * 24 * time.Hour 230 ) 231 232 // used makes a best-effort attempt to update mtime on file, 233 // so that mtime reflects cache access time. 234 // 235 // Because the reflection only needs to be approximate, 236 // and to reduce the amount of disk activity caused by using 237 // cache entries, used only updates the mtime if the current 238 // mtime is more than an hour old. This heuristic eliminates 239 // nearly all of the mtime updates that would otherwise happen, 240 // while still keeping the mtimes useful for cache trimming. 241 func (c *Cache) used(file string) { 242 info, err := os.Stat(file) 243 if err == nil && c.now().Sub(info.ModTime()) < mtimeInterval { 244 return 245 } 246 os.Chtimes(file, c.now(), c.now()) 247 } 248 249 // Trim removes old cache entries that are likely not to be reused. 250 func (c *Cache) Trim() { 251 now := c.now() 252 253 // We maintain in dir/trim.txt the time of the last completed cache trim. 254 // If the cache has been trimmed recently enough, do nothing. 255 // This is the common case. 256 data, _ := ioutil.ReadFile(filepath.Join(c.dir, "trim.txt")) 257 t, err := strconv.ParseInt(strings.TrimSpace(string(data)), 10, 64) 258 if err == nil && now.Sub(time.Unix(t, 0)) < trimInterval { 259 return 260 } 261 262 // Trim each of the 256 subdirectories. 263 // We subtract an additional mtimeInterval 264 // to account for the imprecision of our "last used" mtimes. 265 cutoff := now.Add(-trimLimit - mtimeInterval) 266 for i := 0; i < 256; i++ { 267 subdir := filepath.Join(c.dir, fmt.Sprintf("%02x", i)) 268 c.trimSubdir(subdir, cutoff) 269 } 270 271 ioutil.WriteFile(filepath.Join(c.dir, "trim.txt"), []byte(fmt.Sprintf("%d", now.Unix())), 0666) 272 } 273 274 // trimSubdir trims a single cache subdirectory. 275 func (c *Cache) trimSubdir(subdir string, cutoff time.Time) { 276 // Read all directory entries from subdir before removing 277 // any files, in case removing files invalidates the file offset 278 // in the directory scan. Also, ignore error from f.Readdirnames, 279 // because we don't care about reporting the error and we still 280 // want to process any entries found before the error. 281 f, err := os.Open(subdir) 282 if err != nil { 283 return 284 } 285 names, _ := f.Readdirnames(-1) 286 f.Close() 287 288 for _, name := range names { 289 // Remove only cache entries (xxxx-a and xxxx-d). 290 if !strings.HasSuffix(name, "-a") && !strings.HasSuffix(name, "-d") { 291 continue 292 } 293 entry := filepath.Join(subdir, name) 294 info, err := os.Stat(entry) 295 if err == nil && info.ModTime().Before(cutoff) { 296 os.Remove(entry) 297 } 298 } 299 } 300 301 // putIndexEntry adds an entry to the cache recording that executing the action 302 // with the given id produces an output with the given output id (hash) and size. 303 func (c *Cache) putIndexEntry(id ActionID, out OutputID, size int64, allowVerify bool) error { 304 // Note: We expect that for one reason or another it may happen 305 // that repeating an action produces a different output hash 306 // (for example, if the output contains a time stamp or temp dir name). 307 // While not ideal, this is also not a correctness problem, so we 308 // don't make a big deal about it. In particular, we leave the action 309 // cache entries writable specifically so that they can be overwritten. 310 // 311 // Setting GODEBUG=gocacheverify=1 does make a big deal: 312 // in verify mode we are double-checking that the cache entries 313 // are entirely reproducible. As just noted, this may be unrealistic 314 // in some cases but the check is also useful for shaking out real bugs. 315 entry := []byte(fmt.Sprintf("v1 %x %x %20d %20d\n", id, out, size, time.Now().UnixNano())) 316 if verify && allowVerify { 317 old, err := c.get(id) 318 if err == nil && (old.OutputID != out || old.Size != size) { 319 // panic to show stack trace, so we can see what code is generating this cache entry. 320 msg := fmt.Sprintf("go: internal cache error: cache verify failed: id=%x changed:<<<\n%s\n>>>\nold: %x %d\nnew: %x %d", id, reverseHash(id), out, size, old.OutputID, old.Size) 321 panic(msg) 322 } 323 } 324 file := c.fileName(id, "a") 325 if err := ioutil.WriteFile(file, entry, 0666); err != nil { 326 os.Remove(file) 327 return err 328 } 329 os.Chtimes(file, c.now(), c.now()) // mainly for tests 330 331 fmt.Fprintf(c.log, "%d put %x %x %d\n", c.now().Unix(), id, out, size) 332 return nil 333 } 334 335 // Put stores the given output in the cache as the output for the action ID. 336 // It may read file twice. The content of file must not change between the two passes. 337 func (c *Cache) Put(id ActionID, file io.ReadSeeker) (OutputID, int64, error) { 338 return c.put(id, file, true) 339 } 340 341 // PutNoVerify is like Put but disables the verify check 342 // when GODEBUG=goverifycache=1 is set. 343 // It is meant for data that is OK to cache but that we expect to vary slightly from run to run, 344 // like test output containing times and the like. 345 func (c *Cache) PutNoVerify(id ActionID, file io.ReadSeeker) (OutputID, int64, error) { 346 return c.put(id, file, false) 347 } 348 349 func (c *Cache) put(id ActionID, file io.ReadSeeker, allowVerify bool) (OutputID, int64, error) { 350 // Compute output ID. 351 h := sha256.New() 352 if _, err := file.Seek(0, 0); err != nil { 353 return OutputID{}, 0, err 354 } 355 size, err := io.Copy(h, file) 356 if err != nil { 357 return OutputID{}, 0, err 358 } 359 var out OutputID 360 h.Sum(out[:0]) 361 362 // Copy to cached output file (if not already present). 363 if err := c.copyFile(file, out, size); err != nil { 364 return out, size, err 365 } 366 367 // Add to cache index. 368 return out, size, c.putIndexEntry(id, out, size, allowVerify) 369 } 370 371 // PutBytes stores the given bytes in the cache as the output for the action ID. 372 func (c *Cache) PutBytes(id ActionID, data []byte) error { 373 _, _, err := c.Put(id, bytes.NewReader(data)) 374 return err 375 } 376 377 // copyFile copies file into the cache, expecting it to have the given 378 // output ID and size, if that file is not present already. 379 func (c *Cache) copyFile(file io.ReadSeeker, out OutputID, size int64) error { 380 name := c.fileName(out, "d") 381 info, err := os.Stat(name) 382 if err == nil && info.Size() == size { 383 // Check hash. 384 if f, err := os.Open(name); err == nil { 385 h := sha256.New() 386 io.Copy(h, f) 387 f.Close() 388 var out2 OutputID 389 h.Sum(out2[:0]) 390 if out == out2 { 391 return nil 392 } 393 } 394 // Hash did not match. Fall through and rewrite file. 395 } 396 397 // Copy file to cache directory. 398 mode := os.O_RDWR | os.O_CREATE 399 if err == nil && info.Size() > size { // shouldn't happen but fix in case 400 mode |= os.O_TRUNC 401 } 402 f, err := os.OpenFile(name, mode, 0666) 403 if err != nil { 404 return err 405 } 406 defer f.Close() 407 if size == 0 { 408 // File now exists with correct size. 409 // Only one possible zero-length file, so contents are OK too. 410 // Early return here makes sure there's a "last byte" for code below. 411 return nil 412 } 413 414 // From here on, if any of the I/O writing the file fails, 415 // we make a best-effort attempt to truncate the file f 416 // before returning, to avoid leaving bad bytes in the file. 417 418 // Copy file to f, but also into h to double-check hash. 419 if _, err := file.Seek(0, 0); err != nil { 420 f.Truncate(0) 421 return err 422 } 423 h := sha256.New() 424 w := io.MultiWriter(f, h) 425 if _, err := io.CopyN(w, file, size-1); err != nil { 426 f.Truncate(0) 427 return err 428 } 429 // Check last byte before writing it; writing it will make the size match 430 // what other processes expect to find and might cause them to start 431 // using the file. 432 buf := make([]byte, 1) 433 if _, err := file.Read(buf); err != nil { 434 f.Truncate(0) 435 return err 436 } 437 h.Write(buf) 438 sum := h.Sum(nil) 439 if !bytes.Equal(sum, out[:]) { 440 f.Truncate(0) 441 return fmt.Errorf("file content changed underfoot") 442 } 443 444 // Commit cache file entry. 445 if _, err := f.Write(buf); err != nil { 446 f.Truncate(0) 447 return err 448 } 449 if err := f.Close(); err != nil { 450 // Data might not have been written, 451 // but file may look like it is the right size. 452 // To be extra careful, remove cached file. 453 os.Remove(name) 454 return err 455 } 456 os.Chtimes(name, c.now(), c.now()) // mainly for tests 457 458 return nil 459 }