github.com/goproxy0/go@v0.0.0-20171111080102-49cc0c489d2c/src/cmd/go/internal/cache/cache.go (about) 1 // Copyright 2017 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Package cache implements a build artifact cache. 6 package cache 7 8 import ( 9 "bytes" 10 "crypto/sha256" 11 "encoding/hex" 12 "errors" 13 "fmt" 14 "io" 15 "io/ioutil" 16 "os" 17 "path/filepath" 18 "strconv" 19 "strings" 20 "time" 21 ) 22 23 // An ActionID is a cache action key, the hash of a complete description of a 24 // repeatable computation (command line, environment variables, 25 // input file contents, executable contents). 26 type ActionID [HashSize]byte 27 28 // An OutputID is a cache output key, the hash of an output of a computation. 29 type OutputID [HashSize]byte 30 31 // A Cache is a package cache, backed by a file system directory tree. 32 type Cache struct { 33 dir string 34 log *os.File 35 now func() time.Time 36 } 37 38 // Open opens and returns the cache in the given directory. 39 // 40 // It is safe for multiple processes on a single machine to use the 41 // same cache directory in a local file system simultaneously. 42 // They will coordinate using operating system file locks and may 43 // duplicate effort but will not corrupt the cache. 44 // 45 // However, it is NOT safe for multiple processes on different machines 46 // to share a cache directory (for example, if the directory were stored 47 // in a network file system). File locking is notoriously unreliable in 48 // network file systems and may not suffice to protect the cache. 49 // 50 func Open(dir string) (*Cache, error) { 51 info, err := os.Stat(dir) 52 if err != nil { 53 return nil, err 54 } 55 if !info.IsDir() { 56 return nil, &os.PathError{Op: "open", Path: dir, Err: fmt.Errorf("not a directory")} 57 } 58 for i := 0; i < 256; i++ { 59 name := filepath.Join(dir, fmt.Sprintf("%02x", i)) 60 if err := os.MkdirAll(name, 0777); err != nil { 61 return nil, err 62 } 63 } 64 f, err := os.OpenFile(filepath.Join(dir, "log.txt"), os.O_WRONLY|os.O_APPEND|os.O_CREATE, 0666) 65 if err != nil { 66 return nil, err 67 } 68 c := &Cache{ 69 dir: dir, 70 log: f, 71 now: time.Now, 72 } 73 return c, nil 74 } 75 76 // fileName returns the name of the file corresponding to the given id. 77 func (c *Cache) fileName(id [HashSize]byte, key string) string { 78 return filepath.Join(c.dir, fmt.Sprintf("%02x", id[0]), fmt.Sprintf("%x", id)+"-"+key) 79 } 80 81 var errMissing = errors.New("cache entry not found") 82 83 const ( 84 // action entry file is "v1 <hex id> <hex out> <decimal size space-padded to 20 bytes>\n" 85 hexSize = HashSize * 2 86 entrySize = 2 + 1 + hexSize + 1 + hexSize + 1 + 20 + 1 87 ) 88 89 // verify controls whether to run the cache in verify mode. 90 // In verify mode, the cache always returns errMissing from Get 91 // but then double-checks in Put that the data being written 92 // exactly matches any existing entry. This provides an easy 93 // way to detect program behavior that would have been different 94 // had the cache entry been returned from Get. 95 // 96 // verify is enabled by setting the environment variable 97 // GODEBUG=gocacheverify=1. 98 var verify = false 99 100 func init() { initEnv() } 101 102 func initEnv() { 103 verify = false 104 debugHash = false 105 debug := strings.Split(os.Getenv("GODEBUG"), ",") 106 for _, f := range debug { 107 if f == "gocacheverify=1" { 108 verify = true 109 } 110 if f == "gocachehash=1" { 111 debugHash = true 112 } 113 } 114 } 115 116 // Get looks up the action ID in the cache, 117 // returning the corresponding output ID and file size, if any. 118 // Note that finding an output ID does not guarantee that the 119 // saved file for that output ID is still available. 120 func (c *Cache) Get(id ActionID) (OutputID, int64, error) { 121 if verify { 122 return OutputID{}, 0, errMissing 123 } 124 return c.get(id) 125 } 126 127 // get is Get but does not respect verify mode, so that Put can use it. 128 func (c *Cache) get(id ActionID) (OutputID, int64, error) { 129 missing := func() (OutputID, int64, error) { 130 fmt.Fprintf(c.log, "%d miss %x\n", c.now().Unix(), id) 131 return OutputID{}, 0, errMissing 132 } 133 f, err := os.Open(c.fileName(id, "a")) 134 if err != nil { 135 return missing() 136 } 137 defer f.Close() 138 entry := make([]byte, entrySize+1) // +1 to detect whether f is too long 139 if n, err := io.ReadFull(f, entry); n != entrySize || err != io.ErrUnexpectedEOF { 140 return missing() 141 } 142 if entry[0] != 'v' || entry[1] != '1' || entry[2] != ' ' || entry[3+hexSize] != ' ' || entry[3+hexSize+1+64] != ' ' || entry[entrySize-1] != '\n' { 143 return missing() 144 } 145 eid, eout, esize := entry[3:3+hexSize], entry[3+hexSize+1:3+hexSize+1+hexSize], entry[3+hexSize+1+hexSize+1:entrySize-1] 146 var buf [HashSize]byte 147 if _, err := hex.Decode(buf[:], eid); err != nil || buf != id { 148 return missing() 149 } 150 if _, err := hex.Decode(buf[:], eout); err != nil { 151 return missing() 152 } 153 i := 0 154 for i < len(esize) && esize[i] == ' ' { 155 i++ 156 } 157 size, err := strconv.ParseInt(string(esize[i:]), 10, 64) 158 if err != nil || size < 0 { 159 return missing() 160 } 161 162 fmt.Fprintf(c.log, "%d get %x\n", c.now().Unix(), id) 163 164 // Best-effort attempt to update mtime on file, 165 // so that mtime reflects cache access time. 166 os.Chtimes(c.fileName(id, "a"), c.now(), c.now()) 167 168 return buf, size, nil 169 } 170 171 // GetBytes looks up the action ID in the cache and returns 172 // the corresponding output bytes. 173 // GetBytes should only be used for data that can be expected to fit in memory. 174 func (c *Cache) GetBytes(id ActionID) ([]byte, error) { 175 out, _, err := c.Get(id) 176 if err != nil { 177 return nil, err 178 } 179 data, _ := ioutil.ReadFile(c.OutputFile(out)) 180 if sha256.Sum256(data) != out { 181 return nil, errMissing 182 } 183 return data, nil 184 } 185 186 // OutputFile returns the name of the cache file storing output with the given OutputID. 187 func (c *Cache) OutputFile(out OutputID) string { 188 file := c.fileName(out, "d") 189 190 // Best-effort attempt to update mtime on file, 191 // so that mtime reflects cache access time. 192 os.Chtimes(file, c.now(), c.now()) 193 194 return file 195 } 196 197 // putIndexEntry adds an entry to the cache recording that executing the action 198 // with the given id produces an output with the given output id (hash) and size. 199 func (c *Cache) putIndexEntry(id ActionID, out OutputID, size int64, allowVerify bool) error { 200 // Note: We expect that for one reason or another it may happen 201 // that repeating an action produces a different output hash 202 // (for example, if the output contains a time stamp or temp dir name). 203 // While not ideal, this is also not a correctness problem, so we 204 // don't make a big deal about it. In particular, we leave the action 205 // cache entries writable specifically so that they can be overwritten. 206 // 207 // Setting GODEBUG=gocacheverify=1 does make a big deal: 208 // in verify mode we are double-checking that the cache entries 209 // are entirely reproducible. As just noted, this may be unrealistic 210 // in some cases but the check is also useful for shaking out real bugs. 211 entry := []byte(fmt.Sprintf("v1 %x %x %20d\n", id, out, size)) 212 if verify && allowVerify { 213 oldOut, oldSize, err := c.get(id) 214 if err == nil && (oldOut != out || oldSize != size) { 215 fmt.Fprintf(os.Stderr, "go: internal cache error: id=%x changed:<<<\n%s\n>>>\nold: %x %d\nnew: %x %d\n", id, reverseHash(id), out, size, oldOut, oldSize) 216 // panic to show stack trace, so we can see what code is generating this cache entry. 217 panic("cache verify failed") 218 } 219 } 220 file := c.fileName(id, "a") 221 if err := ioutil.WriteFile(file, entry, 0666); err != nil { 222 os.Remove(file) 223 return err 224 } 225 226 fmt.Fprintf(c.log, "%d put %x %x %d\n", c.now().Unix(), id, out, size) 227 return nil 228 } 229 230 // Put stores the given output in the cache as the output for the action ID. 231 // It may read file twice. The content of file must not change between the two passes. 232 func (c *Cache) Put(id ActionID, file io.ReadSeeker) (OutputID, int64, error) { 233 return c.put(id, file, true) 234 } 235 236 // PutNoVerify is like Put but disables the verify check 237 // when GODEBUG=goverifycache=1 is set. 238 // It is meant for data that is OK to cache but that we expect to vary slightly from run to run, 239 // like test output containing times and the like. 240 func (c *Cache) PutNoVerify(id ActionID, file io.ReadSeeker) (OutputID, int64, error) { 241 return c.put(id, file, false) 242 } 243 244 func (c *Cache) put(id ActionID, file io.ReadSeeker, allowVerify bool) (OutputID, int64, error) { 245 // Compute output ID. 246 h := sha256.New() 247 if _, err := file.Seek(0, 0); err != nil { 248 return OutputID{}, 0, err 249 } 250 size, err := io.Copy(h, file) 251 if err != nil { 252 return OutputID{}, 0, err 253 } 254 var out OutputID 255 h.Sum(out[:0]) 256 257 // Copy to cached output file (if not already present). 258 if err := c.copyFile(file, out, size); err != nil { 259 return out, size, err 260 } 261 262 // Add to cache index. 263 return out, size, c.putIndexEntry(id, out, size, allowVerify) 264 } 265 266 // PutBytes stores the given bytes in the cache as the output for the action ID. 267 func (c *Cache) PutBytes(id ActionID, data []byte) error { 268 _, _, err := c.Put(id, bytes.NewReader(data)) 269 return err 270 } 271 272 // copyFile copies file into the cache, expecting it to have the given 273 // output ID and size, if that file is not present already. 274 func (c *Cache) copyFile(file io.ReadSeeker, out OutputID, size int64) error { 275 name := c.fileName(out, "d") 276 info, err := os.Stat(name) 277 if err == nil && info.Size() == size { 278 // Check hash. 279 if f, err := os.Open(name); err == nil { 280 h := sha256.New() 281 io.Copy(h, f) 282 f.Close() 283 var out2 OutputID 284 h.Sum(out2[:0]) 285 if out == out2 { 286 return nil 287 } 288 } 289 // Hash did not match. Fall through and rewrite file. 290 } 291 292 // Copy file to cache directory. 293 mode := os.O_RDWR | os.O_CREATE 294 if err == nil && info.Size() > size { // shouldn't happen but fix in case 295 mode |= os.O_TRUNC 296 } 297 f, err := os.OpenFile(name, mode, 0666) 298 if err != nil { 299 return err 300 } 301 defer f.Close() 302 if size == 0 { 303 // File now exists with correct size. 304 // Only one possible zero-length file, so contents are OK too. 305 // Early return here makes sure there's a "last byte" for code below. 306 return nil 307 } 308 309 // From here on, if any of the I/O writing the file fails, 310 // we make a best-effort attempt to truncate the file f 311 // before returning, to avoid leaving bad bytes in the file. 312 313 // Copy file to f, but also into h to double-check hash. 314 if _, err := file.Seek(0, 0); err != nil { 315 f.Truncate(0) 316 return err 317 } 318 h := sha256.New() 319 w := io.MultiWriter(f, h) 320 if _, err := io.CopyN(w, file, size-1); err != nil { 321 f.Truncate(0) 322 return err 323 } 324 // Check last byte before writing it; writing it will make the size match 325 // what other processes expect to find and might cause them to start 326 // using the file. 327 buf := make([]byte, 1) 328 if _, err := file.Read(buf); err != nil { 329 f.Truncate(0) 330 return err 331 } 332 h.Write(buf) 333 sum := h.Sum(nil) 334 if !bytes.Equal(sum, out[:]) { 335 f.Truncate(0) 336 return fmt.Errorf("file content changed underfoot") 337 } 338 339 // Commit cache file entry. 340 if _, err := f.Write(buf); err != nil { 341 f.Truncate(0) 342 return err 343 } 344 if err := f.Close(); err != nil { 345 // Data might not have been written, 346 // but file may look like it is the right size. 347 // To be extra careful, remove cached file. 348 os.Remove(name) 349 return err 350 } 351 352 return nil 353 }