github.phpd.cn/thought-machine/please@v12.2.0+incompatible/tools/cache/server/cache.go (about) 1 // Package server contains core functionality for our cache servers; storing & retrieving files etc. 2 package server 3 4 import ( 5 "bytes" 6 "fmt" 7 "io/ioutil" 8 "os" 9 "path" 10 "path/filepath" 11 "sort" 12 "strings" 13 "sync" 14 "sync/atomic" 15 "time" 16 17 "github.com/djherbis/atime" 18 "github.com/dustin/go-humanize" 19 "github.com/streamrail/concurrent-map" 20 21 pb "cache/proto/rpc_cache" 22 "core" 23 "fs" 24 ) 25 26 // metadataFileName is the filename we store metadata in. 27 const metadataFileName = ".plz_metadata" 28 29 // metadataTemplate is the template for writing the metadata files 30 const metadataTemplate = `Address: %s 31 Hostname: %s 32 Replicated: %v 33 Peer: %s 34 ` 35 36 // A cachedFile stores metadata about a file stored in our cache. 37 type cachedFile struct { 38 // Arbitrates single access to this file 39 sync.RWMutex 40 // Time the file was last read 41 lastReadTime time.Time 42 // Number of times the file has been read 43 readCount int 44 // Size of the file 45 size int64 46 } 47 48 // A Cache is the underlying implementation of our HTTP and RPC caches that handles storing & retrieving artifacts. 49 type Cache struct { 50 cachedFiles cmap.ConcurrentMap 51 totalSize int64 52 rootPath string 53 } 54 55 // NewCache initialises the cache and fires off a background cleaner goroutine which runs every 56 // cleanFrequency seconds. The high and low water marks control a (soft) max size and a (harder) 57 // minimum size. 58 func NewCache(path string, cleanFrequency, maxArtifactAge time.Duration, lowWaterMark, highWaterMark uint64) *Cache { 59 log.Notice("Initialising cache with settings:\n Path: %s\n Clean frequency: %s\n Max artifact age: %s\n Low water mark: %s\n High water mark: %s", 60 path, cleanFrequency, maxArtifactAge, humanize.Bytes(lowWaterMark), humanize.Bytes(highWaterMark)) 61 cache := newCache(path) 62 go cache.clean(cleanFrequency, maxArtifactAge, int64(lowWaterMark), int64(highWaterMark)) 63 return cache 64 } 65 66 // newCache is an internal constructor intended mostly for testing. It doesn't start the cleaner goroutine. 67 func newCache(path string) *Cache { 68 cache := &Cache{rootPath: path} 69 cache.scan() 70 return cache 71 } 72 73 // TotalSize returns the current total size monitored by the cache, in bytes. 74 func (cache *Cache) TotalSize() int64 { 75 return cache.totalSize 76 } 77 78 // NumFiles returns the number of files currently monitored by the cache. 79 func (cache *Cache) NumFiles() int { 80 return cache.cachedFiles.Count() 81 } 82 83 // scan scans the directory tree for files. 84 func (cache *Cache) scan() { 85 cache.cachedFiles = cmap.New() 86 cache.totalSize = 0 87 88 if !core.PathExists(cache.rootPath) { 89 if err := os.MkdirAll(cache.rootPath, core.DirPermissions); err != nil { 90 log.Fatalf("Failed to create cache directory %s: %s", cache.rootPath, err) 91 } 92 return 93 } 94 95 log.Info("Scanning cache directory %s...", cache.rootPath) 96 filepath.Walk(cache.rootPath, func(name string, info os.FileInfo, err error) error { 97 if err != nil { 98 log.Fatalf("%s", err) 99 } else if !info.IsDir() { // We don't have directory entries. 100 name = name[len(cache.rootPath)+1:] 101 log.Debug("Found file %s", name) 102 size := info.Size() 103 cache.cachedFiles.Set(name, &cachedFile{ 104 lastReadTime: atime.Get(info), 105 readCount: 0, 106 size: size, 107 }) 108 cache.totalSize += size 109 } 110 return nil 111 }) 112 log.Info("Scan complete, found %d entries", cache.cachedFiles.Count()) 113 } 114 115 // lockFile locks a file for reading or writing. 116 // It returns a locked mutex corresponding to that file or nil if there is none. 117 // The caller should .Unlock() the mutex once they're done with it. 118 func (cache *Cache) lockFile(path string, write bool, size int64) *cachedFile { 119 filei, present := cache.cachedFiles.Get(path) 120 var file *cachedFile 121 if !present { 122 // If we're writing we insert a new one, if we're reading we don't. 123 if !write { 124 return nil 125 } 126 file = &cachedFile{ 127 readCount: 0, 128 size: size, 129 } 130 file.Lock() 131 cache.cachedFiles.Set(path, file) 132 atomic.AddInt64(&cache.totalSize, size) 133 } else { 134 file = filei.(*cachedFile) 135 if write { 136 file.Lock() 137 } else { 138 file.RLock() 139 file.readCount++ 140 } 141 } 142 file.lastReadTime = time.Now() 143 return file 144 } 145 146 // removeFile deletes a file from the cache map. It does not remove the on-disk file. 147 func (cache *Cache) removeFile(path string, file *cachedFile) { 148 cache.cachedFiles.Remove(path) 149 atomic.AddInt64(&cache.totalSize, -file.size) 150 log.Debug("Removing file %s, saves %d, new size will be %d", path, file.size, cache.totalSize) 151 } 152 153 // removeAndDeleteFile deletes a file from the cache map and on-disk. 154 func (cache *Cache) removeAndDeleteFile(p string, file *cachedFile) { 155 cache.removeFile(p, file) 156 p = path.Join(cache.rootPath, p) 157 if err := os.RemoveAll(p); err != nil { 158 log.Error("Failed to delete file: %s", p) 159 } 160 } 161 162 // RetrieveArtifact takes in the artifact path as a parameter and checks in the base server 163 // file directory to see if the file exists in the given path. If found, the function will 164 // return whatever's been stored there, which might be a directory and therefore contain 165 // multiple files to be returned. 166 func (cache *Cache) RetrieveArtifact(artPath string) ([]*pb.Artifact, error) { 167 ret := []*pb.Artifact{} 168 if fs.IsGlob(artPath) { 169 // N.B. strictly speaking we should have a real config here about what BUILD file names are, 170 // but likely the only time it would make a difference is if we'd been asked to cache a file named BUILD 171 // when the BUILD file name had been changed to something else. 172 for _, art := range fs.Glob(nil, cache.rootPath, []string{artPath}, nil, nil, true) { 173 fullPath := path.Join(cache.rootPath, art) 174 lock := cache.lockFile(fullPath, false, 0) 175 body, err := ioutil.ReadFile(fullPath) 176 if lock != nil { 177 lock.RUnlock() 178 } 179 if err != nil { 180 return nil, err 181 } 182 ret = append(ret, &pb.Artifact{File: art, Body: body}) 183 } 184 return ret, nil 185 } 186 187 fullPath := path.Join(cache.rootPath, artPath) 188 lock := cache.lockFile(artPath, false, 0) 189 if lock == nil { 190 // Can happen if artPath is a directory; we only store artifacts as files. 191 // (This is a debatable choice; it's a bit crap either way). 192 if info, err := os.Stat(fullPath); err == nil && info.IsDir() { 193 return cache.retrieveDir(artPath) 194 } 195 return nil, os.ErrNotExist 196 } 197 defer lock.RUnlock() 198 199 if info, err := os.Lstat(fullPath); err == nil && (info.Mode()&os.ModeSymlink) != 0 { 200 dest, err := os.Readlink(fullPath) 201 if err != nil { 202 return nil, err 203 } 204 ret = append(ret, &pb.Artifact{ 205 File: fullPath[len(cache.rootPath)+1:], 206 Symlink: dest, 207 }) 208 } else if err := fs.Walk(fullPath, func(name string, isDir bool) error { 209 if !isDir { 210 body, err := ioutil.ReadFile(name) 211 if err != nil { 212 return err 213 } 214 ret = append(ret, &pb.Artifact{ 215 File: name[len(cache.rootPath)+1:], 216 Body: body, 217 }) 218 } 219 return nil 220 }); err != nil { 221 return nil, err 222 } 223 return ret, nil 224 } 225 226 // retrieveDir retrieves a directory of artifacts. We don't track the directory itself 227 // but allow its traversal to retrieve them. 228 func (cache *Cache) retrieveDir(artPath string) ([]*pb.Artifact, error) { 229 log.Debug("Searching dir %s for artifacts", artPath) 230 ret := []*pb.Artifact{} 231 fullPath := path.Join(cache.rootPath, artPath) 232 err := fs.Walk(fullPath, func(name string, isDir bool) error { 233 if !isDir { 234 // Must strip cache path off the front of this. 235 arts, err := cache.RetrieveArtifact(name[len(cache.rootPath)+1:]) 236 if err != nil { 237 return err 238 } 239 ret = append(ret, arts...) 240 } 241 return nil 242 }) 243 return ret, err 244 } 245 246 // StoreArtifact takes in the artifact content and path as parameters and creates a file with 247 // the given content in the given path. 248 // The function will return the first error found in the process, or nil if the process is successful. 249 func (cache *Cache) StoreArtifact(artPath string, key []byte, symlink string) error { 250 log.Info("Storing artifact %s", artPath) 251 lock := cache.lockFile(artPath, true, int64(len(key))) 252 defer lock.Unlock() 253 254 fullPath := path.Join(cache.rootPath, artPath) 255 dirPath := path.Dir(fullPath) 256 if err := os.MkdirAll(dirPath, core.DirPermissions); err != nil { 257 log.Warning("Couldn't create path %s in http cache: %s", dirPath, err) 258 cache.removeAndDeleteFile(artPath, lock) 259 os.RemoveAll(dirPath) 260 return err 261 } 262 if symlink != "" { 263 if err := os.Symlink(symlink, fullPath); err != nil { 264 log.Errorf("Could not create %s symlink: %s", fullPath, err) 265 cache.removeAndDeleteFile(artPath, lock) 266 return err 267 } 268 } else { 269 log.Debug("Writing artifact to %s", fullPath) 270 if err := fs.WriteFile(bytes.NewReader(key), fullPath, 0); err != nil { 271 log.Errorf("Could not create %s artifact: %s", fullPath, err) 272 cache.removeAndDeleteFile(artPath, lock) 273 return err 274 } 275 } 276 return nil 277 } 278 279 // StoreMetadata stores some metadata about the given artifact in a simple format. 280 // This mostly just identifies where it came from. 281 func (cache *Cache) StoreMetadata(artPath, hostname, address, peer string) error { 282 log.Info("Storing metadata for %s", artPath) 283 lock := cache.lockFile(artPath, true, 0) 284 defer lock.Unlock() 285 fullPath := path.Join(cache.rootPath, artPath, metadataFileName) 286 contents := fmt.Sprintf(metadataTemplate, address, hostname, peer != "", peer) 287 if err := ioutil.WriteFile(fullPath, []byte(contents), 0644); err != nil { 288 log.Error("Could not write metadata file: %s", err) 289 return err 290 } 291 return nil 292 } 293 294 // DeleteArtifact takes in the artifact path as a parameter and removes the artifact from disk. 295 // The function will return the first error found in the process, or nil if the process is successful. 296 func (cache *Cache) DeleteArtifact(artPath string) error { 297 log.Info("Deleting artifact %s", artPath) 298 // We need to search the entire map for prefixes. Pessimism follows... 299 paths := cachedFilePaths{} 300 for t := range cache.cachedFiles.IterBuffered() { 301 if strings.HasPrefix(t.Key, artPath) { 302 paths = append(paths, cachedFilePath{file: t.Val.(*cachedFile), path: t.Key}) 303 } 304 } 305 // NB. We can't do this in the loop above because there's a risk of deadlock. 306 // We create the temporary slice in preference to calling .Items() and duplicating 307 // the entire map. 308 for _, p := range paths { 309 p.file.Lock() 310 cache.removeFile(p.path, p.file) 311 p.file.Unlock() 312 } 313 return os.RemoveAll(path.Join(cache.rootPath, artPath)) 314 } 315 316 // DeleteAllArtifacts will remove all files in the cache directory. 317 // The function will return the first error found in the process, or nil if the process is successful. 318 func (cache *Cache) DeleteAllArtifacts() error { 319 // Empty entire cache now. 320 log.Warning("Deleting entire cache") 321 cache.cachedFiles = cmap.New() 322 cache.totalSize = 0 323 return core.AsyncDeleteDir(cache.rootPath) 324 } 325 326 // clean implements a periodic clean of the cache to remove old artifacts. 327 func (cache *Cache) clean(cleanFrequency, maxArtifactAge time.Duration, lowWaterMark, highWaterMark int64) { 328 for range time.NewTicker(cleanFrequency).C { 329 cache.cleanOldFiles(maxArtifactAge) 330 cache.singleClean(lowWaterMark, highWaterMark) 331 } 332 } 333 334 // cleanOldFiles cleans any files whose last access time is older than the given duration. 335 func (cache *Cache) cleanOldFiles(maxArtifactAge time.Duration) bool { 336 log.Debug("Searching for old files...") 337 oldestTime := time.Now().Add(-maxArtifactAge) 338 cleaned := 0 339 for t := range cache.cachedFiles.IterBuffered() { 340 f := t.Val.(*cachedFile) 341 if f.lastReadTime.Before(oldestTime) { 342 lock := cache.lockFile(t.Key, true, f.size) 343 cache.removeAndDeleteFile(t.Key, f) 344 lock.Unlock() 345 cleaned++ 346 } 347 } 348 log.Notice("Removed %d old files, new size: %d, %d files", cleaned, cache.totalSize, cache.cachedFiles.Count()) 349 return cleaned > 0 350 } 351 352 // singleClean runs a single clean of the cache. It's split out for testing purposes. 353 func (cache *Cache) singleClean(lowWaterMark, highWaterMark int64) bool { 354 log.Debug("Total size: %d High water mark: %d", cache.totalSize, highWaterMark) 355 if cache.totalSize > highWaterMark { 356 log.Info("Cleaning cache...") 357 files := cache.filesToClean(lowWaterMark) 358 log.Info("Identified %d files to clean...", len(files)) 359 for _, file := range files { 360 lock := cache.lockFile(file.path, true, file.file.size) 361 cache.removeAndDeleteFile(file.path, file.file) 362 lock.Unlock() 363 } 364 return true 365 } 366 return false 367 } 368 369 // cachedFilePath embeds a cachedFile but with the path too. 370 type cachedFilePath struct { 371 file *cachedFile 372 path string 373 } 374 375 type cachedFilePaths []cachedFilePath 376 377 func (c cachedFilePaths) Len() int { return len(c) } 378 func (c cachedFilePaths) Swap(i, j int) { c[i], c[j] = c[j], c[i] } 379 func (c cachedFilePaths) Less(i, j int) bool { 380 return c[i].file.lastReadTime.Before(c[j].file.lastReadTime) 381 } 382 383 // filesToClean returns a list of files that should be cleaned, ie. the least interesting 384 // artifacts in the cache according to some heuristic. Removing all of them will be 385 // sufficient to reduce the cache size below lowWaterMark. 386 func (cache *Cache) filesToClean(lowWaterMark int64) cachedFilePaths { 387 ret := make(cachedFilePaths, 0, len(cache.cachedFiles)) 388 for t := range cache.cachedFiles.IterBuffered() { 389 ret = append(ret, cachedFilePath{file: t.Val.(*cachedFile), path: t.Key}) 390 } 391 sort.Sort(&ret) 392 393 sizeToDelete := cache.totalSize - lowWaterMark 394 var sizeDeleted int64 395 for i, file := range ret { 396 if sizeDeleted >= sizeToDelete { 397 return ret[0:i] 398 } 399 sizeDeleted += file.file.size 400 } 401 return ret 402 }