github.phpd.cn/thought-machine/please@v12.2.0+incompatible/tools/cache/server/cache.go (about)

     1  // Package server contains core functionality for our cache servers; storing & retrieving files etc.
     2  package server
     3  
     4  import (
     5  	"bytes"
     6  	"fmt"
     7  	"io/ioutil"
     8  	"os"
     9  	"path"
    10  	"path/filepath"
    11  	"sort"
    12  	"strings"
    13  	"sync"
    14  	"sync/atomic"
    15  	"time"
    16  
    17  	"github.com/djherbis/atime"
    18  	"github.com/dustin/go-humanize"
    19  	"github.com/streamrail/concurrent-map"
    20  
    21  	pb "cache/proto/rpc_cache"
    22  	"core"
    23  	"fs"
    24  )
    25  
    26  // metadataFileName is the filename we store metadata in.
    27  const metadataFileName = ".plz_metadata"
    28  
    29  // metadataTemplate is the template for writing the metadata files
    30  const metadataTemplate = `Address:    %s
    31  Hostname:   %s
    32  Replicated: %v
    33  Peer:       %s
    34  `
    35  
    36  // A cachedFile stores metadata about a file stored in our cache.
    37  type cachedFile struct {
    38  	// Arbitrates single access to this file
    39  	sync.RWMutex
    40  	// Time the file was last read
    41  	lastReadTime time.Time
    42  	// Number of times the file has been read
    43  	readCount int
    44  	// Size of the file
    45  	size int64
    46  }
    47  
    48  // A Cache is the underlying implementation of our HTTP and RPC caches that handles storing & retrieving artifacts.
    49  type Cache struct {
    50  	cachedFiles cmap.ConcurrentMap
    51  	totalSize   int64
    52  	rootPath    string
    53  }
    54  
    55  // NewCache initialises the cache and fires off a background cleaner goroutine which runs every
    56  // cleanFrequency seconds. The high and low water marks control a (soft) max size and a (harder)
    57  // minimum size.
    58  func NewCache(path string, cleanFrequency, maxArtifactAge time.Duration, lowWaterMark, highWaterMark uint64) *Cache {
    59  	log.Notice("Initialising cache with settings:\n  Path: %s\n  Clean frequency: %s\n  Max artifact age: %s\n  Low water mark: %s\n  High water mark: %s",
    60  		path, cleanFrequency, maxArtifactAge, humanize.Bytes(lowWaterMark), humanize.Bytes(highWaterMark))
    61  	cache := newCache(path)
    62  	go cache.clean(cleanFrequency, maxArtifactAge, int64(lowWaterMark), int64(highWaterMark))
    63  	return cache
    64  }
    65  
    66  // newCache is an internal constructor intended mostly for testing. It doesn't start the cleaner goroutine.
    67  func newCache(path string) *Cache {
    68  	cache := &Cache{rootPath: path}
    69  	cache.scan()
    70  	return cache
    71  }
    72  
    73  // TotalSize returns the current total size monitored by the cache, in bytes.
    74  func (cache *Cache) TotalSize() int64 {
    75  	return cache.totalSize
    76  }
    77  
    78  // NumFiles returns the number of files currently monitored by the cache.
    79  func (cache *Cache) NumFiles() int {
    80  	return cache.cachedFiles.Count()
    81  }
    82  
    83  // scan scans the directory tree for files.
    84  func (cache *Cache) scan() {
    85  	cache.cachedFiles = cmap.New()
    86  	cache.totalSize = 0
    87  
    88  	if !core.PathExists(cache.rootPath) {
    89  		if err := os.MkdirAll(cache.rootPath, core.DirPermissions); err != nil {
    90  			log.Fatalf("Failed to create cache directory %s: %s", cache.rootPath, err)
    91  		}
    92  		return
    93  	}
    94  
    95  	log.Info("Scanning cache directory %s...", cache.rootPath)
    96  	filepath.Walk(cache.rootPath, func(name string, info os.FileInfo, err error) error {
    97  		if err != nil {
    98  			log.Fatalf("%s", err)
    99  		} else if !info.IsDir() { // We don't have directory entries.
   100  			name = name[len(cache.rootPath)+1:]
   101  			log.Debug("Found file %s", name)
   102  			size := info.Size()
   103  			cache.cachedFiles.Set(name, &cachedFile{
   104  				lastReadTime: atime.Get(info),
   105  				readCount:    0,
   106  				size:         size,
   107  			})
   108  			cache.totalSize += size
   109  		}
   110  		return nil
   111  	})
   112  	log.Info("Scan complete, found %d entries", cache.cachedFiles.Count())
   113  }
   114  
   115  // lockFile locks a file for reading or writing.
   116  // It returns a locked mutex corresponding to that file or nil if there is none.
   117  // The caller should .Unlock() the mutex once they're done with it.
   118  func (cache *Cache) lockFile(path string, write bool, size int64) *cachedFile {
   119  	filei, present := cache.cachedFiles.Get(path)
   120  	var file *cachedFile
   121  	if !present {
   122  		// If we're writing we insert a new one, if we're reading we don't.
   123  		if !write {
   124  			return nil
   125  		}
   126  		file = &cachedFile{
   127  			readCount: 0,
   128  			size:      size,
   129  		}
   130  		file.Lock()
   131  		cache.cachedFiles.Set(path, file)
   132  		atomic.AddInt64(&cache.totalSize, size)
   133  	} else {
   134  		file = filei.(*cachedFile)
   135  		if write {
   136  			file.Lock()
   137  		} else {
   138  			file.RLock()
   139  			file.readCount++
   140  		}
   141  	}
   142  	file.lastReadTime = time.Now()
   143  	return file
   144  }
   145  
   146  // removeFile deletes a file from the cache map. It does not remove the on-disk file.
   147  func (cache *Cache) removeFile(path string, file *cachedFile) {
   148  	cache.cachedFiles.Remove(path)
   149  	atomic.AddInt64(&cache.totalSize, -file.size)
   150  	log.Debug("Removing file %s, saves %d, new size will be %d", path, file.size, cache.totalSize)
   151  }
   152  
   153  // removeAndDeleteFile deletes a file from the cache map and on-disk.
   154  func (cache *Cache) removeAndDeleteFile(p string, file *cachedFile) {
   155  	cache.removeFile(p, file)
   156  	p = path.Join(cache.rootPath, p)
   157  	if err := os.RemoveAll(p); err != nil {
   158  		log.Error("Failed to delete file: %s", p)
   159  	}
   160  }
   161  
   162  // RetrieveArtifact takes in the artifact path as a parameter and checks in the base server
   163  // file directory to see if the file exists in the given path. If found, the function will
   164  // return whatever's been stored there, which might be a directory and therefore contain
   165  // multiple files to be returned.
   166  func (cache *Cache) RetrieveArtifact(artPath string) ([]*pb.Artifact, error) {
   167  	ret := []*pb.Artifact{}
   168  	if fs.IsGlob(artPath) {
   169  		// N.B. strictly speaking we should have a real config here about what BUILD file names are,
   170  		// but likely the only time it would make a difference is if we'd been asked to cache a file named BUILD
   171  		// when the BUILD file name had been changed to something else.
   172  		for _, art := range fs.Glob(nil, cache.rootPath, []string{artPath}, nil, nil, true) {
   173  			fullPath := path.Join(cache.rootPath, art)
   174  			lock := cache.lockFile(fullPath, false, 0)
   175  			body, err := ioutil.ReadFile(fullPath)
   176  			if lock != nil {
   177  				lock.RUnlock()
   178  			}
   179  			if err != nil {
   180  				return nil, err
   181  			}
   182  			ret = append(ret, &pb.Artifact{File: art, Body: body})
   183  		}
   184  		return ret, nil
   185  	}
   186  
   187  	fullPath := path.Join(cache.rootPath, artPath)
   188  	lock := cache.lockFile(artPath, false, 0)
   189  	if lock == nil {
   190  		// Can happen if artPath is a directory; we only store artifacts as files.
   191  		// (This is a debatable choice; it's a bit crap either way).
   192  		if info, err := os.Stat(fullPath); err == nil && info.IsDir() {
   193  			return cache.retrieveDir(artPath)
   194  		}
   195  		return nil, os.ErrNotExist
   196  	}
   197  	defer lock.RUnlock()
   198  
   199  	if info, err := os.Lstat(fullPath); err == nil && (info.Mode()&os.ModeSymlink) != 0 {
   200  		dest, err := os.Readlink(fullPath)
   201  		if err != nil {
   202  			return nil, err
   203  		}
   204  		ret = append(ret, &pb.Artifact{
   205  			File:    fullPath[len(cache.rootPath)+1:],
   206  			Symlink: dest,
   207  		})
   208  	} else if err := fs.Walk(fullPath, func(name string, isDir bool) error {
   209  		if !isDir {
   210  			body, err := ioutil.ReadFile(name)
   211  			if err != nil {
   212  				return err
   213  			}
   214  			ret = append(ret, &pb.Artifact{
   215  				File: name[len(cache.rootPath)+1:],
   216  				Body: body,
   217  			})
   218  		}
   219  		return nil
   220  	}); err != nil {
   221  		return nil, err
   222  	}
   223  	return ret, nil
   224  }
   225  
   226  // retrieveDir retrieves a directory of artifacts. We don't track the directory itself
   227  // but allow its traversal to retrieve them.
   228  func (cache *Cache) retrieveDir(artPath string) ([]*pb.Artifact, error) {
   229  	log.Debug("Searching dir %s for artifacts", artPath)
   230  	ret := []*pb.Artifact{}
   231  	fullPath := path.Join(cache.rootPath, artPath)
   232  	err := fs.Walk(fullPath, func(name string, isDir bool) error {
   233  		if !isDir {
   234  			// Must strip cache path off the front of this.
   235  			arts, err := cache.RetrieveArtifact(name[len(cache.rootPath)+1:])
   236  			if err != nil {
   237  				return err
   238  			}
   239  			ret = append(ret, arts...)
   240  		}
   241  		return nil
   242  	})
   243  	return ret, err
   244  }
   245  
   246  // StoreArtifact takes in the artifact content and path as parameters and creates a file with
   247  // the given content in the given path.
   248  // The function will return the first error found in the process, or nil if the process is successful.
   249  func (cache *Cache) StoreArtifact(artPath string, key []byte, symlink string) error {
   250  	log.Info("Storing artifact %s", artPath)
   251  	lock := cache.lockFile(artPath, true, int64(len(key)))
   252  	defer lock.Unlock()
   253  
   254  	fullPath := path.Join(cache.rootPath, artPath)
   255  	dirPath := path.Dir(fullPath)
   256  	if err := os.MkdirAll(dirPath, core.DirPermissions); err != nil {
   257  		log.Warning("Couldn't create path %s in http cache: %s", dirPath, err)
   258  		cache.removeAndDeleteFile(artPath, lock)
   259  		os.RemoveAll(dirPath)
   260  		return err
   261  	}
   262  	if symlink != "" {
   263  		if err := os.Symlink(symlink, fullPath); err != nil {
   264  			log.Errorf("Could not create %s symlink: %s", fullPath, err)
   265  			cache.removeAndDeleteFile(artPath, lock)
   266  			return err
   267  		}
   268  	} else {
   269  		log.Debug("Writing artifact to %s", fullPath)
   270  		if err := fs.WriteFile(bytes.NewReader(key), fullPath, 0); err != nil {
   271  			log.Errorf("Could not create %s artifact: %s", fullPath, err)
   272  			cache.removeAndDeleteFile(artPath, lock)
   273  			return err
   274  		}
   275  	}
   276  	return nil
   277  }
   278  
   279  // StoreMetadata stores some metadata about the given artifact in a simple format.
   280  // This mostly just identifies where it came from.
   281  func (cache *Cache) StoreMetadata(artPath, hostname, address, peer string) error {
   282  	log.Info("Storing metadata for %s", artPath)
   283  	lock := cache.lockFile(artPath, true, 0)
   284  	defer lock.Unlock()
   285  	fullPath := path.Join(cache.rootPath, artPath, metadataFileName)
   286  	contents := fmt.Sprintf(metadataTemplate, address, hostname, peer != "", peer)
   287  	if err := ioutil.WriteFile(fullPath, []byte(contents), 0644); err != nil {
   288  		log.Error("Could not write metadata file: %s", err)
   289  		return err
   290  	}
   291  	return nil
   292  }
   293  
   294  // DeleteArtifact takes in the artifact path as a parameter and removes the artifact from disk.
   295  // The function will return the first error found in the process, or nil if the process is successful.
   296  func (cache *Cache) DeleteArtifact(artPath string) error {
   297  	log.Info("Deleting artifact %s", artPath)
   298  	// We need to search the entire map for prefixes. Pessimism follows...
   299  	paths := cachedFilePaths{}
   300  	for t := range cache.cachedFiles.IterBuffered() {
   301  		if strings.HasPrefix(t.Key, artPath) {
   302  			paths = append(paths, cachedFilePath{file: t.Val.(*cachedFile), path: t.Key})
   303  		}
   304  	}
   305  	// NB. We can't do this in the loop above because there's a risk of deadlock.
   306  	//     We create the temporary slice in preference to calling .Items() and duplicating
   307  	//     the entire map.
   308  	for _, p := range paths {
   309  		p.file.Lock()
   310  		cache.removeFile(p.path, p.file)
   311  		p.file.Unlock()
   312  	}
   313  	return os.RemoveAll(path.Join(cache.rootPath, artPath))
   314  }
   315  
   316  // DeleteAllArtifacts will remove all files in the cache directory.
   317  // The function will return the first error found in the process, or nil if the process is successful.
   318  func (cache *Cache) DeleteAllArtifacts() error {
   319  	// Empty entire cache now.
   320  	log.Warning("Deleting entire cache")
   321  	cache.cachedFiles = cmap.New()
   322  	cache.totalSize = 0
   323  	return core.AsyncDeleteDir(cache.rootPath)
   324  }
   325  
   326  // clean implements a periodic clean of the cache to remove old artifacts.
   327  func (cache *Cache) clean(cleanFrequency, maxArtifactAge time.Duration, lowWaterMark, highWaterMark int64) {
   328  	for range time.NewTicker(cleanFrequency).C {
   329  		cache.cleanOldFiles(maxArtifactAge)
   330  		cache.singleClean(lowWaterMark, highWaterMark)
   331  	}
   332  }
   333  
   334  // cleanOldFiles cleans any files whose last access time is older than the given duration.
   335  func (cache *Cache) cleanOldFiles(maxArtifactAge time.Duration) bool {
   336  	log.Debug("Searching for old files...")
   337  	oldestTime := time.Now().Add(-maxArtifactAge)
   338  	cleaned := 0
   339  	for t := range cache.cachedFiles.IterBuffered() {
   340  		f := t.Val.(*cachedFile)
   341  		if f.lastReadTime.Before(oldestTime) {
   342  			lock := cache.lockFile(t.Key, true, f.size)
   343  			cache.removeAndDeleteFile(t.Key, f)
   344  			lock.Unlock()
   345  			cleaned++
   346  		}
   347  	}
   348  	log.Notice("Removed %d old files, new size: %d, %d files", cleaned, cache.totalSize, cache.cachedFiles.Count())
   349  	return cleaned > 0
   350  }
   351  
   352  // singleClean runs a single clean of the cache. It's split out for testing purposes.
   353  func (cache *Cache) singleClean(lowWaterMark, highWaterMark int64) bool {
   354  	log.Debug("Total size: %d High water mark: %d", cache.totalSize, highWaterMark)
   355  	if cache.totalSize > highWaterMark {
   356  		log.Info("Cleaning cache...")
   357  		files := cache.filesToClean(lowWaterMark)
   358  		log.Info("Identified %d files to clean...", len(files))
   359  		for _, file := range files {
   360  			lock := cache.lockFile(file.path, true, file.file.size)
   361  			cache.removeAndDeleteFile(file.path, file.file)
   362  			lock.Unlock()
   363  		}
   364  		return true
   365  	}
   366  	return false
   367  }
   368  
   369  // cachedFilePath embeds a cachedFile but with the path too.
   370  type cachedFilePath struct {
   371  	file *cachedFile
   372  	path string
   373  }
   374  
   375  type cachedFilePaths []cachedFilePath
   376  
   377  func (c cachedFilePaths) Len() int      { return len(c) }
   378  func (c cachedFilePaths) Swap(i, j int) { c[i], c[j] = c[j], c[i] }
   379  func (c cachedFilePaths) Less(i, j int) bool {
   380  	return c[i].file.lastReadTime.Before(c[j].file.lastReadTime)
   381  }
   382  
   383  // filesToClean returns a list of files that should be cleaned, ie. the least interesting
   384  // artifacts in the cache according to some heuristic. Removing all of them will be
   385  // sufficient to reduce the cache size below lowWaterMark.
   386  func (cache *Cache) filesToClean(lowWaterMark int64) cachedFilePaths {
   387  	ret := make(cachedFilePaths, 0, len(cache.cachedFiles))
   388  	for t := range cache.cachedFiles.IterBuffered() {
   389  		ret = append(ret, cachedFilePath{file: t.Val.(*cachedFile), path: t.Key})
   390  	}
   391  	sort.Sort(&ret)
   392  
   393  	sizeToDelete := cache.totalSize - lowWaterMark
   394  	var sizeDeleted int64
   395  	for i, file := range ret {
   396  		if sizeDeleted >= sizeToDelete {
   397  			return ret[0:i]
   398  		}
   399  		sizeDeleted += file.file.size
   400  	}
   401  	return ret
   402  }