github.com/qri-io/qri@v0.10.1-0.20220104210721-c771715036cb/stats/cache.go (about)

     1  package stats
     2  
     3  import (
     4  	"context"
     5  	"encoding/base32"
     6  	"encoding/json"
     7  	"errors"
     8  	"fmt"
     9  	"io/ioutil"
    10  	"math"
    11  	"os"
    12  	"path/filepath"
    13  	"sync"
    14  
    15  	"github.com/qri-io/dataset"
    16  	"github.com/qri-io/didmod"
    17  	"github.com/qri-io/qfs"
    18  )
    19  
    20  var (
    21  	// ErrCacheMiss indicates a requested path isn't in the cache
    22  	ErrCacheMiss = fmt.Errorf("stats: cache miss")
    23  	// ErrNoCache indicates there is no cache
    24  	ErrNoCache = fmt.Errorf("stats: no cache")
    25  	// ErrCacheCorrupt indicates a faulty stats cache
    26  	ErrCacheCorrupt = fmt.Errorf("stats: cache is corrupt")
    27  )
    28  
    29  // Props is an alias of
    30  type Props = didmod.Props
    31  
    32  // Cache is a store of stats components
    33  // Consumers of a cache must not rely on the cache for persistence
    34  // Implementations are expected to maintain their own size bounding
    35  // semantics internally
    36  // Cache implementations must be safe for concurrent use, and must be
    37  // nil-callable
    38  type Cache interface {
    39  	// placing a stats object in the Cache will expire all caches with a lower
    40  	// modTime, use a modTime of zero when no modTime is known
    41  	PutStats(ctx context.Context, key string, sa *dataset.Stats) error
    42  	// GetStats the stats component for a given key
    43  	GetStats(ctx context.Context, key string) (sa *dataset.Stats, err error)
    44  }
    45  
    46  // nilCache is a stand in for not having a cache
    47  // it only ever returns ErrNoCache
    48  type nilCache bool
    49  
    50  var _ Cache = (*nilCache)(nil)
    51  
    52  // PutJSON places stats in the cache, keyed by path
    53  func (nilCache) PutStats(ctx context.Context, key string, sa *dataset.Stats) error {
    54  	return ErrNoCache
    55  }
    56  
    57  // JSON gets cached byte data for a path
    58  func (nilCache) GetStats(ctx context.Context, key string) (sa *dataset.Stats, err error) {
    59  	return nil, ErrCacheMiss
    60  }
    61  
    62  // localCache is a stats cache stored in a directory on the local operating system
    63  type localCache struct {
    64  	root    string
    65  	maxSize int64
    66  
    67  	info   *cacheInfo
    68  	infoLk sync.Mutex
    69  }
    70  
    71  var _ Cache = (*localCache)(nil)
    72  
    73  // NewLocalCache creates a cache in a local directory. LocalCache is sensitive
    74  // to added keys that match the qfs.PathKind of "local". When a stats component
    75  // is added with a local filepath as it's key, LocalCache will record the
    76  // status of that file,  and return ErrCacheMiss if that filepath is altered on
    77  // retrieval
    78  func NewLocalCache(rootDir string, maxSize int64) (Cache, error) {
    79  	c := &localCache{
    80  		root:    rootDir,
    81  		maxSize: maxSize,
    82  		info:    newCacheInfo(),
    83  	}
    84  
    85  	err := c.readCacheInfo()
    86  	if errors.Is(err, ErrCacheCorrupt) {
    87  		log.Warn("Cache of stats data is corrupt. Removing all cached stats data as a precaution. This isn't too big a deal, as stats data can be recalculated.")
    88  		err = os.RemoveAll(rootDir)
    89  		return c, err
    90  	}
    91  
    92  	// ensure base directory exists
    93  	if err := os.MkdirAll(rootDir, os.ModePerm); err != nil {
    94  		return nil, err
    95  	}
    96  
    97  	return c, err
    98  }
    99  
   100  // Put places stats in the cache, keyed by path
   101  func (c *localCache) PutStats(ctx context.Context, key string, sa *dataset.Stats) (err error) {
   102  	var statProps, targetProps didmod.Props
   103  	if qfs.PathKind(key) == "local" {
   104  		targetProps, _ = didmod.NewProps(key)
   105  	}
   106  
   107  	key = c.cacheKey(key)
   108  	filename := c.componentFilepath(key)
   109  	data, err := json.Marshal(sa)
   110  	if err != nil {
   111  		return err
   112  	}
   113  
   114  	if int64(len(data)) > c.maxSize {
   115  		return fmt.Errorf("stats component size exceeds maximum size of cache")
   116  	}
   117  
   118  	if err := ioutil.WriteFile(filename, data, 0644); err != nil {
   119  		return err
   120  	}
   121  	statProps, err = didmod.NewProps(filename)
   122  	if err != nil {
   123  		return err
   124  	}
   125  
   126  	c.addAndPurgeExpired(key, statProps, targetProps)
   127  
   128  	return c.writeCacheInfo()
   129  }
   130  
   131  // Stats gets cached byte data for a path
   132  func (c *localCache) GetStats(ctx context.Context, key string) (sa *dataset.Stats, err error) {
   133  	cacheKey := c.cacheKey(key)
   134  	log.Debugw("getting stats", "key", key, "cacheKey", cacheKey)
   135  
   136  	targetFileProps, exists := c.info.TargetFileProps[cacheKey]
   137  	if !exists {
   138  		return nil, ErrCacheMiss
   139  	}
   140  
   141  	if qfs.PathKind(key) == "local" {
   142  		if fileProps, err := didmod.NewProps(key); err == nil {
   143  			if !targetFileProps.Equal(fileProps) {
   144  				// note: returning ErrCacheMiss here will probably lead to re-calcualtion
   145  				// and subsequent overwriting by cache consumers, so we shouldn't need
   146  				// to proactively drop the stale cache here
   147  				return nil, ErrCacheMiss
   148  			}
   149  		}
   150  	}
   151  
   152  	f, err := os.Open(c.componentFilepath(cacheKey))
   153  	if err != nil {
   154  		return nil, err
   155  	}
   156  	defer f.Close()
   157  
   158  	sa = &dataset.Stats{}
   159  	err = json.NewDecoder(f).Decode(sa)
   160  	return sa, err
   161  }
   162  
   163  var b32Enc = base32.NewEncoding("abcdefghijklmnopqrstuvwxyz234567").WithPadding(base32.NoPadding)
   164  
   165  func (c *localCache) componentFilepath(cacheKey string) string {
   166  	return filepath.Join(c.root, fmt.Sprintf("%s.json", cacheKey))
   167  }
   168  
   169  func (c *localCache) cacheKey(key string) string {
   170  	return b32Enc.EncodeToString([]byte(key))
   171  }
   172  
   173  func (c *localCache) addAndPurgeExpired(cacheKey string, statProps, targetProps didmod.Props) {
   174  	c.infoLk.Lock()
   175  	defer c.infoLk.Unlock()
   176  	log.Debugw("adding stat props", "cacheKey", cacheKey, "statProps", statProps, "targetProps", targetProps)
   177  	c.info.StatFileProps[cacheKey] = statProps
   178  	c.info.TargetFileProps[cacheKey] = targetProps
   179  
   180  	var (
   181  		lowestKey     string
   182  		lowestModTime int64
   183  	)
   184  
   185  	for c.info.Size() > c.maxSize {
   186  		lowestKey = ""
   187  		lowestModTime = math.MaxInt64
   188  
   189  		for key, fileProps := range c.info.StatFileProps {
   190  			if fileProps.Mtime.Unix() < lowestModTime && key != cacheKey {
   191  				lowestKey = key
   192  			}
   193  		}
   194  		if lowestKey == "" {
   195  			break
   196  		}
   197  		log.Debugw("dropping stats component from local cache", "path", lowestKey, "size", c.info.StatFileProps[lowestKey].Size)
   198  		if err := os.Remove(c.componentFilepath(lowestKey)); err != nil {
   199  			break
   200  		}
   201  		delete(c.info.StatFileProps, lowestKey)
   202  		delete(c.info.TargetFileProps, lowestKey)
   203  	}
   204  }
   205  
   206  const localCacheInfoFilename = "info.json"
   207  
   208  type cacheInfo struct {
   209  	StatFileProps   map[string]didmod.Props
   210  	TargetFileProps map[string]didmod.Props
   211  }
   212  
   213  func newCacheInfo() *cacheInfo {
   214  	return &cacheInfo{
   215  		StatFileProps:   map[string]didmod.Props{},
   216  		TargetFileProps: map[string]didmod.Props{},
   217  	}
   218  }
   219  
   220  func (ci cacheInfo) Size() (size int64) {
   221  	for _, p := range ci.StatFileProps {
   222  		size += p.Size
   223  	}
   224  	return size
   225  }
   226  
   227  func (c *localCache) readCacheInfo() error {
   228  	c.infoLk.Lock()
   229  	defer c.infoLk.Unlock()
   230  
   231  	name := filepath.Join(c.root, localCacheInfoFilename)
   232  	f, err := os.Open(name)
   233  	if err != nil {
   234  		if os.IsNotExist(err) {
   235  			c.info = newCacheInfo()
   236  			return nil
   237  		}
   238  		return err
   239  	}
   240  
   241  	defer f.Close()
   242  
   243  	c.info = newCacheInfo()
   244  	if err := json.NewDecoder(f).Decode(c.info); err != nil {
   245  		// corrupt cache
   246  		return fmt.Errorf("%w decoding stats info: %s", ErrCacheCorrupt, err)
   247  	}
   248  
   249  	return nil
   250  }
   251  
   252  func (c *localCache) writeCacheInfo() error {
   253  	c.infoLk.Lock()
   254  	defer c.infoLk.Unlock()
   255  
   256  	name := filepath.Join(c.root, localCacheInfoFilename)
   257  	data, err := json.Marshal(c.info)
   258  	if err != nil {
   259  		return err
   260  	}
   261  
   262  	return ioutil.WriteFile(name, data, 0644)
   263  }