kythe.io@v0.0.68-0.20240422202219-7225dbc01741/kythe/go/platform/cache/cache.go (about)

     1  /*
     2   * Copyright 2014 The Kythe Authors. All rights reserved.
     3   *
     4   * Licensed under the Apache License, Version 2.0 (the "License");
     5   * you may not use this file except in compliance with the License.
     6   * You may obtain a copy of the License at
     7   *
     8   *   http://www.apache.org/licenses/LICENSE-2.0
     9   *
    10   * Unless required by applicable law or agreed to in writing, software
    11   * distributed under the License is distributed on an "AS IS" BASIS,
    12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13   * See the License for the specific language governing permissions and
    14   * limitations under the License.
    15   */
    16  
    17  // Package cache implements a simple in-memory file cache and provides a simple
    18  // Fetcher wrapper that uses the cache for its Fetch operations.
    19  package cache // import "kythe.io/kythe/go/platform/cache"
    20  
    21  import (
    22  	"container/heap"
    23  	"fmt"
    24  	"sync"
    25  
    26  	"kythe.io/kythe/go/platform/analysis"
    27  )
    28  
    29  type cachedFetcher struct {
    30  	cache *Cache
    31  	analysis.Fetcher
    32  }
    33  
    34  // Fetch implements the corresponding method of analysis.Fetcher by reading
    35  // through the cache.
    36  func (c cachedFetcher) Fetch(path, digest string) ([]byte, error) {
    37  	key := fmt.Sprintf("%s\x00%s", path, digest)
    38  	if data := c.cache.Get(key); data != nil {
    39  		return data, nil
    40  	}
    41  	data, err := c.Fetcher.Fetch(path, digest)
    42  	if err == nil {
    43  		c.cache.Put(key, data)
    44  	}
    45  	return data, err
    46  }
    47  
    48  // Fetcher creates an analysis.Fetcher that implements fetches through the
    49  // cache, and delegates all other operations to f.  If cache == nil, f is
    50  // returned unmodified.  The returned value is safe for concurrent use if f is.
    51  func Fetcher(f analysis.Fetcher, cache *Cache) analysis.Fetcher {
    52  	if cache == nil {
    53  		return f
    54  	}
    55  	return cachedFetcher{
    56  		cache:   cache,
    57  		Fetcher: f,
    58  	}
    59  }
    60  
    61  // New returns a new empty cache with a capacity of maxBytes.
    62  // Returns nil if maxBytes <= 0.
    63  func New(maxBytes int) *Cache {
    64  	if maxBytes <= 0 {
    65  		return nil
    66  	}
    67  	return &Cache{
    68  		maxBytes: maxBytes,
    69  		data:     make(map[string]*entry),
    70  	}
    71  }
    72  
    73  // A Cache implements a limited-size cache of key-value pairs, where keys are
    74  // strings and values are byte slices.  Entries are evicted from the cache
    75  // using a least-frequently used policy, based on how many times a given key
    76  // has been fetched with Get.  A *Cache is safe for concurrent use.
    77  type Cache struct {
    78  	mu sync.RWMutex
    79  
    80  	curBytes int               // Size of resident data.
    81  	maxBytes int               // Total allowed capacity of cache.
    82  	data     map[string]*entry // Cached entries.
    83  	usage    countHeap         // Count-ordered heap for eviction.
    84  
    85  	hits, misses int
    86  }
    87  
    88  // Has returns whether the specified key is resident in the cache.  This does
    89  // not affect the usage count of the key for purposes of cache eviction.
    90  func (c *Cache) Has(key string) bool {
    91  	c.mu.RLock()
    92  	defer c.mu.RUnlock()
    93  	return c.data[key] != nil
    94  }
    95  
    96  // Get fetches the specified key from the cache, returning nil if the key is
    97  // not present.  A successful fetch counts as a usage for the purposes of the
    98  // cache eviction policy.
    99  func (c *Cache) Get(key string) []byte {
   100  	c.mu.Lock()
   101  	defer c.mu.Unlock()
   102  
   103  	if e := c.data[key]; e != nil {
   104  		increment(&c.usage, e)
   105  		c.hits++
   106  		return e.data
   107  	}
   108  	c.misses++
   109  	return nil
   110  }
   111  
   112  // Stats returns usage statistics for the cache.
   113  func (c *Cache) Stats() (residentBytes, numHits, numMisses int) {
   114  	if c == nil {
   115  		return 0, 0, 0
   116  	}
   117  	c.mu.RLock()
   118  	defer c.mu.RUnlock()
   119  	return c.curBytes, c.hits, c.misses
   120  }
   121  
   122  // Put adds the specified key and data to the cache if it is not already
   123  // present.  If necessary, existing keys are evicted to maintain size.
   124  func (c *Cache) Put(key string, data []byte) {
   125  	c.mu.Lock()
   126  	defer c.mu.Unlock()
   127  
   128  	// Quick return if this key is already recorded, or if data itself exceeds
   129  	// the cache's total capacity.  There's no point in evicting other keys in
   130  	// that case.
   131  	if c.data[key] != nil || len(data) > c.maxBytes {
   132  		return
   133  	}
   134  
   135  	// At this point we know that there is room for the data, save that we may
   136  	// need to evict some of the existing entries (if there are no entries, we
   137  	// have enough room by construction).
   138  	newBytes := c.curBytes + len(data)
   139  	for newBytes > c.maxBytes {
   140  		goat := heap.Pop(&c.usage).(*entry)
   141  		delete(c.data, goat.key)
   142  		newBytes -= len(goat.data)
   143  	}
   144  
   145  	e := &entry{
   146  		key:  key,
   147  		data: data,
   148  	}
   149  	c.data[key] = e
   150  	heap.Push(&c.usage, e)
   151  	c.curBytes = newBytes
   152  }
   153  
   154  type entry struct {
   155  	key   string
   156  	data  []byte
   157  	index int // Entry's position in the heap.
   158  	count int // Number of fetches on this key.
   159  }
   160  
   161  // countHeap implements a min-heap of entries based on their count value.
   162  // This permits an efficient implementation of a least-frequently used cache
   163  // eviction policy.
   164  type countHeap []*entry
   165  
   166  // Len implements a method of heap.Interface.
   167  func (h countHeap) Len() int { return len(h) }
   168  
   169  // Less implements a method of heap.Interface.
   170  func (h countHeap) Less(i, j int) bool { return h[i].count < h[j].count }
   171  
   172  // Swap implements a method of heap.Interface.
   173  func (h countHeap) Swap(i, j int) {
   174  	h[i], h[j] = h[j], h[i]
   175  	h[i].index = i
   176  	h[j].index = j
   177  }
   178  
   179  // Push implements a method of heap.Interface.
   180  func (h *countHeap) Push(item any) {
   181  	if e, ok := item.(*entry); ok {
   182  		n := len(*h)
   183  		e.index = n
   184  		*h = append(*h, e)
   185  	}
   186  }
   187  
   188  // Pop implements a method of heap.Interface.
   189  func (h *countHeap) Pop() any {
   190  	if n := len(*h) - 1; n >= 0 {
   191  		e := (*h)[n]
   192  		e.index = -1
   193  		*h = (*h)[:n]
   194  		return e
   195  	}
   196  	return nil
   197  }
   198  
   199  // increment increases the count of e by 1 and updates its location in the heap.
   200  func increment(h *countHeap, e *entry) {
   201  	heap.Remove(h, e.index)
   202  	e.count++
   203  	heap.Push(h, e)
   204  }
   205  
   206  // A ByteSize implements the flag.Value interface to allow specifying a cache
   207  // size on the command line.  Legal size values are as parsed by ParseByteSize.
   208  type ByteSize int
   209  
   210  // String implements a method of the flag.Value interface.
   211  func (b *ByteSize) String() string { return fmt.Sprintf("%d", *b) }
   212  
   213  // Get implements a method of the flag.Value interface.
   214  func (b *ByteSize) Get() any { return *b }
   215  
   216  // Set implements a method of the flag.Value interface.
   217  func (b *ByteSize) Set(s string) error {
   218  	v, err := ParseByteSize(s)
   219  	if err != nil {
   220  		return err
   221  	}
   222  	*b = ByteSize(v)
   223  	return nil
   224  }
   225  
   226  // ParseByteSize parses a string specifying a possibly-fractional number with
   227  // units and returns an equivalent value in bytes.  Fractions are rounded down.
   228  // Returns -1 and an error in case of invalid format.
   229  //
   230  // The supported unit labels are:
   231  //
   232  //	B    * 2^0  bytes
   233  //	K    * 2^10 bytes
   234  //	M    * 2^20 bytes
   235  //	G    * 2^30 bytes
   236  //	T    * 2^40 bytes
   237  //
   238  // The labels are case-insensitive ("10g" is the same as "10G")
   239  //
   240  // Examples:
   241  //
   242  //	ParseByteSize("25")    ==> 25
   243  //	ParseByteSize("1k")    ==> 1024
   244  //	ParseByteSize("2.5G")  ==> 2684354560
   245  //	ParseByteSize("10.3k") ==> 10547
   246  //	ParseByteSize("-45xx") ==> -1 [error]
   247  func ParseByteSize(s string) (int, error) {
   248  	var value float64
   249  	var unit string
   250  
   251  	n, err := fmt.Sscanf(s, "%f%s", &value, &unit)
   252  	if err != nil && n != 1 {
   253  		return -1, fmt.Errorf("invalid byte size: %q", s)
   254  	}
   255  	if value < 0 {
   256  		return -1, fmt.Errorf("invalid byte size: %q", s)
   257  	}
   258  	switch unit {
   259  	case "", "b", "B":
   260  		break
   261  	case "k", "K":
   262  		value *= 1 << 10
   263  	case "m", "M":
   264  		value *= 1 << 20
   265  	case "g", "G":
   266  		value *= 1 << 30
   267  	case "t", "T":
   268  		value *= 1 << 40
   269  	default:
   270  		return -1, fmt.Errorf("invalid byte size: %q", s)
   271  	}
   272  	return int(value), nil
   273  }