github.com/dolthub/dolt/go@v0.40.5-0.20240520175717-68db7794bea6/libraries/doltcore/remotestorage/map_chunk_cache.go (about)

     1  // Copyright 2019 Dolthub, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package remotestorage
    16  
    17  import (
    18  	"sync"
    19  
    20  	"github.com/dolthub/dolt/go/store/hash"
    21  	"github.com/dolthub/dolt/go/store/nbs"
    22  )
    23  
    24  // mapChunkCache is a ChunkCache implementation that stores everything in an in memory map.
    25  type mapChunkCache struct {
    26  	mu          *sync.Mutex
    27  	hashToChunk map[hash.Hash]nbs.CompressedChunk
    28  	toFlush     map[hash.Hash]nbs.CompressedChunk
    29  	cm          CapacityMonitor
    30  }
    31  
    32  func newMapChunkCache() *mapChunkCache {
    33  	return &mapChunkCache{
    34  		&sync.Mutex{},
    35  		make(map[hash.Hash]nbs.CompressedChunk),
    36  		make(map[hash.Hash]nbs.CompressedChunk),
    37  		NewUncappedCapacityMonitor(),
    38  	}
    39  }
    40  
    41  // used by DoltHub API
    42  func NewMapChunkCacheWithMaxCapacity(maxCapacity int64) *mapChunkCache {
    43  	return &mapChunkCache{
    44  		&sync.Mutex{},
    45  		make(map[hash.Hash]nbs.CompressedChunk),
    46  		make(map[hash.Hash]nbs.CompressedChunk),
    47  		NewFixedCapacityMonitor(maxCapacity),
    48  	}
    49  }
    50  
    51  // Put puts a slice of chunks into the cache.
    52  func (mcc *mapChunkCache) Put(chnks []nbs.CompressedChunk) bool {
    53  	mcc.mu.Lock()
    54  	defer mcc.mu.Unlock()
    55  
    56  	for i := 0; i < len(chnks); i++ {
    57  		c := chnks[i]
    58  		h := c.Hash()
    59  
    60  		if curr, ok := mcc.hashToChunk[h]; ok {
    61  			if !curr.IsEmpty() {
    62  				continue
    63  			}
    64  		}
    65  
    66  		if mcc.cm.CapacityExceeded(len(c.FullCompressedChunk)) {
    67  			return true
    68  		}
    69  
    70  		mcc.hashToChunk[h] = c
    71  
    72  		if !c.IsEmpty() {
    73  			mcc.toFlush[h] = c
    74  		}
    75  	}
    76  
    77  	return false
    78  }
    79  
    80  // Get gets a map of hash to chunk for a set of hashes.  In the event that a chunk is not in the cache, chunks.Empty.
    81  // is put in it's place
    82  func (mcc *mapChunkCache) Get(hashes hash.HashSet) map[hash.Hash]nbs.CompressedChunk {
    83  	hashToChunk := make(map[hash.Hash]nbs.CompressedChunk)
    84  
    85  	mcc.mu.Lock()
    86  	defer mcc.mu.Unlock()
    87  
    88  	for h := range hashes {
    89  		if c, ok := mcc.hashToChunk[h]; ok {
    90  			hashToChunk[h] = c
    91  		} else {
    92  			hashToChunk[h] = nbs.EmptyCompressedChunk
    93  		}
    94  	}
    95  
    96  	return hashToChunk
    97  }
    98  
    99  // Has takes a set of hashes and returns the set of hashes that the cache currently does not have in it.
   100  func (mcc *mapChunkCache) Has(hashes hash.HashSet) (absent hash.HashSet) {
   101  	absent = make(hash.HashSet)
   102  
   103  	mcc.mu.Lock()
   104  	defer mcc.mu.Unlock()
   105  
   106  	for h := range hashes {
   107  		if _, ok := mcc.hashToChunk[h]; !ok {
   108  			absent[h] = struct{}{}
   109  		}
   110  	}
   111  
   112  	return absent
   113  }
   114  
   115  func (mcc *mapChunkCache) PutChunk(ch nbs.CompressedChunk) bool {
   116  	mcc.mu.Lock()
   117  	defer mcc.mu.Unlock()
   118  
   119  	h := ch.Hash()
   120  	if existing, ok := mcc.hashToChunk[h]; !ok || existing.IsEmpty() {
   121  		if mcc.cm.CapacityExceeded(len(ch.FullCompressedChunk)) {
   122  			return true
   123  		}
   124  		mcc.hashToChunk[h] = ch
   125  		mcc.toFlush[h] = ch
   126  	}
   127  
   128  	return false
   129  }
   130  
   131  // GetAndClearChunksToFlush gets a map of hash to chunk which includes all the chunks that were put in the cache
   132  // between the last time GetAndClearChunksToFlush was called and now.
   133  func (mcc *mapChunkCache) GetAndClearChunksToFlush() map[hash.Hash]nbs.CompressedChunk {
   134  	newToFlush := make(map[hash.Hash]nbs.CompressedChunk)
   135  
   136  	mcc.mu.Lock()
   137  	defer mcc.mu.Unlock()
   138  
   139  	toFlush := mcc.toFlush
   140  	mcc.toFlush = newToFlush
   141  
   142  	return toFlush
   143  }