github.com/jbendotnet/noms@v0.0.0-20190904222105-c43e4293ea92/go/nbs/table_persister.go (about)

     1  // Copyright 2016 Attic Labs, Inc. All rights reserved.
     2  // Licensed under the Apache License, version 2.0:
     3  // http://www.apache.org/licenses/LICENSE-2.0
     4  
     5  package nbs
     6  
     7  import (
     8  	"bytes"
     9  	"crypto/sha512"
    10  	"encoding/binary"
    11  	"sort"
    12  	"sync"
    13  
    14  	"github.com/attic-labs/noms/go/d"
    15  	"github.com/attic-labs/noms/go/util/sizecache"
    16  )
    17  
    18  // tablePersister allows interaction with persistent storage. It provides
    19  // primitives for pushing the contents of a memTable to persistent storage,
    20  // opening persistent tables for reading, and conjoining a number of existing
    21  // chunkSources into one. A tablePersister implementation must be goroutine-
    22  // safe.
    23  type tablePersister interface {
    24  	// Persist makes the contents of mt durable. Chunks already present in
    25  	// |haver| may be dropped in the process.
    26  	Persist(mt *memTable, haver chunkReader, stats *Stats) chunkSource
    27  
    28  	// ConjoinAll conjoins all chunks in |sources| into a single, new
    29  	// chunkSource.
    30  	ConjoinAll(sources chunkSources, stats *Stats) chunkSource
    31  
    32  	// Open a table named |name|, containing |chunkCount| chunks.
    33  	Open(name addr, chunkCount uint32, stats *Stats) chunkSource
    34  }
    35  
    36  // indexCache provides sized storage for table indices. While getting and/or
    37  // setting the cache entry for a given table name, the caller MUST hold the
    38  // lock that for that entry.
    39  type indexCache struct {
    40  	cache  *sizecache.SizeCache
    41  	cond   *sync.Cond
    42  	locked map[addr]struct{}
    43  }
    44  
    45  // Returns an indexCache which will burn roughly |size| bytes of memory.
    46  func newIndexCache(size uint64) *indexCache {
    47  	return &indexCache{sizecache.New(size), sync.NewCond(&sync.Mutex{}), map[addr]struct{}{}}
    48  }
    49  
    50  // Take an exclusive lock on the cache entry for |name|. Callers must do this
    51  // before calling get(addr) or put(addr, index)
    52  func (sic *indexCache) lockEntry(name addr) {
    53  	sic.cond.L.Lock()
    54  	defer sic.cond.L.Unlock()
    55  
    56  	for {
    57  		if _, present := sic.locked[name]; !present {
    58  			sic.locked[name] = struct{}{}
    59  			break
    60  		}
    61  		sic.cond.Wait()
    62  	}
    63  }
    64  
    65  func (sic *indexCache) unlockEntry(name addr) {
    66  	sic.cond.L.Lock()
    67  	defer sic.cond.L.Unlock()
    68  
    69  	_, ok := sic.locked[name]
    70  	d.PanicIfFalse(ok)
    71  	delete(sic.locked, name)
    72  
    73  	sic.cond.Broadcast()
    74  }
    75  
    76  func (sic *indexCache) get(name addr) (tableIndex, bool) {
    77  	if idx, found := sic.cache.Get(name); found {
    78  		return idx.(tableIndex), true
    79  	}
    80  	return tableIndex{}, false
    81  }
    82  
    83  func (sic *indexCache) put(name addr, idx tableIndex) {
    84  	indexSize := uint64(idx.chunkCount) * (addrSize + ordinalSize + lengthSize + uint64Size)
    85  	sic.cache.Add(name, indexSize, idx)
    86  }
    87  
    88  type chunkSourcesByAscendingCount chunkSources
    89  
    90  func (csbc chunkSourcesByAscendingCount) Len() int { return len(csbc) }
    91  func (csbc chunkSourcesByAscendingCount) Less(i, j int) bool {
    92  	srcI, srcJ := csbc[i], csbc[j]
    93  	if srcI.count() == srcJ.count() {
    94  		hi, hj := srcI.hash(), srcJ.hash()
    95  		return bytes.Compare(hi[:], hj[:]) < 0
    96  	}
    97  	return srcI.count() < srcJ.count()
    98  }
    99  func (csbc chunkSourcesByAscendingCount) Swap(i, j int) { csbc[i], csbc[j] = csbc[j], csbc[i] }
   100  
   101  type chunkSourcesByDescendingDataSize []sourceWithSize
   102  
   103  func (csbds chunkSourcesByDescendingDataSize) Len() int { return len(csbds) }
   104  func (csbds chunkSourcesByDescendingDataSize) Less(i, j int) bool {
   105  	swsI, swsJ := csbds[i], csbds[j]
   106  	if swsI.dataLen == swsJ.dataLen {
   107  		hi, hj := swsI.source.hash(), swsJ.source.hash()
   108  		return bytes.Compare(hi[:], hj[:]) < 0
   109  	}
   110  	return swsI.dataLen > swsJ.dataLen
   111  }
   112  func (csbds chunkSourcesByDescendingDataSize) Swap(i, j int) { csbds[i], csbds[j] = csbds[j], csbds[i] }
   113  
   114  type sourceWithSize struct {
   115  	source  chunkSource
   116  	dataLen uint64
   117  }
   118  
   119  type compactionPlan struct {
   120  	sources             chunkSourcesByDescendingDataSize
   121  	mergedIndex         []byte
   122  	chunkCount          uint32
   123  	totalCompressedData uint64
   124  }
   125  
   126  func (cp compactionPlan) lengths() []byte {
   127  	lengthsStart := uint64(cp.chunkCount) * prefixTupleSize
   128  	return cp.mergedIndex[lengthsStart : lengthsStart+uint64(cp.chunkCount)*lengthSize]
   129  }
   130  
   131  func (cp compactionPlan) suffixes() []byte {
   132  	suffixesStart := uint64(cp.chunkCount) * (prefixTupleSize + lengthSize)
   133  	return cp.mergedIndex[suffixesStart : suffixesStart+uint64(cp.chunkCount)*addrSuffixSize]
   134  }
   135  
   136  func planConjoin(sources chunkSources, stats *Stats) (plan compactionPlan) {
   137  	var totalUncompressedData uint64
   138  	for _, src := range sources {
   139  		totalUncompressedData += src.uncompressedLen()
   140  		index := src.index()
   141  		plan.chunkCount += index.chunkCount
   142  
   143  		// Calculate the amount of chunk data in |src|
   144  		chunkDataLen := calcChunkDataLen(index)
   145  		plan.sources = append(plan.sources, sourceWithSize{src, chunkDataLen})
   146  		plan.totalCompressedData += chunkDataLen
   147  	}
   148  	sort.Sort(plan.sources)
   149  
   150  	lengthsPos := lengthsOffset(plan.chunkCount)
   151  	suffixesPos := suffixesOffset(plan.chunkCount)
   152  	plan.mergedIndex = make([]byte, indexSize(plan.chunkCount)+footerSize)
   153  
   154  	prefixIndexRecs := make(prefixIndexSlice, 0, plan.chunkCount)
   155  	var ordinalOffset uint32
   156  	for _, sws := range plan.sources {
   157  		index := sws.source.index()
   158  
   159  		// Add all the prefix tuples from this index to the list of all prefixIndexRecs, modifying the ordinals such that all entries from the 1st item in sources come after those in the 0th and so on.
   160  		for j, prefix := range index.prefixes {
   161  			rec := prefixIndexRec{prefix: prefix, order: ordinalOffset + index.ordinals[j]}
   162  			prefixIndexRecs = append(prefixIndexRecs, rec)
   163  		}
   164  		ordinalOffset += sws.source.count()
   165  
   166  		// TODO: copy the lengths and suffixes as a byte-copy from src BUG #3438
   167  		// Bring over the lengths block, in order
   168  		for _, length := range index.lengths {
   169  			binary.BigEndian.PutUint32(plan.mergedIndex[lengthsPos:], length)
   170  			lengthsPos += lengthSize
   171  		}
   172  
   173  		// Bring over the suffixes block, in order
   174  		n := copy(plan.mergedIndex[suffixesPos:], index.suffixes)
   175  		d.Chk.True(n == len(index.suffixes))
   176  		suffixesPos += uint64(n)
   177  	}
   178  
   179  	// Sort all prefixTuples by hash and then insert them starting at the beginning of plan.mergedIndex
   180  	sort.Sort(prefixIndexRecs)
   181  	var pfxPos uint64
   182  	for _, pi := range prefixIndexRecs {
   183  		binary.BigEndian.PutUint64(plan.mergedIndex[pfxPos:], pi.prefix)
   184  		pfxPos += addrPrefixSize
   185  		binary.BigEndian.PutUint32(plan.mergedIndex[pfxPos:], pi.order)
   186  		pfxPos += ordinalSize
   187  	}
   188  
   189  	writeFooter(plan.mergedIndex[uint64(len(plan.mergedIndex))-footerSize:], plan.chunkCount, totalUncompressedData)
   190  
   191  	stats.BytesPerConjoin.Sample(uint64(plan.totalCompressedData) + uint64(len(plan.mergedIndex)))
   192  	return plan
   193  }
   194  
   195  func nameFromSuffixes(suffixes []byte) (name addr) {
   196  	sha := sha512.New()
   197  	sha.Write(suffixes)
   198  
   199  	var h []byte
   200  	h = sha.Sum(h) // Appends hash to h
   201  	copy(name[:], h)
   202  	return
   203  }
   204  
   205  func calcChunkDataLen(index tableIndex) uint64 {
   206  	return index.offsets[index.chunkCount-1] + uint64(index.lengths[index.chunkCount-1])
   207  }