github.com/ndau/noms@v1.0.5/go/types/opcache.go (about)

     1  // Copyright 2016 Attic Labs, Inc. All rights reserved.
     2  // Licensed under the Apache License, version 2.0:
     3  // http://www.apache.org/licenses/LICENSE-2.0
     4  
     5  // opCache stores build operations on a graph of nested Maps whose leaves can
     6  // in turn be Set, Map, or List collections containing any Noms Value.
     7  // OpCacheIterator returns operations in sorted order.
     8  //
     9  // OpCache uses a special encoding of the information supplied by the MapSet(),
    10  // ListAppend(), or SetInsert() operation stored in the ldbKey combined with
    11  // custom ldb Comparer object implemented in opcache_compare.go to make this
    12  // happen.
    13  //
    14  // Ldb keys are encoded byte arrays that contain the following information:
    15  //     4-bytes -- uint32 in BigEndian order which identifies this key/value
    16  //                as belonging to a particular graph
    17  //     1-byte  -- a NomsKind value that represents the collection type that is
    18  //                being acted on. This will either be MapKind, SetKind, or ListKind.
    19  //     1-byte  -- uint8 representing the number of NomsValues encoded in this key
    20  //
    21  // After this 6-byte header, there is a section of bytes for each value encoded
    22  // into the key. Each value has a 1-byte prefix:
    23  //     1-byte  -- a NomsKind value that represents the type of value that is
    24  //                being encoded.
    25  //     The 1-byte NomsKind value determines what follows, if this value is
    26  //     BoolKind, NumberKind, or StringKind, the rest of the bytes are:
    27  //         4-bytes -- uint32 length of the Value serialization
    28  //         n-bytes -- the serialized value
    29  //     If the NomsKind byte has any other value, it is followed by:
    30  //         20-bytes -- digest of Value's hash
    31  //
    32  // Whenever the value is encoded as a hash digest in the ldbKey, it's actual value
    33  // needs to get stored in the ldbValue. (More about this later)
    34  //
    35  // There are 3 operation types on opCache: MapSet(), SetInsert(), and ListAppend().
    36  // Each one stores slightly different things in the ldbKey.
    37  // MapSet() -- stores each graphKey and the key to the final Map
    38  // ValueSet() -- stores each graphKey and the Value being inserted into the set
    39  // ListAppend() -- stores each graphKey and a Number() containing an uint64 value
    40  //    that is shared across all collections and lists which is incremented each time
    41  //    ListAppend() is called.
    42  //
    43  // The ldbValue also stores different information for each mutation operation. An
    44  // ldbValue has a 1-byte uint8 header that is the number of values that are encoded
    45  // into it.
    46  //    1-byte -- uint8 indicating number of values encoded into this byte array
    47  // Then for each encoded value it contains:
    48  //    4-byte -- uint32 indicating length of value serialization
    49  //    n-bytes -- the serialized value
    50  //
    51  // The ldbValue contains the following values for each type of mutation:
    52  // MapSet() -- stores any graphKeys that were encoded as a hash digest in
    53  //    the ldbKey. The mapKey if it was encoded as a hash digest in the ldbKey
    54  //    and the value being set in the map.
    55  // SetInsert() -- stores any graphKeys that were encoded as a hash digest in
    56  //    the ldbKey. The value being inserted into the set if it was encoded into the
    57  //    ldbKey as a hash digest.
    58  // ListAppend() -- stores any graphKeys that were encoded as a hash digest in the
    59  //    ldbKey. The value being appended to the list.
    60  //
    61  
    62  package types
    63  
    64  import (
    65  	"encoding/binary"
    66  	"io/ioutil"
    67  	"os"
    68  	"sync/atomic"
    69  
    70  	"github.com/ndau/noms/go/d"
    71  	"github.com/ndau/noms/go/hash"
    72  	"github.com/syndtr/goleveldb/leveldb"
    73  	ldbIterator "github.com/syndtr/goleveldb/leveldb/iterator"
    74  	"github.com/syndtr/goleveldb/leveldb/opt"
    75  	"github.com/syndtr/goleveldb/leveldb/util"
    76  )
    77  
    78  const uint32Size = 4
    79  
    80  type opCacheStore interface {
    81  	opCache() opCache
    82  	destroy() error
    83  }
    84  
    85  type opCache interface {
    86  	// This method can be called from multiple go routines.
    87  	GraphMapSet(keys ValueSlice, mapKey Value, mapVal Value)
    88  
    89  	// This method can be called from multiple go routines.
    90  	GraphSetInsert(keys ValueSlice, val Value)
    91  
    92  	// This method can be called from multiple go routines, however items will
    93  	// be appended to the list based on the order that routines execute
    94  	// this method.
    95  	GraphListAppend(keys ValueSlice, val Value)
    96  
    97  	NewIterator() opCacheIterator
    98  }
    99  
   100  type opCacheIterator interface {
   101  	GraphOp() (ValueSlice, NomsKind, sequenceItem)
   102  	Next() bool
   103  	Release()
   104  }
   105  
   106  type ldbOpCacheStore struct {
   107  	ldb          *leveldb.DB
   108  	dbDir        string
   109  	collectionId uint32
   110  	vrw          ValueReadWriter
   111  }
   112  
   113  type ldbOpCache struct {
   114  	vrw     ValueReadWriter
   115  	colId   uint32
   116  	listIdx int64
   117  	ldb     *leveldb.DB
   118  }
   119  
   120  type ldbOpCacheIterator struct {
   121  	iter ldbIterator.Iterator
   122  	vrw  ValueReadWriter
   123  }
   124  
   125  func newLdbOpCacheStore(vrw ValueReadWriter) *ldbOpCacheStore {
   126  	dir, err := ioutil.TempDir("", "")
   127  	d.Chk.NoError(err)
   128  	db, err := leveldb.OpenFile(dir, &opt.Options{
   129  		Compression:            opt.NoCompression,
   130  		Comparer:               opCacheComparer{},
   131  		OpenFilesCacheCapacity: 24,
   132  		// This data does not have to be durable. LDB is acting as temporary
   133  		// storage that can be larger than main memory.
   134  		NoSync:      true,
   135  		WriteBuffer: 1 << 27, // 128MiB
   136  	})
   137  	d.Chk.NoError(err, "opening put cache in %s", dir)
   138  	return &ldbOpCacheStore{ldb: db, dbDir: dir, vrw: vrw}
   139  }
   140  
   141  func (store *ldbOpCacheStore) destroy() error {
   142  	d.Chk.NoError(store.ldb.Close())
   143  	return os.RemoveAll(store.dbDir)
   144  }
   145  
   146  func (store *ldbOpCacheStore) opCache() opCache {
   147  	colId := atomic.AddUint32(&store.collectionId, 1)
   148  	return &ldbOpCache{vrw: store.vrw, colId: colId, ldb: store.ldb}
   149  }
   150  
   151  // insertLdbOp encodes allKeys into the ldb key. Bool, Number, and String values
   152  // are encoded directly into the ldb key bytes. All other types are encoded as
   153  // their Hash() digest. Their actual value is then stored in ldb value.
   154  func (opc *ldbOpCache) insertLdbOp(allKeys ValueSlice, opKind NomsKind, val Value) {
   155  	if len(allKeys) > 0x00FF {
   156  		d.Panic("Number of keys in GraphMapSet exceeds max of 256")
   157  	}
   158  	ldbKeyBytes := [initialBufferSize]byte{}
   159  	ldbValBytes := [initialBufferSize]byte{}
   160  
   161  	ldbKey, valuesToEncode := encodeKeys(ldbKeyBytes[:0], opc.colId, opKind, allKeys)
   162  
   163  	// val may be nil when dealing with sets, since the val is the key.
   164  	if val != nil {
   165  		valuesToEncode = append(valuesToEncode, val)
   166  	}
   167  	ldbVal := encodeValues(ldbValBytes[:0], valuesToEncode)
   168  
   169  	err := opc.ldb.Put(ldbKey, ldbVal, nil)
   170  	d.Chk.NoError(err)
   171  }
   172  
   173  func (opc *ldbOpCache) GraphMapSet(graphKeys ValueSlice, mapKey, mapVal Value) {
   174  	allKeys := append(graphKeys, mapKey)
   175  	opc.insertLdbOp(allKeys, MapKind, mapVal)
   176  }
   177  
   178  func (opc *ldbOpCache) GraphSetInsert(graphKeys ValueSlice, val Value) {
   179  	allKeys := append(graphKeys, val)
   180  	opc.insertLdbOp(allKeys, SetKind, val)
   181  }
   182  
   183  func (opc *ldbOpCache) GraphListAppend(graphKeys ValueSlice, val Value) {
   184  	idx := atomic.AddInt64(&opc.listIdx, 1)
   185  	allKeys := append(graphKeys, Number(idx))
   186  	opc.insertLdbOp(allKeys, ListKind, val)
   187  }
   188  
   189  func (i *ldbOpCacheIterator) GraphOp() (ValueSlice, NomsKind, sequenceItem) {
   190  	ldbKey := i.iter.Key()
   191  	ldbVal := i.iter.Value()
   192  
   193  	// skip over 4 bytes of colId and get opKind, and numKeys from bytes 4 & 5
   194  	opKind := NomsKind(ldbKey[4])
   195  	numKeys := uint8(ldbKey[5])
   196  	ldbKey = ldbKey[6:]
   197  
   198  	// Call decodeValue for each encoded graphKey. nil will be appended to
   199  	// graphKeys for any keys that were encoded as hash digests.
   200  	graphKeys := ValueSlice{}
   201  	for pos := uint8(0); pos < numKeys; pos++ {
   202  		var gk Value
   203  		ldbKey, gk = decodeValue(ldbKey, false, i.vrw)
   204  		graphKeys = append(graphKeys, gk)
   205  	}
   206  
   207  	// Get the number of values whose value was encoded in ldbVal
   208  	numEncodedValues := uint8(ldbVal[0])
   209  	ldbVal = ldbVal[1:]
   210  
   211  	// Call decodeValue for each non-primitive key stored in ldbVal. Replace
   212  	// the nil value in graphKeys with the new decodedValue.
   213  	values := ValueSlice{}
   214  	for pos := uint8(0); pos < numEncodedValues; pos++ {
   215  		var gk Value
   216  		ldbVal, gk = decodeValue(ldbVal, true, i.vrw)
   217  		values = append(values, gk)
   218  	}
   219  
   220  	// Fold in any non-primitive key values that were stored in ldbVal
   221  	pos := 0
   222  	for idx, k1 := range graphKeys {
   223  		if k1 == nil {
   224  			graphKeys[idx] = values[pos]
   225  			pos++
   226  		}
   227  	}
   228  
   229  	// Remove the last key in graphKeys. The last key in graphKeys is the
   230  	// mapkey for Maps, the item for Sets, and the index for Lists.
   231  	key := graphKeys[len(graphKeys)-1]
   232  	graphKeys = graphKeys[:len(graphKeys)-1]
   233  
   234  	var item sequenceItem
   235  	switch opKind {
   236  	case MapKind:
   237  		val := values[len(values)-1]
   238  		item = mapEntry{key, val}
   239  	case SetKind:
   240  		item = key
   241  	case ListKind:
   242  		item = values[len(values)-1]
   243  	}
   244  
   245  	return graphKeys, opKind, item
   246  }
   247  
   248  func (opc *ldbOpCache) NewIterator() opCacheIterator {
   249  	prefix := [4]byte{}
   250  	binary.BigEndian.PutUint32(prefix[:], opc.colId)
   251  	return &ldbOpCacheIterator{iter: opc.ldb.NewIterator(util.BytesPrefix(prefix[:]), nil), vrw: opc.vrw}
   252  }
   253  
   254  func (i *ldbOpCacheIterator) Next() bool {
   255  	return i.iter.Next()
   256  }
   257  
   258  func (i *ldbOpCacheIterator) Release() {
   259  	i.iter.Release()
   260  }
   261  
   262  // encodeKeys() serializes a list of keys to the byte slice |bs|.
   263  func encodeKeys(bs []byte, colId uint32, opKind NomsKind, keys []Value) ([]byte, []Value) {
   264  	// All ldb keys start with a 4-byte collection id that serves as a namespace
   265  	// that keeps them separate from other collections.
   266  	idHolder := [4]byte{}
   267  	idHolderSlice := idHolder[:4]
   268  	binary.BigEndian.PutUint32(idHolderSlice, colId)
   269  	bs = append(bs, idHolderSlice...)
   270  
   271  	// bs[4] is a NomsKind value which represents the type of leaf
   272  	//   collection being operated on (i.e. MapKind, SetKind, or ListKind)
   273  	// bs[5] is a single uint8 value representing the number of keys
   274  	//   encoded in the ldb key.
   275  	bs = append(bs, byte(opKind), byte(len(keys)))
   276  
   277  	valuesToEncode := ValueSlice{}
   278  	for _, gk := range keys {
   279  		bs = encodeGraphKey(bs, gk)
   280  		if !isKindOrderedByValue(gk.Kind()) {
   281  			valuesToEncode = append(valuesToEncode, gk)
   282  		}
   283  	}
   284  	return bs, valuesToEncode
   285  }
   286  
   287  func encodeValues(bs []byte, valuesToEncode []Value) []byte {
   288  	// Encode allValues into the ldbVal byte slice.
   289  	bs = append(bs, uint8(len(valuesToEncode)))
   290  	for _, k := range valuesToEncode {
   291  		bs = encodeGraphValue(bs, k)
   292  	}
   293  	return bs
   294  }
   295  
   296  func encodeGraphKey(bs []byte, v Value) []byte {
   297  	return encodeForGraph(bs, v, false)
   298  }
   299  
   300  func encodeGraphValue(bs []byte, v Value) []byte {
   301  	return encodeForGraph(bs, v, true)
   302  }
   303  
   304  func encodeForGraph(bs []byte, v Value, asValue bool) []byte {
   305  	// Note: encToSlice() and append() will both grow the backing store of |bs|
   306  	// as necessary. Always call them when writing to |bs|.
   307  	if asValue || isKindOrderedByValue(v.Kind()) {
   308  		// if we're encoding value, then put:
   309  		// noms-kind(1-byte), serialization-len(4-bytes), serialization(n-bytes)
   310  		buf := [initialBufferSize]byte{}
   311  		uint32buf := [4]byte{}
   312  		encodedVal := encToSlice(v, buf[:])
   313  		binary.BigEndian.PutUint32(uint32buf[:], uint32(len(encodedVal)))
   314  		bs = append(bs, uint8(v.Kind()))
   315  		bs = append(bs, uint32buf[:]...)
   316  		bs = append(bs, encodedVal...)
   317  	} else {
   318  		// if we're encoding hash values, we know the length, so we can leave that out
   319  		bs = append(bs, uint8(v.Kind()))
   320  		h := v.Hash()
   321  		bs = append(bs, h[:]...)
   322  	}
   323  	return bs
   324  }
   325  
   326  func decodeValue(bs []byte, asValue bool, vrw ValueReadWriter) ([]byte, Value) {
   327  	kind := NomsKind(bs[0])
   328  	var v Value
   329  	if asValue || isKindOrderedByValue(kind) {
   330  		encodedLen := binary.BigEndian.Uint32(bs[1:5])
   331  		// The bytes in bs gets reused by LDB. The data of a chunk must
   332  		// never change since we are backing the values by this data.
   333  		data := make([]byte, encodedLen)
   334  		copy(data, bs[5:5+encodedLen])
   335  		v = DecodeFromBytes(data, vrw)
   336  		return bs[5+encodedLen:], v
   337  	}
   338  	return bs[1+hash.ByteLen:], nil
   339  }
   340  
   341  // Note that, if 'v' are prolly trees, any in-memory child chunks will be written to vw at this time.
   342  func encToSlice(v Value, initBuf []byte) []byte {
   343  	// TODO: Are there enough calls to this that it's worth re-using a nomsWriter?
   344  	w := &binaryNomsWriter{initBuf, 0}
   345  	v.writeTo(w)
   346  	return w.data()
   347  }