github.com/ndau/noms@v1.0.5/go/types/opcache.go (about) 1 // Copyright 2016 Attic Labs, Inc. All rights reserved. 2 // Licensed under the Apache License, version 2.0: 3 // http://www.apache.org/licenses/LICENSE-2.0 4 5 // opCache stores build operations on a graph of nested Maps whose leaves can 6 // in turn be Set, Map, or List collections containing any Noms Value. 7 // OpCacheIterator returns operations in sorted order. 8 // 9 // OpCache uses a special encoding of the information supplied by the MapSet(), 10 // ListAppend(), or SetInsert() operation stored in the ldbKey combined with 11 // custom ldb Comparer object implemented in opcache_compare.go to make this 12 // happen. 13 // 14 // Ldb keys are encoded byte arrays that contain the following information: 15 // 4-bytes -- uint32 in BigEndian order which identifies this key/value 16 // as belonging to a particular graph 17 // 1-byte -- a NomsKind value that represents the collection type that is 18 // being acted on. This will either be MapKind, SetKind, or ListKind. 19 // 1-byte -- uint8 representing the number of NomsValues encoded in this key 20 // 21 // After this 6-byte header, there is a section of bytes for each value encoded 22 // into the key. Each value has a 1-byte prefix: 23 // 1-byte -- a NomsKind value that represents the type of value that is 24 // being encoded. 25 // The 1-byte NomsKind value determines what follows, if this value is 26 // BoolKind, NumberKind, or StringKind, the rest of the bytes are: 27 // 4-bytes -- uint32 length of the Value serialization 28 // n-bytes -- the serialized value 29 // If the NomsKind byte has any other value, it is followed by: 30 // 20-bytes -- digest of Value's hash 31 // 32 // Whenever the value is encoded as a hash digest in the ldbKey, it's actual value 33 // needs to get stored in the ldbValue. (More about this later) 34 // 35 // There are 3 operation types on opCache: MapSet(), SetInsert(), and ListAppend(). 36 // Each one stores slightly different things in the ldbKey. 37 // MapSet() -- stores each graphKey and the key to the final Map 38 // ValueSet() -- stores each graphKey and the Value being inserted into the set 39 // ListAppend() -- stores each graphKey and a Number() containing an uint64 value 40 // that is shared across all collections and lists which is incremented each time 41 // ListAppend() is called. 42 // 43 // The ldbValue also stores different information for each mutation operation. An 44 // ldbValue has a 1-byte uint8 header that is the number of values that are encoded 45 // into it. 46 // 1-byte -- uint8 indicating number of values encoded into this byte array 47 // Then for each encoded value it contains: 48 // 4-byte -- uint32 indicating length of value serialization 49 // n-bytes -- the serialized value 50 // 51 // The ldbValue contains the following values for each type of mutation: 52 // MapSet() -- stores any graphKeys that were encoded as a hash digest in 53 // the ldbKey. The mapKey if it was encoded as a hash digest in the ldbKey 54 // and the value being set in the map. 55 // SetInsert() -- stores any graphKeys that were encoded as a hash digest in 56 // the ldbKey. The value being inserted into the set if it was encoded into the 57 // ldbKey as a hash digest. 58 // ListAppend() -- stores any graphKeys that were encoded as a hash digest in the 59 // ldbKey. The value being appended to the list. 60 // 61 62 package types 63 64 import ( 65 "encoding/binary" 66 "io/ioutil" 67 "os" 68 "sync/atomic" 69 70 "github.com/ndau/noms/go/d" 71 "github.com/ndau/noms/go/hash" 72 "github.com/syndtr/goleveldb/leveldb" 73 ldbIterator "github.com/syndtr/goleveldb/leveldb/iterator" 74 "github.com/syndtr/goleveldb/leveldb/opt" 75 "github.com/syndtr/goleveldb/leveldb/util" 76 ) 77 78 const uint32Size = 4 79 80 type opCacheStore interface { 81 opCache() opCache 82 destroy() error 83 } 84 85 type opCache interface { 86 // This method can be called from multiple go routines. 87 GraphMapSet(keys ValueSlice, mapKey Value, mapVal Value) 88 89 // This method can be called from multiple go routines. 90 GraphSetInsert(keys ValueSlice, val Value) 91 92 // This method can be called from multiple go routines, however items will 93 // be appended to the list based on the order that routines execute 94 // this method. 95 GraphListAppend(keys ValueSlice, val Value) 96 97 NewIterator() opCacheIterator 98 } 99 100 type opCacheIterator interface { 101 GraphOp() (ValueSlice, NomsKind, sequenceItem) 102 Next() bool 103 Release() 104 } 105 106 type ldbOpCacheStore struct { 107 ldb *leveldb.DB 108 dbDir string 109 collectionId uint32 110 vrw ValueReadWriter 111 } 112 113 type ldbOpCache struct { 114 vrw ValueReadWriter 115 colId uint32 116 listIdx int64 117 ldb *leveldb.DB 118 } 119 120 type ldbOpCacheIterator struct { 121 iter ldbIterator.Iterator 122 vrw ValueReadWriter 123 } 124 125 func newLdbOpCacheStore(vrw ValueReadWriter) *ldbOpCacheStore { 126 dir, err := ioutil.TempDir("", "") 127 d.Chk.NoError(err) 128 db, err := leveldb.OpenFile(dir, &opt.Options{ 129 Compression: opt.NoCompression, 130 Comparer: opCacheComparer{}, 131 OpenFilesCacheCapacity: 24, 132 // This data does not have to be durable. LDB is acting as temporary 133 // storage that can be larger than main memory. 134 NoSync: true, 135 WriteBuffer: 1 << 27, // 128MiB 136 }) 137 d.Chk.NoError(err, "opening put cache in %s", dir) 138 return &ldbOpCacheStore{ldb: db, dbDir: dir, vrw: vrw} 139 } 140 141 func (store *ldbOpCacheStore) destroy() error { 142 d.Chk.NoError(store.ldb.Close()) 143 return os.RemoveAll(store.dbDir) 144 } 145 146 func (store *ldbOpCacheStore) opCache() opCache { 147 colId := atomic.AddUint32(&store.collectionId, 1) 148 return &ldbOpCache{vrw: store.vrw, colId: colId, ldb: store.ldb} 149 } 150 151 // insertLdbOp encodes allKeys into the ldb key. Bool, Number, and String values 152 // are encoded directly into the ldb key bytes. All other types are encoded as 153 // their Hash() digest. Their actual value is then stored in ldb value. 154 func (opc *ldbOpCache) insertLdbOp(allKeys ValueSlice, opKind NomsKind, val Value) { 155 if len(allKeys) > 0x00FF { 156 d.Panic("Number of keys in GraphMapSet exceeds max of 256") 157 } 158 ldbKeyBytes := [initialBufferSize]byte{} 159 ldbValBytes := [initialBufferSize]byte{} 160 161 ldbKey, valuesToEncode := encodeKeys(ldbKeyBytes[:0], opc.colId, opKind, allKeys) 162 163 // val may be nil when dealing with sets, since the val is the key. 164 if val != nil { 165 valuesToEncode = append(valuesToEncode, val) 166 } 167 ldbVal := encodeValues(ldbValBytes[:0], valuesToEncode) 168 169 err := opc.ldb.Put(ldbKey, ldbVal, nil) 170 d.Chk.NoError(err) 171 } 172 173 func (opc *ldbOpCache) GraphMapSet(graphKeys ValueSlice, mapKey, mapVal Value) { 174 allKeys := append(graphKeys, mapKey) 175 opc.insertLdbOp(allKeys, MapKind, mapVal) 176 } 177 178 func (opc *ldbOpCache) GraphSetInsert(graphKeys ValueSlice, val Value) { 179 allKeys := append(graphKeys, val) 180 opc.insertLdbOp(allKeys, SetKind, val) 181 } 182 183 func (opc *ldbOpCache) GraphListAppend(graphKeys ValueSlice, val Value) { 184 idx := atomic.AddInt64(&opc.listIdx, 1) 185 allKeys := append(graphKeys, Number(idx)) 186 opc.insertLdbOp(allKeys, ListKind, val) 187 } 188 189 func (i *ldbOpCacheIterator) GraphOp() (ValueSlice, NomsKind, sequenceItem) { 190 ldbKey := i.iter.Key() 191 ldbVal := i.iter.Value() 192 193 // skip over 4 bytes of colId and get opKind, and numKeys from bytes 4 & 5 194 opKind := NomsKind(ldbKey[4]) 195 numKeys := uint8(ldbKey[5]) 196 ldbKey = ldbKey[6:] 197 198 // Call decodeValue for each encoded graphKey. nil will be appended to 199 // graphKeys for any keys that were encoded as hash digests. 200 graphKeys := ValueSlice{} 201 for pos := uint8(0); pos < numKeys; pos++ { 202 var gk Value 203 ldbKey, gk = decodeValue(ldbKey, false, i.vrw) 204 graphKeys = append(graphKeys, gk) 205 } 206 207 // Get the number of values whose value was encoded in ldbVal 208 numEncodedValues := uint8(ldbVal[0]) 209 ldbVal = ldbVal[1:] 210 211 // Call decodeValue for each non-primitive key stored in ldbVal. Replace 212 // the nil value in graphKeys with the new decodedValue. 213 values := ValueSlice{} 214 for pos := uint8(0); pos < numEncodedValues; pos++ { 215 var gk Value 216 ldbVal, gk = decodeValue(ldbVal, true, i.vrw) 217 values = append(values, gk) 218 } 219 220 // Fold in any non-primitive key values that were stored in ldbVal 221 pos := 0 222 for idx, k1 := range graphKeys { 223 if k1 == nil { 224 graphKeys[idx] = values[pos] 225 pos++ 226 } 227 } 228 229 // Remove the last key in graphKeys. The last key in graphKeys is the 230 // mapkey for Maps, the item for Sets, and the index for Lists. 231 key := graphKeys[len(graphKeys)-1] 232 graphKeys = graphKeys[:len(graphKeys)-1] 233 234 var item sequenceItem 235 switch opKind { 236 case MapKind: 237 val := values[len(values)-1] 238 item = mapEntry{key, val} 239 case SetKind: 240 item = key 241 case ListKind: 242 item = values[len(values)-1] 243 } 244 245 return graphKeys, opKind, item 246 } 247 248 func (opc *ldbOpCache) NewIterator() opCacheIterator { 249 prefix := [4]byte{} 250 binary.BigEndian.PutUint32(prefix[:], opc.colId) 251 return &ldbOpCacheIterator{iter: opc.ldb.NewIterator(util.BytesPrefix(prefix[:]), nil), vrw: opc.vrw} 252 } 253 254 func (i *ldbOpCacheIterator) Next() bool { 255 return i.iter.Next() 256 } 257 258 func (i *ldbOpCacheIterator) Release() { 259 i.iter.Release() 260 } 261 262 // encodeKeys() serializes a list of keys to the byte slice |bs|. 263 func encodeKeys(bs []byte, colId uint32, opKind NomsKind, keys []Value) ([]byte, []Value) { 264 // All ldb keys start with a 4-byte collection id that serves as a namespace 265 // that keeps them separate from other collections. 266 idHolder := [4]byte{} 267 idHolderSlice := idHolder[:4] 268 binary.BigEndian.PutUint32(idHolderSlice, colId) 269 bs = append(bs, idHolderSlice...) 270 271 // bs[4] is a NomsKind value which represents the type of leaf 272 // collection being operated on (i.e. MapKind, SetKind, or ListKind) 273 // bs[5] is a single uint8 value representing the number of keys 274 // encoded in the ldb key. 275 bs = append(bs, byte(opKind), byte(len(keys))) 276 277 valuesToEncode := ValueSlice{} 278 for _, gk := range keys { 279 bs = encodeGraphKey(bs, gk) 280 if !isKindOrderedByValue(gk.Kind()) { 281 valuesToEncode = append(valuesToEncode, gk) 282 } 283 } 284 return bs, valuesToEncode 285 } 286 287 func encodeValues(bs []byte, valuesToEncode []Value) []byte { 288 // Encode allValues into the ldbVal byte slice. 289 bs = append(bs, uint8(len(valuesToEncode))) 290 for _, k := range valuesToEncode { 291 bs = encodeGraphValue(bs, k) 292 } 293 return bs 294 } 295 296 func encodeGraphKey(bs []byte, v Value) []byte { 297 return encodeForGraph(bs, v, false) 298 } 299 300 func encodeGraphValue(bs []byte, v Value) []byte { 301 return encodeForGraph(bs, v, true) 302 } 303 304 func encodeForGraph(bs []byte, v Value, asValue bool) []byte { 305 // Note: encToSlice() and append() will both grow the backing store of |bs| 306 // as necessary. Always call them when writing to |bs|. 307 if asValue || isKindOrderedByValue(v.Kind()) { 308 // if we're encoding value, then put: 309 // noms-kind(1-byte), serialization-len(4-bytes), serialization(n-bytes) 310 buf := [initialBufferSize]byte{} 311 uint32buf := [4]byte{} 312 encodedVal := encToSlice(v, buf[:]) 313 binary.BigEndian.PutUint32(uint32buf[:], uint32(len(encodedVal))) 314 bs = append(bs, uint8(v.Kind())) 315 bs = append(bs, uint32buf[:]...) 316 bs = append(bs, encodedVal...) 317 } else { 318 // if we're encoding hash values, we know the length, so we can leave that out 319 bs = append(bs, uint8(v.Kind())) 320 h := v.Hash() 321 bs = append(bs, h[:]...) 322 } 323 return bs 324 } 325 326 func decodeValue(bs []byte, asValue bool, vrw ValueReadWriter) ([]byte, Value) { 327 kind := NomsKind(bs[0]) 328 var v Value 329 if asValue || isKindOrderedByValue(kind) { 330 encodedLen := binary.BigEndian.Uint32(bs[1:5]) 331 // The bytes in bs gets reused by LDB. The data of a chunk must 332 // never change since we are backing the values by this data. 333 data := make([]byte, encodedLen) 334 copy(data, bs[5:5+encodedLen]) 335 v = DecodeFromBytes(data, vrw) 336 return bs[5+encodedLen:], v 337 } 338 return bs[1+hash.ByteLen:], nil 339 } 340 341 // Note that, if 'v' are prolly trees, any in-memory child chunks will be written to vw at this time. 342 func encToSlice(v Value, initBuf []byte) []byte { 343 // TODO: Are there enough calls to this that it's worth re-using a nomsWriter? 344 w := &binaryNomsWriter{initBuf, 0} 345 v.writeTo(w) 346 return w.data() 347 }