github.com/vescale/zgraph@v0.0.0-20230410094002-959c02d50f95/storage/memdb_arena.go (about) 1 // Copyright 2022 zGraph Authors. All rights reserved. 2 // 3 // Copyright 2020 PingCAP, Inc. 4 // 5 // Licensed under the Apache License, Version 2.0 (the "License"); 6 // you may not use this file except in compliance with the License. 7 // You may obtain a copy of the License at 8 // 9 // http://www.apache.org/licenses/LICENSE-2.0 10 // 11 // Unless required by applicable law or agreed to in writing, software 12 // distributed under the License is distributed on an "AS IS" BASIS, 13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 // See the License for the specific language governing permissions and 15 // limitations under the License. 16 17 package storage 18 19 import ( 20 "encoding/binary" 21 "math" 22 "unsafe" 23 24 "github.com/vescale/zgraph/storage/kv" 25 ) 26 27 const ( 28 alignMask = 1<<32 - 8 // 29 bit 1 and 3 bit 0. 29 30 nullBlockOffset = math.MaxUint32 31 maxBlockSize = 128 << 20 32 initBlockSize = 4 * 1024 33 ) 34 35 var ( 36 nullAddr = memdbArenaAddr{math.MaxUint32, math.MaxUint32} 37 endian = binary.LittleEndian 38 ) 39 40 type memdbArenaAddr struct { 41 idx uint32 42 off uint32 43 } 44 45 func (addr memdbArenaAddr) isNull() bool { 46 if addr == nullAddr { 47 return true 48 } 49 if addr.idx == math.MaxUint32 || addr.off == math.MaxUint32 { 50 // TODO: warning 51 // defensive programming, the code should never run to here. 52 // it always means something wrong... (maybe caused by data race?) 53 // because we never set part of idx/off to math.MaxUint64 54 return true 55 } 56 return false 57 } 58 59 // store and load is used by vlog, due to pointer in vlog is not aligned. 60 61 func (addr memdbArenaAddr) store(dst []byte) { 62 endian.PutUint32(dst, addr.idx) 63 endian.PutUint32(dst[4:], addr.off) 64 } 65 66 func (addr *memdbArenaAddr) load(src []byte) { 67 addr.idx = endian.Uint32(src) 68 addr.off = endian.Uint32(src[4:]) 69 } 70 71 type memdbArena struct { 72 blockSize int 73 blocks []memdbArenaBlock 74 // the total size of all blocks, also the approximate memory footprint of the arena. 75 capacity uint64 76 // when it enlarges or shrinks, call this function with the current memory footprint (in bytes) 77 memChangeHook func() 78 } 79 80 func (a *memdbArena) alloc(size int, align bool) (memdbArenaAddr, []byte) { 81 if size > maxBlockSize { 82 panic("alloc size is larger than max block size") 83 } 84 85 if len(a.blocks) == 0 { 86 a.enlarge(size, initBlockSize) 87 } 88 89 addr, data := a.allocInLastBlock(size, align) 90 if !addr.isNull() { 91 return addr, data 92 } 93 94 a.enlarge(size, a.blockSize<<1) 95 return a.allocInLastBlock(size, align) 96 } 97 98 func (a *memdbArena) enlarge(allocSize, blockSize int) { 99 a.blockSize = blockSize 100 for a.blockSize <= allocSize { 101 a.blockSize <<= 1 102 } 103 // Size will never larger than maxBlockSize. 104 if a.blockSize > maxBlockSize { 105 a.blockSize = maxBlockSize 106 } 107 a.blocks = append(a.blocks, memdbArenaBlock{ 108 buf: make([]byte, a.blockSize), 109 }) 110 a.capacity += uint64(a.blockSize) 111 a.onMemChange() 112 } 113 114 func (a *memdbArena) onMemChange() { 115 if a.memChangeHook != nil { 116 a.memChangeHook() 117 } 118 } 119 120 func (a *memdbArena) allocInLastBlock(size int, align bool) (memdbArenaAddr, []byte) { 121 idx := len(a.blocks) - 1 122 offset, data := a.blocks[idx].alloc(size, align) 123 if offset == nullBlockOffset { 124 return nullAddr, nil 125 } 126 return memdbArenaAddr{uint32(idx), offset}, data 127 } 128 129 func (a *memdbArena) reset() { 130 for i := range a.blocks { 131 a.blocks[i].reset() 132 } 133 a.blocks = a.blocks[:0] 134 a.blockSize = 0 135 a.capacity = 0 136 a.onMemChange() 137 } 138 139 type memdbArenaBlock struct { 140 buf []byte 141 length int 142 } 143 144 func (a *memdbArenaBlock) alloc(size int, align bool) (uint32, []byte) { 145 offset := a.length 146 if align { 147 // We must align the allocated address for node 148 // to make runtime.checkptrAlignment happy. 149 offset = (a.length + 7) & alignMask 150 } 151 newLen := offset + size 152 if newLen > len(a.buf) { 153 return nullBlockOffset, nil 154 } 155 a.length = newLen 156 return uint32(offset), a.buf[offset : offset+size] 157 } 158 159 func (a *memdbArenaBlock) reset() { 160 a.buf = nil 161 a.length = 0 162 } 163 164 // MemDBCheckpoint is the checkpoint of memory DB. 165 type MemDBCheckpoint struct { 166 blockSize int 167 blocks int 168 offsetInBlock int 169 } 170 171 func (cp *MemDBCheckpoint) isSamePosition(other *MemDBCheckpoint) bool { 172 return cp.blocks == other.blocks && cp.offsetInBlock == other.offsetInBlock 173 } 174 175 func (a *memdbArena) checkpoint() MemDBCheckpoint { 176 snap := MemDBCheckpoint{ 177 blockSize: a.blockSize, 178 blocks: len(a.blocks), 179 } 180 if len(a.blocks) > 0 { 181 snap.offsetInBlock = a.blocks[len(a.blocks)-1].length 182 } 183 return snap 184 } 185 186 func (a *memdbArena) truncate(snap *MemDBCheckpoint) { 187 for i := snap.blocks; i < len(a.blocks); i++ { 188 a.blocks[i] = memdbArenaBlock{} 189 } 190 a.blocks = a.blocks[:snap.blocks] 191 if len(a.blocks) > 0 { 192 a.blocks[len(a.blocks)-1].length = snap.offsetInBlock 193 } 194 a.blockSize = snap.blockSize 195 196 a.capacity = 0 197 for _, block := range a.blocks { 198 a.capacity += uint64(block.length) 199 } 200 a.onMemChange() 201 } 202 203 type nodeAllocator struct { 204 memdbArena 205 206 // Dummy node, so that we can make X.left.up = X. 207 // We then use this instead of NULL to mean the top or bottom 208 // end of the rb tree. It is a black node. 209 nullNode memdbNode 210 } 211 212 func (a *nodeAllocator) init() { 213 a.nullNode = memdbNode{ 214 up: nullAddr, 215 left: nullAddr, 216 right: nullAddr, 217 vptr: nullAddr, 218 } 219 } 220 221 func (a *nodeAllocator) getNode(addr memdbArenaAddr) *memdbNode { 222 if addr.isNull() { 223 return &a.nullNode 224 } 225 226 return (*memdbNode)(unsafe.Pointer(&a.blocks[addr.idx].buf[addr.off])) 227 } 228 229 func (a *nodeAllocator) allocNode(key []byte) (memdbArenaAddr, *memdbNode) { 230 nodeSize := 8*4 + 2 + kv.FlagBytes + len(key) 231 addr, mem := a.alloc(nodeSize, true) 232 n := (*memdbNode)(unsafe.Pointer(&mem[0])) 233 n.vptr = nullAddr 234 n.klen = uint16(len(key)) 235 copy(n.getKey(), key) 236 return addr, n 237 } 238 239 var testMode = false 240 241 func (a *nodeAllocator) freeNode(addr memdbArenaAddr) { 242 if testMode { 243 // Make it easier for debug. 244 n := a.getNode(addr) 245 badAddr := nullAddr 246 badAddr.idx-- 247 n.left = badAddr 248 n.right = badAddr 249 n.up = badAddr 250 n.vptr = badAddr 251 return 252 } 253 // TODO: reuse freed nodes. 254 } 255 256 func (a *nodeAllocator) reset() { 257 a.memdbArena.reset() 258 a.init() 259 } 260 261 type memdbVlog struct { 262 memdbArena 263 memdb *MemDB 264 } 265 266 const memdbVlogHdrSize = 8 + 8 + 4 267 268 type memdbVlogHdr struct { 269 nodeAddr memdbArenaAddr 270 oldValue memdbArenaAddr 271 valueLen uint32 272 } 273 274 func (hdr *memdbVlogHdr) store(dst []byte) { 275 cursor := 0 276 endian.PutUint32(dst[cursor:], hdr.valueLen) 277 cursor += 4 278 hdr.oldValue.store(dst[cursor:]) 279 cursor += 8 280 hdr.nodeAddr.store(dst[cursor:]) 281 } 282 283 func (hdr *memdbVlogHdr) load(src []byte) { 284 cursor := 0 285 hdr.valueLen = endian.Uint32(src[cursor:]) 286 cursor += 4 287 hdr.oldValue.load(src[cursor:]) 288 cursor += 8 289 hdr.nodeAddr.load(src[cursor:]) 290 } 291 292 func (l *memdbVlog) appendValue(nodeAddr memdbArenaAddr, oldValue memdbArenaAddr, value []byte) memdbArenaAddr { 293 size := memdbVlogHdrSize + len(value) 294 addr, mem := l.alloc(size, false) 295 296 copy(mem, value) 297 hdr := memdbVlogHdr{nodeAddr, oldValue, uint32(len(value))} 298 hdr.store(mem[len(value):]) 299 300 addr.off += uint32(size) 301 return addr 302 } 303 304 // A pure function that gets a value. 305 func (l *memdbVlog) getValue(addr memdbArenaAddr) []byte { 306 lenOff := addr.off - memdbVlogHdrSize 307 block := l.blocks[addr.idx].buf 308 valueLen := endian.Uint32(block[lenOff:]) 309 if valueLen == 0 { 310 return tombstone 311 } 312 valueOff := lenOff - valueLen 313 return block[valueOff:lenOff:lenOff] 314 } 315 316 func (l *memdbVlog) getSnapshotValue(addr memdbArenaAddr, snap *MemDBCheckpoint) ([]byte, bool) { 317 result := l.selectValueHistory(addr, func(addr memdbArenaAddr) bool { 318 return !l.canModify(snap, addr) 319 }) 320 if result.isNull() { 321 return nil, false 322 } 323 return l.getValue(addr), true 324 } 325 326 func (l *memdbVlog) selectValueHistory(addr memdbArenaAddr, predicate func(memdbArenaAddr) bool) memdbArenaAddr { 327 for !addr.isNull() { 328 if predicate(addr) { 329 return addr 330 } 331 var hdr memdbVlogHdr 332 hdr.load(l.blocks[addr.idx].buf[addr.off-memdbVlogHdrSize:]) 333 addr = hdr.oldValue 334 } 335 return nullAddr 336 } 337 338 func (l *memdbVlog) revertToCheckpoint(db *MemDB, cp *MemDBCheckpoint) { 339 cursor := l.checkpoint() 340 for !cp.isSamePosition(&cursor) { 341 hdrOff := cursor.offsetInBlock - memdbVlogHdrSize 342 block := l.blocks[cursor.blocks-1].buf 343 var hdr memdbVlogHdr 344 hdr.load(block[hdrOff:]) 345 node := db.getNode(hdr.nodeAddr) 346 347 node.vptr = hdr.oldValue 348 db.size -= int(hdr.valueLen) 349 // oldValue.isNull() == true means this is a newly added value. 350 if hdr.oldValue.isNull() { 351 // If there are no flags associated with this key, we need to delete this node. 352 keptFlags := node.getKeyFlags().AndPersistent() 353 if keptFlags == 0 { 354 db.deleteNode(node) 355 } else { 356 node.setKeyFlags(keptFlags) 357 db.dirty = true 358 } 359 } else { 360 db.size += len(l.getValue(hdr.oldValue)) 361 } 362 363 l.moveBackCursor(&cursor, &hdr) 364 } 365 } 366 367 func (l *memdbVlog) inspectKVInLog(db *MemDB, head, tail *MemDBCheckpoint, f func([]byte, kv.KeyFlags, []byte)) { 368 cursor := *tail 369 for !head.isSamePosition(&cursor) { 370 cursorAddr := memdbArenaAddr{idx: uint32(cursor.blocks - 1), off: uint32(cursor.offsetInBlock)} 371 hdrOff := cursorAddr.off - memdbVlogHdrSize 372 block := l.blocks[cursorAddr.idx].buf 373 var hdr memdbVlogHdr 374 hdr.load(block[hdrOff:]) 375 node := db.allocator.getNode(hdr.nodeAddr) 376 377 // Skip older versions. 378 if node.vptr == cursorAddr { 379 value := block[hdrOff-hdr.valueLen : hdrOff] 380 f(node.getKey(), node.getKeyFlags(), value) 381 } 382 383 l.moveBackCursor(&cursor, &hdr) 384 } 385 } 386 387 func (l *memdbVlog) moveBackCursor(cursor *MemDBCheckpoint, hdr *memdbVlogHdr) { 388 cursor.offsetInBlock -= (memdbVlogHdrSize + int(hdr.valueLen)) 389 if cursor.offsetInBlock == 0 { 390 cursor.blocks-- 391 if cursor.blocks > 0 { 392 cursor.offsetInBlock = l.blocks[cursor.blocks-1].length 393 } 394 } 395 } 396 397 func (l *memdbVlog) canModify(cp *MemDBCheckpoint, addr memdbArenaAddr) bool { 398 if cp == nil { 399 return true 400 } 401 if int(addr.idx) > cp.blocks-1 { 402 return true 403 } 404 if int(addr.idx) == cp.blocks-1 && int(addr.off) > cp.offsetInBlock { 405 return true 406 } 407 return false 408 }