github.com/vescale/zgraph@v0.0.0-20230410094002-959c02d50f95/storage/memdb_arena.go (about)

     1  // Copyright 2022 zGraph Authors. All rights reserved.
     2  //
     3  // Copyright 2020 PingCAP, Inc.
     4  //
     5  // Licensed under the Apache License, Version 2.0 (the "License");
     6  // you may not use this file except in compliance with the License.
     7  // You may obtain a copy of the License at
     8  //
     9  //     http://www.apache.org/licenses/LICENSE-2.0
    10  //
    11  // Unless required by applicable law or agreed to in writing, software
    12  // distributed under the License is distributed on an "AS IS" BASIS,
    13  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14  // See the License for the specific language governing permissions and
    15  // limitations under the License.
    16  
    17  package storage
    18  
    19  import (
    20  	"encoding/binary"
    21  	"math"
    22  	"unsafe"
    23  
    24  	"github.com/vescale/zgraph/storage/kv"
    25  )
    26  
    27  const (
    28  	alignMask = 1<<32 - 8 // 29 bit 1 and 3 bit 0.
    29  
    30  	nullBlockOffset = math.MaxUint32
    31  	maxBlockSize    = 128 << 20
    32  	initBlockSize   = 4 * 1024
    33  )
    34  
    35  var (
    36  	nullAddr = memdbArenaAddr{math.MaxUint32, math.MaxUint32}
    37  	endian   = binary.LittleEndian
    38  )
    39  
    40  type memdbArenaAddr struct {
    41  	idx uint32
    42  	off uint32
    43  }
    44  
    45  func (addr memdbArenaAddr) isNull() bool {
    46  	if addr == nullAddr {
    47  		return true
    48  	}
    49  	if addr.idx == math.MaxUint32 || addr.off == math.MaxUint32 {
    50  		// TODO: warning
    51  		// defensive programming, the code should never run to here.
    52  		// it always means something wrong... (maybe caused by data race?)
    53  		// because we never set part of idx/off to math.MaxUint64
    54  		return true
    55  	}
    56  	return false
    57  }
    58  
    59  // store and load is used by vlog, due to pointer in vlog is not aligned.
    60  
    61  func (addr memdbArenaAddr) store(dst []byte) {
    62  	endian.PutUint32(dst, addr.idx)
    63  	endian.PutUint32(dst[4:], addr.off)
    64  }
    65  
    66  func (addr *memdbArenaAddr) load(src []byte) {
    67  	addr.idx = endian.Uint32(src)
    68  	addr.off = endian.Uint32(src[4:])
    69  }
    70  
    71  type memdbArena struct {
    72  	blockSize int
    73  	blocks    []memdbArenaBlock
    74  	// the total size of all blocks, also the approximate memory footprint of the arena.
    75  	capacity uint64
    76  	// when it enlarges or shrinks, call this function with the current memory footprint (in bytes)
    77  	memChangeHook func()
    78  }
    79  
    80  func (a *memdbArena) alloc(size int, align bool) (memdbArenaAddr, []byte) {
    81  	if size > maxBlockSize {
    82  		panic("alloc size is larger than max block size")
    83  	}
    84  
    85  	if len(a.blocks) == 0 {
    86  		a.enlarge(size, initBlockSize)
    87  	}
    88  
    89  	addr, data := a.allocInLastBlock(size, align)
    90  	if !addr.isNull() {
    91  		return addr, data
    92  	}
    93  
    94  	a.enlarge(size, a.blockSize<<1)
    95  	return a.allocInLastBlock(size, align)
    96  }
    97  
    98  func (a *memdbArena) enlarge(allocSize, blockSize int) {
    99  	a.blockSize = blockSize
   100  	for a.blockSize <= allocSize {
   101  		a.blockSize <<= 1
   102  	}
   103  	// Size will never larger than maxBlockSize.
   104  	if a.blockSize > maxBlockSize {
   105  		a.blockSize = maxBlockSize
   106  	}
   107  	a.blocks = append(a.blocks, memdbArenaBlock{
   108  		buf: make([]byte, a.blockSize),
   109  	})
   110  	a.capacity += uint64(a.blockSize)
   111  	a.onMemChange()
   112  }
   113  
   114  func (a *memdbArena) onMemChange() {
   115  	if a.memChangeHook != nil {
   116  		a.memChangeHook()
   117  	}
   118  }
   119  
   120  func (a *memdbArena) allocInLastBlock(size int, align bool) (memdbArenaAddr, []byte) {
   121  	idx := len(a.blocks) - 1
   122  	offset, data := a.blocks[idx].alloc(size, align)
   123  	if offset == nullBlockOffset {
   124  		return nullAddr, nil
   125  	}
   126  	return memdbArenaAddr{uint32(idx), offset}, data
   127  }
   128  
   129  func (a *memdbArena) reset() {
   130  	for i := range a.blocks {
   131  		a.blocks[i].reset()
   132  	}
   133  	a.blocks = a.blocks[:0]
   134  	a.blockSize = 0
   135  	a.capacity = 0
   136  	a.onMemChange()
   137  }
   138  
   139  type memdbArenaBlock struct {
   140  	buf    []byte
   141  	length int
   142  }
   143  
   144  func (a *memdbArenaBlock) alloc(size int, align bool) (uint32, []byte) {
   145  	offset := a.length
   146  	if align {
   147  		// We must align the allocated address for node
   148  		// to make runtime.checkptrAlignment happy.
   149  		offset = (a.length + 7) & alignMask
   150  	}
   151  	newLen := offset + size
   152  	if newLen > len(a.buf) {
   153  		return nullBlockOffset, nil
   154  	}
   155  	a.length = newLen
   156  	return uint32(offset), a.buf[offset : offset+size]
   157  }
   158  
   159  func (a *memdbArenaBlock) reset() {
   160  	a.buf = nil
   161  	a.length = 0
   162  }
   163  
   164  // MemDBCheckpoint is the checkpoint of memory DB.
   165  type MemDBCheckpoint struct {
   166  	blockSize     int
   167  	blocks        int
   168  	offsetInBlock int
   169  }
   170  
   171  func (cp *MemDBCheckpoint) isSamePosition(other *MemDBCheckpoint) bool {
   172  	return cp.blocks == other.blocks && cp.offsetInBlock == other.offsetInBlock
   173  }
   174  
   175  func (a *memdbArena) checkpoint() MemDBCheckpoint {
   176  	snap := MemDBCheckpoint{
   177  		blockSize: a.blockSize,
   178  		blocks:    len(a.blocks),
   179  	}
   180  	if len(a.blocks) > 0 {
   181  		snap.offsetInBlock = a.blocks[len(a.blocks)-1].length
   182  	}
   183  	return snap
   184  }
   185  
   186  func (a *memdbArena) truncate(snap *MemDBCheckpoint) {
   187  	for i := snap.blocks; i < len(a.blocks); i++ {
   188  		a.blocks[i] = memdbArenaBlock{}
   189  	}
   190  	a.blocks = a.blocks[:snap.blocks]
   191  	if len(a.blocks) > 0 {
   192  		a.blocks[len(a.blocks)-1].length = snap.offsetInBlock
   193  	}
   194  	a.blockSize = snap.blockSize
   195  
   196  	a.capacity = 0
   197  	for _, block := range a.blocks {
   198  		a.capacity += uint64(block.length)
   199  	}
   200  	a.onMemChange()
   201  }
   202  
   203  type nodeAllocator struct {
   204  	memdbArena
   205  
   206  	// Dummy node, so that we can make X.left.up = X.
   207  	// We then use this instead of NULL to mean the top or bottom
   208  	// end of the rb tree. It is a black node.
   209  	nullNode memdbNode
   210  }
   211  
   212  func (a *nodeAllocator) init() {
   213  	a.nullNode = memdbNode{
   214  		up:    nullAddr,
   215  		left:  nullAddr,
   216  		right: nullAddr,
   217  		vptr:  nullAddr,
   218  	}
   219  }
   220  
   221  func (a *nodeAllocator) getNode(addr memdbArenaAddr) *memdbNode {
   222  	if addr.isNull() {
   223  		return &a.nullNode
   224  	}
   225  
   226  	return (*memdbNode)(unsafe.Pointer(&a.blocks[addr.idx].buf[addr.off]))
   227  }
   228  
   229  func (a *nodeAllocator) allocNode(key []byte) (memdbArenaAddr, *memdbNode) {
   230  	nodeSize := 8*4 + 2 + kv.FlagBytes + len(key)
   231  	addr, mem := a.alloc(nodeSize, true)
   232  	n := (*memdbNode)(unsafe.Pointer(&mem[0]))
   233  	n.vptr = nullAddr
   234  	n.klen = uint16(len(key))
   235  	copy(n.getKey(), key)
   236  	return addr, n
   237  }
   238  
   239  var testMode = false
   240  
   241  func (a *nodeAllocator) freeNode(addr memdbArenaAddr) {
   242  	if testMode {
   243  		// Make it easier for debug.
   244  		n := a.getNode(addr)
   245  		badAddr := nullAddr
   246  		badAddr.idx--
   247  		n.left = badAddr
   248  		n.right = badAddr
   249  		n.up = badAddr
   250  		n.vptr = badAddr
   251  		return
   252  	}
   253  	// TODO: reuse freed nodes.
   254  }
   255  
   256  func (a *nodeAllocator) reset() {
   257  	a.memdbArena.reset()
   258  	a.init()
   259  }
   260  
   261  type memdbVlog struct {
   262  	memdbArena
   263  	memdb *MemDB
   264  }
   265  
   266  const memdbVlogHdrSize = 8 + 8 + 4
   267  
   268  type memdbVlogHdr struct {
   269  	nodeAddr memdbArenaAddr
   270  	oldValue memdbArenaAddr
   271  	valueLen uint32
   272  }
   273  
   274  func (hdr *memdbVlogHdr) store(dst []byte) {
   275  	cursor := 0
   276  	endian.PutUint32(dst[cursor:], hdr.valueLen)
   277  	cursor += 4
   278  	hdr.oldValue.store(dst[cursor:])
   279  	cursor += 8
   280  	hdr.nodeAddr.store(dst[cursor:])
   281  }
   282  
   283  func (hdr *memdbVlogHdr) load(src []byte) {
   284  	cursor := 0
   285  	hdr.valueLen = endian.Uint32(src[cursor:])
   286  	cursor += 4
   287  	hdr.oldValue.load(src[cursor:])
   288  	cursor += 8
   289  	hdr.nodeAddr.load(src[cursor:])
   290  }
   291  
   292  func (l *memdbVlog) appendValue(nodeAddr memdbArenaAddr, oldValue memdbArenaAddr, value []byte) memdbArenaAddr {
   293  	size := memdbVlogHdrSize + len(value)
   294  	addr, mem := l.alloc(size, false)
   295  
   296  	copy(mem, value)
   297  	hdr := memdbVlogHdr{nodeAddr, oldValue, uint32(len(value))}
   298  	hdr.store(mem[len(value):])
   299  
   300  	addr.off += uint32(size)
   301  	return addr
   302  }
   303  
   304  // A pure function that gets a value.
   305  func (l *memdbVlog) getValue(addr memdbArenaAddr) []byte {
   306  	lenOff := addr.off - memdbVlogHdrSize
   307  	block := l.blocks[addr.idx].buf
   308  	valueLen := endian.Uint32(block[lenOff:])
   309  	if valueLen == 0 {
   310  		return tombstone
   311  	}
   312  	valueOff := lenOff - valueLen
   313  	return block[valueOff:lenOff:lenOff]
   314  }
   315  
   316  func (l *memdbVlog) getSnapshotValue(addr memdbArenaAddr, snap *MemDBCheckpoint) ([]byte, bool) {
   317  	result := l.selectValueHistory(addr, func(addr memdbArenaAddr) bool {
   318  		return !l.canModify(snap, addr)
   319  	})
   320  	if result.isNull() {
   321  		return nil, false
   322  	}
   323  	return l.getValue(addr), true
   324  }
   325  
   326  func (l *memdbVlog) selectValueHistory(addr memdbArenaAddr, predicate func(memdbArenaAddr) bool) memdbArenaAddr {
   327  	for !addr.isNull() {
   328  		if predicate(addr) {
   329  			return addr
   330  		}
   331  		var hdr memdbVlogHdr
   332  		hdr.load(l.blocks[addr.idx].buf[addr.off-memdbVlogHdrSize:])
   333  		addr = hdr.oldValue
   334  	}
   335  	return nullAddr
   336  }
   337  
   338  func (l *memdbVlog) revertToCheckpoint(db *MemDB, cp *MemDBCheckpoint) {
   339  	cursor := l.checkpoint()
   340  	for !cp.isSamePosition(&cursor) {
   341  		hdrOff := cursor.offsetInBlock - memdbVlogHdrSize
   342  		block := l.blocks[cursor.blocks-1].buf
   343  		var hdr memdbVlogHdr
   344  		hdr.load(block[hdrOff:])
   345  		node := db.getNode(hdr.nodeAddr)
   346  
   347  		node.vptr = hdr.oldValue
   348  		db.size -= int(hdr.valueLen)
   349  		// oldValue.isNull() == true means this is a newly added value.
   350  		if hdr.oldValue.isNull() {
   351  			// If there are no flags associated with this key, we need to delete this node.
   352  			keptFlags := node.getKeyFlags().AndPersistent()
   353  			if keptFlags == 0 {
   354  				db.deleteNode(node)
   355  			} else {
   356  				node.setKeyFlags(keptFlags)
   357  				db.dirty = true
   358  			}
   359  		} else {
   360  			db.size += len(l.getValue(hdr.oldValue))
   361  		}
   362  
   363  		l.moveBackCursor(&cursor, &hdr)
   364  	}
   365  }
   366  
   367  func (l *memdbVlog) inspectKVInLog(db *MemDB, head, tail *MemDBCheckpoint, f func([]byte, kv.KeyFlags, []byte)) {
   368  	cursor := *tail
   369  	for !head.isSamePosition(&cursor) {
   370  		cursorAddr := memdbArenaAddr{idx: uint32(cursor.blocks - 1), off: uint32(cursor.offsetInBlock)}
   371  		hdrOff := cursorAddr.off - memdbVlogHdrSize
   372  		block := l.blocks[cursorAddr.idx].buf
   373  		var hdr memdbVlogHdr
   374  		hdr.load(block[hdrOff:])
   375  		node := db.allocator.getNode(hdr.nodeAddr)
   376  
   377  		// Skip older versions.
   378  		if node.vptr == cursorAddr {
   379  			value := block[hdrOff-hdr.valueLen : hdrOff]
   380  			f(node.getKey(), node.getKeyFlags(), value)
   381  		}
   382  
   383  		l.moveBackCursor(&cursor, &hdr)
   384  	}
   385  }
   386  
   387  func (l *memdbVlog) moveBackCursor(cursor *MemDBCheckpoint, hdr *memdbVlogHdr) {
   388  	cursor.offsetInBlock -= (memdbVlogHdrSize + int(hdr.valueLen))
   389  	if cursor.offsetInBlock == 0 {
   390  		cursor.blocks--
   391  		if cursor.blocks > 0 {
   392  			cursor.offsetInBlock = l.blocks[cursor.blocks-1].length
   393  		}
   394  	}
   395  }
   396  
   397  func (l *memdbVlog) canModify(cp *MemDBCheckpoint, addr memdbArenaAddr) bool {
   398  	if cp == nil {
   399  		return true
   400  	}
   401  	if int(addr.idx) > cp.blocks-1 {
   402  		return true
   403  	}
   404  	if int(addr.idx) == cp.blocks-1 && int(addr.off) > cp.offsetInBlock {
   405  		return true
   406  	}
   407  	return false
   408  }