github.com/KinWaiYuen/client-go/v2@v2.5.4/internal/unionstore/memdb_arena.go (about)

     1  // Copyright 2021 TiKV Authors
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // NOTE: The code in this file is based on code from the
    16  // TiDB project, licensed under the Apache License v 2.0
    17  //
    18  // https://github.com/pingcap/tidb/tree/cc5e161ac06827589c4966674597c137cc9e809c/store/tikv/unionstore/memdb_arena.go
    19  //
    20  
    21  // Copyright 2020 PingCAP, Inc.
    22  //
    23  // Licensed under the Apache License, Version 2.0 (the "License");
    24  // you may not use this file except in compliance with the License.
    25  // You may obtain a copy of the License at
    26  //
    27  //     http://www.apache.org/licenses/LICENSE-2.0
    28  //
    29  // Unless required by applicable law or agreed to in writing, software
    30  // distributed under the License is distributed on an "AS IS" BASIS,
    31  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    32  // See the License for the specific language governing permissions and
    33  // limitations under the License.
    34  
    35  package unionstore
    36  
    37  import (
    38  	"encoding/binary"
    39  	"math"
    40  	"unsafe"
    41  
    42  	"github.com/KinWaiYuen/client-go/v2/kv"
    43  )
    44  
    45  const (
    46  	alignMask = 1<<32 - 8 // 29 bit 1 and 3 bit 0.
    47  
    48  	nullBlockOffset = math.MaxUint32
    49  	maxBlockSize    = 128 << 20
    50  	initBlockSize   = 4 * 1024
    51  )
    52  
    53  var (
    54  	nullAddr = memdbArenaAddr{math.MaxUint32, math.MaxUint32}
    55  	endian   = binary.LittleEndian
    56  )
    57  
    58  type memdbArenaAddr struct {
    59  	idx uint32
    60  	off uint32
    61  }
    62  
    63  func (addr memdbArenaAddr) isNull() bool {
    64  	return addr == nullAddr
    65  }
    66  
    67  // store and load is used by vlog, due to pointer in vlog is not aligned.
    68  
    69  func (addr memdbArenaAddr) store(dst []byte) {
    70  	endian.PutUint32(dst, addr.idx)
    71  	endian.PutUint32(dst[4:], addr.off)
    72  }
    73  
    74  func (addr *memdbArenaAddr) load(src []byte) {
    75  	addr.idx = endian.Uint32(src)
    76  	addr.off = endian.Uint32(src[4:])
    77  }
    78  
    79  type memdbArena struct {
    80  	blockSize int
    81  	blocks    []memdbArenaBlock
    82  }
    83  
    84  func (a *memdbArena) alloc(size int, align bool) (memdbArenaAddr, []byte) {
    85  	if size > maxBlockSize {
    86  		panic("alloc size is larger than max block size")
    87  	}
    88  
    89  	if len(a.blocks) == 0 {
    90  		a.enlarge(size, initBlockSize)
    91  	}
    92  
    93  	addr, data := a.allocInLastBlock(size, align)
    94  	if !addr.isNull() {
    95  		return addr, data
    96  	}
    97  
    98  	a.enlarge(size, a.blockSize<<1)
    99  	return a.allocInLastBlock(size, align)
   100  }
   101  
   102  func (a *memdbArena) enlarge(allocSize, blockSize int) {
   103  	a.blockSize = blockSize
   104  	for a.blockSize <= allocSize {
   105  		a.blockSize <<= 1
   106  	}
   107  	// Size will never larger than maxBlockSize.
   108  	if a.blockSize > maxBlockSize {
   109  		a.blockSize = maxBlockSize
   110  	}
   111  	a.blocks = append(a.blocks, memdbArenaBlock{
   112  		buf: make([]byte, a.blockSize),
   113  	})
   114  }
   115  
   116  func (a *memdbArena) allocInLastBlock(size int, align bool) (memdbArenaAddr, []byte) {
   117  	idx := len(a.blocks) - 1
   118  	offset, data := a.blocks[idx].alloc(size, align)
   119  	if offset == nullBlockOffset {
   120  		return nullAddr, nil
   121  	}
   122  	return memdbArenaAddr{uint32(idx), offset}, data
   123  }
   124  
   125  func (a *memdbArena) reset() {
   126  	for i := range a.blocks {
   127  		a.blocks[i].reset()
   128  	}
   129  	a.blocks = a.blocks[:0]
   130  	a.blockSize = 0
   131  }
   132  
   133  type memdbArenaBlock struct {
   134  	buf    []byte
   135  	length int
   136  }
   137  
   138  func (a *memdbArenaBlock) alloc(size int, align bool) (uint32, []byte) {
   139  	offset := a.length
   140  	if align {
   141  		// We must align the allocated address for node
   142  		// to make runtime.checkptrAlignment happy.
   143  		offset = (a.length + 7) & alignMask
   144  	}
   145  	newLen := offset + size
   146  	if newLen > len(a.buf) {
   147  		return nullBlockOffset, nil
   148  	}
   149  	a.length = newLen
   150  	return uint32(offset), a.buf[offset : offset+size]
   151  }
   152  
   153  func (a *memdbArenaBlock) reset() {
   154  	a.buf = nil
   155  	a.length = 0
   156  }
   157  
   158  type memdbCheckpoint struct {
   159  	blockSize     int
   160  	blocks        int
   161  	offsetInBlock int
   162  }
   163  
   164  func (cp *memdbCheckpoint) isSamePosition(other *memdbCheckpoint) bool {
   165  	return cp.blocks == other.blocks && cp.offsetInBlock == other.offsetInBlock
   166  }
   167  
   168  func (a *memdbArena) checkpoint() memdbCheckpoint {
   169  	snap := memdbCheckpoint{
   170  		blockSize: a.blockSize,
   171  		blocks:    len(a.blocks),
   172  	}
   173  	if len(a.blocks) > 0 {
   174  		snap.offsetInBlock = a.blocks[len(a.blocks)-1].length
   175  	}
   176  	return snap
   177  }
   178  
   179  func (a *memdbArena) truncate(snap *memdbCheckpoint) {
   180  	for i := snap.blocks; i < len(a.blocks); i++ {
   181  		a.blocks[i] = memdbArenaBlock{}
   182  	}
   183  	a.blocks = a.blocks[:snap.blocks]
   184  	if len(a.blocks) > 0 {
   185  		a.blocks[len(a.blocks)-1].length = snap.offsetInBlock
   186  	}
   187  	a.blockSize = snap.blockSize
   188  }
   189  
   190  type nodeAllocator struct {
   191  	memdbArena
   192  
   193  	// Dummy node, so that we can make X.left.up = X.
   194  	// We then use this instead of NULL to mean the top or bottom
   195  	// end of the rb tree. It is a black node.
   196  	nullNode memdbNode
   197  }
   198  
   199  func (a *nodeAllocator) init() {
   200  	a.nullNode = memdbNode{
   201  		up:    nullAddr,
   202  		left:  nullAddr,
   203  		right: nullAddr,
   204  		vptr:  nullAddr,
   205  	}
   206  }
   207  
   208  func (a *nodeAllocator) getNode(addr memdbArenaAddr) *memdbNode {
   209  	if addr.isNull() {
   210  		return &a.nullNode
   211  	}
   212  
   213  	return (*memdbNode)(unsafe.Pointer(&a.blocks[addr.idx].buf[addr.off]))
   214  }
   215  
   216  func (a *nodeAllocator) allocNode(key []byte) (memdbArenaAddr, *memdbNode) {
   217  	nodeSize := 8*4 + 2 + kv.FlagBytes + len(key)
   218  	addr, mem := a.alloc(nodeSize, true)
   219  	n := (*memdbNode)(unsafe.Pointer(&mem[0]))
   220  	n.vptr = nullAddr
   221  	n.klen = uint16(len(key))
   222  	copy(n.getKey(), key)
   223  	return addr, n
   224  }
   225  
   226  var testMode = false
   227  
   228  func (a *nodeAllocator) freeNode(addr memdbArenaAddr) {
   229  	if testMode {
   230  		// Make it easier for debug.
   231  		n := a.getNode(addr)
   232  		badAddr := nullAddr
   233  		badAddr.idx--
   234  		n.left = badAddr
   235  		n.right = badAddr
   236  		n.up = badAddr
   237  		n.vptr = badAddr
   238  		return
   239  	}
   240  	// TODO: reuse freed nodes.
   241  }
   242  
   243  func (a *nodeAllocator) reset() {
   244  	a.memdbArena.reset()
   245  	a.init()
   246  }
   247  
   248  type memdbVlog struct {
   249  	memdbArena
   250  }
   251  
   252  const memdbVlogHdrSize = 8 + 8 + 4
   253  
   254  type memdbVlogHdr struct {
   255  	nodeAddr memdbArenaAddr
   256  	oldValue memdbArenaAddr
   257  	valueLen uint32
   258  }
   259  
   260  func (hdr *memdbVlogHdr) store(dst []byte) {
   261  	cursor := 0
   262  	endian.PutUint32(dst[cursor:], hdr.valueLen)
   263  	cursor += 4
   264  	hdr.oldValue.store(dst[cursor:])
   265  	cursor += 8
   266  	hdr.nodeAddr.store(dst[cursor:])
   267  }
   268  
   269  func (hdr *memdbVlogHdr) load(src []byte) {
   270  	cursor := 0
   271  	hdr.valueLen = endian.Uint32(src[cursor:])
   272  	cursor += 4
   273  	hdr.oldValue.load(src[cursor:])
   274  	cursor += 8
   275  	hdr.nodeAddr.load(src[cursor:])
   276  }
   277  
   278  func (l *memdbVlog) appendValue(nodeAddr memdbArenaAddr, oldValue memdbArenaAddr, value []byte) memdbArenaAddr {
   279  	size := memdbVlogHdrSize + len(value)
   280  	addr, mem := l.alloc(size, false)
   281  
   282  	copy(mem, value)
   283  	hdr := memdbVlogHdr{nodeAddr, oldValue, uint32(len(value))}
   284  	hdr.store(mem[len(value):])
   285  
   286  	addr.off += uint32(size)
   287  	return addr
   288  }
   289  
   290  func (l *memdbVlog) getValue(addr memdbArenaAddr) []byte {
   291  	lenOff := addr.off - memdbVlogHdrSize
   292  	block := l.blocks[addr.idx].buf
   293  	valueLen := endian.Uint32(block[lenOff:])
   294  	if valueLen == 0 {
   295  		return tombstone
   296  	}
   297  	valueOff := lenOff - valueLen
   298  	return block[valueOff:lenOff:lenOff]
   299  }
   300  
   301  func (l *memdbVlog) getSnapshotValue(addr memdbArenaAddr, snap *memdbCheckpoint) ([]byte, bool) {
   302  	result := l.selectValueHistory(addr, func(addr memdbArenaAddr) bool {
   303  		return !l.canModify(snap, addr)
   304  	})
   305  	if result.isNull() {
   306  		return nil, false
   307  	}
   308  	return l.getValue(addr), true
   309  }
   310  
   311  func (l *memdbVlog) selectValueHistory(addr memdbArenaAddr, predicate func(memdbArenaAddr) bool) memdbArenaAddr {
   312  	for !addr.isNull() {
   313  		if predicate(addr) {
   314  			return addr
   315  		}
   316  		var hdr memdbVlogHdr
   317  		hdr.load(l.blocks[addr.idx].buf[addr.off-memdbVlogHdrSize:])
   318  		addr = hdr.oldValue
   319  	}
   320  	return nullAddr
   321  }
   322  
   323  func (l *memdbVlog) revertToCheckpoint(db *MemDB, cp *memdbCheckpoint) {
   324  	cursor := l.checkpoint()
   325  	for !cp.isSamePosition(&cursor) {
   326  		hdrOff := cursor.offsetInBlock - memdbVlogHdrSize
   327  		block := l.blocks[cursor.blocks-1].buf
   328  		var hdr memdbVlogHdr
   329  		hdr.load(block[hdrOff:])
   330  		node := db.getNode(hdr.nodeAddr)
   331  
   332  		node.vptr = hdr.oldValue
   333  		db.size -= int(hdr.valueLen)
   334  		// oldValue.isNull() == true means this is a newly added value.
   335  		if hdr.oldValue.isNull() {
   336  			// If there are no flags associated with this key, we need to delete this node.
   337  			keptFlags := node.getKeyFlags().AndPersistent()
   338  			if keptFlags == 0 {
   339  				db.deleteNode(node)
   340  			} else {
   341  				node.setKeyFlags(keptFlags)
   342  				db.dirty = true
   343  			}
   344  		} else {
   345  			db.size += len(l.getValue(hdr.oldValue))
   346  		}
   347  
   348  		l.moveBackCursor(&cursor, &hdr)
   349  	}
   350  }
   351  
   352  func (l *memdbVlog) inspectKVInLog(db *MemDB, head, tail *memdbCheckpoint, f func([]byte, kv.KeyFlags, []byte)) {
   353  	cursor := *tail
   354  	for !head.isSamePosition(&cursor) {
   355  		cursorAddr := memdbArenaAddr{idx: uint32(cursor.blocks - 1), off: uint32(cursor.offsetInBlock)}
   356  		hdrOff := cursorAddr.off - memdbVlogHdrSize
   357  		block := l.blocks[cursorAddr.idx].buf
   358  		var hdr memdbVlogHdr
   359  		hdr.load(block[hdrOff:])
   360  		node := db.allocator.getNode(hdr.nodeAddr)
   361  
   362  		// Skip older versions.
   363  		if node.vptr == cursorAddr {
   364  			value := block[hdrOff-hdr.valueLen : hdrOff]
   365  			f(node.getKey(), node.getKeyFlags(), value)
   366  		}
   367  
   368  		l.moveBackCursor(&cursor, &hdr)
   369  	}
   370  }
   371  
   372  func (l *memdbVlog) moveBackCursor(cursor *memdbCheckpoint, hdr *memdbVlogHdr) {
   373  	cursor.offsetInBlock -= (memdbVlogHdrSize + int(hdr.valueLen))
   374  	if cursor.offsetInBlock == 0 {
   375  		cursor.blocks--
   376  		if cursor.blocks > 0 {
   377  			cursor.offsetInBlock = l.blocks[cursor.blocks-1].length
   378  		}
   379  	}
   380  }
   381  
   382  func (l *memdbVlog) canModify(cp *memdbCheckpoint, addr memdbArenaAddr) bool {
   383  	if cp == nil {
   384  		return true
   385  	}
   386  	if int(addr.idx) > cp.blocks-1 {
   387  		return true
   388  	}
   389  	if int(addr.idx) == cp.blocks-1 && int(addr.off) > cp.offsetInBlock {
   390  		return true
   391  	}
   392  	return false
   393  }