github.com/zuoyebang/bitalosdb@v1.1.1-0.20240516111551-79a8c4d8ce20/bitpage/bitrie.go (about)

     1  // Copyright 2021 The Bitalosdb author(hustxrb@163.com) and other contributors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package bitpage
    16  
    17  import (
    18  	"bytes"
    19  	"container/list"
    20  	"encoding/binary"
    21  	"fmt"
    22  	"sort"
    23  )
    24  
    25  const (
    26  	BitrieVersion    = 1
    27  	BitrieHeaderSize = 18
    28  	BitrieKeySize    = 1
    29  	BitrieIndexSize  = 4
    30  	PruneKeySize     = 2
    31  	PruneKeyValFlag  = 1
    32  )
    33  
    34  const (
    35  	InternalKindKeyPrune      uint8 = 1
    36  	InternalKindHasValue      uint8 = 2
    37  	InternalKindHasChildrenL1 uint8 = 4
    38  	InternalKindHasChildrenL2 uint8 = 8
    39  	InternalKindHasChildrenL3 uint8 = 16
    40  
    41  	KindChildrenL1Step uint32 = 65536
    42  	KindChildrenL2Step uint32 = 16777216
    43  )
    44  
    45  type Header struct {
    46  	version     uint16
    47  	reserved    uint16
    48  	keyOffset   uint16
    49  	indexOffset uint32
    50  	dataOffset  uint32
    51  	size        uint32
    52  }
    53  
    54  type Bitrie struct {
    55  	header   Header
    56  	length   uint32
    57  	data     []byte
    58  	children map[uint8]*trienode
    59  }
    60  
    61  type trienode struct {
    62  	key      uint8
    63  	prune    []byte
    64  	value    []byte
    65  	children map[uint8]*trienode
    66  }
    67  
    68  type disknode struct {
    69  	prune      []byte
    70  	value      []byte
    71  	childCount uint8
    72  	childIndex uint32
    73  }
    74  
    75  func NewBitrie() *Bitrie {
    76  	root := &Bitrie{
    77  		header:   Header{version: BitrieVersion},
    78  		length:   1,
    79  		data:     nil,
    80  		children: nil,
    81  	}
    82  
    83  	return root
    84  }
    85  
    86  func (bt *Bitrie) InitWriter() {
    87  	bt.length = 1
    88  	bt.children = make(map[uint8]*trienode, 1<<10)
    89  }
    90  
    91  func (bt *Bitrie) SetReader(d []byte, offset uint32) bool {
    92  	if d == nil {
    93  		return false
    94  	}
    95  
    96  	bt.data = d
    97  	bt.header = bt.readHeader(bt.data[offset:])
    98  
    99  	return len(d) >= int(bt.header.size)
   100  }
   101  
   102  func (bt *Bitrie) Size() uint32 {
   103  	return bt.header.size
   104  }
   105  
   106  func (bt *Bitrie) Add(key []byte, value []byte) {
   107  	keyLength := len(key)
   108  	if keyLength <= 0 || len(value) <= 0 {
   109  		return
   110  	}
   111  
   112  	var ok bool
   113  	var childNode *trienode
   114  
   115  	children := bt.children
   116  
   117  	for i := 0; i < keyLength; i++ {
   118  		if childNode, ok = children[key[i]]; !ok {
   119  			newNode := &trienode{
   120  				key:      key[i],
   121  				prune:    key[i+1:],
   122  				value:    value,
   123  				children: make(map[byte]*trienode, 1<<3),
   124  			}
   125  			children[key[i]] = newNode
   126  			bt.length += 1
   127  			break
   128  		} else if pruneKeyLength := len(childNode.prune); pruneKeyLength > 0 {
   129  			m := 0
   130  			n := i + 1
   131  			for m < pruneKeyLength && n < keyLength {
   132  				if childNode.prune[m] != key[n] {
   133  					break
   134  				}
   135  
   136  				m++
   137  				n++
   138  			}
   139  
   140  			tailKeyLength := keyLength - i - 1
   141  			if m == 0 {
   142  				if pruneKeyLength > tailKeyLength {
   143  					if n <= keyLength-1 {
   144  						bt.newPruneChildByNode(childNode, m)
   145  						bt.newPruneChildByKey(key[n:], value, childNode)
   146  						childNode.prune = nil
   147  						bt.length += 2
   148  					} else if n > keyLength-1 {
   149  						bt.newPruneChildByNode(childNode, m)
   150  						childNode.prune = nil
   151  						childNode.value = value
   152  						bt.length += 1
   153  					}
   154  				} else if pruneKeyLength == tailKeyLength {
   155  					if n <= keyLength-1 {
   156  						bt.newPruneChildByNode(childNode, m)
   157  						bt.newPruneChildByKey(key[n:], value, childNode)
   158  						childNode.prune = nil
   159  						bt.length += 2
   160  					} else if n > keyLength-1 {
   161  						childNode.value = value
   162  					}
   163  				} else if pruneKeyLength < tailKeyLength {
   164  					bt.newPruneChildByNode(childNode, m)
   165  					bt.newPruneChildByKey(key[n:], value, childNode)
   166  					childNode.prune = nil
   167  					bt.length += 2
   168  				}
   169  				break
   170  			} else if m > 0 {
   171  				if pruneKeyLength > tailKeyLength {
   172  					if n <= keyLength-1 {
   173  						bt.newPruneChildByNode(childNode, m)
   174  						bt.newPruneChildByKey(key[n:], value, childNode)
   175  						childNode.prune = childNode.prune[:m]
   176  						bt.length += 2
   177  					} else if n > keyLength-1 {
   178  						bt.newPruneChildByNode(childNode, m)
   179  						childNode.value = value
   180  						childNode.prune = childNode.prune[:m]
   181  						bt.length += 1
   182  					}
   183  					break
   184  				} else if pruneKeyLength == tailKeyLength {
   185  					if n <= keyLength-1 {
   186  						bt.newPruneChildByNode(childNode, m)
   187  						bt.newPruneChildByKey(key[n:], value, childNode)
   188  						childNode.prune = childNode.prune[:m]
   189  						bt.length += 2
   190  					} else if n > keyLength-1 {
   191  						childNode.value = value
   192  					}
   193  					break
   194  				} else if pruneKeyLength < tailKeyLength {
   195  					if m <= pruneKeyLength-1 {
   196  						bt.newPruneChildByNode(childNode, m)
   197  						bt.newPruneChildByKey(key[n:], value, childNode)
   198  						childNode.prune = childNode.prune[:m]
   199  						bt.length += 2
   200  						break
   201  					} else if m > pruneKeyLength-1 {
   202  						i += m
   203  					}
   204  				}
   205  			}
   206  		}
   207  
   208  		children = childNode.children
   209  	}
   210  }
   211  
   212  func (bt *Bitrie) Finish() {
   213  	bt.children = nil
   214  }
   215  
   216  func (bt *Bitrie) Serialize(
   217  	tblalloc func(uint32) uint32,
   218  	tblbytes func(uint32, uint32) []byte,
   219  	tblsize func() uint32) bool {
   220  	if bt.length <= 0 {
   221  		return false
   222  	}
   223  
   224  	itemIndex := uint32(1)
   225  
   226  	headerOffset := tblalloc(BitrieHeaderSize + bt.length)
   227  	keyOffset := headerOffset + BitrieHeaderSize
   228  
   229  	idxSize := bt.length * BitrieIndexSize
   230  	indexOffset := tblalloc(idxSize)
   231  
   232  	dataOffset := indexOffset + idxSize
   233  
   234  	bt.header.keyOffset = uint16(keyOffset)
   235  	bt.header.indexOffset = indexOffset
   236  	bt.header.dataOffset = dataOffset
   237  
   238  	wrBuf := make([]byte, 256<<10)
   239  	dkNode := disknode{
   240  		prune:      nil,
   241  		value:      nil,
   242  		childCount: 0,
   243  		childIndex: 0,
   244  	}
   245  
   246  	if len(bt.children) > 0 {
   247  		bt.writeKey(tblbytes(keyOffset, BitrieKeySize), 0)
   248  		keyOffset += BitrieKeySize
   249  
   250  		bt.writeIndex(tblbytes(indexOffset, BitrieIndexSize), dataOffset)
   251  		indexOffset += BitrieIndexSize
   252  
   253  		dkNode.childIndex = itemIndex
   254  		dkNode.childCount = uint8(len(bt.children) - 1)
   255  
   256  		wbuf, wsize := bt.writeNode(wrBuf[0:], &dkNode)
   257  		offset := tblalloc(wsize)
   258  		copy(tblbytes(offset, wsize), wbuf)
   259  		dataOffset += wsize
   260  	} else {
   261  		return false
   262  	}
   263  
   264  	Queue := list.New()
   265  	bt.pushQueue(Queue, bt.children)
   266  
   267  	for Queue.Len() > 0 {
   268  		elem := Queue.Front()
   269  		node := elem.Value.(*trienode)
   270  
   271  		bt.writeKey(tblbytes(keyOffset, BitrieKeySize), node.key)
   272  		keyOffset += BitrieKeySize
   273  
   274  		bt.writeIndex(tblbytes(indexOffset, BitrieIndexSize), dataOffset)
   275  
   276  		dkNode.prune = node.prune
   277  		dkNode.value = node.value
   278  		if len(node.children) > 0 {
   279  			dkNode.childIndex = itemIndex + uint32(Queue.Len())
   280  			dkNode.childCount = uint8(len(node.children) - 1)
   281  		} else {
   282  			dkNode.childIndex = 0
   283  			dkNode.childCount = 0
   284  		}
   285  		itemIndex++
   286  
   287  		wbuf, wsize := bt.writeNode(wrBuf[0:], &dkNode)
   288  		offset := tblalloc(wsize)
   289  		copy(tblbytes(offset, wsize), wbuf)
   290  
   291  		indexOffset += BitrieIndexSize
   292  		dataOffset += wsize
   293  
   294  		bt.pushQueue(Queue, node.children)
   295  		Queue.Remove(elem)
   296  	}
   297  
   298  	bt.header.size = tblsize()
   299  	bt.writeHeader(tblbytes(headerOffset, BitrieHeaderSize), bt.header)
   300  
   301  	return true
   302  }
   303  
   304  func (bt *Bitrie) Get(key []byte) ([]byte, bool) {
   305  	keyOffset := uint32(bt.header.keyOffset)
   306  	indexOffset := bt.header.indexOffset
   307  
   308  	node := bt.readNode(bt.data[bt.header.dataOffset:], 0)
   309  	childCount := node.childCount
   310  	childIndex := node.childIndex
   311  
   312  	keyLength := len(key)
   313  	for i := 0; i < keyLength; i++ {
   314  		tmpChildCount := uint32(childCount)
   315  		if childIndex > 0 {
   316  			tmpChildCount++
   317  		}
   318  
   319  		find, childPos := bt.findNode(key[i], bt.data[keyOffset+childIndex:], tmpChildCount)
   320  		if find {
   321  			childIndex += childPos
   322  			nsize, offset := bt.getNodeSizeAndOffset(bt.data, indexOffset+childIndex*BitrieIndexSize)
   323  			node = bt.readNode(bt.data[offset:], nsize)
   324  
   325  			valueLength := len(node.value)
   326  			pruneLength := len(node.prune)
   327  			if pruneLength > 0 {
   328  				curKeyPos := i + 1 + pruneLength
   329  				if curKeyPos <= keyLength && bytes.Equal(node.prune, key[i+1:curKeyPos]) {
   330  					i += pruneLength
   331  					if i == keyLength-1 && valueLength > 0 {
   332  						return node.value, true
   333  					} else {
   334  						childCount = node.childCount
   335  						childIndex = node.childIndex
   336  						continue
   337  					}
   338  				} else {
   339  					return nil, false
   340  				}
   341  			}
   342  			childCount = node.childCount
   343  			childIndex = node.childIndex
   344  
   345  			if i == keyLength-1 && valueLength > 0 {
   346  				return node.value, true
   347  			}
   348  		} else {
   349  			return nil, false
   350  		}
   351  	}
   352  
   353  	return nil, false
   354  }
   355  
   356  func (bt *Bitrie) ToBytes() []byte {
   357  	buf := make([]byte, 0, 1024)
   358  
   359  	queue := list.New()
   360  	bt.pushQueue(queue, bt.children)
   361  
   362  	for queue.Len() > 0 {
   363  		elem := queue.Front()
   364  		node := elem.Value.(*trienode)
   365  
   366  		buf = append(buf, fmt.Sprintf("key=%c; prune=%s; value=%s; ", node.key, node.prune, node.value)...)
   367  		if len(node.children) > 0 {
   368  			buf = append(buf, fmt.Sprintf("children[%d]=[", len(node.children))...)
   369  			for k, _ := range node.children {
   370  				buf = append(buf, fmt.Sprintf("k=%c, ", k)...)
   371  			}
   372  			buf = append(buf, "]\n"...)
   373  		} else {
   374  			buf = append(buf, "children=[0]\n"...)
   375  		}
   376  
   377  		bt.pushQueue(queue, node.children)
   378  		queue.Remove(elem)
   379  	}
   380  
   381  	return buf
   382  }
   383  
   384  func (bt *Bitrie) AnalyzeBytes() []byte {
   385  	buf := make([]byte, 0, 10<<10)
   386  
   387  	keyOffset := uint32(bt.header.keyOffset)
   388  	indexOffset := bt.header.indexOffset
   389  
   390  	key := uint8(0)
   391  	offset_next := uint32(0)
   392  
   393  	count := indexOffset - keyOffset
   394  	buf = append(buf, fmt.Sprintf("Header version=%d; keyOffset=%d; indexOffset=%d; dataOffset=%d; itemCount=%d; size=%d\n", bt.header.version, keyOffset, indexOffset, bt.header.dataOffset, count, bt.header.size)...)
   395  	for i := uint32(0); i < count; i++ {
   396  		tmp_kpos := keyOffset + i
   397  		tmp_ipos := indexOffset + BitrieIndexSize*i
   398  		tmp_dpos := binary.BigEndian.Uint32(bt.data[tmp_ipos:])
   399  		if i == count-1 {
   400  			offset_next = bt.header.size
   401  		} else {
   402  			offset_next = binary.BigEndian.Uint32(bt.data[tmp_ipos+BitrieIndexSize:])
   403  		}
   404  		node := bt.readNode(bt.data[tmp_dpos:], offset_next-tmp_dpos)
   405  
   406  		key = bt.data[tmp_kpos]
   407  		if key == 0 {
   408  			key = ' '
   409  		}
   410  
   411  		if len(node.prune) == 0 {
   412  			node.prune = []byte(" ")
   413  		}
   414  		if len(node.value) == 0 {
   415  			node.value = []byte(" ")
   416  		}
   417  
   418  		tmpChildCount := uint32(node.childCount)
   419  		if node.childIndex > 0 {
   420  			tmpChildCount++
   421  		}
   422  
   423  		buf = append(buf, fmt.Sprintf("Item-%d keyOffset=%d; indexOffset=%d; dataOffset=%d; node.key=%c; node.prune=%s; node.value=%s; node.childCount=%v; node.childIndex=%v\n", i, tmp_ipos, tmp_ipos, tmp_dpos, key, node.prune, node.value, tmpChildCount, node.childIndex)...)
   424  	}
   425  
   426  	return buf
   427  }
   428  
   429  func (bt *Bitrie) newPruneChildByNode(node *trienode, offset int) *trienode {
   430  	pruneKeyLen := len(node.prune) - 1
   431  	if offset > pruneKeyLen {
   432  		return nil
   433  	}
   434  
   435  	newNode := &trienode{
   436  		key:   node.prune[offset],
   437  		value: node.value,
   438  	}
   439  
   440  	if len(node.children) > 0 {
   441  		newNode.children = node.children
   442  		node.children = make(map[byte]*trienode, 1<<3)
   443  	} else {
   444  		newNode.children = make(map[byte]*trienode, 1<<3)
   445  	}
   446  
   447  	node.children[newNode.key] = newNode
   448  
   449  	if offset < pruneKeyLen {
   450  		newNode.prune = node.prune[offset+1:]
   451  	} else {
   452  		newNode.prune = nil
   453  	}
   454  
   455  	node.value = nil
   456  
   457  	return newNode
   458  }
   459  
   460  func (bt *Bitrie) newPruneChildByKey(key []byte, value []byte, node *trienode) *trienode {
   461  	newNode := &trienode{
   462  		key:      key[0],
   463  		prune:    key[1:],
   464  		value:    value,
   465  		children: make(map[byte]*trienode, 1<<3),
   466  	}
   467  
   468  	node.children[newNode.key] = newNode
   469  
   470  	return newNode
   471  }
   472  
   473  func (bt *Bitrie) readHeader(buf []byte) Header {
   474  	header := Header{
   475  		version:     binary.BigEndian.Uint16(buf[0:]),
   476  		reserved:    binary.BigEndian.Uint16(buf[2:]),
   477  		keyOffset:   binary.BigEndian.Uint16(buf[4:]),
   478  		indexOffset: binary.BigEndian.Uint32(buf[6:]),
   479  		dataOffset:  binary.BigEndian.Uint32(buf[10:]),
   480  		size:        binary.BigEndian.Uint32(buf[14:]),
   481  	}
   482  
   483  	return header
   484  }
   485  
   486  func (bt *Bitrie) writeHeader(buf []byte, header Header) {
   487  	binary.BigEndian.PutUint16(buf[0:], header.version)
   488  	binary.BigEndian.PutUint16(buf[2:], header.reserved)
   489  	binary.BigEndian.PutUint16(buf[4:], header.keyOffset)
   490  	binary.BigEndian.PutUint32(buf[6:], header.indexOffset)
   491  	binary.BigEndian.PutUint32(buf[10:], header.dataOffset)
   492  	binary.BigEndian.PutUint32(buf[14:], header.size)
   493  }
   494  
   495  func (bt *Bitrie) getNodeSizeAndOffset(buf []byte, offset uint32) (uint32, uint32) {
   496  	lo := binary.BigEndian.Uint32(buf[offset:])
   497  
   498  	var ro uint32
   499  	offsetNext := offset + BitrieIndexSize
   500  	if offsetNext < bt.header.dataOffset {
   501  		ro = binary.BigEndian.Uint32(buf[offsetNext:])
   502  	} else {
   503  		ro = bt.header.size
   504  	}
   505  
   506  	return ro - lo, lo
   507  }
   508  
   509  func (bt *Bitrie) readNode(buf []byte, size uint32) disknode {
   510  	dkNode := disknode{
   511  		prune:      nil,
   512  		value:      nil,
   513  		childCount: 0,
   514  		childIndex: 0,
   515  	}
   516  
   517  	kind := buf[0]
   518  	offset := uint32(1)
   519  
   520  	if kind&InternalKindKeyPrune == InternalKindKeyPrune {
   521  		ksize := uint32(binary.BigEndian.Uint16(buf[offset:]))
   522  		offset += 2
   523  		dkNode.prune = buf[offset : offset+ksize]
   524  		offset += ksize
   525  	}
   526  
   527  	if kind&InternalKindHasChildrenL1 == InternalKindHasChildrenL1 {
   528  		dkNode.childCount = buf[offset]
   529  		offset += 1
   530  		dkNode.childIndex = uint32(binary.BigEndian.Uint16(buf[offset:]))
   531  		offset += 2
   532  	} else if kind&InternalKindHasChildrenL2 == InternalKindHasChildrenL2 {
   533  		childIndex := binary.BigEndian.Uint32(buf[offset:])
   534  		dkNode.childCount = uint8(childIndex & 0xff)
   535  		dkNode.childIndex = childIndex >> 8
   536  		offset += 4
   537  	} else if kind&InternalKindHasChildrenL3 == InternalKindHasChildrenL3 {
   538  		dkNode.childCount = buf[offset]
   539  		offset += 1
   540  		dkNode.childIndex = binary.BigEndian.Uint32(buf[offset:])
   541  		offset += 4
   542  	}
   543  
   544  	if kind&InternalKindHasValue == InternalKindHasValue && offset < size {
   545  		dkNode.value = buf[offset:size]
   546  	}
   547  
   548  	return dkNode
   549  }
   550  
   551  func (bt *Bitrie) writeKey(buf []byte, key uint8) {
   552  	buf[0] = key
   553  }
   554  
   555  func (bt *Bitrie) writeIndex(buf []byte, idx uint32) {
   556  	binary.BigEndian.PutUint32(buf[0:], idx)
   557  }
   558  
   559  func (bt *Bitrie) writeNode(buf []byte, dkNode *disknode) ([]byte, uint32) {
   560  	kind := uint8(0)
   561  	offset := uint32(1)
   562  
   563  	pruneLength := uint32(len(dkNode.prune))
   564  	if pruneLength > 0 {
   565  		kind |= InternalKindKeyPrune
   566  		binary.BigEndian.PutUint16(buf[offset:], uint16(pruneLength))
   567  		offset += 2
   568  		copy(buf[offset:offset+pruneLength], dkNode.prune)
   569  		offset += pruneLength
   570  	}
   571  
   572  	if dkNode.childIndex > 0 {
   573  		if dkNode.childIndex < KindChildrenL1Step {
   574  			kind |= InternalKindHasChildrenL1
   575  			buf[offset] = dkNode.childCount
   576  			offset += 1
   577  			binary.BigEndian.PutUint16(buf[offset:], uint16(dkNode.childIndex))
   578  			offset += 2
   579  		} else if dkNode.childIndex < KindChildrenL2Step {
   580  			kind |= InternalKindHasChildrenL2
   581  			childIndex := dkNode.childIndex<<8 | uint32(dkNode.childCount)
   582  			binary.BigEndian.PutUint32(buf[offset:], childIndex)
   583  			offset += 4
   584  		} else {
   585  			kind |= InternalKindHasChildrenL3
   586  			buf[offset] = dkNode.childCount
   587  			offset += 1
   588  			binary.BigEndian.PutUint32(buf[offset:], dkNode.childIndex)
   589  			offset += 4
   590  		}
   591  	}
   592  
   593  	valueLength := uint32(len(dkNode.value))
   594  	if valueLength > 0 {
   595  		kind |= InternalKindHasValue
   596  		copy(buf[offset:offset+valueLength], dkNode.value)
   597  		offset += valueLength
   598  	}
   599  
   600  	buf[0] = kind
   601  
   602  	return buf[0:offset], offset
   603  }
   604  
   605  func (bt *Bitrie) findNode(key uint8, buf []byte, n uint32) (bool, uint32) {
   606  	i, j := uint32(0), n
   607  	for i < j {
   608  		h := (i + j) >> 1
   609  		if buf[h] < key {
   610  			i = h + 1
   611  		} else {
   612  			j = h
   613  		}
   614  	}
   615  
   616  	if i < n && buf[i] == key {
   617  		return true, i
   618  	}
   619  
   620  	return false, 0
   621  }
   622  
   623  func (bt *Bitrie) pushQueue(queue *list.List, children map[uint8]*trienode) {
   624  	childCount := len(children)
   625  	if childCount <= 0 {
   626  		return
   627  	}
   628  
   629  	sortedKeys := make([]int, 0, childCount)
   630  
   631  	for k, _ := range children {
   632  		sortedKeys = append(sortedKeys, int(k))
   633  	}
   634  
   635  	sort.Ints(sortedKeys)
   636  
   637  	for _, v := range sortedKeys {
   638  		queue.PushBack(children[uint8(v)])
   639  	}
   640  }