github.com/zuoyebang/bitalosdb@v1.1.1-0.20240516111551-79a8c4d8ce20/bitpage/page_block.go (about)

     1  // Copyright 2021 The Bitalosdb author(hustxrb@163.com) and other contributors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package bitpage
    16  
    17  import (
    18  	"encoding/binary"
    19  	"sort"
    20  )
    21  
    22  const (
    23  	pbHeaderSize   = 10
    24  	pbHeaderOffset = 0
    25  	pbDataOffset   = pbHeaderOffset + pbHeaderSize
    26  )
    27  
    28  var (
    29  	dataExpandBuf [2 << 10]byte
    30  	intIndexSize  uint32 = 3 << 10
    31  )
    32  
    33  type pageBlock struct {
    34  	header        pbHeader
    35  	itemsOffset   []uint32
    36  	data          []byte
    37  	intIndex      []byte
    38  	size          uint32
    39  	num           int
    40  	prevKey       []byte
    41  	prevValue     []byte
    42  	prevHasShared bool
    43  	sharedKey     []byte
    44  	sharedNum     int
    45  }
    46  
    47  type pbHeader struct {
    48  	version        uint16
    49  	num            uint32
    50  	intIndexOffset uint32
    51  }
    52  
    53  /*var pbPool = sync.Pool{
    54  	New: func() interface{} {
    55  		return &pageBlock{}
    56  	},
    57  }*/
    58  
    59  func newPageBlock(version uint16, blockSize uint32) *pageBlock {
    60  	pb := &pageBlock{
    61  		itemsOffset:   make([]uint32, 0, 1<<8),
    62  		data:          make([]byte, blockSize+intIndexSize),
    63  		size:          pbDataOffset,
    64  		num:           0,
    65  		prevKey:       nil,
    66  		prevValue:     nil,
    67  		prevHasShared: false,
    68  		sharedKey:     nil,
    69  		sharedNum:     0,
    70  	}
    71  
    72  	pb.header.version = version
    73  
    74  	return pb
    75  }
    76  
    77  func openPageBlock(pb *pageBlock, buf []byte) {
    78  	pb.data = buf
    79  
    80  	pb.readHeader()
    81  	pb.num = int(pb.header.num)
    82  	pb.size = uint32(len(buf))
    83  	pb.intIndex = pb.data[pb.header.intIndexOffset:pb.size]
    84  }
    85  
    86  func (p *pageBlock) reset(version uint16) {
    87  	p.header.intIndexOffset = 0
    88  	p.header.num = 0
    89  	p.header.version = version
    90  
    91  	p.itemsOffset = p.itemsOffset[0:0]
    92  	p.size = pbDataOffset
    93  	p.num = 0
    94  	p.prevKey = nil
    95  	p.prevValue = nil
    96  	p.prevHasShared = false
    97  	p.sharedKey = nil
    98  	p.sharedNum = 0
    99  }
   100  
   101  func (p *pageBlock) writeHeader() {
   102  	binary.BigEndian.PutUint16(p.data[0:2], p.header.version)
   103  	binary.BigEndian.PutUint32(p.data[2:6], p.header.num)
   104  	binary.BigEndian.PutUint32(p.data[6:10], p.header.intIndexOffset)
   105  }
   106  
   107  func (p *pageBlock) readHeader() {
   108  	p.header.version = binary.BigEndian.Uint16(p.data[0:2])
   109  	p.header.num = binary.BigEndian.Uint32(p.data[2:6])
   110  	p.header.intIndexOffset = binary.BigEndian.Uint32(p.data[6:10])
   111  }
   112  
   113  func (p *pageBlock) getVersion() uint32 {
   114  	return uint32(p.header.version)
   115  }
   116  
   117  func (p *pageBlock) getIntIndexSize() uint32 {
   118  	return uint32(p.num * itemOffsetSize)
   119  }
   120  
   121  func (p *pageBlock) writeItem(key, value []byte) (n uint32) {
   122  	if p.header.version == atVersionPrefixBlockCompress {
   123  		n = p.writeItemPrefixCompress(key, value)
   124  	} else {
   125  		n = p.writeItemDefault(key, value)
   126  	}
   127  
   128  	p.num++
   129  	return n
   130  }
   131  
   132  func (p *pageBlock) writeSharedInternal(shared int) uint32 {
   133  	var keySize uint32
   134  
   135  	if !p.prevHasShared {
   136  		if shared >= itemSharedMinLength {
   137  			keySize = uint32(len(p.prevKey)) + 3
   138  		} else {
   139  			keySize = uint32(len(p.prevKey)) + 1
   140  		}
   141  	} else {
   142  		keySize = uint32(len(p.prevKey[len(p.sharedKey):])) + 1
   143  	}
   144  
   145  	valueSize := uint32(len(p.prevValue))
   146  	sz := keySize + valueSize + itemHeaderLen
   147  	nextSize := p.allocBuf(sz)
   148  
   149  	p.itemsOffset = append(p.itemsOffset, p.size)
   150  	itemBuf := p.data[p.size:nextSize]
   151  	binary.BigEndian.PutUint16(itemBuf[0:itemHeaderLen], uint16(keySize))
   152  
   153  	if !p.prevHasShared {
   154  		if shared >= itemSharedMinLength {
   155  			itemBuf[itemHeaderLen] = itemSharedMin
   156  			binary.BigEndian.PutUint16(itemBuf[itemHeaderLen+1:itemHeaderLen+3], uint16(len(p.sharedKey)))
   157  			copy(itemBuf[itemHeaderLen+3:itemHeaderLen+keySize], p.prevKey)
   158  		} else {
   159  			itemBuf[itemHeaderLen] = itemSharedMax
   160  			copy(itemBuf[itemHeaderLen+1:itemHeaderLen+keySize], p.prevKey)
   161  		}
   162  	} else {
   163  		itemBuf[itemHeaderLen] = uint8(p.num - p.sharedNum)
   164  		copy(itemBuf[itemHeaderLen+1:itemHeaderLen+keySize], p.prevKey[len(p.sharedKey):])
   165  	}
   166  
   167  	copy(itemBuf[itemHeaderLen+keySize:sz], p.prevValue)
   168  
   169  	p.size = nextSize
   170  
   171  	return sz
   172  }
   173  
   174  func (p *pageBlock) writeItemDefault(key, value []byte) uint32 {
   175  	keySize := uint32(len(key))
   176  	valueSize := uint32(len(value))
   177  
   178  	sz := keySize + valueSize + itemHeaderLen
   179  	nextSize := p.allocBuf(sz)
   180  
   181  	p.itemsOffset = append(p.itemsOffset, p.size)
   182  	itemBuf := p.data[p.size:nextSize]
   183  	binary.BigEndian.PutUint16(itemBuf[0:itemHeaderLen], uint16(keySize))
   184  	copy(itemBuf[itemHeaderLen:itemHeaderLen+keySize], key)
   185  	copy(itemBuf[itemHeaderLen+keySize:sz], value)
   186  	p.size = nextSize
   187  
   188  	return sz
   189  }
   190  
   191  func (p *pageBlock) writeItemPrefixCompress(key, value []byte) uint32 {
   192  	if p.prevKey == nil && p.prevValue == nil {
   193  		p.prevKey = make([]byte, 0, 1<<7)
   194  		p.prevKey = append(p.prevKey[:0], key...)
   195  		p.prevValue = value
   196  		p.prevHasShared = false
   197  		p.sharedKey = make([]byte, 0, 1<<6)
   198  		return 0
   199  	}
   200  
   201  	shared := 0
   202  	n := len(key)
   203  	if n > len(p.prevKey) {
   204  		n = len(p.prevKey)
   205  	}
   206  	asUint64 := func(b []byte, i int) uint64 {
   207  		return binary.LittleEndian.Uint64(b[i:])
   208  	}
   209  
   210  	for shared < n-7 && asUint64(key, shared) == asUint64(p.prevKey, shared) {
   211  		shared += 8
   212  	}
   213  	for shared < n && key[shared] == p.prevKey[shared] {
   214  		shared++
   215  	}
   216  
   217  	if shared > 0 {
   218  		sharedKeyLength := len(p.sharedKey)
   219  		if shared < itemSharedMinLength || shared < sharedKeyLength {
   220  			shared = 0
   221  		} else {
   222  			if !p.prevHasShared {
   223  				p.sharedKey = append(p.sharedKey[:0], key[:shared]...)
   224  				p.sharedNum = p.num
   225  			} else if shared > sharedKeyLength+4 || p.num-p.sharedNum >= itemSharedRestart {
   226  				shared = 0
   227  			} else {
   228  				shared = sharedKeyLength
   229  			}
   230  		}
   231  	}
   232  
   233  	sz := p.writeSharedInternal(shared)
   234  
   235  	p.prevKey = append(p.prevKey[:0], key...)
   236  	p.prevValue = value
   237  	if shared >= itemSharedMinLength {
   238  		p.prevHasShared = true
   239  	} else {
   240  		p.prevHasShared = false
   241  		p.sharedKey = p.sharedKey[:0]
   242  		p.sharedNum = 0
   243  	}
   244  
   245  	return sz
   246  }
   247  
   248  func (p *pageBlock) writeFinish() {
   249  	if p.header.version == atVersionPrefixBlockCompress && p.prevKey != nil && p.prevValue != nil {
   250  		_ = p.writeSharedInternal(0)
   251  	}
   252  
   253  	nextSize := p.allocBuf(p.getIntIndexSize())
   254  
   255  	intIndexBuf := p.data[p.size:nextSize]
   256  	intIndexPos := 0
   257  	for i := range p.itemsOffset {
   258  		binary.BigEndian.PutUint32(intIndexBuf[intIndexPos:intIndexPos+itemOffsetSize], p.itemsOffset[i])
   259  		intIndexPos += itemOffsetSize
   260  	}
   261  
   262  	p.header.num = uint32(p.num)
   263  	p.header.intIndexOffset = p.size
   264  	p.writeHeader()
   265  
   266  	p.size = nextSize
   267  	p.intIndex = p.data[p.header.intIndexOffset:p.size]
   268  
   269  	p.prevKey = nil
   270  	p.prevValue = nil
   271  	p.sharedKey = nil
   272  }
   273  
   274  func (p *pageBlock) getSharedKey(i int, key []byte, sharedCache *sharedInfo) ([]byte, []byte) {
   275  	offset := key[0]
   276  	switch offset {
   277  	case itemSharedMin:
   278  		return key[3:], nil
   279  	case itemSharedMax:
   280  		return key[1:], nil
   281  	default:
   282  		idx := i - int(offset)
   283  
   284  		if sharedCache != nil && sharedCache.idx == idx && len(sharedCache.key) >= itemSharedMinLength {
   285  			return sharedCache.key, key[1:]
   286  		}
   287  
   288  		itemOffset := p.getItemOffset(idx)
   289  		if itemOffset == 0 {
   290  			return nil, nil
   291  		}
   292  
   293  		keyOffset := itemOffset + itemHeaderLen
   294  		sharedKeySize := uint32(binary.BigEndian.Uint16(p.data[itemOffset:keyOffset]))
   295  		sharedKey := p.data[keyOffset : keyOffset+sharedKeySize]
   296  
   297  		sharedKeyOffset := sharedKey[0]
   298  		if sharedKeyOffset != itemSharedMin {
   299  			return nil, nil
   300  		}
   301  
   302  		sharedKeyLenght := uint32(binary.BigEndian.Uint16(sharedKey[1:]) + 3)
   303  		if sharedKeyLenght > sharedKeySize {
   304  			return nil, nil
   305  		}
   306  
   307  		if sharedCache != nil && sharedCache.idx != idx {
   308  			sharedCache.idx = idx
   309  			sharedCache.key = sharedKey[3:sharedKeyLenght]
   310  		}
   311  
   312  		return sharedKey[3:sharedKeyLenght], key[1:]
   313  	}
   314  }
   315  
   316  func (p *pageBlock) getItemOffset(i int) uint32 {
   317  	if i < 0 || i >= p.num {
   318  		return 0
   319  	}
   320  
   321  	pos := i * itemOffsetSize
   322  	itemOffset := binary.BigEndian.Uint32(p.intIndex[pos : pos+itemOffsetSize])
   323  
   324  	return itemOffset
   325  }
   326  
   327  func (p *pageBlock) getKey(i int) ([]byte, []byte) {
   328  	itemOffset := p.getItemOffset(i)
   329  	if itemOffset == 0 {
   330  		return nil, nil
   331  	}
   332  
   333  	keyOffset := itemOffset + itemHeaderLen
   334  	keySize := uint32(binary.BigEndian.Uint16(p.data[itemOffset:keyOffset]))
   335  	key := p.data[keyOffset : keyOffset+keySize]
   336  
   337  	if p.header.version == atVersionPrefixBlockCompress {
   338  		return p.getSharedKey(i, key, nil)
   339  	}
   340  
   341  	return key, nil
   342  }
   343  
   344  func (p *pageBlock) getMaxKey() []byte {
   345  	pos := p.num - 1
   346  	itemOffset := p.getItemOffset(pos)
   347  	if itemOffset == 0 {
   348  		return nil
   349  	}
   350  
   351  	keyOffset := itemOffset + itemHeaderLen
   352  	keySize := uint32(binary.BigEndian.Uint16(p.data[itemOffset:keyOffset]))
   353  	key := p.data[keyOffset : keyOffset+keySize]
   354  
   355  	if p.header.version == atVersionPrefixBlockCompress {
   356  		sharedKey1, sharedKey2 := p.getSharedKey(pos, key, nil)
   357  		sk1l := len(sharedKey1)
   358  		sk2l := len(sharedKey2)
   359  		rawKey := make([]byte, sk1l+sk2l)
   360  		copy(rawKey[:sk1l], sharedKey1)
   361  		copy(rawKey[sk1l:], sharedKey2)
   362  
   363  		return rawKey
   364  	}
   365  
   366  	return key
   367  }
   368  
   369  func (p *pageBlock) getKV(i int) ([]byte, []byte) {
   370  	itemOffset := p.getItemOffset(i)
   371  	if itemOffset == 0 {
   372  		return nil, nil
   373  	}
   374  
   375  	var itemSize uint32
   376  	if i == p.num-1 {
   377  		itemSize = p.header.intIndexOffset - itemOffset
   378  	} else {
   379  		itemSize = p.getItemOffset(i+1) - itemOffset
   380  	}
   381  
   382  	keyOffset := itemOffset + itemHeaderLen
   383  	keySize := uint32(binary.BigEndian.Uint16(p.data[itemOffset:keyOffset]))
   384  	key := p.data[keyOffset : keyOffset+keySize]
   385  	valueSize := itemSize - keySize - itemHeaderLen
   386  	value := p.data[keyOffset+keySize : keyOffset+keySize+valueSize]
   387  
   388  	return key, value
   389  }
   390  
   391  func (p *pageBlock) getSharedKV(i int, sharedCache *sharedInfo) ([]byte, []byte, []byte) {
   392  	key, value := p.getKV(i)
   393  	if key == nil {
   394  		return nil, nil, nil
   395  	}
   396  
   397  	if p.header.version == atVersionPrefixBlockCompress {
   398  		sharedKey1, sharedKey2 := p.getSharedKey(i, key, sharedCache)
   399  		return sharedKey1, sharedKey2, value
   400  	}
   401  
   402  	return key, nil, value
   403  
   404  }
   405  
   406  func (p *pageBlock) get(key []byte) ([]byte, bool, internalKeyKind) {
   407  	pos := p.findKeyByIntIndex(key)
   408  	sharedKey1, sharedKey2, value := p.getSharedKV(pos, nil)
   409  	if sharedKey1 == nil || !atEqual(sharedKey1, sharedKey2, key) {
   410  		return nil, false, internalKeyKindInvalid
   411  	}
   412  
   413  	return value, true, internalKeyKindSet
   414  }
   415  
   416  func (p *pageBlock) newIter(o *iterOptions) *pageBlockIterator {
   417  	iter := pbIterPool.Get().(*pageBlockIterator)
   418  	iter.pb = p
   419  
   420  	if p.header.version == atVersionPrefixBlockCompress {
   421  		if cap(iter.keyBuf) == 0 {
   422  			iter.keyBuf = make([]byte, 0, 1<<7)
   423  		}
   424  		if iter.sharedCache == nil {
   425  			iter.sharedCache = &sharedInfo{idx: -1, key: nil}
   426  		}
   427  	}
   428  
   429  	return iter
   430  }
   431  
   432  func (p *pageBlock) allocBuf(sz uint32) uint32 {
   433  	newSize := int(p.size + sz)
   434  
   435  	for {
   436  		if newSize >= len(p.data) {
   437  			p.data = append(p.data, dataExpandBuf[:]...)
   438  		} else {
   439  			break
   440  		}
   441  	}
   442  
   443  	return uint32(newSize)
   444  }
   445  
   446  func (p *pageBlock) bytes() []byte {
   447  	return p.data
   448  }
   449  
   450  func (p *pageBlock) inuseBytes() uint32 {
   451  	return p.size
   452  }
   453  
   454  func (p *pageBlock) totalBytes() uint64 {
   455  	return uint64(cap(p.data))
   456  }
   457  
   458  func (p *pageBlock) close() error {
   459  	return nil
   460  }
   461  
   462  func (p *pageBlock) empty() bool {
   463  	return p.num == 0
   464  }
   465  
   466  func (p *pageBlock) itemCount() int {
   467  	return p.num
   468  }
   469  
   470  func (p *pageBlock) findKeyByIntIndex(key []byte) int {
   471  	return sort.Search(p.num, func(i int) bool {
   472  		sharedKey1, sharedKey2 := p.getKey(i)
   473  		return atCompare(sharedKey1, sharedKey2, key) != -1
   474  	})
   475  }