github.com/zuoyebang/bitalosdb@v1.1.1-0.20240516111551-79a8c4d8ce20/internal/bindex/succinct_map.go (about)

     1  // Copyright 2021 The Bitalosdb author(hustxrb@163.com) and other contributors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package bindex
    16  
    17  import (
    18  	"arena"
    19  	"encoding/binary"
    20  	"sort"
    21  )
    22  
    23  const (
    24  	SuccinctVersion      = 1
    25  	SuccinctShardItemMax = 1024
    26  	SuccinctHeaderSize   = 8
    27  	SuccinctShardSize    = 8
    28  	SuccinctItem32Size   = 8
    29  	SuccinctItem64Size   = 12
    30  )
    31  
    32  type SuccinctMap struct {
    33  	header Header
    34  	size   uint32
    35  	length uint32
    36  	type32 bool
    37  	data   []byte
    38  	data32 []Item32Array
    39  	data64 []Item64Array
    40  	arena  *arena.Arena
    41  }
    42  
    43  type Header struct {
    44  	version  uint16
    45  	reserved uint16
    46  	shards   uint32
    47  }
    48  
    49  type Shard struct {
    50  	offset uint32
    51  	length uint32
    52  }
    53  
    54  type Item32 struct {
    55  	key   uint32
    56  	value uint32
    57  }
    58  
    59  type Item64 struct {
    60  	key   uint32
    61  	value uint64
    62  }
    63  
    64  type Item32Array []Item32
    65  
    66  func (i32 Item32Array) Len() int {
    67  	return len(i32)
    68  }
    69  
    70  func (i32 Item32Array) Swap(i, j int) {
    71  	i32[i], i32[j] = i32[j], i32[i]
    72  }
    73  
    74  func (i32 Item32Array) Less(i, j int) bool {
    75  	return i32[i].key < i32[j].key
    76  }
    77  
    78  type Item64Array []Item64
    79  
    80  func (i64 Item64Array) Len() int {
    81  	return len(i64)
    82  }
    83  
    84  func (i64 Item64Array) Swap(i, j int) {
    85  	i64[i], i64[j] = i64[j], i64[i]
    86  }
    87  
    88  func (i64 Item64Array) Less(i, j int) bool {
    89  	return i64[i].key < i64[j].key
    90  }
    91  
    92  func NewSuccinctMap(type32 bool) *SuccinctMap {
    93  	m := &SuccinctMap{
    94  		header: Header{version: SuccinctVersion, reserved: 0, shards: 0},
    95  		size:   0,
    96  		length: 0,
    97  		type32: type32,
    98  		data:   nil,
    99  		data32: nil,
   100  		data64: nil,
   101  		arena:  nil,
   102  	}
   103  
   104  	return m
   105  }
   106  
   107  func (s *SuccinctMap) Size() uint32 {
   108  	return s.size
   109  }
   110  
   111  func (s *SuccinctMap) Length() uint32 {
   112  	return s.length
   113  }
   114  
   115  func (s *SuccinctMap) GetData() []byte {
   116  	return s.data
   117  }
   118  
   119  func (s *SuccinctMap) SetReader(d []byte) bool {
   120  	if d == nil || len(d) <= SuccinctHeaderSize {
   121  		return false
   122  	}
   123  
   124  	s.data = d
   125  	s.header = s.readHeader(s.data)
   126  
   127  	return true
   128  }
   129  
   130  func (s *SuccinctMap) InitWriter(count uint32) {
   131  	shards := (count / SuccinctShardItemMax) + 1<<4
   132  
   133  	s.header = Header{version: SuccinctVersion, reserved: 0, shards: shards}
   134  	s.size = SuccinctHeaderSize + shards*SuccinctShardSize
   135  	s.length = count
   136  	s.data = nil
   137  	s.arena = arena.NewArena()
   138  
   139  	if s.type32 {
   140  		s.data32 = arena.MakeSlice[Item32Array](s.arena, int(shards), int(shards))
   141  	} else {
   142  		s.data64 = arena.MakeSlice[Item64Array](s.arena, int(shards), int(shards))
   143  	}
   144  }
   145  
   146  func (s *SuccinctMap) SetWriter(d []byte) bool {
   147  	if d == nil || len(d) < int(s.size) || cap(d) < int(s.size) {
   148  		return false
   149  	}
   150  
   151  	s.data = d
   152  
   153  	return true
   154  }
   155  
   156  func (s *SuccinctMap) Store(key uint32, value any) {
   157  	switch value.(type) {
   158  	case uint32:
   159  		if s.type32 {
   160  			s.store32Internal(key, value.(uint32))
   161  		}
   162  		return
   163  	case uint64:
   164  		if !s.type32 {
   165  			s.store64Internal(key, value.(uint64))
   166  		}
   167  		return
   168  	default:
   169  		return
   170  	}
   171  }
   172  
   173  func (s *SuccinctMap) Add(key uint32, value any) {
   174  	switch value.(type) {
   175  	case uint32:
   176  		if s.type32 {
   177  			s.add32Internal(key, value.(uint32))
   178  		}
   179  		return
   180  	case uint64:
   181  		if !s.type32 {
   182  			s.add64Internal(key, value.(uint64))
   183  		}
   184  		return
   185  	default:
   186  		return
   187  	}
   188  }
   189  
   190  func (s *SuccinctMap) Serialize() bool {
   191  	if s.type32 {
   192  		return s.serialize32Internal()
   193  	} else {
   194  		return s.serialize64Internal()
   195  	}
   196  }
   197  
   198  func (s *SuccinctMap) Load(key uint32) (any, bool) {
   199  	if s.type32 {
   200  		return s.load32Internal(key)
   201  	} else {
   202  		return s.load64Internal(key)
   203  	}
   204  }
   205  
   206  func (s *SuccinctMap) Get(key uint32) (any, bool) {
   207  	if s.type32 {
   208  		return s.get32Internal(key)
   209  	} else {
   210  		return s.get64Internal(key)
   211  	}
   212  }
   213  
   214  func (s *SuccinctMap) store32Internal(key uint32, value uint32) {
   215  	if s.header.shards <= 0 {
   216  		return
   217  	}
   218  
   219  	sid := key % s.header.shards
   220  
   221  	if len(s.data32[sid]) == 0 {
   222  		s.data32[sid] = arena.MakeSlice[Item32](s.arena, 0, SuccinctShardItemMax/2)
   223  	}
   224  
   225  	itemArray := &s.data32[sid]
   226  
   227  	index := sort.Search(len(*itemArray),
   228  		func(i int) bool {
   229  			var ret int
   230  			if (*itemArray)[i].key == key {
   231  				ret = 0
   232  			} else if (*itemArray)[i].key < key {
   233  				ret = -1
   234  			} else {
   235  				ret = 1
   236  			}
   237  			return ret != -1
   238  		},
   239  	)
   240  
   241  	exist := len(*itemArray) > 0 && index < len(*itemArray) && (*itemArray)[index].key == key
   242  	if !exist {
   243  		*itemArray = append(*itemArray, Item32{})
   244  		copy((*itemArray)[index+1:], (*itemArray)[index:])
   245  	}
   246  
   247  	item := &(*itemArray)[index]
   248  	item.key = key
   249  	item.value = value
   250  
   251  	s.size += SuccinctItem32Size
   252  }
   253  
   254  func (s *SuccinctMap) store64Internal(key uint32, value uint64) {
   255  	if s.header.shards <= 0 {
   256  		return
   257  	}
   258  
   259  	sid := key % s.header.shards
   260  
   261  	if len(s.data64[sid]) == 0 {
   262  		s.data64[sid] = arena.MakeSlice[Item64](s.arena, 0, SuccinctShardItemMax/2)
   263  	}
   264  
   265  	itemArray := &s.data64[sid]
   266  
   267  	index := sort.Search(len(*itemArray),
   268  		func(i int) bool {
   269  			var ret int
   270  			if (*itemArray)[i].key == key {
   271  				ret = 0
   272  			} else if (*itemArray)[i].key < key {
   273  				ret = -1
   274  			} else {
   275  				ret = 1
   276  			}
   277  			return ret != -1
   278  		},
   279  	)
   280  
   281  	exist := len(*itemArray) > 0 && index < len(*itemArray) && (*itemArray)[index].key == key
   282  	if !exist {
   283  		*itemArray = append(*itemArray, Item64{})
   284  		copy((*itemArray)[index+1:], (*itemArray)[index:])
   285  	}
   286  
   287  	item := &(*itemArray)[index]
   288  	item.key = key
   289  	item.value = value
   290  
   291  	s.size += SuccinctItem64Size
   292  }
   293  
   294  func (s *SuccinctMap) add32Internal(key uint32, value uint32) {
   295  	if s.header.shards <= 0 {
   296  		return
   297  	}
   298  
   299  	sid := key % s.header.shards
   300  
   301  	if len(s.data32[sid]) == 0 {
   302  		s.data32[sid] = arena.MakeSlice[Item32](s.arena, 0, SuccinctShardItemMax)
   303  	}
   304  
   305  	s.data32[sid] = append(s.data32[sid], Item32{key: key, value: value})
   306  
   307  	s.size += SuccinctItem32Size
   308  }
   309  
   310  func (s *SuccinctMap) add64Internal(key uint32, value uint64) {
   311  	if s.header.shards <= 0 {
   312  		return
   313  	}
   314  
   315  	sid := key % s.header.shards
   316  
   317  	if len(s.data64[sid]) == 0 {
   318  		s.data64[sid] = arena.MakeSlice[Item64](s.arena, 0, SuccinctShardItemMax)
   319  	}
   320  
   321  	s.data64[sid] = append(s.data64[sid], Item64{key: key, value: value})
   322  
   323  	s.size += SuccinctItem64Size
   324  }
   325  
   326  func (s *SuccinctMap) serialize32Internal() bool {
   327  	if s.size <= SuccinctHeaderSize || s.length <= 0 || len(s.data32) <= 0 {
   328  		return false
   329  	}
   330  
   331  	shardOffset := uint32(0)
   332  	itemOffset := SuccinctHeaderSize + s.header.shards*SuccinctShardSize
   333  
   334  	if s.data == nil {
   335  		s.data = arena.MakeSlice[byte](s.arena, int(s.size), int(s.size))
   336  	}
   337  
   338  	s.writeHeader(s.data[shardOffset:], s.header)
   339  	shardOffset += SuccinctHeaderSize
   340  
   341  	for i := uint32(0); i < s.header.shards; i++ {
   342  		itemsLen := uint32(len(s.data32[i]))
   343  		s.writeShard(s.data[shardOffset:], Shard{offset: itemOffset, length: itemsLen})
   344  		shardOffset += SuccinctShardSize
   345  
   346  		if itemsLen <= 0 {
   347  			continue
   348  		}
   349  
   350  		sort.Sort(s.data32[i])
   351  		for j := uint32(0); j < itemsLen; j++ {
   352  			s.writeItem32(s.data[itemOffset:], s.data32[i][j])
   353  			itemOffset += SuccinctItem32Size
   354  		}
   355  	}
   356  
   357  	return true
   358  }
   359  
   360  func (s *SuccinctMap) serialize64Internal() bool {
   361  	if s.size <= SuccinctHeaderSize || s.length <= 0 || len(s.data64) <= 0 {
   362  		return false
   363  	}
   364  
   365  	shardOffset := uint32(0)
   366  	itemOffset := SuccinctHeaderSize + s.header.shards*SuccinctShardSize
   367  
   368  	if s.data == nil {
   369  		s.data = arena.MakeSlice[byte](s.arena, int(s.size), int(s.size))
   370  	}
   371  
   372  	s.writeHeader(s.data[shardOffset:], s.header)
   373  	shardOffset += SuccinctHeaderSize
   374  
   375  	for i := uint32(0); i < s.header.shards; i++ {
   376  		itemsLen := uint32(len(s.data64[i]))
   377  		s.writeShard(s.data[shardOffset:], Shard{offset: itemOffset, length: itemsLen})
   378  		shardOffset += SuccinctShardSize
   379  
   380  		if itemsLen <= 0 {
   381  			continue
   382  		}
   383  
   384  		sort.Sort(s.data64[i])
   385  		for j := uint32(0); j < itemsLen; j++ {
   386  			s.writeItem64(s.data[itemOffset:], s.data64[i][j])
   387  			itemOffset += SuccinctItem64Size
   388  		}
   389  	}
   390  
   391  	return true
   392  }
   393  
   394  func (s *SuccinctMap) load32Internal(key uint32) (uint32, bool) {
   395  	if len(s.data32) < int(s.header.shards) || s.header.shards <= 0 {
   396  		return 0, false
   397  	}
   398  
   399  	sid := key % s.header.shards
   400  
   401  	if len(s.data32[sid]) == 0 {
   402  		return 0, false
   403  	}
   404  
   405  	itemArray := &s.data32[sid]
   406  
   407  	ok, idx := s.findItem32Arr(key, *itemArray, len(*itemArray))
   408  	if !ok {
   409  		return 0, false
   410  	}
   411  
   412  	return (*itemArray)[idx].value, true
   413  }
   414  
   415  func (s *SuccinctMap) load64Internal(key uint32) (uint64, bool) {
   416  	if len(s.data64) < int(s.header.shards) || s.header.shards <= 0 {
   417  		return 0, false
   418  	}
   419  
   420  	sid := key % s.header.shards
   421  
   422  	if len(s.data64[sid]) == 0 {
   423  		return 0, false
   424  	}
   425  
   426  	itemArray := &s.data64[sid]
   427  
   428  	ok, idx := s.findItem64Arr(key, *itemArray, len(*itemArray))
   429  	if !ok {
   430  		return 0, false
   431  	}
   432  
   433  	return (*itemArray)[idx].value, true
   434  }
   435  
   436  func (s *SuccinctMap) get32Internal(key uint32) (uint32, bool) {
   437  	if len(s.data) <= SuccinctHeaderSize || s.header.shards <= 0 {
   438  		return 0, false
   439  	}
   440  
   441  	sid := key % s.header.shards
   442  	curOffset := SuccinctHeaderSize + sid*SuccinctShardSize
   443  
   444  	shard := s.readShard(s.data[curOffset:])
   445  	if shard.length <= 0 {
   446  		return 0, false
   447  	}
   448  
   449  	curOffset = shard.offset
   450  
   451  	ok, idx := s.findItem32(key, s.data[curOffset:], int(shard.length))
   452  	if !ok {
   453  		return 0, false
   454  	}
   455  
   456  	curOffset += uint32(idx * SuccinctItem32Size)
   457  	item32 := s.readItem32(s.data[curOffset:])
   458  
   459  	return item32.value, true
   460  }
   461  
   462  func (s *SuccinctMap) get64Internal(key uint32) (uint64, bool) {
   463  	if len(s.data) <= SuccinctHeaderSize || s.header.shards <= 0 {
   464  		return 0, false
   465  	}
   466  
   467  	sid := key % s.header.shards
   468  	curOffset := SuccinctHeaderSize + sid*SuccinctShardSize
   469  
   470  	shard := s.readShard(s.data[curOffset:])
   471  	if shard.length <= 0 {
   472  		return 0, false
   473  	}
   474  
   475  	curOffset = shard.offset
   476  
   477  	ok, idx := s.findItem64(key, s.data[curOffset:], int(shard.length))
   478  	if !ok {
   479  		return 0, false
   480  	}
   481  
   482  	curOffset += uint32(idx * SuccinctItem64Size)
   483  	item64 := s.readItem64(s.data[curOffset:])
   484  
   485  	return item64.value, true
   486  }
   487  
   488  func (s *SuccinctMap) Finish() {
   489  	s.size = SuccinctHeaderSize
   490  	s.length = 0
   491  	s.data32 = nil
   492  	s.data64 = nil
   493  	if s.arena != nil {
   494  		s.arena.Free()
   495  		s.arena = nil
   496  	}
   497  }
   498  
   499  func (s *SuccinctMap) writeHeader(buf []byte, header Header) {
   500  	binary.BigEndian.PutUint16(buf[0:], header.version)
   501  	binary.BigEndian.PutUint16(buf[2:], header.reserved)
   502  	binary.BigEndian.PutUint32(buf[4:], header.shards)
   503  }
   504  
   505  func (s *SuccinctMap) writeShard(buf []byte, shard Shard) {
   506  	binary.BigEndian.PutUint32(buf[0:], shard.offset)
   507  	binary.BigEndian.PutUint32(buf[4:], shard.length)
   508  }
   509  
   510  func (s *SuccinctMap) writeItem32(buf []byte, item32 Item32) {
   511  	binary.BigEndian.PutUint32(buf[0:], item32.key)
   512  	binary.BigEndian.PutUint32(buf[4:], item32.value)
   513  }
   514  
   515  func (s *SuccinctMap) writeItem64(buf []byte, item64 Item64) {
   516  	binary.BigEndian.PutUint32(buf[0:], item64.key)
   517  	binary.BigEndian.PutUint64(buf[4:], item64.value)
   518  }
   519  
   520  func (s *SuccinctMap) readHeader(buf []byte) Header {
   521  	header := Header{
   522  		version:  binary.BigEndian.Uint16(buf[0:]),
   523  		reserved: binary.BigEndian.Uint16(buf[2:]),
   524  		shards:   binary.BigEndian.Uint32(buf[4:]),
   525  	}
   526  
   527  	return header
   528  }
   529  
   530  func (s *SuccinctMap) readShard(buf []byte) Shard {
   531  	shard := Shard{
   532  		offset: binary.BigEndian.Uint32(buf[0:]),
   533  		length: binary.BigEndian.Uint32(buf[4:]),
   534  	}
   535  
   536  	return shard
   537  }
   538  
   539  func (s *SuccinctMap) readItem32(buf []byte) Item32 {
   540  	item32 := Item32{
   541  		key:   binary.BigEndian.Uint32(buf[0:]),
   542  		value: binary.BigEndian.Uint32(buf[4:]),
   543  	}
   544  
   545  	return item32
   546  }
   547  
   548  func (s *SuccinctMap) readItem64(buf []byte) Item64 {
   549  	item64 := Item64{
   550  		key:   binary.BigEndian.Uint32(buf[0:]),
   551  		value: binary.BigEndian.Uint64(buf[4:]),
   552  	}
   553  
   554  	return item64
   555  }
   556  
   557  func (s *SuccinctMap) findItem32(key uint32, buf []byte, n int) (bool, int) {
   558  	i, j := 0, n
   559  	for i < j {
   560  		h := int(uint(i+j) >> 1)
   561  		if binary.BigEndian.Uint32(buf[SuccinctItem32Size*h:]) < key {
   562  			i = h + 1
   563  		} else {
   564  			j = h
   565  		}
   566  	}
   567  
   568  	if i < n && binary.BigEndian.Uint32(buf[SuccinctItem32Size*i:]) == key {
   569  		return true, i
   570  	}
   571  
   572  	return false, 0
   573  }
   574  
   575  func (s *SuccinctMap) findItem32Arr(key uint32, arr Item32Array, n int) (bool, int) {
   576  	i, j := 0, n
   577  	for i < j {
   578  		h := int(uint(i+j) >> 1)
   579  		if arr[h].key < key {
   580  			i = h + 1
   581  		} else {
   582  			j = h
   583  		}
   584  	}
   585  
   586  	if i < n && arr[i].key == key {
   587  		return true, i
   588  	}
   589  
   590  	return false, 0
   591  }
   592  
   593  func (s *SuccinctMap) findItem64Arr(key uint32, arr Item64Array, n int) (bool, int) {
   594  	i, j := 0, n
   595  	for i < j {
   596  		h := int(uint(i+j) >> 1)
   597  		if arr[h].key < key {
   598  			i = h + 1
   599  		} else {
   600  			j = h
   601  		}
   602  	}
   603  
   604  	if i < n && arr[i].key == key {
   605  		return true, i
   606  	}
   607  
   608  	return false, 0
   609  }
   610  
   611  func (s *SuccinctMap) findItem64(key uint32, buf []byte, n int) (bool, int) {
   612  	i, j := 0, n
   613  	for i < j {
   614  		h := int(uint(i+j) >> 1)
   615  		if binary.BigEndian.Uint32(buf[SuccinctItem64Size*h:]) < key {
   616  			i = h + 1
   617  		} else {
   618  			j = h
   619  		}
   620  	}
   621  
   622  	if i < n && binary.BigEndian.Uint32(buf[SuccinctItem64Size*i:]) == key {
   623  		return true, i
   624  	}
   625  
   626  	return false, 0
   627  }