github.com/matrixorigin/matrixone@v0.7.0/pkg/container/hashtable/string_hash_map.go (about)

     1  // Copyright 2021 Matrix Origin
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package hashtable
    16  
    17  import (
    18  	"unsafe"
    19  
    20  	"github.com/matrixorigin/matrixone/pkg/common/moerr"
    21  	"github.com/matrixorigin/matrixone/pkg/common/mpool"
    22  )
    23  
    24  type StringRef struct {
    25  	Ptr *byte
    26  	Len int
    27  }
    28  
    29  type StringHashMapCell struct {
    30  	HashState [3]uint64
    31  	Mapped    uint64
    32  }
    33  
    34  var StrKeyPadding [16]byte
    35  
    36  type StringHashMap struct {
    37  	blockCellCntBits uint8
    38  	blockCellCnt     uint64
    39  	blockMaxElemCnt  uint64
    40  	//confCnt     uint64
    41  
    42  	cellCnt uint64
    43  	elemCnt uint64
    44  	rawData [][]byte
    45  	cells   [][]StringHashMapCell
    46  }
    47  
    48  var strCellSize int64
    49  
    50  func init() {
    51  	strCellSize = int64(unsafe.Sizeof(StringHashMapCell{}))
    52  }
    53  
    54  func (ht *StringHashMap) Free(m *mpool.MPool) {
    55  	for i := range ht.rawData {
    56  		if len(ht.rawData[i]) > 0 {
    57  			m.Free(ht.rawData[i])
    58  		}
    59  		ht.rawData[i], ht.cells[i] = nil, nil
    60  	}
    61  	ht.rawData, ht.cells = nil, nil
    62  }
    63  
    64  func (ht *StringHashMap) Init(m *mpool.MPool) (err error) {
    65  	ht.blockCellCntBits = kInitialCellCntBits
    66  	ht.blockCellCnt = kInitialCellCnt
    67  	ht.blockMaxElemCnt = kInitialCellCnt * kLoadFactorNumerator / kLoadFactorDenominator
    68  	ht.elemCnt = 0
    69  	ht.cellCnt = kInitialCellCnt
    70  
    71  	ht.rawData = make([][]byte, 1)
    72  	ht.cells = make([][]StringHashMapCell, 1)
    73  	if ht.rawData[0], err = m.Alloc(int(ht.blockCellCnt) * int(strCellSize)); err == nil {
    74  		ht.cells[0] = unsafe.Slice((*StringHashMapCell)(unsafe.Pointer(&ht.rawData[0][0])), ht.blockCellCnt)
    75  	}
    76  	return
    77  }
    78  
    79  func (ht *StringHashMap) InsertStringBatch(states [][3]uint64, keys [][]byte, values []uint64, m *mpool.MPool) error {
    80  	if err := ht.resizeOnDemand(uint64(len(keys)), m); err != nil {
    81  		return err
    82  	}
    83  
    84  	BytesBatchGenHashStates(&keys[0], &states[0], len(keys))
    85  
    86  	for i := range keys {
    87  		cell := ht.findCell(&states[i])
    88  		if cell.Mapped == 0 {
    89  			ht.elemCnt++
    90  			cell.HashState = states[i]
    91  			cell.Mapped = ht.elemCnt
    92  		}
    93  		values[i] = cell.Mapped
    94  	}
    95  	return nil
    96  }
    97  
    98  func (ht *StringHashMap) InsertStringBatchWithRing(zValues []int64, states [][3]uint64, keys [][]byte, values []uint64, m *mpool.MPool) error {
    99  	if err := ht.resizeOnDemand(uint64(len(keys)), m); err != nil {
   100  		return err
   101  	}
   102  
   103  	BytesBatchGenHashStates(&keys[0], &states[0], len(keys))
   104  
   105  	for i := range keys {
   106  		if zValues[i] == 0 {
   107  			continue
   108  		}
   109  
   110  		cell := ht.findCell(&states[i])
   111  		if cell.Mapped == 0 {
   112  			ht.elemCnt++
   113  			cell.HashState = states[i]
   114  			cell.Mapped = ht.elemCnt
   115  		}
   116  		values[i] = cell.Mapped
   117  	}
   118  	return nil
   119  }
   120  
   121  func (ht *StringHashMap) FindStringBatch(states [][3]uint64, keys [][]byte, values []uint64) {
   122  	BytesBatchGenHashStates(&keys[0], &states[0], len(keys))
   123  
   124  	for i := range keys {
   125  		cell := ht.findCell(&states[i])
   126  		values[i] = cell.Mapped
   127  	}
   128  }
   129  
   130  func (ht *StringHashMap) FindString24Batch(states [][3]uint64, keys [][3]uint64, values []uint64) {
   131  	Int192BatchGenHashStates(&keys[0], &states[0], len(keys))
   132  
   133  	for i := range keys {
   134  		cell := ht.findCell(&states[i])
   135  		values[i] = cell.Mapped
   136  	}
   137  }
   138  
   139  func (ht *StringHashMap) FindString32Batch(states [][3]uint64, keys [][4]uint64, values []uint64) {
   140  	Int256BatchGenHashStates(&keys[0], &states[0], len(keys))
   141  
   142  	for i := range keys {
   143  		cell := ht.findCell(&states[i])
   144  		values[i] = cell.Mapped
   145  	}
   146  }
   147  
   148  func (ht *StringHashMap) FindString40Batch(states [][3]uint64, keys [][5]uint64, values []uint64) {
   149  	Int320BatchGenHashStates(&keys[0], &states[0], len(keys))
   150  
   151  	for i := range keys {
   152  		cell := ht.findCell(&states[i])
   153  		values[i] = cell.Mapped
   154  	}
   155  }
   156  
   157  func (ht *StringHashMap) FindStringBatchWithRing(states [][3]uint64, zValues []int64, keys [][]byte, values []uint64) {
   158  	// XXX I think it is no use now.
   159  }
   160  
   161  func (ht *StringHashMap) FindHashStateBatch(states [][3]uint64, values []uint64) {
   162  	for i := range states {
   163  		cell := ht.findCell(&states[i])
   164  		values[i] = cell.Mapped
   165  	}
   166  }
   167  
   168  func (ht *StringHashMap) findCell(state *[3]uint64) *StringHashMapCell {
   169  	mask := ht.cellCnt - 1
   170  	for idx := state[0] & mask; true; idx = (idx + 1) & mask {
   171  		blockId := idx / ht.blockCellCnt
   172  		cellId := idx % ht.blockCellCnt
   173  		cell := &ht.cells[blockId][cellId]
   174  		if cell.Mapped == 0 || cell.HashState == *state {
   175  			return cell
   176  		}
   177  	}
   178  	return nil
   179  }
   180  
   181  func (ht *StringHashMap) findEmptyCell(state *[3]uint64) *StringHashMapCell {
   182  	mask := ht.cellCnt - 1
   183  	for idx := state[0] & mask; true; idx = (idx + 1) & mask {
   184  		blockId := idx / ht.blockCellCnt
   185  		cellId := idx % ht.blockCellCnt
   186  		cell := &ht.cells[blockId][cellId]
   187  		if cell.Mapped == 0 {
   188  			return cell
   189  		}
   190  	}
   191  	return nil
   192  }
   193  
   194  func (ht *StringHashMap) resizeOnDemand(n uint64, m *mpool.MPool) error {
   195  	targetCnt := ht.elemCnt + n
   196  	if targetCnt <= uint64(len(ht.rawData))*ht.blockMaxElemCnt {
   197  		return nil
   198  	}
   199  
   200  	var err error
   201  	if len(ht.rawData) == 1 {
   202  		newCellCntBits := ht.blockCellCntBits + 2
   203  		newCellCnt := uint64(1 << newCellCntBits)
   204  		newBlockMaxElemCnt := newCellCnt * kLoadFactorNumerator / kLoadFactorDenominator
   205  		for newBlockMaxElemCnt < targetCnt {
   206  			newCellCntBits++
   207  			newCellCnt <<= 1
   208  			newBlockMaxElemCnt = newCellCnt * kLoadFactorNumerator / kLoadFactorDenominator
   209  		}
   210  
   211  		oldCellCnt := ht.blockCellCnt
   212  		oldCells0 := ht.cells[0]
   213  		oldData0 := ht.rawData[0]
   214  
   215  		newAlloc := int(newCellCnt) * int(strCellSize)
   216  		if newAlloc <= mpool.GB {
   217  			// update hashTable cnt.
   218  			ht.blockCellCntBits = newCellCntBits
   219  			ht.cellCnt = newCellCnt
   220  			ht.blockCellCnt = newCellCnt
   221  			ht.blockMaxElemCnt = newBlockMaxElemCnt
   222  
   223  			ht.rawData[0], err = m.Alloc(newAlloc)
   224  			if err != nil {
   225  				return err
   226  			}
   227  			blockData := ht.rawData[0]
   228  			for i := range blockData {
   229  				blockData[i] = 0
   230  			}
   231  			ht.cells[0] = unsafe.Slice((*StringHashMapCell)(unsafe.Pointer(&blockData[0])), ht.blockCellCnt)
   232  
   233  			// rearrange the cells
   234  			for i := uint64(0); i < oldCellCnt; i++ {
   235  				cell := &oldCells0[i]
   236  				if cell.Mapped != 0 {
   237  					newCell := ht.findEmptyCell(&cell.HashState)
   238  					*newCell = *cell
   239  				}
   240  			}
   241  
   242  			m.Free(oldData0)
   243  			return nil
   244  		}
   245  	}
   246  
   247  	// double the blocks
   248  	oldBlockNum := len(ht.rawData)
   249  	oldCells := ht.cells
   250  	oldData := ht.rawData
   251  
   252  	ht.rawData = make([][]byte, oldBlockNum*2)
   253  	ht.cells = make([][]StringHashMapCell, oldBlockNum*2)
   254  	ht.cellCnt = ht.blockCellCnt * uint64(len(ht.rawData))
   255  
   256  	for i := range ht.rawData {
   257  		ht.rawData[i], err = m.Alloc(int(ht.blockCellCnt) * int(strCellSize))
   258  		if err != nil {
   259  			return err
   260  		}
   261  		blockData := ht.rawData[i]
   262  		for j := range blockData {
   263  			blockData[j] = 0
   264  		}
   265  		ht.cells[i] = unsafe.Slice((*StringHashMapCell)(unsafe.Pointer(&blockData[0])), ht.blockCellCnt)
   266  	}
   267  
   268  	// rearrange the cells
   269  	for i := 0; i < oldBlockNum; i++ {
   270  		for j := uint64(0); j < ht.blockCellCnt; j++ {
   271  			cell := &oldCells[i][j]
   272  			if cell.Mapped != 0 {
   273  				newCell := ht.findEmptyCell(&cell.HashState)
   274  				*newCell = *cell
   275  			}
   276  		}
   277  		m.Free(oldData[i])
   278  	}
   279  
   280  	return nil
   281  }
   282  
   283  func (ht *StringHashMap) Cardinality() uint64 {
   284  	return ht.elemCnt
   285  }
   286  
   287  func (ht *StringHashMap) Size() int64 {
   288  	// 33 is the origin size of StringHashMaps
   289  	ret := int64(33)
   290  	for i := range ht.rawData {
   291  		ret += int64(len(ht.rawData[i]))
   292  		// 32 is the len of ht.cells[i]
   293  		ret += 32
   294  	}
   295  	return ret
   296  }
   297  
   298  type StringHashMapIterator struct {
   299  	table *StringHashMap
   300  	pos   uint64
   301  }
   302  
   303  func (it *StringHashMapIterator) Init(ht *StringHashMap) {
   304  	it.table = ht
   305  }
   306  
   307  func (it *StringHashMapIterator) Next() (cell *StringHashMapCell, err error) {
   308  	for it.pos < it.table.cellCnt {
   309  		blockId := it.pos / it.table.blockCellCnt
   310  		cellId := it.pos % it.table.blockCellCnt
   311  		cell = &it.table.cells[blockId][cellId]
   312  		if cell.Mapped != 0 {
   313  			break
   314  		}
   315  		it.pos++
   316  	}
   317  
   318  	if it.pos >= it.table.cellCnt {
   319  		err = moerr.NewInternalErrorNoCtx("out of range")
   320  		return
   321  	}
   322  	it.pos++
   323  
   324  	return
   325  }