github.com/matrixorigin/matrixone@v0.7.0/pkg/container/hashtable/int64_hash_map.go (about)

     1  // Copyright 2021 Matrix Origin
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package hashtable
    16  
    17  import (
    18  	"unsafe"
    19  
    20  	"github.com/matrixorigin/matrixone/pkg/common/moerr"
    21  	"github.com/matrixorigin/matrixone/pkg/common/mpool"
    22  )
    23  
    24  type Int64HashMapCell struct {
    25  	Key    uint64
    26  	Mapped uint64
    27  }
    28  
    29  type Int64HashMap struct {
    30  	blockCellCntBits uint8
    31  	blockCellCnt     uint64
    32  	blockMaxElemCnt  uint64
    33  	cellCntMask      uint64
    34  	//confCnt     uint64
    35  
    36  	cellCnt uint64
    37  	elemCnt uint64
    38  	rawData [][]byte
    39  	cells   [][]Int64HashMapCell
    40  }
    41  
    42  var intCellSize int64
    43  
    44  func init() {
    45  	intCellSize = int64(unsafe.Sizeof(Int64HashMapCell{}))
    46  }
    47  
    48  func (ht *Int64HashMap) Free(m *mpool.MPool) {
    49  	for i := range ht.rawData {
    50  		if len(ht.rawData[i]) > 0 {
    51  			m.Free(ht.rawData[i])
    52  		}
    53  		ht.rawData[i], ht.cells[i] = nil, nil
    54  	}
    55  	ht.rawData, ht.cells = nil, nil
    56  }
    57  
    58  func (ht *Int64HashMap) Init(m *mpool.MPool) (err error) {
    59  	ht.blockCellCntBits = kInitialCellCntBits
    60  	ht.blockCellCnt = kInitialCellCnt
    61  	ht.blockMaxElemCnt = kInitialCellCnt * kLoadFactorNumerator / kLoadFactorDenominator
    62  	ht.cellCntMask = kInitialCellCnt - 1
    63  	ht.elemCnt = 0
    64  	ht.cellCnt = kInitialCellCnt
    65  
    66  	ht.rawData = make([][]byte, 1)
    67  	ht.cells = make([][]Int64HashMapCell, 1)
    68  	if ht.rawData[0], err = m.Alloc(int(ht.blockCellCnt) * int(intCellSize)); err == nil {
    69  		ht.cells[0] = unsafe.Slice((*Int64HashMapCell)(unsafe.Pointer(&ht.rawData[0][0])), ht.blockCellCnt)
    70  	}
    71  	return
    72  }
    73  
    74  func (ht *Int64HashMap) InsertBatch(n int, hashes []uint64, keysPtr unsafe.Pointer, values []uint64, m *mpool.MPool) error {
    75  	if err := ht.resizeOnDemand(n, m); err != nil {
    76  		return err
    77  	}
    78  
    79  	if hashes[0] == 0 {
    80  		Int64BatchHash(keysPtr, &hashes[0], n)
    81  	}
    82  
    83  	keys := unsafe.Slice((*uint64)(keysPtr), n)
    84  	for i, key := range keys {
    85  		cell := ht.findCell(hashes[i], key)
    86  		if cell.Mapped == 0 {
    87  			ht.elemCnt++
    88  			cell.Key = key
    89  			cell.Mapped = ht.elemCnt
    90  		}
    91  		values[i] = cell.Mapped
    92  	}
    93  	return nil
    94  }
    95  
    96  func (ht *Int64HashMap) InsertBatchWithRing(n int, zValues []int64, hashes []uint64, keysPtr unsafe.Pointer, values []uint64, m *mpool.MPool) error {
    97  	if err := ht.resizeOnDemand(n, m); err != nil {
    98  		return err
    99  	}
   100  
   101  	if hashes[0] == 0 {
   102  		Int64BatchHash(keysPtr, &hashes[0], n)
   103  	}
   104  
   105  	keys := unsafe.Slice((*uint64)(keysPtr), n)
   106  	for i, key := range keys {
   107  		if zValues[i] == 0 {
   108  			continue
   109  		}
   110  		cell := ht.findCell(hashes[i], key)
   111  		if cell.Mapped == 0 {
   112  			ht.elemCnt++
   113  			cell.Key = key
   114  			cell.Mapped = ht.elemCnt
   115  		}
   116  		values[i] = cell.Mapped
   117  	}
   118  	return nil
   119  }
   120  
   121  func (ht *Int64HashMap) FindBatch(n int, hashes []uint64, keysPtr unsafe.Pointer, values []uint64) {
   122  	if hashes[0] == 0 {
   123  		Int64BatchHash(keysPtr, &hashes[0], n)
   124  	}
   125  
   126  	keys := unsafe.Slice((*uint64)(keysPtr), n)
   127  	for i, key := range keys {
   128  		cell := ht.findCell(hashes[i], key)
   129  		values[i] = cell.Mapped
   130  	}
   131  }
   132  
   133  func (ht *Int64HashMap) FindBatchWithRing(n int, zValues []int64, hashes []uint64, keysPtr unsafe.Pointer, values []uint64) {
   134  	if hashes[0] == 0 {
   135  		Int64BatchHash(keysPtr, &hashes[0], n)
   136  	}
   137  
   138  	keys := unsafe.Slice((*uint64)(keysPtr), n)
   139  	for i, key := range keys {
   140  		if zValues[i] == 0 {
   141  			values[i] = 0
   142  			continue
   143  		}
   144  		cell := ht.findCell(hashes[i], key)
   145  		values[i] = cell.Mapped
   146  	}
   147  }
   148  
   149  func (ht *Int64HashMap) findCell(hash uint64, key uint64) *Int64HashMapCell {
   150  	for idx := hash & ht.cellCntMask; true; idx = (idx + 1) & ht.cellCntMask {
   151  		blockId := idx / ht.blockCellCnt
   152  		cellId := idx % ht.blockCellCnt
   153  		cell := &ht.cells[blockId][cellId]
   154  		if cell.Key == key || cell.Mapped == 0 {
   155  			return cell
   156  		}
   157  	}
   158  	return nil
   159  }
   160  
   161  func (ht *Int64HashMap) findEmptyCell(hash uint64, key uint64) *Int64HashMapCell {
   162  	for idx := hash & ht.cellCntMask; true; idx = (idx + 1) & ht.cellCntMask {
   163  		blockId := idx / ht.blockCellCnt
   164  		cellId := idx % ht.blockCellCnt
   165  		cell := &ht.cells[blockId][cellId]
   166  		if cell.Mapped == 0 {
   167  			return cell
   168  		}
   169  	}
   170  	return nil
   171  }
   172  
   173  func (ht *Int64HashMap) resizeOnDemand(n int, m *mpool.MPool) error {
   174  	targetCnt := ht.elemCnt + uint64(n)
   175  	if targetCnt <= uint64(len(ht.rawData))*ht.blockMaxElemCnt {
   176  		return nil
   177  	}
   178  
   179  	var err error
   180  	if len(ht.rawData) == 1 {
   181  		newCellCntBits := ht.blockCellCntBits + 2
   182  		newCellCnt := uint64(1 << newCellCntBits)
   183  		newBlockMaxElemCnt := newCellCnt * kLoadFactorNumerator / kLoadFactorDenominator
   184  		for newBlockMaxElemCnt < targetCnt {
   185  			newCellCntBits++
   186  			newCellCnt <<= 1
   187  			newBlockMaxElemCnt = newCellCnt * kLoadFactorNumerator / kLoadFactorDenominator
   188  		}
   189  
   190  		newAlloc := int(newCellCnt) * int(intCellSize)
   191  		if newAlloc <= mpool.GB {
   192  			// update hashTable cnt.
   193  			oldCellCnt := ht.blockCellCnt
   194  			oldCells0 := ht.cells[0]
   195  			oldData0 := ht.rawData[0]
   196  
   197  			ht.blockCellCntBits = newCellCntBits
   198  			ht.blockCellCnt = newCellCnt
   199  			ht.blockMaxElemCnt = newBlockMaxElemCnt
   200  			ht.cellCnt = newCellCnt
   201  			ht.cellCntMask = newCellCnt - 1
   202  
   203  			ht.rawData[0], err = m.Alloc(newAlloc)
   204  			if err != nil {
   205  				return err
   206  			}
   207  			blockData := ht.rawData[0]
   208  			// This can be optimized to SIMD by Go compiler, according to https://codereview.appspot.com/137880043
   209  			for i := range blockData {
   210  				blockData[i] = 0
   211  			}
   212  			ht.cells[0] = unsafe.Slice((*Int64HashMapCell)(unsafe.Pointer(&blockData[0])), ht.blockCellCnt)
   213  
   214  			// rearrange the cells
   215  			var hashes [256]uint64
   216  			for i := uint64(0); i < oldCellCnt; i += 256 {
   217  				cells := oldCells0[i : i+256]
   218  				Int64CellBatchHash(unsafe.Pointer(&cells[0]), &hashes[0], 256)
   219  				for j := range cells {
   220  					cell := &cells[j]
   221  					if cell.Mapped != 0 {
   222  						newCell := ht.findEmptyCell(hashes[j], cell.Key)
   223  						*newCell = *cell
   224  					}
   225  				}
   226  			}
   227  
   228  			m.Free(oldData0)
   229  			return nil
   230  		}
   231  	}
   232  
   233  	// double the blocks
   234  	oldBlockNum := len(ht.rawData)
   235  	oldCells := ht.cells
   236  	oldData := ht.rawData
   237  
   238  	ht.rawData = make([][]byte, oldBlockNum*2)
   239  	ht.cells = make([][]Int64HashMapCell, oldBlockNum*2)
   240  	ht.cellCnt = ht.blockCellCnt * uint64(len(ht.rawData))
   241  	ht.cellCntMask = ht.cellCnt - 1
   242  
   243  	for i := range ht.rawData {
   244  		ht.rawData[i], err = m.Alloc(int(ht.blockCellCnt) * int(intCellSize))
   245  		if err != nil {
   246  			return err
   247  		}
   248  		blockData := ht.rawData[i]
   249  		for j := range blockData {
   250  			blockData[j] = 0
   251  		}
   252  		ht.cells[i] = unsafe.Slice((*Int64HashMapCell)(unsafe.Pointer(&blockData[0])), ht.blockCellCnt)
   253  	}
   254  
   255  	// rearrange the cells
   256  	var hashes [256]uint64
   257  
   258  	for i := 0; i < oldBlockNum; i++ {
   259  		for j := uint64(0); j < ht.blockCellCnt; j += 256 {
   260  			cells := oldCells[i][j : j+256]
   261  			Int64CellBatchHash(unsafe.Pointer(&cells[0]), &hashes[0], 256)
   262  			for k := range cells {
   263  				cell := &cells[k]
   264  				if cell.Mapped != 0 {
   265  					newCell := ht.findEmptyCell(hashes[k], cell.Key)
   266  					*newCell = *cell
   267  				}
   268  			}
   269  		}
   270  		m.Free(oldData[i])
   271  	}
   272  
   273  	return nil
   274  }
   275  
   276  func (ht *Int64HashMap) Cardinality() uint64 {
   277  	return ht.elemCnt
   278  }
   279  
   280  func (ht *Int64HashMap) Size() int64 {
   281  	// 41 is the fixed size of Int64HashMap
   282  	ret := int64(41)
   283  	for i := range ht.rawData {
   284  		ret += int64(len(ht.rawData[i]))
   285  		// 16 is the len of ht.cells[i]
   286  		ret += 16
   287  	}
   288  	return ret
   289  }
   290  
   291  type Int64HashMapIterator struct {
   292  	table *Int64HashMap
   293  	pos   uint64
   294  }
   295  
   296  func (it *Int64HashMapIterator) Init(ht *Int64HashMap) {
   297  	it.table = ht
   298  }
   299  
   300  func (it *Int64HashMapIterator) Next() (cell *Int64HashMapCell, err error) {
   301  	for it.pos < it.table.cellCnt {
   302  		blockId := it.pos / it.table.blockCellCnt
   303  		cellId := it.pos % it.table.blockCellCnt
   304  		cell = &it.table.cells[blockId][cellId]
   305  		if cell.Mapped != 0 {
   306  			break
   307  		}
   308  		it.pos++
   309  	}
   310  
   311  	if it.pos >= it.table.cellCnt {
   312  		err = moerr.NewInternalErrorNoCtx("out of range")
   313  		return
   314  	}
   315  
   316  	it.pos++
   317  
   318  	return
   319  }