github.com/matrixorigin/matrixone@v0.7.0/pkg/container/hashtable/int64_hash_map.go (about) 1 // Copyright 2021 Matrix Origin 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package hashtable 16 17 import ( 18 "unsafe" 19 20 "github.com/matrixorigin/matrixone/pkg/common/moerr" 21 "github.com/matrixorigin/matrixone/pkg/common/mpool" 22 ) 23 24 type Int64HashMapCell struct { 25 Key uint64 26 Mapped uint64 27 } 28 29 type Int64HashMap struct { 30 blockCellCntBits uint8 31 blockCellCnt uint64 32 blockMaxElemCnt uint64 33 cellCntMask uint64 34 //confCnt uint64 35 36 cellCnt uint64 37 elemCnt uint64 38 rawData [][]byte 39 cells [][]Int64HashMapCell 40 } 41 42 var intCellSize int64 43 44 func init() { 45 intCellSize = int64(unsafe.Sizeof(Int64HashMapCell{})) 46 } 47 48 func (ht *Int64HashMap) Free(m *mpool.MPool) { 49 for i := range ht.rawData { 50 if len(ht.rawData[i]) > 0 { 51 m.Free(ht.rawData[i]) 52 } 53 ht.rawData[i], ht.cells[i] = nil, nil 54 } 55 ht.rawData, ht.cells = nil, nil 56 } 57 58 func (ht *Int64HashMap) Init(m *mpool.MPool) (err error) { 59 ht.blockCellCntBits = kInitialCellCntBits 60 ht.blockCellCnt = kInitialCellCnt 61 ht.blockMaxElemCnt = kInitialCellCnt * kLoadFactorNumerator / kLoadFactorDenominator 62 ht.cellCntMask = kInitialCellCnt - 1 63 ht.elemCnt = 0 64 ht.cellCnt = kInitialCellCnt 65 66 ht.rawData = make([][]byte, 1) 67 ht.cells = make([][]Int64HashMapCell, 1) 68 if ht.rawData[0], err = m.Alloc(int(ht.blockCellCnt) * int(intCellSize)); err == nil { 69 ht.cells[0] = unsafe.Slice((*Int64HashMapCell)(unsafe.Pointer(&ht.rawData[0][0])), ht.blockCellCnt) 70 } 71 return 72 } 73 74 func (ht *Int64HashMap) InsertBatch(n int, hashes []uint64, keysPtr unsafe.Pointer, values []uint64, m *mpool.MPool) error { 75 if err := ht.resizeOnDemand(n, m); err != nil { 76 return err 77 } 78 79 if hashes[0] == 0 { 80 Int64BatchHash(keysPtr, &hashes[0], n) 81 } 82 83 keys := unsafe.Slice((*uint64)(keysPtr), n) 84 for i, key := range keys { 85 cell := ht.findCell(hashes[i], key) 86 if cell.Mapped == 0 { 87 ht.elemCnt++ 88 cell.Key = key 89 cell.Mapped = ht.elemCnt 90 } 91 values[i] = cell.Mapped 92 } 93 return nil 94 } 95 96 func (ht *Int64HashMap) InsertBatchWithRing(n int, zValues []int64, hashes []uint64, keysPtr unsafe.Pointer, values []uint64, m *mpool.MPool) error { 97 if err := ht.resizeOnDemand(n, m); err != nil { 98 return err 99 } 100 101 if hashes[0] == 0 { 102 Int64BatchHash(keysPtr, &hashes[0], n) 103 } 104 105 keys := unsafe.Slice((*uint64)(keysPtr), n) 106 for i, key := range keys { 107 if zValues[i] == 0 { 108 continue 109 } 110 cell := ht.findCell(hashes[i], key) 111 if cell.Mapped == 0 { 112 ht.elemCnt++ 113 cell.Key = key 114 cell.Mapped = ht.elemCnt 115 } 116 values[i] = cell.Mapped 117 } 118 return nil 119 } 120 121 func (ht *Int64HashMap) FindBatch(n int, hashes []uint64, keysPtr unsafe.Pointer, values []uint64) { 122 if hashes[0] == 0 { 123 Int64BatchHash(keysPtr, &hashes[0], n) 124 } 125 126 keys := unsafe.Slice((*uint64)(keysPtr), n) 127 for i, key := range keys { 128 cell := ht.findCell(hashes[i], key) 129 values[i] = cell.Mapped 130 } 131 } 132 133 func (ht *Int64HashMap) FindBatchWithRing(n int, zValues []int64, hashes []uint64, keysPtr unsafe.Pointer, values []uint64) { 134 if hashes[0] == 0 { 135 Int64BatchHash(keysPtr, &hashes[0], n) 136 } 137 138 keys := unsafe.Slice((*uint64)(keysPtr), n) 139 for i, key := range keys { 140 if zValues[i] == 0 { 141 values[i] = 0 142 continue 143 } 144 cell := ht.findCell(hashes[i], key) 145 values[i] = cell.Mapped 146 } 147 } 148 149 func (ht *Int64HashMap) findCell(hash uint64, key uint64) *Int64HashMapCell { 150 for idx := hash & ht.cellCntMask; true; idx = (idx + 1) & ht.cellCntMask { 151 blockId := idx / ht.blockCellCnt 152 cellId := idx % ht.blockCellCnt 153 cell := &ht.cells[blockId][cellId] 154 if cell.Key == key || cell.Mapped == 0 { 155 return cell 156 } 157 } 158 return nil 159 } 160 161 func (ht *Int64HashMap) findEmptyCell(hash uint64, key uint64) *Int64HashMapCell { 162 for idx := hash & ht.cellCntMask; true; idx = (idx + 1) & ht.cellCntMask { 163 blockId := idx / ht.blockCellCnt 164 cellId := idx % ht.blockCellCnt 165 cell := &ht.cells[blockId][cellId] 166 if cell.Mapped == 0 { 167 return cell 168 } 169 } 170 return nil 171 } 172 173 func (ht *Int64HashMap) resizeOnDemand(n int, m *mpool.MPool) error { 174 targetCnt := ht.elemCnt + uint64(n) 175 if targetCnt <= uint64(len(ht.rawData))*ht.blockMaxElemCnt { 176 return nil 177 } 178 179 var err error 180 if len(ht.rawData) == 1 { 181 newCellCntBits := ht.blockCellCntBits + 2 182 newCellCnt := uint64(1 << newCellCntBits) 183 newBlockMaxElemCnt := newCellCnt * kLoadFactorNumerator / kLoadFactorDenominator 184 for newBlockMaxElemCnt < targetCnt { 185 newCellCntBits++ 186 newCellCnt <<= 1 187 newBlockMaxElemCnt = newCellCnt * kLoadFactorNumerator / kLoadFactorDenominator 188 } 189 190 newAlloc := int(newCellCnt) * int(intCellSize) 191 if newAlloc <= mpool.GB { 192 // update hashTable cnt. 193 oldCellCnt := ht.blockCellCnt 194 oldCells0 := ht.cells[0] 195 oldData0 := ht.rawData[0] 196 197 ht.blockCellCntBits = newCellCntBits 198 ht.blockCellCnt = newCellCnt 199 ht.blockMaxElemCnt = newBlockMaxElemCnt 200 ht.cellCnt = newCellCnt 201 ht.cellCntMask = newCellCnt - 1 202 203 ht.rawData[0], err = m.Alloc(newAlloc) 204 if err != nil { 205 return err 206 } 207 blockData := ht.rawData[0] 208 // This can be optimized to SIMD by Go compiler, according to https://codereview.appspot.com/137880043 209 for i := range blockData { 210 blockData[i] = 0 211 } 212 ht.cells[0] = unsafe.Slice((*Int64HashMapCell)(unsafe.Pointer(&blockData[0])), ht.blockCellCnt) 213 214 // rearrange the cells 215 var hashes [256]uint64 216 for i := uint64(0); i < oldCellCnt; i += 256 { 217 cells := oldCells0[i : i+256] 218 Int64CellBatchHash(unsafe.Pointer(&cells[0]), &hashes[0], 256) 219 for j := range cells { 220 cell := &cells[j] 221 if cell.Mapped != 0 { 222 newCell := ht.findEmptyCell(hashes[j], cell.Key) 223 *newCell = *cell 224 } 225 } 226 } 227 228 m.Free(oldData0) 229 return nil 230 } 231 } 232 233 // double the blocks 234 oldBlockNum := len(ht.rawData) 235 oldCells := ht.cells 236 oldData := ht.rawData 237 238 ht.rawData = make([][]byte, oldBlockNum*2) 239 ht.cells = make([][]Int64HashMapCell, oldBlockNum*2) 240 ht.cellCnt = ht.blockCellCnt * uint64(len(ht.rawData)) 241 ht.cellCntMask = ht.cellCnt - 1 242 243 for i := range ht.rawData { 244 ht.rawData[i], err = m.Alloc(int(ht.blockCellCnt) * int(intCellSize)) 245 if err != nil { 246 return err 247 } 248 blockData := ht.rawData[i] 249 for j := range blockData { 250 blockData[j] = 0 251 } 252 ht.cells[i] = unsafe.Slice((*Int64HashMapCell)(unsafe.Pointer(&blockData[0])), ht.blockCellCnt) 253 } 254 255 // rearrange the cells 256 var hashes [256]uint64 257 258 for i := 0; i < oldBlockNum; i++ { 259 for j := uint64(0); j < ht.blockCellCnt; j += 256 { 260 cells := oldCells[i][j : j+256] 261 Int64CellBatchHash(unsafe.Pointer(&cells[0]), &hashes[0], 256) 262 for k := range cells { 263 cell := &cells[k] 264 if cell.Mapped != 0 { 265 newCell := ht.findEmptyCell(hashes[k], cell.Key) 266 *newCell = *cell 267 } 268 } 269 } 270 m.Free(oldData[i]) 271 } 272 273 return nil 274 } 275 276 func (ht *Int64HashMap) Cardinality() uint64 { 277 return ht.elemCnt 278 } 279 280 func (ht *Int64HashMap) Size() int64 { 281 // 41 is the fixed size of Int64HashMap 282 ret := int64(41) 283 for i := range ht.rawData { 284 ret += int64(len(ht.rawData[i])) 285 // 16 is the len of ht.cells[i] 286 ret += 16 287 } 288 return ret 289 } 290 291 type Int64HashMapIterator struct { 292 table *Int64HashMap 293 pos uint64 294 } 295 296 func (it *Int64HashMapIterator) Init(ht *Int64HashMap) { 297 it.table = ht 298 } 299 300 func (it *Int64HashMapIterator) Next() (cell *Int64HashMapCell, err error) { 301 for it.pos < it.table.cellCnt { 302 blockId := it.pos / it.table.blockCellCnt 303 cellId := it.pos % it.table.blockCellCnt 304 cell = &it.table.cells[blockId][cellId] 305 if cell.Mapped != 0 { 306 break 307 } 308 it.pos++ 309 } 310 311 if it.pos >= it.table.cellCnt { 312 err = moerr.NewInternalErrorNoCtx("out of range") 313 return 314 } 315 316 it.pos++ 317 318 return 319 }