github.com/matrixorigin/matrixone@v1.2.0/pkg/container/hashtable/int64_hash_map.go (about) 1 // Copyright 2021 Matrix Origin 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package hashtable 16 17 import ( 18 "unsafe" 19 20 "github.com/matrixorigin/matrixone/pkg/common/moerr" 21 "github.com/matrixorigin/matrixone/pkg/common/mpool" 22 ) 23 24 type Int64HashMapCell struct { 25 Key uint64 26 Mapped uint64 27 } 28 29 type Int64HashMap struct { 30 blockCellCnt uint64 31 blockMaxElemCnt uint64 32 cellCntMask uint64 33 34 cellCnt uint64 35 elemCnt uint64 36 rawData [][]byte 37 cells [][]Int64HashMapCell 38 } 39 40 var ( 41 intCellSize uint64 42 maxIntCellCntPerBlock uint64 43 ) 44 45 func init() { 46 intCellSize = uint64(unsafe.Sizeof(Int64HashMapCell{})) 47 maxIntCellCntPerBlock = maxBlockSize / intCellSize 48 } 49 50 func (ht *Int64HashMap) Dup() *Int64HashMap { 51 val := &Int64HashMap{ 52 blockCellCnt: ht.blockCellCnt, 53 blockMaxElemCnt: ht.blockMaxElemCnt, 54 cellCntMask: ht.cellCntMask, 55 56 cellCnt: ht.cellCnt, 57 elemCnt: ht.elemCnt, 58 59 rawData: make([][]byte, len(ht.rawData)), 60 cells: make([][]Int64HashMapCell, len(ht.cells)), 61 } 62 63 for i, raw := range ht.rawData { 64 val.rawData[i] = make([]byte, len(raw)) 65 copy(val.rawData[i], raw) 66 } 67 68 for i, cell := range ht.cells { 69 val.cells[i] = make([]Int64HashMapCell, len(cell)) 70 copy(val.cells[i], cell) 71 } 72 73 return val 74 } 75 76 func (ht *Int64HashMap) Free(m *mpool.MPool) { 77 for i := range ht.rawData { 78 if len(ht.rawData[i]) > 0 { 79 m.Free(ht.rawData[i]) 80 } 81 ht.rawData[i], ht.cells[i] = nil, nil 82 } 83 ht.rawData, ht.cells = nil, nil 84 } 85 86 func (ht *Int64HashMap) Init(m *mpool.MPool) (err error) { 87 ht.blockCellCnt = kInitialCellCnt 88 ht.blockMaxElemCnt = maxElemCnt(kInitialCellCnt, intCellSize) 89 ht.cellCntMask = kInitialCellCnt - 1 90 ht.elemCnt = 0 91 ht.cellCnt = kInitialCellCnt 92 93 ht.rawData = make([][]byte, 1) 94 ht.cells = make([][]Int64HashMapCell, 1) 95 if ht.rawData[0], err = m.Alloc(int(ht.blockCellCnt * intCellSize)); err == nil { 96 ht.cells[0] = unsafe.Slice((*Int64HashMapCell)(unsafe.Pointer(&ht.rawData[0][0])), ht.blockCellCnt) 97 } 98 return 99 } 100 101 func (ht *Int64HashMap) InsertBatch(n int, hashes []uint64, keysPtr unsafe.Pointer, values []uint64, m *mpool.MPool) error { 102 if err := ht.ResizeOnDemand(n, m); err != nil { 103 return err 104 } 105 106 if hashes[0] == 0 { 107 Int64BatchHash(keysPtr, &hashes[0], n) 108 } 109 110 for i, hash := range hashes { 111 cell := ht.findCell(hash) 112 if cell.Mapped == 0 { 113 ht.elemCnt++ 114 cell.Key = hash 115 cell.Mapped = ht.elemCnt 116 } 117 values[i] = cell.Mapped 118 } 119 return nil 120 } 121 122 func (ht *Int64HashMap) InsertBatchWithRing(n int, zValues []int64, hashes []uint64, keysPtr unsafe.Pointer, values []uint64, m *mpool.MPool) error { 123 if err := ht.ResizeOnDemand(n, m); err != nil { 124 return err 125 } 126 127 if hashes[0] == 0 { 128 Int64BatchHash(keysPtr, &hashes[0], n) 129 } 130 131 for i, hash := range hashes { 132 if zValues[i] == 0 { 133 continue 134 } 135 cell := ht.findCell(hash) 136 if cell.Mapped == 0 { 137 ht.elemCnt++ 138 cell.Key = hash 139 cell.Mapped = ht.elemCnt 140 } 141 values[i] = cell.Mapped 142 } 143 return nil 144 } 145 146 func (ht *Int64HashMap) FindBatch(n int, hashes []uint64, keysPtr unsafe.Pointer, values []uint64) { 147 if hashes[0] == 0 { 148 Int64BatchHash(keysPtr, &hashes[0], n) 149 } 150 151 for i, hash := range hashes { 152 cell := ht.findCell(hash) 153 values[i] = cell.Mapped 154 } 155 } 156 157 func (ht *Int64HashMap) FindBatchWithRing(n int, zValues []int64, hashes []uint64, keysPtr unsafe.Pointer, values []uint64) { 158 if hashes[0] == 0 { 159 Int64BatchHash(keysPtr, &hashes[0], n) 160 } 161 162 for i, hash := range hashes { 163 if zValues[i] == 0 { 164 values[i] = 0 165 continue 166 } 167 cell := ht.findCell(hash) 168 values[i] = cell.Mapped 169 } 170 } 171 172 func (ht *Int64HashMap) findCell(hash uint64) *Int64HashMapCell { 173 for idx := hash & ht.cellCntMask; true; idx = (idx + 1) & ht.cellCntMask { 174 blockId := idx / ht.blockCellCnt 175 cellId := idx % ht.blockCellCnt 176 cell := &ht.cells[blockId][cellId] 177 if cell.Key == hash || cell.Mapped == 0 { 178 return cell 179 } 180 } 181 return nil 182 } 183 184 func (ht *Int64HashMap) findEmptyCell(hash uint64) *Int64HashMapCell { 185 for idx := hash & ht.cellCntMask; true; idx = (idx + 1) & ht.cellCntMask { 186 blockId := idx / ht.blockCellCnt 187 cellId := idx % ht.blockCellCnt 188 cell := &ht.cells[blockId][cellId] 189 if cell.Mapped == 0 { 190 return cell 191 } 192 } 193 return nil 194 } 195 196 func (ht *Int64HashMap) ResizeOnDemand(n int, m *mpool.MPool) error { 197 var err error 198 199 targetCnt := ht.elemCnt + uint64(n) 200 if targetCnt <= uint64(len(ht.rawData))*ht.blockMaxElemCnt { 201 return nil 202 } 203 204 newCellCnt := ht.cellCnt << 1 205 newMaxElemCnt := maxElemCnt(newCellCnt, intCellSize) 206 for newMaxElemCnt < targetCnt { 207 newCellCnt <<= 1 208 newMaxElemCnt = maxElemCnt(newCellCnt, intCellSize) 209 } 210 211 newAlloc := int(newCellCnt * intCellSize) 212 if ht.blockCellCnt == maxIntCellCntPerBlock { 213 // double the blocks 214 oldBlockNum := len(ht.rawData) 215 newBlockNum := newAlloc / maxBlockSize 216 217 ht.rawData = append(ht.rawData, make([][]byte, newBlockNum-oldBlockNum)...) 218 ht.cells = append(ht.cells, make([][]Int64HashMapCell, newBlockNum-oldBlockNum)...) 219 ht.cellCnt = ht.blockCellCnt * uint64(newBlockNum) 220 ht.cellCntMask = ht.cellCnt - 1 221 222 for i := oldBlockNum; i < newBlockNum; i++ { 223 ht.rawData[i], err = m.Alloc(int(ht.blockCellCnt * intCellSize)) 224 if err != nil { 225 return err 226 } 227 ht.cells[i] = unsafe.Slice((*Int64HashMapCell)(unsafe.Pointer(&ht.rawData[i][0])), ht.blockCellCnt) 228 } 229 230 // rearrange the cells 231 var block []Int64HashMapCell 232 var emptyCell Int64HashMapCell 233 234 for i := 0; i < oldBlockNum; i++ { 235 block = ht.cells[i] 236 for j := uint64(0); j < ht.blockCellCnt; j++ { 237 cell := &block[j] 238 if cell.Mapped == 0 { 239 continue 240 } 241 newCell := ht.findCell(cell.Key) 242 if newCell != cell { 243 *newCell = *cell 244 *cell = emptyCell 245 } 246 } 247 } 248 249 block = ht.cells[oldBlockNum] 250 for j := uint64(0); j < ht.blockCellCnt; j++ { 251 cell := &block[j] 252 if cell.Mapped == 0 { 253 break 254 } 255 newCell := ht.findCell(cell.Key) 256 if newCell != cell { 257 *newCell = *cell 258 *cell = emptyCell 259 } 260 } 261 } else { 262 oldCells0 := ht.cells[0] 263 oldData0 := ht.rawData[0] 264 ht.cellCnt = newCellCnt 265 ht.cellCntMask = newCellCnt - 1 266 267 if newAlloc <= maxBlockSize { 268 ht.blockCellCnt = newCellCnt 269 ht.blockMaxElemCnt = newMaxElemCnt 270 271 ht.rawData[0], err = m.Alloc(newAlloc) 272 if err != nil { 273 return err 274 } 275 ht.cells[0] = unsafe.Slice((*Int64HashMapCell)(unsafe.Pointer(&ht.rawData[0][0])), ht.blockCellCnt) 276 } else { 277 ht.blockCellCnt = maxIntCellCntPerBlock 278 ht.blockMaxElemCnt = maxElemCnt(ht.blockCellCnt, intCellSize) 279 280 newBlockNum := newAlloc / maxBlockSize 281 ht.rawData = make([][]byte, newBlockNum) 282 ht.cells = make([][]Int64HashMapCell, newBlockNum) 283 ht.cellCnt = ht.blockCellCnt * uint64(newBlockNum) 284 ht.cellCntMask = ht.cellCnt - 1 285 286 for i := 0; i < newBlockNum; i++ { 287 ht.rawData[i], err = m.Alloc(int(ht.blockCellCnt * intCellSize)) 288 if err != nil { 289 return err 290 } 291 ht.cells[i] = unsafe.Slice((*Int64HashMapCell)(unsafe.Pointer(&ht.rawData[i][0])), ht.blockCellCnt) 292 } 293 } 294 295 // rearrange the cells 296 for i := range oldCells0 { 297 cell := &oldCells0[i] 298 if cell.Mapped != 0 { 299 newCell := ht.findEmptyCell(cell.Key) 300 *newCell = *cell 301 } 302 } 303 304 m.Free(oldData0) 305 } 306 307 return nil 308 } 309 310 func (ht *Int64HashMap) Cardinality() uint64 { 311 return ht.elemCnt 312 } 313 314 func (ht *Int64HashMap) Size() int64 { 315 // 41 is the fixed size of Int64HashMap 316 ret := int64(41) 317 for i := range ht.rawData { 318 ret += int64(len(ht.rawData[i])) 319 // 16 is the len of ht.cells[i] 320 ret += 16 321 } 322 return ret 323 } 324 325 type Int64HashMapIterator struct { 326 table *Int64HashMap 327 pos uint64 328 } 329 330 func (it *Int64HashMapIterator) Init(ht *Int64HashMap) { 331 it.table = ht 332 } 333 334 func (it *Int64HashMapIterator) Next() (cell *Int64HashMapCell, err error) { 335 for it.pos < it.table.cellCnt { 336 blockId := it.pos / it.table.blockCellCnt 337 cellId := it.pos % it.table.blockCellCnt 338 cell = &it.table.cells[blockId][cellId] 339 if cell.Mapped != 0 { 340 break 341 } 342 it.pos++ 343 } 344 345 if it.pos >= it.table.cellCnt { 346 err = moerr.NewInternalErrorNoCtx("out of range") 347 return 348 } 349 350 it.pos++ 351 352 return 353 }