github.com/matrixorigin/matrixone@v1.2.0/pkg/container/hashtable/string_hash_map.go (about) 1 // Copyright 2021 Matrix Origin 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package hashtable 16 17 import ( 18 "unsafe" 19 20 "github.com/matrixorigin/matrixone/pkg/common/moerr" 21 "github.com/matrixorigin/matrixone/pkg/common/mpool" 22 ) 23 24 type StringRef struct { 25 Ptr *byte 26 Len int 27 } 28 29 type StringHashMapCell struct { 30 HashState [3]uint64 31 Mapped uint64 32 } 33 34 var StrKeyPadding [16]byte 35 36 type StringHashMap struct { 37 blockCellCnt uint64 38 blockMaxElemCnt uint64 39 cellCntMask uint64 40 //confCnt uint64 41 42 cellCnt uint64 43 elemCnt uint64 44 rawData [][]byte 45 cells [][]StringHashMapCell 46 } 47 48 var ( 49 strCellSize uint64 50 maxStrCellCntPerBlock uint64 51 ) 52 53 func init() { 54 strCellSize = uint64(unsafe.Sizeof(StringHashMapCell{})) 55 maxStrCellCntPerBlock = maxBlockSize / strCellSize 56 } 57 58 func (ht *StringHashMap) Free(m *mpool.MPool) { 59 for i := range ht.rawData { 60 if len(ht.rawData[i]) > 0 { 61 m.Free(ht.rawData[i]) 62 } 63 ht.rawData[i], ht.cells[i] = nil, nil 64 } 65 ht.rawData, ht.cells = nil, nil 66 } 67 68 func (ht *StringHashMap) Init(m *mpool.MPool) (err error) { 69 ht.blockCellCnt = kInitialCellCnt 70 ht.blockMaxElemCnt = maxElemCnt(kInitialCellCnt, strCellSize) 71 ht.elemCnt = 0 72 ht.cellCnt = kInitialCellCnt 73 ht.cellCntMask = kInitialCellCnt - 1 74 75 ht.rawData = make([][]byte, 1) 76 ht.cells = make([][]StringHashMapCell, 1) 77 if ht.rawData[0], err = m.Alloc(int(ht.blockCellCnt * strCellSize)); err == nil { 78 ht.cells[0] = unsafe.Slice((*StringHashMapCell)(unsafe.Pointer(&ht.rawData[0][0])), ht.blockCellCnt) 79 } 80 return 81 } 82 83 func (ht *StringHashMap) Dup() *StringHashMap { 84 val := &StringHashMap{ 85 blockCellCnt: ht.blockCellCnt, 86 blockMaxElemCnt: ht.blockMaxElemCnt, 87 cellCntMask: ht.cellCntMask, 88 89 cellCnt: ht.cellCnt, 90 elemCnt: ht.elemCnt, 91 92 rawData: make([][]byte, len(ht.rawData)), 93 cells: make([][]StringHashMapCell, len(ht.cells)), 94 } 95 96 for i, raw := range ht.rawData { 97 val.rawData[i] = make([]byte, len(raw)) 98 copy(val.rawData[i], raw) 99 } 100 101 for i, cell := range ht.cells { 102 val.cells[i] = make([]StringHashMapCell, len(cell)) 103 copy(val.cells[i], cell) 104 } 105 106 return val 107 } 108 109 func (ht *StringHashMap) InsertStringBatch(states [][3]uint64, keys [][]byte, values []uint64, m *mpool.MPool) error { 110 if err := ht.ResizeOnDemand(uint64(len(keys)), m); err != nil { 111 return err 112 } 113 114 BytesBatchGenHashStates(&keys[0], &states[0], len(keys)) 115 116 for i := range keys { 117 cell := ht.findCell(&states[i]) 118 if cell.Mapped == 0 { 119 ht.elemCnt++ 120 cell.HashState = states[i] 121 cell.Mapped = ht.elemCnt 122 } 123 values[i] = cell.Mapped 124 } 125 return nil 126 } 127 128 func (ht *StringHashMap) InsertStringBatchWithRing(zValues []int64, states [][3]uint64, keys [][]byte, values []uint64, m *mpool.MPool) error { 129 if err := ht.ResizeOnDemand(uint64(len(keys)), m); err != nil { 130 return err 131 } 132 133 BytesBatchGenHashStates(&keys[0], &states[0], len(keys)) 134 135 for i := range keys { 136 if zValues[i] == 0 { 137 continue 138 } 139 140 cell := ht.findCell(&states[i]) 141 if cell.Mapped == 0 { 142 ht.elemCnt++ 143 cell.HashState = states[i] 144 cell.Mapped = ht.elemCnt 145 } 146 values[i] = cell.Mapped 147 } 148 return nil 149 } 150 151 func (ht *StringHashMap) FindStringBatch(states [][3]uint64, keys [][]byte, values []uint64) { 152 BytesBatchGenHashStates(&keys[0], &states[0], len(keys)) 153 154 for i := range keys { 155 cell := ht.findCell(&states[i]) 156 values[i] = cell.Mapped 157 } 158 } 159 160 func (ht *StringHashMap) FindString24Batch(states [][3]uint64, keys [][3]uint64, values []uint64) { 161 Int192BatchGenHashStates(&keys[0], &states[0], len(keys)) 162 163 for i := range keys { 164 cell := ht.findCell(&states[i]) 165 values[i] = cell.Mapped 166 } 167 } 168 169 func (ht *StringHashMap) FindString32Batch(states [][3]uint64, keys [][4]uint64, values []uint64) { 170 Int256BatchGenHashStates(&keys[0], &states[0], len(keys)) 171 172 for i := range keys { 173 cell := ht.findCell(&states[i]) 174 values[i] = cell.Mapped 175 } 176 } 177 178 func (ht *StringHashMap) FindString40Batch(states [][3]uint64, keys [][5]uint64, values []uint64) { 179 Int320BatchGenHashStates(&keys[0], &states[0], len(keys)) 180 181 for i := range keys { 182 cell := ht.findCell(&states[i]) 183 values[i] = cell.Mapped 184 } 185 } 186 187 func (ht *StringHashMap) FindStringBatchWithRing(states [][3]uint64, zValues []int64, keys [][]byte, values []uint64) { 188 // XXX I think it is no use now. 189 } 190 191 func (ht *StringHashMap) FindHashStateBatch(states [][3]uint64, values []uint64) { 192 for i := range states { 193 cell := ht.findCell(&states[i]) 194 values[i] = cell.Mapped 195 } 196 } 197 198 func (ht *StringHashMap) findCell(state *[3]uint64) *StringHashMapCell { 199 for idx := state[0] & ht.cellCntMask; true; idx = (idx + 1) & ht.cellCntMask { 200 blockId := idx / ht.blockCellCnt 201 cellId := idx % ht.blockCellCnt 202 cell := &ht.cells[blockId][cellId] 203 if cell.Mapped == 0 || cell.HashState == *state { 204 return cell 205 } 206 } 207 return nil 208 } 209 210 func (ht *StringHashMap) findEmptyCell(state *[3]uint64) *StringHashMapCell { 211 for idx := state[0] & ht.cellCntMask; true; idx = (idx + 1) & ht.cellCntMask { 212 blockId := idx / ht.blockCellCnt 213 cellId := idx % ht.blockCellCnt 214 cell := &ht.cells[blockId][cellId] 215 if cell.Mapped == 0 { 216 return cell 217 } 218 } 219 return nil 220 } 221 222 func (ht *StringHashMap) ResizeOnDemand(n uint64, m *mpool.MPool) error { 223 var err error 224 225 targetCnt := ht.elemCnt + n 226 if targetCnt <= uint64(len(ht.rawData))*ht.blockMaxElemCnt { 227 return nil 228 } 229 230 newCellCnt := ht.cellCnt << 1 231 newMaxElemCnt := maxElemCnt(newCellCnt, strCellSize) 232 for newMaxElemCnt < targetCnt { 233 newCellCnt <<= 1 234 newMaxElemCnt = maxElemCnt(newCellCnt, strCellSize) 235 } 236 237 newAlloc := int(newCellCnt * strCellSize) 238 if ht.blockCellCnt == maxStrCellCntPerBlock { 239 // double the blocks 240 oldBlockNum := len(ht.rawData) 241 newBlockNum := newAlloc / maxBlockSize 242 243 ht.rawData = append(ht.rawData, make([][]byte, newBlockNum-oldBlockNum)...) 244 ht.cells = append(ht.cells, make([][]StringHashMapCell, newBlockNum-oldBlockNum)...) 245 ht.cellCnt = ht.blockCellCnt * uint64(newBlockNum) 246 ht.cellCntMask = ht.cellCnt - 1 247 248 for i := oldBlockNum; i < newBlockNum; i++ { 249 ht.rawData[i], err = m.Alloc(int(ht.blockCellCnt * strCellSize)) 250 if err != nil { 251 return err 252 } 253 ht.cells[i] = unsafe.Slice((*StringHashMapCell)(unsafe.Pointer(&ht.rawData[i][0])), ht.blockCellCnt) 254 } 255 256 // rearrange the cells 257 var block []StringHashMapCell 258 var emptyCell StringHashMapCell 259 260 for i := 0; i < oldBlockNum; i++ { 261 block = ht.cells[i] 262 for j := uint64(0); j < ht.blockCellCnt; j++ { 263 cell := &block[j] 264 if cell.Mapped == 0 { 265 continue 266 } 267 newCell := ht.findCell(&cell.HashState) 268 if newCell != cell { 269 *newCell = *cell 270 *cell = emptyCell 271 } 272 } 273 } 274 275 block = ht.cells[oldBlockNum] 276 for j := uint64(0); j < ht.blockCellCnt; j++ { 277 cell := &block[j] 278 if cell.Mapped == 0 { 279 break 280 } 281 newCell := ht.findCell(&cell.HashState) 282 if newCell != cell { 283 *newCell = *cell 284 *cell = emptyCell 285 } 286 } 287 } else { 288 oldCells0 := ht.cells[0] 289 oldData0 := ht.rawData[0] 290 ht.cellCnt = newCellCnt 291 ht.cellCntMask = ht.cellCnt - 1 292 293 if newAlloc <= maxBlockSize { 294 ht.blockCellCnt = newCellCnt 295 ht.blockMaxElemCnt = newMaxElemCnt 296 297 ht.rawData[0], err = m.Alloc(newAlloc) 298 if err != nil { 299 return err 300 } 301 ht.cells[0] = unsafe.Slice((*StringHashMapCell)(unsafe.Pointer(&ht.rawData[0][0])), ht.blockCellCnt) 302 } else { 303 ht.blockCellCnt = maxStrCellCntPerBlock 304 ht.blockMaxElemCnt = maxElemCnt(ht.blockCellCnt, strCellSize) 305 306 newBlockNum := newAlloc / maxBlockSize 307 ht.rawData = make([][]byte, newBlockNum) 308 ht.cells = make([][]StringHashMapCell, newBlockNum) 309 ht.cellCnt = ht.blockCellCnt * uint64(newBlockNum) 310 ht.cellCntMask = ht.cellCnt - 1 311 312 for i := 0; i < newBlockNum; i++ { 313 ht.rawData[i], err = m.Alloc(int(ht.blockCellCnt * strCellSize)) 314 if err != nil { 315 return err 316 } 317 ht.cells[i] = unsafe.Slice((*StringHashMapCell)(unsafe.Pointer(&ht.rawData[i][0])), ht.blockCellCnt) 318 } 319 } 320 321 // rearrange the cells 322 for i := range oldCells0 { 323 cell := &oldCells0[i] 324 if cell.Mapped != 0 { 325 newCell := ht.findEmptyCell(&cell.HashState) 326 *newCell = *cell 327 } 328 } 329 330 m.Free(oldData0) 331 } 332 333 return nil 334 } 335 336 func (ht *StringHashMap) Cardinality() uint64 { 337 return ht.elemCnt 338 } 339 340 func (ht *StringHashMap) Size() int64 { 341 // 33 is the origin size of StringHashMaps 342 ret := int64(33) 343 for i := range ht.rawData { 344 ret += int64(len(ht.rawData[i])) 345 // 32 is the len of ht.cells[i] 346 ret += 32 347 } 348 return ret 349 } 350 351 type StringHashMapIterator struct { 352 table *StringHashMap 353 pos uint64 354 } 355 356 func (it *StringHashMapIterator) Init(ht *StringHashMap) { 357 it.table = ht 358 } 359 360 func (it *StringHashMapIterator) Next() (cell *StringHashMapCell, err error) { 361 for it.pos < it.table.cellCnt { 362 blockId := it.pos / it.table.blockCellCnt 363 cellId := it.pos % it.table.blockCellCnt 364 cell = &it.table.cells[blockId][cellId] 365 if cell.Mapped != 0 { 366 break 367 } 368 it.pos++ 369 } 370 371 if it.pos >= it.table.cellCnt { 372 err = moerr.NewInternalErrorNoCtx("out of range") 373 return 374 } 375 it.pos++ 376 377 return 378 }