github.com/matrixorigin/matrixone@v0.7.0/pkg/container/hashtable/string_hash_map.go (about) 1 // Copyright 2021 Matrix Origin 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package hashtable 16 17 import ( 18 "unsafe" 19 20 "github.com/matrixorigin/matrixone/pkg/common/moerr" 21 "github.com/matrixorigin/matrixone/pkg/common/mpool" 22 ) 23 24 type StringRef struct { 25 Ptr *byte 26 Len int 27 } 28 29 type StringHashMapCell struct { 30 HashState [3]uint64 31 Mapped uint64 32 } 33 34 var StrKeyPadding [16]byte 35 36 type StringHashMap struct { 37 blockCellCntBits uint8 38 blockCellCnt uint64 39 blockMaxElemCnt uint64 40 //confCnt uint64 41 42 cellCnt uint64 43 elemCnt uint64 44 rawData [][]byte 45 cells [][]StringHashMapCell 46 } 47 48 var strCellSize int64 49 50 func init() { 51 strCellSize = int64(unsafe.Sizeof(StringHashMapCell{})) 52 } 53 54 func (ht *StringHashMap) Free(m *mpool.MPool) { 55 for i := range ht.rawData { 56 if len(ht.rawData[i]) > 0 { 57 m.Free(ht.rawData[i]) 58 } 59 ht.rawData[i], ht.cells[i] = nil, nil 60 } 61 ht.rawData, ht.cells = nil, nil 62 } 63 64 func (ht *StringHashMap) Init(m *mpool.MPool) (err error) { 65 ht.blockCellCntBits = kInitialCellCntBits 66 ht.blockCellCnt = kInitialCellCnt 67 ht.blockMaxElemCnt = kInitialCellCnt * kLoadFactorNumerator / kLoadFactorDenominator 68 ht.elemCnt = 0 69 ht.cellCnt = kInitialCellCnt 70 71 ht.rawData = make([][]byte, 1) 72 ht.cells = make([][]StringHashMapCell, 1) 73 if ht.rawData[0], err = m.Alloc(int(ht.blockCellCnt) * int(strCellSize)); err == nil { 74 ht.cells[0] = unsafe.Slice((*StringHashMapCell)(unsafe.Pointer(&ht.rawData[0][0])), ht.blockCellCnt) 75 } 76 return 77 } 78 79 func (ht *StringHashMap) InsertStringBatch(states [][3]uint64, keys [][]byte, values []uint64, m *mpool.MPool) error { 80 if err := ht.resizeOnDemand(uint64(len(keys)), m); err != nil { 81 return err 82 } 83 84 BytesBatchGenHashStates(&keys[0], &states[0], len(keys)) 85 86 for i := range keys { 87 cell := ht.findCell(&states[i]) 88 if cell.Mapped == 0 { 89 ht.elemCnt++ 90 cell.HashState = states[i] 91 cell.Mapped = ht.elemCnt 92 } 93 values[i] = cell.Mapped 94 } 95 return nil 96 } 97 98 func (ht *StringHashMap) InsertStringBatchWithRing(zValues []int64, states [][3]uint64, keys [][]byte, values []uint64, m *mpool.MPool) error { 99 if err := ht.resizeOnDemand(uint64(len(keys)), m); err != nil { 100 return err 101 } 102 103 BytesBatchGenHashStates(&keys[0], &states[0], len(keys)) 104 105 for i := range keys { 106 if zValues[i] == 0 { 107 continue 108 } 109 110 cell := ht.findCell(&states[i]) 111 if cell.Mapped == 0 { 112 ht.elemCnt++ 113 cell.HashState = states[i] 114 cell.Mapped = ht.elemCnt 115 } 116 values[i] = cell.Mapped 117 } 118 return nil 119 } 120 121 func (ht *StringHashMap) FindStringBatch(states [][3]uint64, keys [][]byte, values []uint64) { 122 BytesBatchGenHashStates(&keys[0], &states[0], len(keys)) 123 124 for i := range keys { 125 cell := ht.findCell(&states[i]) 126 values[i] = cell.Mapped 127 } 128 } 129 130 func (ht *StringHashMap) FindString24Batch(states [][3]uint64, keys [][3]uint64, values []uint64) { 131 Int192BatchGenHashStates(&keys[0], &states[0], len(keys)) 132 133 for i := range keys { 134 cell := ht.findCell(&states[i]) 135 values[i] = cell.Mapped 136 } 137 } 138 139 func (ht *StringHashMap) FindString32Batch(states [][3]uint64, keys [][4]uint64, values []uint64) { 140 Int256BatchGenHashStates(&keys[0], &states[0], len(keys)) 141 142 for i := range keys { 143 cell := ht.findCell(&states[i]) 144 values[i] = cell.Mapped 145 } 146 } 147 148 func (ht *StringHashMap) FindString40Batch(states [][3]uint64, keys [][5]uint64, values []uint64) { 149 Int320BatchGenHashStates(&keys[0], &states[0], len(keys)) 150 151 for i := range keys { 152 cell := ht.findCell(&states[i]) 153 values[i] = cell.Mapped 154 } 155 } 156 157 func (ht *StringHashMap) FindStringBatchWithRing(states [][3]uint64, zValues []int64, keys [][]byte, values []uint64) { 158 // XXX I think it is no use now. 159 } 160 161 func (ht *StringHashMap) FindHashStateBatch(states [][3]uint64, values []uint64) { 162 for i := range states { 163 cell := ht.findCell(&states[i]) 164 values[i] = cell.Mapped 165 } 166 } 167 168 func (ht *StringHashMap) findCell(state *[3]uint64) *StringHashMapCell { 169 mask := ht.cellCnt - 1 170 for idx := state[0] & mask; true; idx = (idx + 1) & mask { 171 blockId := idx / ht.blockCellCnt 172 cellId := idx % ht.blockCellCnt 173 cell := &ht.cells[blockId][cellId] 174 if cell.Mapped == 0 || cell.HashState == *state { 175 return cell 176 } 177 } 178 return nil 179 } 180 181 func (ht *StringHashMap) findEmptyCell(state *[3]uint64) *StringHashMapCell { 182 mask := ht.cellCnt - 1 183 for idx := state[0] & mask; true; idx = (idx + 1) & mask { 184 blockId := idx / ht.blockCellCnt 185 cellId := idx % ht.blockCellCnt 186 cell := &ht.cells[blockId][cellId] 187 if cell.Mapped == 0 { 188 return cell 189 } 190 } 191 return nil 192 } 193 194 func (ht *StringHashMap) resizeOnDemand(n uint64, m *mpool.MPool) error { 195 targetCnt := ht.elemCnt + n 196 if targetCnt <= uint64(len(ht.rawData))*ht.blockMaxElemCnt { 197 return nil 198 } 199 200 var err error 201 if len(ht.rawData) == 1 { 202 newCellCntBits := ht.blockCellCntBits + 2 203 newCellCnt := uint64(1 << newCellCntBits) 204 newBlockMaxElemCnt := newCellCnt * kLoadFactorNumerator / kLoadFactorDenominator 205 for newBlockMaxElemCnt < targetCnt { 206 newCellCntBits++ 207 newCellCnt <<= 1 208 newBlockMaxElemCnt = newCellCnt * kLoadFactorNumerator / kLoadFactorDenominator 209 } 210 211 oldCellCnt := ht.blockCellCnt 212 oldCells0 := ht.cells[0] 213 oldData0 := ht.rawData[0] 214 215 newAlloc := int(newCellCnt) * int(strCellSize) 216 if newAlloc <= mpool.GB { 217 // update hashTable cnt. 218 ht.blockCellCntBits = newCellCntBits 219 ht.cellCnt = newCellCnt 220 ht.blockCellCnt = newCellCnt 221 ht.blockMaxElemCnt = newBlockMaxElemCnt 222 223 ht.rawData[0], err = m.Alloc(newAlloc) 224 if err != nil { 225 return err 226 } 227 blockData := ht.rawData[0] 228 for i := range blockData { 229 blockData[i] = 0 230 } 231 ht.cells[0] = unsafe.Slice((*StringHashMapCell)(unsafe.Pointer(&blockData[0])), ht.blockCellCnt) 232 233 // rearrange the cells 234 for i := uint64(0); i < oldCellCnt; i++ { 235 cell := &oldCells0[i] 236 if cell.Mapped != 0 { 237 newCell := ht.findEmptyCell(&cell.HashState) 238 *newCell = *cell 239 } 240 } 241 242 m.Free(oldData0) 243 return nil 244 } 245 } 246 247 // double the blocks 248 oldBlockNum := len(ht.rawData) 249 oldCells := ht.cells 250 oldData := ht.rawData 251 252 ht.rawData = make([][]byte, oldBlockNum*2) 253 ht.cells = make([][]StringHashMapCell, oldBlockNum*2) 254 ht.cellCnt = ht.blockCellCnt * uint64(len(ht.rawData)) 255 256 for i := range ht.rawData { 257 ht.rawData[i], err = m.Alloc(int(ht.blockCellCnt) * int(strCellSize)) 258 if err != nil { 259 return err 260 } 261 blockData := ht.rawData[i] 262 for j := range blockData { 263 blockData[j] = 0 264 } 265 ht.cells[i] = unsafe.Slice((*StringHashMapCell)(unsafe.Pointer(&blockData[0])), ht.blockCellCnt) 266 } 267 268 // rearrange the cells 269 for i := 0; i < oldBlockNum; i++ { 270 for j := uint64(0); j < ht.blockCellCnt; j++ { 271 cell := &oldCells[i][j] 272 if cell.Mapped != 0 { 273 newCell := ht.findEmptyCell(&cell.HashState) 274 *newCell = *cell 275 } 276 } 277 m.Free(oldData[i]) 278 } 279 280 return nil 281 } 282 283 func (ht *StringHashMap) Cardinality() uint64 { 284 return ht.elemCnt 285 } 286 287 func (ht *StringHashMap) Size() int64 { 288 // 33 is the origin size of StringHashMaps 289 ret := int64(33) 290 for i := range ht.rawData { 291 ret += int64(len(ht.rawData[i])) 292 // 32 is the len of ht.cells[i] 293 ret += 32 294 } 295 return ret 296 } 297 298 type StringHashMapIterator struct { 299 table *StringHashMap 300 pos uint64 301 } 302 303 func (it *StringHashMapIterator) Init(ht *StringHashMap) { 304 it.table = ht 305 } 306 307 func (it *StringHashMapIterator) Next() (cell *StringHashMapCell, err error) { 308 for it.pos < it.table.cellCnt { 309 blockId := it.pos / it.table.blockCellCnt 310 cellId := it.pos % it.table.blockCellCnt 311 cell = &it.table.cells[blockId][cellId] 312 if cell.Mapped != 0 { 313 break 314 } 315 it.pos++ 316 } 317 318 if it.pos >= it.table.cellCnt { 319 err = moerr.NewInternalErrorNoCtx("out of range") 320 return 321 } 322 it.pos++ 323 324 return 325 }