github.com/matrixorigin/matrixone@v1.2.0/pkg/common/hashmap/inthashmap.go (about) 1 // Copyright 2021 Matrix Origin 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package hashmap 16 17 import ( 18 "unsafe" 19 20 "github.com/matrixorigin/matrixone/pkg/common/mpool" 21 "github.com/matrixorigin/matrixone/pkg/container/hashtable" 22 "github.com/matrixorigin/matrixone/pkg/container/types" 23 "github.com/matrixorigin/matrixone/pkg/container/vector" 24 ) 25 26 func init() { 27 zeroUint64 = make([]uint64, UnitLimit) 28 zeroUint32 = make([]uint32, UnitLimit) 29 } 30 31 func NewIntHashMap(hasNull bool, ibucket, nbucket uint64, m *mpool.MPool) (*IntHashMap, error) { 32 mp := &hashtable.Int64HashMap{} 33 if err := mp.Init(m); err != nil { 34 return nil, err 35 } 36 return &IntHashMap{ 37 m: m, 38 rows: 0, 39 hasNull: hasNull, 40 ibucket: ibucket, 41 nbucket: nbucket, 42 keys: make([]uint64, UnitLimit), 43 keyOffs: make([]uint32, UnitLimit), 44 values: make([]uint64, UnitLimit), 45 zValues: make([]int64, UnitLimit), 46 hashes: make([]uint64, UnitLimit), 47 hashMap: mp, 48 }, nil 49 } 50 51 func (m *IntHashMap) NewIterator() *intHashMapIterator { 52 return &intHashMapIterator{ 53 mp: m, 54 m: m.m, 55 ibucket: m.ibucket, 56 nbucket: m.nbucket, 57 } 58 } 59 60 func (m *IntHashMap) HasNull() bool { 61 return m.hasNull 62 } 63 64 func (m *IntHashMap) Free() { 65 m.hashMap.Free(m.m) 66 } 67 68 func (m *IntHashMap) PreAlloc(n uint64, mp *mpool.MPool) error { 69 return m.hashMap.ResizeOnDemand(int(n), mp) 70 } 71 72 func (m *IntHashMap) GroupCount() uint64 { 73 return m.rows 74 } 75 76 func (m *IntHashMap) AddGroup() { 77 m.rows++ 78 } 79 80 func (m *IntHashMap) AddGroups(rows uint64) { 81 m.rows += rows 82 } 83 84 func (m *IntHashMap) Size() int64 { 85 // TODO: add the size of the other IntHashMap parts 86 if m.hashMap == nil { 87 return 0 88 } 89 return m.hashMap.Size() 90 } 91 92 func (m *IntHashMap) Cardinality() uint64 { 93 return m.hashMap.Cardinality() 94 } 95 96 func (m *IntHashMap) encodeHashKeys(vecs []*vector.Vector, start, count int) { 97 for _, vec := range vecs { 98 switch vec.GetType().TypeSize() { 99 case 1: 100 fillKeys[uint8](m, vec, 1, start, count) 101 case 2: 102 fillKeys[uint16](m, vec, 2, start, count) 103 case 4: 104 fillKeys[uint32](m, vec, 4, start, count) 105 case 8: 106 fillKeys[uint64](m, vec, 8, start, count) 107 default: 108 if !vec.IsConst() && vec.GetArea() == nil { 109 fillVarlenaKey(m, vec, start, count) 110 } else { 111 fillStrKey(m, vec, start, count) 112 } 113 } 114 } 115 } 116 117 func (m *IntHashMap) Dup(pool *mpool.MPool) *IntHashMap { 118 val := &IntHashMap{ 119 hasNull: m.hasNull, 120 rows: m.rows, 121 122 keys: make([]uint64, len(m.keys)), 123 keyOffs: make([]uint32, len(m.keyOffs)), 124 values: make([]uint64, len(m.values)), 125 zValues: make([]int64, len(m.zValues)), 126 hashes: make([]uint64, len(m.hashes)), 127 128 ibucket: m.ibucket, 129 nbucket: m.nbucket, 130 131 m: pool, 132 } 133 copy(val.keys, m.keys) 134 copy(val.keyOffs, m.keyOffs) 135 copy(val.values, m.values) 136 copy(val.zValues, m.zValues) 137 copy(val.hashes, m.hashes) 138 if m.hashMap != nil { 139 val.hashMap = m.hashMap.Dup() 140 } 141 142 return val 143 } 144 145 func fillKeys[T types.FixedSizeT](m *IntHashMap, vec *vector.Vector, size uint32, start int, n int) { 146 keys := m.keys 147 keyOffs := m.keyOffs 148 if vec.IsConstNull() { 149 if m.hasNull { 150 for i := 0; i < n; i++ { 151 *(*int8)(unsafe.Add(unsafe.Pointer(&keys[i]), keyOffs[i])) = 1 152 keyOffs[i]++ 153 } 154 } else { 155 for i := 0; i < n; i++ { 156 m.zValues[i] = 0 157 } 158 } 159 } else if vec.IsConst() { 160 ptr := vector.GetPtrAt[T](vec, 0) 161 // The old code was too stupid and would lead to out-of-bounds writing 162 if !m.hasNull { 163 for i := 0; i < n; i++ { 164 *(*T)(unsafe.Add(unsafe.Pointer(&keys[i]), keyOffs[i])) = *ptr 165 } 166 uint32AddScalar(size, keyOffs[:n], keyOffs[:n]) 167 } else { 168 for i := 0; i < n; i++ { 169 *(*int8)(unsafe.Add(unsafe.Pointer(&keys[i]), keyOffs[i])) = 0 170 *(*T)(unsafe.Add(unsafe.Pointer(&keys[i]), keyOffs[i]+1)) = *ptr 171 } 172 uint32AddScalar(1+size, keyOffs[:n], keyOffs[:n]) 173 } 174 } else if !vec.GetNulls().Any() { 175 if m.hasNull { 176 for i := 0; i < n; i++ { 177 *(*int8)(unsafe.Add(unsafe.Pointer(&keys[i]), keyOffs[i])) = 0 178 ptr := vector.GetPtrAt[T](vec, int64(i+start)) 179 *(*T)(unsafe.Add(unsafe.Pointer(&keys[i]), keyOffs[i]+1)) = *ptr 180 } 181 uint32AddScalar(1+size, keyOffs[:n], keyOffs[:n]) 182 } else { 183 for i := 0; i < n; i++ { 184 ptr := vector.GetPtrAt[T](vec, int64(i+start)) 185 *(*T)(unsafe.Add(unsafe.Pointer(&keys[i]), keyOffs[i])) = *ptr 186 } 187 uint32AddScalar(size, keyOffs[:n], keyOffs[:n]) 188 } 189 } else { 190 nsp := vec.GetNulls() 191 if m.hasNull { 192 for i := 0; i < n; i++ { 193 if nsp.Contains(uint64(i + start)) { 194 *(*int8)(unsafe.Add(unsafe.Pointer(&keys[i]), keyOffs[i])) = 1 195 keyOffs[i]++ 196 } else { 197 *(*int8)(unsafe.Add(unsafe.Pointer(&keys[i]), keyOffs[i])) = 0 198 ptr := vector.GetPtrAt[T](vec, int64(i+start)) 199 *(*T)(unsafe.Add(unsafe.Pointer(&keys[i]), keyOffs[i]+1)) = *ptr 200 keyOffs[i] += 1 + size 201 } 202 } 203 } else { 204 for i := 0; i < n; i++ { 205 if nsp.Contains(uint64(i + start)) { 206 m.zValues[i] = 0 207 continue 208 } 209 ptr := vector.GetPtrAt[T](vec, int64(i+start)) 210 *(*T)(unsafe.Add(unsafe.Pointer(&keys[i]), keyOffs[i])) = *ptr 211 keyOffs[i] += size 212 } 213 } 214 } 215 } 216 217 func fillVarlenaKey(m *IntHashMap, vec *vector.Vector, start int, n int) { 218 keys := m.keys 219 keyOffs := m.keyOffs 220 vcol, _ := vector.MustVarlenaRawData(vec) 221 if !vec.GetNulls().Any() { 222 if m.hasNull { 223 for i := 0; i < n; i++ { 224 v := vcol[i+start].ByteSlice() 225 *(*int8)(unsafe.Add(unsafe.Pointer(&keys[i]), keyOffs[i])) = 0 226 copy(unsafe.Slice((*byte)(unsafe.Pointer(&keys[i])), 8)[m.keyOffs[i]+1:], v) 227 m.keyOffs[i] += uint32(len(v) + 1) 228 } 229 } else { 230 for i := 0; i < n; i++ { 231 v := vcol[i+start].ByteSlice() 232 copy(unsafe.Slice((*byte)(unsafe.Pointer(&keys[i])), 8)[m.keyOffs[i]:], v) 233 m.keyOffs[i] += uint32(len(v)) 234 } 235 } 236 } else { 237 nsp := vec.GetNulls() 238 if m.hasNull { 239 for i := 0; i < n; i++ { 240 if nsp.Contains(uint64(i + start)) { 241 *(*int8)(unsafe.Add(unsafe.Pointer(&keys[i]), keyOffs[i])) = 1 242 keyOffs[i]++ 243 } else { 244 v := vcol[i+start].ByteSlice() 245 *(*int8)(unsafe.Add(unsafe.Pointer(&keys[i]), keyOffs[i])) = 0 246 copy(unsafe.Slice((*byte)(unsafe.Pointer(&keys[i])), 8)[m.keyOffs[i]+1:], v) 247 m.keyOffs[i] += uint32(len(v) + 1) 248 } 249 } 250 } else { 251 for i := 0; i < n; i++ { 252 if nsp.Contains(uint64(i + start)) { 253 m.zValues[i] = 0 254 continue 255 } 256 v := vcol[i+start].ByteSlice() 257 copy(unsafe.Slice((*byte)(unsafe.Pointer(&keys[i])), 8)[m.keyOffs[i]:], v) 258 m.keyOffs[i] += uint32(len(v)) 259 } 260 } 261 } 262 } 263 264 func fillStrKey(m *IntHashMap, vec *vector.Vector, start int, n int) { 265 keys := m.keys 266 keyOffs := m.keyOffs 267 if vec.IsConstNull() { 268 if m.hasNull { 269 for i := 0; i < n; i++ { 270 *(*int8)(unsafe.Add(unsafe.Pointer(&keys[i]), keyOffs[i])) = 1 271 keyOffs[i]++ 272 } 273 } else { 274 for i := 0; i < n; i++ { 275 m.zValues[i] = 0 276 } 277 } 278 } else if !vec.GetNulls().Any() { 279 if m.hasNull { 280 for i := 0; i < n; i++ { 281 v := vec.GetBytesAt(i + start) 282 *(*int8)(unsafe.Add(unsafe.Pointer(&keys[i]), keyOffs[i])) = 0 283 copy(unsafe.Slice((*byte)(unsafe.Pointer(&keys[i])), 8)[m.keyOffs[i]+1:], v) 284 m.keyOffs[i] += uint32(len(v) + 1) 285 } 286 } else { 287 for i := 0; i < n; i++ { 288 v := vec.GetBytesAt(i + start) 289 copy(unsafe.Slice((*byte)(unsafe.Pointer(&keys[i])), 8)[m.keyOffs[i]:], v) 290 m.keyOffs[i] += uint32(len(v)) 291 } 292 } 293 } else { 294 nsp := vec.GetNulls() 295 if m.hasNull { 296 for i := 0; i < n; i++ { 297 v := vec.GetBytesAt(i + start) 298 if nsp.Contains(uint64(i + start)) { 299 *(*int8)(unsafe.Add(unsafe.Pointer(&keys[i]), keyOffs[i])) = 1 300 keyOffs[i]++ 301 } else { 302 *(*int8)(unsafe.Add(unsafe.Pointer(&keys[i]), keyOffs[i])) = 0 303 copy(unsafe.Slice((*byte)(unsafe.Pointer(&keys[i])), 8)[m.keyOffs[i]+1:], v) 304 m.keyOffs[i] += uint32(len(v) + 1) 305 } 306 } 307 } else { 308 for i := 0; i < n; i++ { 309 v := vec.GetBytesAt(i + start) 310 if nsp.Contains(uint64(i + start)) { 311 m.zValues[i] = 0 312 continue 313 } 314 copy(unsafe.Slice((*byte)(unsafe.Pointer(&keys[i])), 8)[m.keyOffs[i]:], v) 315 m.keyOffs[i] += uint32(len(v)) 316 } 317 } 318 } 319 } 320 321 func uint32AddScalar(x uint32, ys, rs []uint32) []uint32 { 322 for i, y := range ys { 323 rs[i] = x + y 324 } 325 return rs 326 }