github.com/matrixorigin/matrixone@v1.2.0/pkg/common/hashmap/strhashmap.go (about) 1 // Copyright 2021 Matrix Origin 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package hashmap 16 17 import ( 18 "unsafe" 19 20 "github.com/matrixorigin/matrixone/pkg/common/mpool" 21 "github.com/matrixorigin/matrixone/pkg/container/hashtable" 22 "github.com/matrixorigin/matrixone/pkg/container/types" 23 "github.com/matrixorigin/matrixone/pkg/container/vector" 24 ) 25 26 func init() { 27 OneInt64s = make([]int64, UnitLimit) 28 for i := range OneInt64s { 29 OneInt64s[i] = 1 30 } 31 OneUInt8s = make([]uint8, UnitLimit) 32 for i := range OneUInt8s { 33 OneUInt8s[i] = 1 34 } 35 } 36 37 func NewStrMap(hasNull bool, ibucket, nbucket uint64, m *mpool.MPool) (*StrHashMap, error) { 38 mp := &hashtable.StringHashMap{} 39 if err := mp.Init(m); err != nil { 40 return nil, err 41 } 42 return &StrHashMap{ 43 m: m, 44 hashMap: mp, 45 hasNull: hasNull, 46 ibucket: ibucket, 47 nbucket: nbucket, 48 values: make([]uint64, UnitLimit), 49 zValues: make([]int64, UnitLimit), 50 keys: make([][]byte, UnitLimit), 51 strHashStates: make([][3]uint64, UnitLimit), 52 }, nil 53 } 54 55 func (m *StrHashMap) NewIterator() Iterator { 56 return &strHashmapIterator{ 57 mp: m, 58 m: m.m, 59 ibucket: m.ibucket, 60 nbucket: m.nbucket, 61 } 62 } 63 64 func (m *StrHashMap) HasNull() bool { 65 return m.hasNull 66 } 67 68 func (m *StrHashMap) Free() { 69 m.hashMap.Free(m.m) 70 } 71 72 func (m *StrHashMap) PreAlloc(n uint64, mp *mpool.MPool) error { 73 return m.hashMap.ResizeOnDemand(n, mp) 74 } 75 76 func (m *StrHashMap) GroupCount() uint64 { 77 return m.rows 78 } 79 80 func (m *StrHashMap) AddGroup() { 81 m.rows++ 82 } 83 84 func (m *StrHashMap) AddGroups(rows uint64) { 85 m.rows += rows 86 } 87 88 func (m *StrHashMap) Size() int64 { 89 // TODO: add the size of the other StrHashMap parts 90 if m.hashMap == nil { 91 return 0 92 } 93 return m.hashMap.Size() 94 } 95 96 func (m *StrHashMap) Cardinality() uint64 { 97 return m.hashMap.Cardinality() 98 } 99 100 // InsertValue insert a value, return true if it is new, otherwise false 101 // never handle null 102 func (m *StrHashMap) InsertValue(val any) (bool, error) { 103 defer func() { m.keys[0] = m.keys[0][:0] }() 104 switch v := val.(type) { 105 case uint8: 106 m.keys[0] = append(m.keys[0], types.EncodeFixed(v)...) 107 case uint16: 108 m.keys[0] = append(m.keys[0], types.EncodeFixed(v)...) 109 case uint32: 110 m.keys[0] = append(m.keys[0], types.EncodeFixed(v)...) 111 case uint64: 112 m.keys[0] = append(m.keys[0], types.EncodeFixed(v)...) 113 case int8: 114 m.keys[0] = append(m.keys[0], types.EncodeFixed(v)...) 115 case int16: 116 m.keys[0] = append(m.keys[0], types.EncodeFixed(v)...) 117 case int32: 118 m.keys[0] = append(m.keys[0], types.EncodeFixed(v)...) 119 case int64: 120 m.keys[0] = append(m.keys[0], types.EncodeFixed(v)...) 121 case float32: 122 m.keys[0] = append(m.keys[0], types.EncodeFixed(v)...) 123 case float64: 124 m.keys[0] = append(m.keys[0], types.EncodeFixed(v)...) 125 case []byte: 126 length := uint16(len(v)) 127 m.keys[0] = append(m.keys[0], unsafe.Slice((*byte)(unsafe.Pointer(&length)), 2)...) 128 m.keys[0] = append(m.keys[0], v...) 129 case types.Date: 130 m.keys[0] = append(m.keys[0], types.EncodeFixed(v)...) 131 case types.Datetime: 132 m.keys[0] = append(m.keys[0], types.EncodeFixed(v)...) 133 case types.Timestamp: 134 m.keys[0] = append(m.keys[0], types.EncodeFixed(v)...) 135 case types.Decimal64: 136 m.keys[0] = append(m.keys[0], types.EncodeFixed(v)...) 137 case types.Decimal128: 138 m.keys[0] = append(m.keys[0], types.EncodeFixed(v)...) 139 case types.Uuid: 140 m.keys[0] = append(m.keys[0], types.EncodeFixed(v)...) 141 case string: 142 m.keys[0] = append(m.keys[0], []byte(v)...) 143 } 144 if l := len(m.keys[0]); l < 16 { 145 m.keys[0] = append(m.keys[0], hashtable.StrKeyPadding[l:]...) 146 } 147 if err := m.hashMap.InsertStringBatch(m.strHashStates, m.keys[:1], m.values[:1], m.m); err != nil { 148 return false, err 149 } 150 if m.values[0] > m.rows { 151 m.rows++ 152 return true, nil 153 } 154 return false, nil 155 } 156 157 // Insert a row from multiple columns into the hashmap, return true if it is new, otherwise false 158 func (m *StrHashMap) Insert(vecs []*vector.Vector, row int) (bool, error) { 159 defer func() { m.keys[0] = m.keys[0][:0] }() 160 m.encodeHashKeys(vecs, row, 1) 161 if err := m.hashMap.InsertStringBatch(m.strHashStates, m.keys[:1], m.values[:1], m.m); err != nil { 162 return false, err 163 } 164 if m.values[0] > m.rows { 165 m.rows++ 166 return true, nil 167 } 168 return false, nil 169 } 170 171 func (m *StrHashMap) encodeHashKeys(vecs []*vector.Vector, start, count int) { 172 for _, vec := range vecs { 173 if vec.GetType().IsFixedLen() { 174 fillGroupStr(m, vec, count, vec.GetType().TypeSize(), start, 0, len(vecs)) 175 } else { 176 fillStringGroupStr(m, vec, count, start, len(vecs)) 177 } 178 } 179 for i := 0; i < count; i++ { 180 if l := len(m.keys[i]); l < 16 { 181 m.keys[i] = append(m.keys[i], hashtable.StrKeyPadding[l:]...) 182 } 183 } 184 } 185 186 // A NULL C 187 // 01A101C 9 bytes 188 // for non-NULL value, give 3 bytes, the first byte is always 0, the last two bytes are the length 189 // of this value,and then append the true bytes of the value 190 // for NULL value, just only one byte, give one byte(1) 191 // these are the rules of multi-cols 192 // for one col, just give the value bytes 193 func fillStringGroupStr(m *StrHashMap, vec *vector.Vector, n int, start int, lenCols int) { 194 if vec.IsConstNull() { 195 if m.hasNull { 196 for i := 0; i < n; i++ { 197 m.keys[i] = append(m.keys[i], byte(1)) 198 } 199 } else { 200 for i := 0; i < n; i++ { 201 m.zValues[i] = 0 202 } 203 } 204 return 205 } 206 if !vec.GetNulls().Any() { 207 if m.hasNull { 208 for i := 0; i < n; i++ { 209 bytes := vec.GetBytesAt(i + start) 210 // for "a","bc" and "ab","c", we need to distinct 211 // this is not null value 212 m.keys[i] = append(m.keys[i], 0) 213 // give the length 214 length := uint16(len(bytes)) 215 m.keys[i] = append(m.keys[i], unsafe.Slice((*byte)(unsafe.Pointer(&length)), 2)...) 216 // append the ture value bytes 217 m.keys[i] = append(m.keys[i], bytes...) 218 } 219 } else { 220 for i := 0; i < n; i++ { 221 bytes := vec.GetBytesAt(i + start) 222 // for "a","bc" and "ab","c", we need to distinct 223 // give the length 224 length := uint16(len(bytes)) 225 m.keys[i] = append(m.keys[i], unsafe.Slice((*byte)(unsafe.Pointer(&length)), 2)...) 226 // append the ture value bytes 227 m.keys[i] = append(m.keys[i], bytes...) 228 } 229 } 230 } else { 231 nsp := vec.GetNulls() 232 for i := 0; i < n; i++ { 233 hasNull := nsp.Contains(uint64(i + start)) 234 if m.hasNull { 235 if hasNull { 236 m.keys[i] = append(m.keys[i], byte(1)) 237 } else { 238 bytes := vec.GetBytesAt(i + start) 239 // for "a","bc" and "ab","c", we need to distinct 240 // this is not null value 241 m.keys[i] = append(m.keys[i], 0) 242 // give the length 243 length := uint16(len(bytes)) 244 m.keys[i] = append(m.keys[i], unsafe.Slice((*byte)(unsafe.Pointer(&length)), 2)...) 245 // append the ture value bytes 246 m.keys[i] = append(m.keys[i], bytes...) 247 } 248 } else { 249 if hasNull { 250 m.zValues[i] = 0 251 continue 252 } 253 bytes := vec.GetBytesAt(i + start) 254 // for "a","bc" and "ab","c", we need to distinct 255 // give the length 256 length := uint16(len(bytes)) 257 m.keys[i] = append(m.keys[i], unsafe.Slice((*byte)(unsafe.Pointer(&length)), 2)...) 258 // append the ture value bytes 259 m.keys[i] = append(m.keys[i], bytes...) 260 } 261 } 262 } 263 } 264 265 func fillGroupStr(m *StrHashMap, vec *vector.Vector, n int, sz int, start int, scale int32, lenCols int) { 266 if vec.IsConstNull() { 267 if m.hasNull { 268 for i := 0; i < n; i++ { 269 m.keys[i] = append(m.keys[i], byte(1)) 270 } 271 } else { 272 for i := 0; i < n; i++ { 273 m.zValues[i] = 0 274 } 275 } 276 return 277 } 278 if vec.IsConst() { 279 data := unsafe.Slice(vector.GetPtrAt[byte](vec, 0), sz) 280 if m.hasNull { 281 for i := 0; i < n; i++ { 282 m.keys[i] = append(m.keys[i], 0) 283 m.keys[i] = append(m.keys[i], data...) 284 } 285 } else { 286 for i := 0; i < n; i++ { 287 m.keys[i] = append(m.keys[i], data...) 288 } 289 } 290 return 291 } 292 data := unsafe.Slice(vector.GetPtrAt[byte](vec, 0), (n+start)*sz) 293 if !vec.GetNulls().Any() { 294 if m.hasNull { 295 for i := 0; i < n; i++ { 296 bytes := data[(i+start)*sz : (i+start+1)*sz] 297 m.keys[i] = append(m.keys[i], 0) 298 m.keys[i] = append(m.keys[i], bytes...) 299 } 300 } else { 301 for i := 0; i < n; i++ { 302 bytes := data[(i+start)*sz : (i+start+1)*sz] 303 m.keys[i] = append(m.keys[i], bytes...) 304 } 305 } 306 } else { 307 nsp := vec.GetNulls() 308 for i := 0; i < n; i++ { 309 isNull := nsp.Contains(uint64(i + start)) 310 if m.hasNull { 311 if isNull { 312 m.keys[i] = append(m.keys[i], 1) 313 } else { 314 bytes := data[(i+start)*sz : (i+start+1)*sz] 315 m.keys[i] = append(m.keys[i], 0) 316 m.keys[i] = append(m.keys[i], bytes...) 317 } 318 } else { 319 if isNull { 320 m.zValues[i] = 0 321 continue 322 } 323 bytes := data[(i+start)*sz : (i+start+1)*sz] 324 m.keys[i] = append(m.keys[i], bytes...) 325 } 326 } 327 } 328 } 329 330 func (m *StrHashMap) Dup(pool *mpool.MPool) *StrHashMap { 331 val := &StrHashMap{ 332 hasNull: m.hasNull, 333 rows: m.rows, 334 335 keys: make([][]byte, len(m.keys)), 336 values: make([]uint64, len(m.values)), 337 zValues: make([]int64, len(m.zValues)), 338 strHashStates: make([][3]uint64, len(m.strHashStates)), 339 340 ibucket: m.ibucket, 341 nbucket: m.nbucket, 342 343 m: pool, 344 } 345 copy(val.values, m.values) 346 copy(val.zValues, m.zValues) 347 copy(val.strHashStates, m.strHashStates) 348 for i, key := range m.keys { 349 val.keys[i] = make([]byte, len(key)) 350 copy(val.keys[i], key) 351 } 352 if m.hashMap != nil { 353 val.hashMap = m.hashMap.Dup() 354 } 355 return val 356 }