github.com/matrixorigin/matrixone@v0.7.0/pkg/common/hashmap/strhashmap.go (about) 1 // Copyright 2021 Matrix Origin 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package hashmap 16 17 import ( 18 "unsafe" 19 20 "github.com/matrixorigin/matrixone/pkg/common/mpool" 21 "github.com/matrixorigin/matrixone/pkg/container/hashtable" 22 "github.com/matrixorigin/matrixone/pkg/container/types" 23 "github.com/matrixorigin/matrixone/pkg/container/vector" 24 ) 25 26 func init() { 27 OneInt64s = make([]int64, UnitLimit) 28 for i := range OneInt64s { 29 OneInt64s[i] = 1 30 } 31 OneUInt8s = make([]uint8, UnitLimit) 32 for i := range OneUInt8s { 33 OneUInt8s[i] = 1 34 } 35 } 36 37 func NewStrMap(hasNull bool, ibucket, nbucket uint64, m *mpool.MPool) (*StrHashMap, error) { 38 mp := &hashtable.StringHashMap{} 39 if err := mp.Init(m); err != nil { 40 return nil, err 41 } 42 return &StrHashMap{ 43 m: m, 44 hashMap: mp, 45 hasNull: hasNull, 46 ibucket: ibucket, 47 nbucket: nbucket, 48 values: make([]uint64, UnitLimit), 49 zValues: make([]int64, UnitLimit), 50 keys: make([][]byte, UnitLimit), 51 strHashStates: make([][3]uint64, UnitLimit), 52 }, nil 53 } 54 55 func (m *StrHashMap) NewIterator() Iterator { 56 return &strHashmapIterator{ 57 mp: m, 58 m: m.m, 59 ibucket: m.ibucket, 60 nbucket: m.nbucket, 61 } 62 } 63 64 func (m *StrHashMap) HasNull() bool { 65 return m.hasNull 66 } 67 68 func (m *StrHashMap) Free() { 69 m.hashMap.Free(m.m) 70 } 71 72 func (m *StrHashMap) GroupCount() uint64 { 73 return m.rows 74 } 75 76 func (m *StrHashMap) AddGroup() { 77 m.rows++ 78 } 79 80 func (m *StrHashMap) AddGroups(rows uint64) { 81 m.rows += rows 82 } 83 84 func (m *StrHashMap) Size() int64 { 85 // TODO: add the size of the other StrHashMap parts 86 if m.hashMap == nil { 87 return 0 88 } 89 return m.hashMap.Size() 90 } 91 92 func (m *StrHashMap) Cardinality() uint64 { 93 return m.hashMap.Cardinality() 94 } 95 96 // InsertValue insert a value, return true if it is new, otherwise false 97 // never handle null 98 func (m *StrHashMap) InsertValue(val any) (bool, error) { 99 defer func() { m.keys[0] = m.keys[0][:0] }() 100 switch v := val.(type) { 101 case uint8: 102 m.keys[0] = append(m.keys[0], types.EncodeFixed(v)...) 103 case uint16: 104 m.keys[0] = append(m.keys[0], types.EncodeFixed(v)...) 105 case uint32: 106 m.keys[0] = append(m.keys[0], types.EncodeFixed(v)...) 107 case uint64: 108 m.keys[0] = append(m.keys[0], types.EncodeFixed(v)...) 109 case int8: 110 m.keys[0] = append(m.keys[0], types.EncodeFixed(v)...) 111 case int16: 112 m.keys[0] = append(m.keys[0], types.EncodeFixed(v)...) 113 case int32: 114 m.keys[0] = append(m.keys[0], types.EncodeFixed(v)...) 115 case int64: 116 m.keys[0] = append(m.keys[0], types.EncodeFixed(v)...) 117 case float32: 118 m.keys[0] = append(m.keys[0], types.EncodeFixed(v)...) 119 case float64: 120 m.keys[0] = append(m.keys[0], types.EncodeFixed(v)...) 121 case []byte: 122 m.keys[0] = append(m.keys[0], v...) 123 case types.Date: 124 m.keys[0] = append(m.keys[0], types.EncodeFixed(v)...) 125 case types.Datetime: 126 m.keys[0] = append(m.keys[0], types.EncodeFixed(v)...) 127 case types.Timestamp: 128 m.keys[0] = append(m.keys[0], types.EncodeFixed(v)...) 129 case types.Decimal64: 130 m.keys[0] = append(m.keys[0], types.EncodeFixed(v)...) 131 case types.Decimal128: 132 m.keys[0] = append(m.keys[0], types.EncodeFixed(v)...) 133 case types.Uuid: 134 m.keys[0] = append(m.keys[0], types.EncodeFixed(v)...) 135 case string: 136 m.keys[0] = append(m.keys[0], []byte(v)...) 137 } 138 if l := len(m.keys[0]); l < 16 { 139 m.keys[0] = append(m.keys[0], hashtable.StrKeyPadding[l:]...) 140 } 141 if err := m.hashMap.InsertStringBatch(m.strHashStates, m.keys[:1], m.values[:1], m.m); err != nil { 142 return false, err 143 } 144 if m.values[0] > m.rows { 145 m.rows++ 146 return true, nil 147 } 148 return false, nil 149 } 150 151 // Insert a row from multiple columns into the hashmap, return true if it is new, otherwise false 152 func (m *StrHashMap) Insert(vecs []*vector.Vector, row int) (bool, error) { 153 defer func() { m.keys[0] = m.keys[0][:0] }() 154 m.encodeHashKeys(vecs, row, 1) 155 if err := m.hashMap.InsertStringBatch(m.strHashStates, m.keys[:1], m.values[:1], m.m); err != nil { 156 return false, err 157 } 158 if m.values[0] > m.rows { 159 m.rows++ 160 return true, nil 161 } 162 return false, nil 163 } 164 165 func (m *StrHashMap) encodeHashKeys(vecs []*vector.Vector, start, count int) { 166 for _, vec := range vecs { 167 if vec.GetType().IsFixedLen() { 168 fillGroupStr(m, vec, count, vec.GetType().TypeSize(), start, 0, len(vecs)) 169 } else { 170 fillStringGroupStr(m, vec, count, start, len(vecs)) 171 } 172 } 173 for i := 0; i < count; i++ { 174 if l := len(m.keys[i]); l < 16 { 175 m.keys[i] = append(m.keys[i], hashtable.StrKeyPadding[l:]...) 176 } 177 } 178 } 179 180 // A NULL C 181 // 01A101C 9 bytes 182 // for non-NULL value, give 3 bytes, the first byte is always 0, the last two bytes are the length 183 // of this value,and then append the true bytes of the value 184 // for NULL value, just only one byte, give one byte(1) 185 // these are the rules of multi-cols 186 // for one col, just give the value bytes 187 func fillStringGroupStr(m *StrHashMap, vec *vector.Vector, n int, start int, lenCols int) { 188 area := vec.GetArea() 189 vs := vector.MustTCols[types.Varlena](vec) 190 if !vec.GetNulls().Any() { 191 for i := 0; i < n; i++ { 192 bytes := vs[i+start].GetByteSlice(area) 193 if lenCols > 1 { 194 // for "a","bc" and "ab","c", we need to distinct 195 // this is not null value 196 m.keys[i] = append(m.keys[i], 0) 197 // give the length 198 length := uint16(len(bytes)) 199 m.keys[i] = append(m.keys[i], unsafe.Slice((*byte)(unsafe.Pointer(&length)), 2)...) 200 } 201 // append the ture value bytes 202 m.keys[i] = append(m.keys[i], bytes...) 203 } 204 } else { 205 nsp := vec.GetNulls() 206 for i := 0; i < n; i++ { 207 hasNull := nsp.Contains(uint64(i + start)) 208 if m.hasNull { 209 if hasNull { 210 m.keys[i] = append(m.keys[i], byte(1)) 211 } else { 212 bytes := vs[i+start].GetByteSlice(area) 213 if lenCols > 1 { 214 // for "a","bc" and "ab","c", we need to distinct 215 // this is not null value 216 m.keys[i] = append(m.keys[i], 0) 217 // give the length 218 length := uint16(len(bytes)) 219 m.keys[i] = append(m.keys[i], unsafe.Slice((*byte)(unsafe.Pointer(&length)), 2)...) 220 } 221 // append the ture value bytes 222 m.keys[i] = append(m.keys[i], bytes...) 223 } 224 } else { 225 if hasNull { 226 m.zValues[i] = 0 227 continue 228 } 229 bytes := vs[i+start].GetByteSlice(area) 230 if lenCols > 1 { 231 // for "a","bc" and "ab","c", we need to distinct 232 // this is not null value 233 m.keys[i] = append(m.keys[i], 0) 234 // give the length 235 length := uint16(len(bytes)) 236 m.keys[i] = append(m.keys[i], unsafe.Slice((*byte)(unsafe.Pointer(&length)), 2)...) 237 } 238 // append the ture value bytes 239 m.keys[i] = append(m.keys[i], bytes...) 240 } 241 } 242 } 243 } 244 245 func fillGroupStr(m *StrHashMap, vec *vector.Vector, n int, sz int, start int, scale int32, lenCols int) { 246 var data []byte 247 if !vec.IsConst() { 248 data = unsafe.Slice((*byte)(vector.GetPtrAt(vec, 0)), (n+start)*sz) 249 } else { 250 if vec.IsScalarNull() { 251 data = make([]byte, (n+start)*sz) 252 } else { 253 vec = vec.ConstExpand(false, m.m) 254 data = unsafe.Slice((*byte)(vector.GetPtrAt(vec, 0)), (n+start)*sz) 255 } 256 } 257 if !vec.GetNulls().Any() { 258 for i := 0; i < n; i++ { 259 bytes := data[(i+start)*sz : (i+start+1)*sz] 260 if lenCols > 1 { 261 // for "a","bc" and "ab","c", we need to distinct 262 // this is not null value 263 m.keys[i] = append(m.keys[i], 0) 264 // give the length 265 length := uint16(len(bytes)) 266 m.keys[i] = append(m.keys[i], unsafe.Slice((*byte)(unsafe.Pointer(&length)), 2)...) 267 } 268 // append the ture value bytes 269 m.keys[i] = append(m.keys[i], bytes...) 270 } 271 } else { 272 nsp := vec.GetNulls() 273 for i := 0; i < n; i++ { 274 isNull := nsp.Contains(uint64(i + start)) 275 if m.hasNull { 276 if isNull { 277 m.keys[i] = append(m.keys[i], byte(1)) 278 } else { 279 bytes := data[(i+start)*sz : (i+start+1)*sz] 280 if lenCols > 1 { 281 // for "a","bc" and "ab","c", we need to distinct 282 // this is not null value 283 m.keys[i] = append(m.keys[i], 0) 284 // give the length 285 length := uint16(len(bytes)) 286 m.keys[i] = append(m.keys[i], unsafe.Slice((*byte)(unsafe.Pointer(&length)), 2)...) 287 } 288 // append the ture value bytes 289 m.keys[i] = append(m.keys[i], bytes...) 290 } 291 } else { 292 if isNull { 293 m.zValues[i] = 0 294 continue 295 } 296 bytes := data[(i+start)*sz : (i+start+1)*sz] 297 if lenCols > 1 { 298 // for "a","bc" and "ab","c", we need to distinct 299 // this is not null value 300 m.keys[i] = append(m.keys[i], 0) 301 // give the length 302 length := uint16(len(bytes)) 303 m.keys[i] = append(m.keys[i], unsafe.Slice((*byte)(unsafe.Pointer(&length)), 2)...) 304 } 305 // append the ture value bytes 306 m.keys[i] = append(m.keys[i], bytes...) 307 } 308 } 309 } 310 }