github.com/matrixorigin/matrixone@v1.2.0/pkg/common/hashmap/inthashmap.go (about)

     1  // Copyright 2021 Matrix Origin
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package hashmap
    16  
    17  import (
    18  	"unsafe"
    19  
    20  	"github.com/matrixorigin/matrixone/pkg/common/mpool"
    21  	"github.com/matrixorigin/matrixone/pkg/container/hashtable"
    22  	"github.com/matrixorigin/matrixone/pkg/container/types"
    23  	"github.com/matrixorigin/matrixone/pkg/container/vector"
    24  )
    25  
    26  func init() {
    27  	zeroUint64 = make([]uint64, UnitLimit)
    28  	zeroUint32 = make([]uint32, UnitLimit)
    29  }
    30  
    31  func NewIntHashMap(hasNull bool, ibucket, nbucket uint64, m *mpool.MPool) (*IntHashMap, error) {
    32  	mp := &hashtable.Int64HashMap{}
    33  	if err := mp.Init(m); err != nil {
    34  		return nil, err
    35  	}
    36  	return &IntHashMap{
    37  		m:       m,
    38  		rows:    0,
    39  		hasNull: hasNull,
    40  		ibucket: ibucket,
    41  		nbucket: nbucket,
    42  		keys:    make([]uint64, UnitLimit),
    43  		keyOffs: make([]uint32, UnitLimit),
    44  		values:  make([]uint64, UnitLimit),
    45  		zValues: make([]int64, UnitLimit),
    46  		hashes:  make([]uint64, UnitLimit),
    47  		hashMap: mp,
    48  	}, nil
    49  }
    50  
    51  func (m *IntHashMap) NewIterator() *intHashMapIterator {
    52  	return &intHashMapIterator{
    53  		mp:      m,
    54  		m:       m.m,
    55  		ibucket: m.ibucket,
    56  		nbucket: m.nbucket,
    57  	}
    58  }
    59  
    60  func (m *IntHashMap) HasNull() bool {
    61  	return m.hasNull
    62  }
    63  
    64  func (m *IntHashMap) Free() {
    65  	m.hashMap.Free(m.m)
    66  }
    67  
    68  func (m *IntHashMap) PreAlloc(n uint64, mp *mpool.MPool) error {
    69  	return m.hashMap.ResizeOnDemand(int(n), mp)
    70  }
    71  
    72  func (m *IntHashMap) GroupCount() uint64 {
    73  	return m.rows
    74  }
    75  
    76  func (m *IntHashMap) AddGroup() {
    77  	m.rows++
    78  }
    79  
    80  func (m *IntHashMap) AddGroups(rows uint64) {
    81  	m.rows += rows
    82  }
    83  
    84  func (m *IntHashMap) Size() int64 {
    85  	// TODO: add the size of the other IntHashMap parts
    86  	if m.hashMap == nil {
    87  		return 0
    88  	}
    89  	return m.hashMap.Size()
    90  }
    91  
    92  func (m *IntHashMap) Cardinality() uint64 {
    93  	return m.hashMap.Cardinality()
    94  }
    95  
    96  func (m *IntHashMap) encodeHashKeys(vecs []*vector.Vector, start, count int) {
    97  	for _, vec := range vecs {
    98  		switch vec.GetType().TypeSize() {
    99  		case 1:
   100  			fillKeys[uint8](m, vec, 1, start, count)
   101  		case 2:
   102  			fillKeys[uint16](m, vec, 2, start, count)
   103  		case 4:
   104  			fillKeys[uint32](m, vec, 4, start, count)
   105  		case 8:
   106  			fillKeys[uint64](m, vec, 8, start, count)
   107  		default:
   108  			if !vec.IsConst() && vec.GetArea() == nil {
   109  				fillVarlenaKey(m, vec, start, count)
   110  			} else {
   111  				fillStrKey(m, vec, start, count)
   112  			}
   113  		}
   114  	}
   115  }
   116  
   117  func (m *IntHashMap) Dup(pool *mpool.MPool) *IntHashMap {
   118  	val := &IntHashMap{
   119  		hasNull: m.hasNull,
   120  		rows:    m.rows,
   121  
   122  		keys:    make([]uint64, len(m.keys)),
   123  		keyOffs: make([]uint32, len(m.keyOffs)),
   124  		values:  make([]uint64, len(m.values)),
   125  		zValues: make([]int64, len(m.zValues)),
   126  		hashes:  make([]uint64, len(m.hashes)),
   127  
   128  		ibucket: m.ibucket,
   129  		nbucket: m.nbucket,
   130  
   131  		m: pool,
   132  	}
   133  	copy(val.keys, m.keys)
   134  	copy(val.keyOffs, m.keyOffs)
   135  	copy(val.values, m.values)
   136  	copy(val.zValues, m.zValues)
   137  	copy(val.hashes, m.hashes)
   138  	if m.hashMap != nil {
   139  		val.hashMap = m.hashMap.Dup()
   140  	}
   141  
   142  	return val
   143  }
   144  
   145  func fillKeys[T types.FixedSizeT](m *IntHashMap, vec *vector.Vector, size uint32, start int, n int) {
   146  	keys := m.keys
   147  	keyOffs := m.keyOffs
   148  	if vec.IsConstNull() {
   149  		if m.hasNull {
   150  			for i := 0; i < n; i++ {
   151  				*(*int8)(unsafe.Add(unsafe.Pointer(&keys[i]), keyOffs[i])) = 1
   152  				keyOffs[i]++
   153  			}
   154  		} else {
   155  			for i := 0; i < n; i++ {
   156  				m.zValues[i] = 0
   157  			}
   158  		}
   159  	} else if vec.IsConst() {
   160  		ptr := vector.GetPtrAt[T](vec, 0)
   161  		// The old code was too stupid and would lead to out-of-bounds writing
   162  		if !m.hasNull {
   163  			for i := 0; i < n; i++ {
   164  				*(*T)(unsafe.Add(unsafe.Pointer(&keys[i]), keyOffs[i])) = *ptr
   165  			}
   166  			uint32AddScalar(size, keyOffs[:n], keyOffs[:n])
   167  		} else {
   168  			for i := 0; i < n; i++ {
   169  				*(*int8)(unsafe.Add(unsafe.Pointer(&keys[i]), keyOffs[i])) = 0
   170  				*(*T)(unsafe.Add(unsafe.Pointer(&keys[i]), keyOffs[i]+1)) = *ptr
   171  			}
   172  			uint32AddScalar(1+size, keyOffs[:n], keyOffs[:n])
   173  		}
   174  	} else if !vec.GetNulls().Any() {
   175  		if m.hasNull {
   176  			for i := 0; i < n; i++ {
   177  				*(*int8)(unsafe.Add(unsafe.Pointer(&keys[i]), keyOffs[i])) = 0
   178  				ptr := vector.GetPtrAt[T](vec, int64(i+start))
   179  				*(*T)(unsafe.Add(unsafe.Pointer(&keys[i]), keyOffs[i]+1)) = *ptr
   180  			}
   181  			uint32AddScalar(1+size, keyOffs[:n], keyOffs[:n])
   182  		} else {
   183  			for i := 0; i < n; i++ {
   184  				ptr := vector.GetPtrAt[T](vec, int64(i+start))
   185  				*(*T)(unsafe.Add(unsafe.Pointer(&keys[i]), keyOffs[i])) = *ptr
   186  			}
   187  			uint32AddScalar(size, keyOffs[:n], keyOffs[:n])
   188  		}
   189  	} else {
   190  		nsp := vec.GetNulls()
   191  		if m.hasNull {
   192  			for i := 0; i < n; i++ {
   193  				if nsp.Contains(uint64(i + start)) {
   194  					*(*int8)(unsafe.Add(unsafe.Pointer(&keys[i]), keyOffs[i])) = 1
   195  					keyOffs[i]++
   196  				} else {
   197  					*(*int8)(unsafe.Add(unsafe.Pointer(&keys[i]), keyOffs[i])) = 0
   198  					ptr := vector.GetPtrAt[T](vec, int64(i+start))
   199  					*(*T)(unsafe.Add(unsafe.Pointer(&keys[i]), keyOffs[i]+1)) = *ptr
   200  					keyOffs[i] += 1 + size
   201  				}
   202  			}
   203  		} else {
   204  			for i := 0; i < n; i++ {
   205  				if nsp.Contains(uint64(i + start)) {
   206  					m.zValues[i] = 0
   207  					continue
   208  				}
   209  				ptr := vector.GetPtrAt[T](vec, int64(i+start))
   210  				*(*T)(unsafe.Add(unsafe.Pointer(&keys[i]), keyOffs[i])) = *ptr
   211  				keyOffs[i] += size
   212  			}
   213  		}
   214  	}
   215  }
   216  
   217  func fillVarlenaKey(m *IntHashMap, vec *vector.Vector, start int, n int) {
   218  	keys := m.keys
   219  	keyOffs := m.keyOffs
   220  	vcol, _ := vector.MustVarlenaRawData(vec)
   221  	if !vec.GetNulls().Any() {
   222  		if m.hasNull {
   223  			for i := 0; i < n; i++ {
   224  				v := vcol[i+start].ByteSlice()
   225  				*(*int8)(unsafe.Add(unsafe.Pointer(&keys[i]), keyOffs[i])) = 0
   226  				copy(unsafe.Slice((*byte)(unsafe.Pointer(&keys[i])), 8)[m.keyOffs[i]+1:], v)
   227  				m.keyOffs[i] += uint32(len(v) + 1)
   228  			}
   229  		} else {
   230  			for i := 0; i < n; i++ {
   231  				v := vcol[i+start].ByteSlice()
   232  				copy(unsafe.Slice((*byte)(unsafe.Pointer(&keys[i])), 8)[m.keyOffs[i]:], v)
   233  				m.keyOffs[i] += uint32(len(v))
   234  			}
   235  		}
   236  	} else {
   237  		nsp := vec.GetNulls()
   238  		if m.hasNull {
   239  			for i := 0; i < n; i++ {
   240  				if nsp.Contains(uint64(i + start)) {
   241  					*(*int8)(unsafe.Add(unsafe.Pointer(&keys[i]), keyOffs[i])) = 1
   242  					keyOffs[i]++
   243  				} else {
   244  					v := vcol[i+start].ByteSlice()
   245  					*(*int8)(unsafe.Add(unsafe.Pointer(&keys[i]), keyOffs[i])) = 0
   246  					copy(unsafe.Slice((*byte)(unsafe.Pointer(&keys[i])), 8)[m.keyOffs[i]+1:], v)
   247  					m.keyOffs[i] += uint32(len(v) + 1)
   248  				}
   249  			}
   250  		} else {
   251  			for i := 0; i < n; i++ {
   252  				if nsp.Contains(uint64(i + start)) {
   253  					m.zValues[i] = 0
   254  					continue
   255  				}
   256  				v := vcol[i+start].ByteSlice()
   257  				copy(unsafe.Slice((*byte)(unsafe.Pointer(&keys[i])), 8)[m.keyOffs[i]:], v)
   258  				m.keyOffs[i] += uint32(len(v))
   259  			}
   260  		}
   261  	}
   262  }
   263  
   264  func fillStrKey(m *IntHashMap, vec *vector.Vector, start int, n int) {
   265  	keys := m.keys
   266  	keyOffs := m.keyOffs
   267  	if vec.IsConstNull() {
   268  		if m.hasNull {
   269  			for i := 0; i < n; i++ {
   270  				*(*int8)(unsafe.Add(unsafe.Pointer(&keys[i]), keyOffs[i])) = 1
   271  				keyOffs[i]++
   272  			}
   273  		} else {
   274  			for i := 0; i < n; i++ {
   275  				m.zValues[i] = 0
   276  			}
   277  		}
   278  	} else if !vec.GetNulls().Any() {
   279  		if m.hasNull {
   280  			for i := 0; i < n; i++ {
   281  				v := vec.GetBytesAt(i + start)
   282  				*(*int8)(unsafe.Add(unsafe.Pointer(&keys[i]), keyOffs[i])) = 0
   283  				copy(unsafe.Slice((*byte)(unsafe.Pointer(&keys[i])), 8)[m.keyOffs[i]+1:], v)
   284  				m.keyOffs[i] += uint32(len(v) + 1)
   285  			}
   286  		} else {
   287  			for i := 0; i < n; i++ {
   288  				v := vec.GetBytesAt(i + start)
   289  				copy(unsafe.Slice((*byte)(unsafe.Pointer(&keys[i])), 8)[m.keyOffs[i]:], v)
   290  				m.keyOffs[i] += uint32(len(v))
   291  			}
   292  		}
   293  	} else {
   294  		nsp := vec.GetNulls()
   295  		if m.hasNull {
   296  			for i := 0; i < n; i++ {
   297  				v := vec.GetBytesAt(i + start)
   298  				if nsp.Contains(uint64(i + start)) {
   299  					*(*int8)(unsafe.Add(unsafe.Pointer(&keys[i]), keyOffs[i])) = 1
   300  					keyOffs[i]++
   301  				} else {
   302  					*(*int8)(unsafe.Add(unsafe.Pointer(&keys[i]), keyOffs[i])) = 0
   303  					copy(unsafe.Slice((*byte)(unsafe.Pointer(&keys[i])), 8)[m.keyOffs[i]+1:], v)
   304  					m.keyOffs[i] += uint32(len(v) + 1)
   305  				}
   306  			}
   307  		} else {
   308  			for i := 0; i < n; i++ {
   309  				v := vec.GetBytesAt(i + start)
   310  				if nsp.Contains(uint64(i + start)) {
   311  					m.zValues[i] = 0
   312  					continue
   313  				}
   314  				copy(unsafe.Slice((*byte)(unsafe.Pointer(&keys[i])), 8)[m.keyOffs[i]:], v)
   315  				m.keyOffs[i] += uint32(len(v))
   316  			}
   317  		}
   318  	}
   319  }
   320  
   321  func uint32AddScalar(x uint32, ys, rs []uint32) []uint32 {
   322  	for i, y := range ys {
   323  		rs[i] = x + y
   324  	}
   325  	return rs
   326  }