github.com/matrixorigin/matrixone@v1.2.0/pkg/common/hashmap/strhashmap.go (about)

     1  // Copyright 2021 Matrix Origin
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package hashmap
    16  
    17  import (
    18  	"unsafe"
    19  
    20  	"github.com/matrixorigin/matrixone/pkg/common/mpool"
    21  	"github.com/matrixorigin/matrixone/pkg/container/hashtable"
    22  	"github.com/matrixorigin/matrixone/pkg/container/types"
    23  	"github.com/matrixorigin/matrixone/pkg/container/vector"
    24  )
    25  
    26  func init() {
    27  	OneInt64s = make([]int64, UnitLimit)
    28  	for i := range OneInt64s {
    29  		OneInt64s[i] = 1
    30  	}
    31  	OneUInt8s = make([]uint8, UnitLimit)
    32  	for i := range OneUInt8s {
    33  		OneUInt8s[i] = 1
    34  	}
    35  }
    36  
    37  func NewStrMap(hasNull bool, ibucket, nbucket uint64, m *mpool.MPool) (*StrHashMap, error) {
    38  	mp := &hashtable.StringHashMap{}
    39  	if err := mp.Init(m); err != nil {
    40  		return nil, err
    41  	}
    42  	return &StrHashMap{
    43  		m:             m,
    44  		hashMap:       mp,
    45  		hasNull:       hasNull,
    46  		ibucket:       ibucket,
    47  		nbucket:       nbucket,
    48  		values:        make([]uint64, UnitLimit),
    49  		zValues:       make([]int64, UnitLimit),
    50  		keys:          make([][]byte, UnitLimit),
    51  		strHashStates: make([][3]uint64, UnitLimit),
    52  	}, nil
    53  }
    54  
    55  func (m *StrHashMap) NewIterator() Iterator {
    56  	return &strHashmapIterator{
    57  		mp:      m,
    58  		m:       m.m,
    59  		ibucket: m.ibucket,
    60  		nbucket: m.nbucket,
    61  	}
    62  }
    63  
    64  func (m *StrHashMap) HasNull() bool {
    65  	return m.hasNull
    66  }
    67  
    68  func (m *StrHashMap) Free() {
    69  	m.hashMap.Free(m.m)
    70  }
    71  
    72  func (m *StrHashMap) PreAlloc(n uint64, mp *mpool.MPool) error {
    73  	return m.hashMap.ResizeOnDemand(n, mp)
    74  }
    75  
    76  func (m *StrHashMap) GroupCount() uint64 {
    77  	return m.rows
    78  }
    79  
    80  func (m *StrHashMap) AddGroup() {
    81  	m.rows++
    82  }
    83  
    84  func (m *StrHashMap) AddGroups(rows uint64) {
    85  	m.rows += rows
    86  }
    87  
    88  func (m *StrHashMap) Size() int64 {
    89  	// TODO: add the size of the other StrHashMap parts
    90  	if m.hashMap == nil {
    91  		return 0
    92  	}
    93  	return m.hashMap.Size()
    94  }
    95  
    96  func (m *StrHashMap) Cardinality() uint64 {
    97  	return m.hashMap.Cardinality()
    98  }
    99  
   100  // InsertValue insert a value, return true if it is new, otherwise false
   101  // never handle null
   102  func (m *StrHashMap) InsertValue(val any) (bool, error) {
   103  	defer func() { m.keys[0] = m.keys[0][:0] }()
   104  	switch v := val.(type) {
   105  	case uint8:
   106  		m.keys[0] = append(m.keys[0], types.EncodeFixed(v)...)
   107  	case uint16:
   108  		m.keys[0] = append(m.keys[0], types.EncodeFixed(v)...)
   109  	case uint32:
   110  		m.keys[0] = append(m.keys[0], types.EncodeFixed(v)...)
   111  	case uint64:
   112  		m.keys[0] = append(m.keys[0], types.EncodeFixed(v)...)
   113  	case int8:
   114  		m.keys[0] = append(m.keys[0], types.EncodeFixed(v)...)
   115  	case int16:
   116  		m.keys[0] = append(m.keys[0], types.EncodeFixed(v)...)
   117  	case int32:
   118  		m.keys[0] = append(m.keys[0], types.EncodeFixed(v)...)
   119  	case int64:
   120  		m.keys[0] = append(m.keys[0], types.EncodeFixed(v)...)
   121  	case float32:
   122  		m.keys[0] = append(m.keys[0], types.EncodeFixed(v)...)
   123  	case float64:
   124  		m.keys[0] = append(m.keys[0], types.EncodeFixed(v)...)
   125  	case []byte:
   126  		length := uint16(len(v))
   127  		m.keys[0] = append(m.keys[0], unsafe.Slice((*byte)(unsafe.Pointer(&length)), 2)...)
   128  		m.keys[0] = append(m.keys[0], v...)
   129  	case types.Date:
   130  		m.keys[0] = append(m.keys[0], types.EncodeFixed(v)...)
   131  	case types.Datetime:
   132  		m.keys[0] = append(m.keys[0], types.EncodeFixed(v)...)
   133  	case types.Timestamp:
   134  		m.keys[0] = append(m.keys[0], types.EncodeFixed(v)...)
   135  	case types.Decimal64:
   136  		m.keys[0] = append(m.keys[0], types.EncodeFixed(v)...)
   137  	case types.Decimal128:
   138  		m.keys[0] = append(m.keys[0], types.EncodeFixed(v)...)
   139  	case types.Uuid:
   140  		m.keys[0] = append(m.keys[0], types.EncodeFixed(v)...)
   141  	case string:
   142  		m.keys[0] = append(m.keys[0], []byte(v)...)
   143  	}
   144  	if l := len(m.keys[0]); l < 16 {
   145  		m.keys[0] = append(m.keys[0], hashtable.StrKeyPadding[l:]...)
   146  	}
   147  	if err := m.hashMap.InsertStringBatch(m.strHashStates, m.keys[:1], m.values[:1], m.m); err != nil {
   148  		return false, err
   149  	}
   150  	if m.values[0] > m.rows {
   151  		m.rows++
   152  		return true, nil
   153  	}
   154  	return false, nil
   155  }
   156  
   157  // Insert a row from multiple columns into the hashmap, return true if it is new, otherwise false
   158  func (m *StrHashMap) Insert(vecs []*vector.Vector, row int) (bool, error) {
   159  	defer func() { m.keys[0] = m.keys[0][:0] }()
   160  	m.encodeHashKeys(vecs, row, 1)
   161  	if err := m.hashMap.InsertStringBatch(m.strHashStates, m.keys[:1], m.values[:1], m.m); err != nil {
   162  		return false, err
   163  	}
   164  	if m.values[0] > m.rows {
   165  		m.rows++
   166  		return true, nil
   167  	}
   168  	return false, nil
   169  }
   170  
   171  func (m *StrHashMap) encodeHashKeys(vecs []*vector.Vector, start, count int) {
   172  	for _, vec := range vecs {
   173  		if vec.GetType().IsFixedLen() {
   174  			fillGroupStr(m, vec, count, vec.GetType().TypeSize(), start, 0, len(vecs))
   175  		} else {
   176  			fillStringGroupStr(m, vec, count, start, len(vecs))
   177  		}
   178  	}
   179  	for i := 0; i < count; i++ {
   180  		if l := len(m.keys[i]); l < 16 {
   181  			m.keys[i] = append(m.keys[i], hashtable.StrKeyPadding[l:]...)
   182  		}
   183  	}
   184  }
   185  
   186  // A NULL C
   187  // 01A101C 9 bytes
   188  // for non-NULL value, give 3 bytes, the first byte is always 0, the last two bytes are the length
   189  // of this value,and then append the true bytes of the value
   190  // for NULL value, just only one byte, give one byte(1)
   191  // these are the rules of multi-cols
   192  // for one col, just give the value bytes
   193  func fillStringGroupStr(m *StrHashMap, vec *vector.Vector, n int, start int, lenCols int) {
   194  	if vec.IsConstNull() {
   195  		if m.hasNull {
   196  			for i := 0; i < n; i++ {
   197  				m.keys[i] = append(m.keys[i], byte(1))
   198  			}
   199  		} else {
   200  			for i := 0; i < n; i++ {
   201  				m.zValues[i] = 0
   202  			}
   203  		}
   204  		return
   205  	}
   206  	if !vec.GetNulls().Any() {
   207  		if m.hasNull {
   208  			for i := 0; i < n; i++ {
   209  				bytes := vec.GetBytesAt(i + start)
   210  				// for "a","bc" and "ab","c", we need to distinct
   211  				// this is not null value
   212  				m.keys[i] = append(m.keys[i], 0)
   213  				// give the length
   214  				length := uint16(len(bytes))
   215  				m.keys[i] = append(m.keys[i], unsafe.Slice((*byte)(unsafe.Pointer(&length)), 2)...)
   216  				// append the ture value bytes
   217  				m.keys[i] = append(m.keys[i], bytes...)
   218  			}
   219  		} else {
   220  			for i := 0; i < n; i++ {
   221  				bytes := vec.GetBytesAt(i + start)
   222  				// for "a","bc" and "ab","c", we need to distinct
   223  				// give the length
   224  				length := uint16(len(bytes))
   225  				m.keys[i] = append(m.keys[i], unsafe.Slice((*byte)(unsafe.Pointer(&length)), 2)...)
   226  				// append the ture value bytes
   227  				m.keys[i] = append(m.keys[i], bytes...)
   228  			}
   229  		}
   230  	} else {
   231  		nsp := vec.GetNulls()
   232  		for i := 0; i < n; i++ {
   233  			hasNull := nsp.Contains(uint64(i + start))
   234  			if m.hasNull {
   235  				if hasNull {
   236  					m.keys[i] = append(m.keys[i], byte(1))
   237  				} else {
   238  					bytes := vec.GetBytesAt(i + start)
   239  					// for "a","bc" and "ab","c", we need to distinct
   240  					// this is not null value
   241  					m.keys[i] = append(m.keys[i], 0)
   242  					// give the length
   243  					length := uint16(len(bytes))
   244  					m.keys[i] = append(m.keys[i], unsafe.Slice((*byte)(unsafe.Pointer(&length)), 2)...)
   245  					// append the ture value bytes
   246  					m.keys[i] = append(m.keys[i], bytes...)
   247  				}
   248  			} else {
   249  				if hasNull {
   250  					m.zValues[i] = 0
   251  					continue
   252  				}
   253  				bytes := vec.GetBytesAt(i + start)
   254  				// for "a","bc" and "ab","c", we need to distinct
   255  				// give the length
   256  				length := uint16(len(bytes))
   257  				m.keys[i] = append(m.keys[i], unsafe.Slice((*byte)(unsafe.Pointer(&length)), 2)...)
   258  				// append the ture value bytes
   259  				m.keys[i] = append(m.keys[i], bytes...)
   260  			}
   261  		}
   262  	}
   263  }
   264  
   265  func fillGroupStr(m *StrHashMap, vec *vector.Vector, n int, sz int, start int, scale int32, lenCols int) {
   266  	if vec.IsConstNull() {
   267  		if m.hasNull {
   268  			for i := 0; i < n; i++ {
   269  				m.keys[i] = append(m.keys[i], byte(1))
   270  			}
   271  		} else {
   272  			for i := 0; i < n; i++ {
   273  				m.zValues[i] = 0
   274  			}
   275  		}
   276  		return
   277  	}
   278  	if vec.IsConst() {
   279  		data := unsafe.Slice(vector.GetPtrAt[byte](vec, 0), sz)
   280  		if m.hasNull {
   281  			for i := 0; i < n; i++ {
   282  				m.keys[i] = append(m.keys[i], 0)
   283  				m.keys[i] = append(m.keys[i], data...)
   284  			}
   285  		} else {
   286  			for i := 0; i < n; i++ {
   287  				m.keys[i] = append(m.keys[i], data...)
   288  			}
   289  		}
   290  		return
   291  	}
   292  	data := unsafe.Slice(vector.GetPtrAt[byte](vec, 0), (n+start)*sz)
   293  	if !vec.GetNulls().Any() {
   294  		if m.hasNull {
   295  			for i := 0; i < n; i++ {
   296  				bytes := data[(i+start)*sz : (i+start+1)*sz]
   297  				m.keys[i] = append(m.keys[i], 0)
   298  				m.keys[i] = append(m.keys[i], bytes...)
   299  			}
   300  		} else {
   301  			for i := 0; i < n; i++ {
   302  				bytes := data[(i+start)*sz : (i+start+1)*sz]
   303  				m.keys[i] = append(m.keys[i], bytes...)
   304  			}
   305  		}
   306  	} else {
   307  		nsp := vec.GetNulls()
   308  		for i := 0; i < n; i++ {
   309  			isNull := nsp.Contains(uint64(i + start))
   310  			if m.hasNull {
   311  				if isNull {
   312  					m.keys[i] = append(m.keys[i], 1)
   313  				} else {
   314  					bytes := data[(i+start)*sz : (i+start+1)*sz]
   315  					m.keys[i] = append(m.keys[i], 0)
   316  					m.keys[i] = append(m.keys[i], bytes...)
   317  				}
   318  			} else {
   319  				if isNull {
   320  					m.zValues[i] = 0
   321  					continue
   322  				}
   323  				bytes := data[(i+start)*sz : (i+start+1)*sz]
   324  				m.keys[i] = append(m.keys[i], bytes...)
   325  			}
   326  		}
   327  	}
   328  }
   329  
   330  func (m *StrHashMap) Dup(pool *mpool.MPool) *StrHashMap {
   331  	val := &StrHashMap{
   332  		hasNull: m.hasNull,
   333  		rows:    m.rows,
   334  
   335  		keys:          make([][]byte, len(m.keys)),
   336  		values:        make([]uint64, len(m.values)),
   337  		zValues:       make([]int64, len(m.zValues)),
   338  		strHashStates: make([][3]uint64, len(m.strHashStates)),
   339  
   340  		ibucket: m.ibucket,
   341  		nbucket: m.nbucket,
   342  
   343  		m: pool,
   344  	}
   345  	copy(val.values, m.values)
   346  	copy(val.zValues, m.zValues)
   347  	copy(val.strHashStates, m.strHashStates)
   348  	for i, key := range m.keys {
   349  		val.keys[i] = make([]byte, len(key))
   350  		copy(val.keys[i], key)
   351  	}
   352  	if m.hashMap != nil {
   353  		val.hashMap = m.hashMap.Dup()
   354  	}
   355  	return val
   356  }