github.com/matrixorigin/matrixone@v0.7.0/pkg/common/hashmap/strhashmap.go (about)

     1  // Copyright 2021 Matrix Origin
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package hashmap
    16  
    17  import (
    18  	"unsafe"
    19  
    20  	"github.com/matrixorigin/matrixone/pkg/common/mpool"
    21  	"github.com/matrixorigin/matrixone/pkg/container/hashtable"
    22  	"github.com/matrixorigin/matrixone/pkg/container/types"
    23  	"github.com/matrixorigin/matrixone/pkg/container/vector"
    24  )
    25  
    26  func init() {
    27  	OneInt64s = make([]int64, UnitLimit)
    28  	for i := range OneInt64s {
    29  		OneInt64s[i] = 1
    30  	}
    31  	OneUInt8s = make([]uint8, UnitLimit)
    32  	for i := range OneUInt8s {
    33  		OneUInt8s[i] = 1
    34  	}
    35  }
    36  
    37  func NewStrMap(hasNull bool, ibucket, nbucket uint64, m *mpool.MPool) (*StrHashMap, error) {
    38  	mp := &hashtable.StringHashMap{}
    39  	if err := mp.Init(m); err != nil {
    40  		return nil, err
    41  	}
    42  	return &StrHashMap{
    43  		m:             m,
    44  		hashMap:       mp,
    45  		hasNull:       hasNull,
    46  		ibucket:       ibucket,
    47  		nbucket:       nbucket,
    48  		values:        make([]uint64, UnitLimit),
    49  		zValues:       make([]int64, UnitLimit),
    50  		keys:          make([][]byte, UnitLimit),
    51  		strHashStates: make([][3]uint64, UnitLimit),
    52  	}, nil
    53  }
    54  
    55  func (m *StrHashMap) NewIterator() Iterator {
    56  	return &strHashmapIterator{
    57  		mp:      m,
    58  		m:       m.m,
    59  		ibucket: m.ibucket,
    60  		nbucket: m.nbucket,
    61  	}
    62  }
    63  
    64  func (m *StrHashMap) HasNull() bool {
    65  	return m.hasNull
    66  }
    67  
    68  func (m *StrHashMap) Free() {
    69  	m.hashMap.Free(m.m)
    70  }
    71  
    72  func (m *StrHashMap) GroupCount() uint64 {
    73  	return m.rows
    74  }
    75  
    76  func (m *StrHashMap) AddGroup() {
    77  	m.rows++
    78  }
    79  
    80  func (m *StrHashMap) AddGroups(rows uint64) {
    81  	m.rows += rows
    82  }
    83  
    84  func (m *StrHashMap) Size() int64 {
    85  	// TODO: add the size of the other StrHashMap parts
    86  	if m.hashMap == nil {
    87  		return 0
    88  	}
    89  	return m.hashMap.Size()
    90  }
    91  
    92  func (m *StrHashMap) Cardinality() uint64 {
    93  	return m.hashMap.Cardinality()
    94  }
    95  
    96  // InsertValue insert a value, return true if it is new, otherwise false
    97  // never handle null
    98  func (m *StrHashMap) InsertValue(val any) (bool, error) {
    99  	defer func() { m.keys[0] = m.keys[0][:0] }()
   100  	switch v := val.(type) {
   101  	case uint8:
   102  		m.keys[0] = append(m.keys[0], types.EncodeFixed(v)...)
   103  	case uint16:
   104  		m.keys[0] = append(m.keys[0], types.EncodeFixed(v)...)
   105  	case uint32:
   106  		m.keys[0] = append(m.keys[0], types.EncodeFixed(v)...)
   107  	case uint64:
   108  		m.keys[0] = append(m.keys[0], types.EncodeFixed(v)...)
   109  	case int8:
   110  		m.keys[0] = append(m.keys[0], types.EncodeFixed(v)...)
   111  	case int16:
   112  		m.keys[0] = append(m.keys[0], types.EncodeFixed(v)...)
   113  	case int32:
   114  		m.keys[0] = append(m.keys[0], types.EncodeFixed(v)...)
   115  	case int64:
   116  		m.keys[0] = append(m.keys[0], types.EncodeFixed(v)...)
   117  	case float32:
   118  		m.keys[0] = append(m.keys[0], types.EncodeFixed(v)...)
   119  	case float64:
   120  		m.keys[0] = append(m.keys[0], types.EncodeFixed(v)...)
   121  	case []byte:
   122  		m.keys[0] = append(m.keys[0], v...)
   123  	case types.Date:
   124  		m.keys[0] = append(m.keys[0], types.EncodeFixed(v)...)
   125  	case types.Datetime:
   126  		m.keys[0] = append(m.keys[0], types.EncodeFixed(v)...)
   127  	case types.Timestamp:
   128  		m.keys[0] = append(m.keys[0], types.EncodeFixed(v)...)
   129  	case types.Decimal64:
   130  		m.keys[0] = append(m.keys[0], types.EncodeFixed(v)...)
   131  	case types.Decimal128:
   132  		m.keys[0] = append(m.keys[0], types.EncodeFixed(v)...)
   133  	case types.Uuid:
   134  		m.keys[0] = append(m.keys[0], types.EncodeFixed(v)...)
   135  	case string:
   136  		m.keys[0] = append(m.keys[0], []byte(v)...)
   137  	}
   138  	if l := len(m.keys[0]); l < 16 {
   139  		m.keys[0] = append(m.keys[0], hashtable.StrKeyPadding[l:]...)
   140  	}
   141  	if err := m.hashMap.InsertStringBatch(m.strHashStates, m.keys[:1], m.values[:1], m.m); err != nil {
   142  		return false, err
   143  	}
   144  	if m.values[0] > m.rows {
   145  		m.rows++
   146  		return true, nil
   147  	}
   148  	return false, nil
   149  }
   150  
   151  // Insert a row from multiple columns into the hashmap, return true if it is new, otherwise false
   152  func (m *StrHashMap) Insert(vecs []*vector.Vector, row int) (bool, error) {
   153  	defer func() { m.keys[0] = m.keys[0][:0] }()
   154  	m.encodeHashKeys(vecs, row, 1)
   155  	if err := m.hashMap.InsertStringBatch(m.strHashStates, m.keys[:1], m.values[:1], m.m); err != nil {
   156  		return false, err
   157  	}
   158  	if m.values[0] > m.rows {
   159  		m.rows++
   160  		return true, nil
   161  	}
   162  	return false, nil
   163  }
   164  
   165  func (m *StrHashMap) encodeHashKeys(vecs []*vector.Vector, start, count int) {
   166  	for _, vec := range vecs {
   167  		if vec.GetType().IsFixedLen() {
   168  			fillGroupStr(m, vec, count, vec.GetType().TypeSize(), start, 0, len(vecs))
   169  		} else {
   170  			fillStringGroupStr(m, vec, count, start, len(vecs))
   171  		}
   172  	}
   173  	for i := 0; i < count; i++ {
   174  		if l := len(m.keys[i]); l < 16 {
   175  			m.keys[i] = append(m.keys[i], hashtable.StrKeyPadding[l:]...)
   176  		}
   177  	}
   178  }
   179  
   180  // A NULL C
   181  // 01A101C 9 bytes
   182  // for non-NULL value, give 3 bytes, the first byte is always 0, the last two bytes are the length
   183  // of this value,and then append the true bytes of the value
   184  // for NULL value, just only one byte, give one byte(1)
   185  // these are the rules of multi-cols
   186  // for one col, just give the value bytes
   187  func fillStringGroupStr(m *StrHashMap, vec *vector.Vector, n int, start int, lenCols int) {
   188  	area := vec.GetArea()
   189  	vs := vector.MustTCols[types.Varlena](vec)
   190  	if !vec.GetNulls().Any() {
   191  		for i := 0; i < n; i++ {
   192  			bytes := vs[i+start].GetByteSlice(area)
   193  			if lenCols > 1 {
   194  				// for "a","bc" and "ab","c", we need to distinct
   195  				// this is not null value
   196  				m.keys[i] = append(m.keys[i], 0)
   197  				// give the length
   198  				length := uint16(len(bytes))
   199  				m.keys[i] = append(m.keys[i], unsafe.Slice((*byte)(unsafe.Pointer(&length)), 2)...)
   200  			}
   201  			// append the ture value bytes
   202  			m.keys[i] = append(m.keys[i], bytes...)
   203  		}
   204  	} else {
   205  		nsp := vec.GetNulls()
   206  		for i := 0; i < n; i++ {
   207  			hasNull := nsp.Contains(uint64(i + start))
   208  			if m.hasNull {
   209  				if hasNull {
   210  					m.keys[i] = append(m.keys[i], byte(1))
   211  				} else {
   212  					bytes := vs[i+start].GetByteSlice(area)
   213  					if lenCols > 1 {
   214  						// for "a","bc" and "ab","c", we need to distinct
   215  						// this is not null value
   216  						m.keys[i] = append(m.keys[i], 0)
   217  						// give the length
   218  						length := uint16(len(bytes))
   219  						m.keys[i] = append(m.keys[i], unsafe.Slice((*byte)(unsafe.Pointer(&length)), 2)...)
   220  					}
   221  					// append the ture value bytes
   222  					m.keys[i] = append(m.keys[i], bytes...)
   223  				}
   224  			} else {
   225  				if hasNull {
   226  					m.zValues[i] = 0
   227  					continue
   228  				}
   229  				bytes := vs[i+start].GetByteSlice(area)
   230  				if lenCols > 1 {
   231  					// for "a","bc" and "ab","c", we need to distinct
   232  					// this is not null value
   233  					m.keys[i] = append(m.keys[i], 0)
   234  					// give the length
   235  					length := uint16(len(bytes))
   236  					m.keys[i] = append(m.keys[i], unsafe.Slice((*byte)(unsafe.Pointer(&length)), 2)...)
   237  				}
   238  				// append the ture value bytes
   239  				m.keys[i] = append(m.keys[i], bytes...)
   240  			}
   241  		}
   242  	}
   243  }
   244  
   245  func fillGroupStr(m *StrHashMap, vec *vector.Vector, n int, sz int, start int, scale int32, lenCols int) {
   246  	var data []byte
   247  	if !vec.IsConst() {
   248  		data = unsafe.Slice((*byte)(vector.GetPtrAt(vec, 0)), (n+start)*sz)
   249  	} else {
   250  		if vec.IsScalarNull() {
   251  			data = make([]byte, (n+start)*sz)
   252  		} else {
   253  			vec = vec.ConstExpand(false, m.m)
   254  			data = unsafe.Slice((*byte)(vector.GetPtrAt(vec, 0)), (n+start)*sz)
   255  		}
   256  	}
   257  	if !vec.GetNulls().Any() {
   258  		for i := 0; i < n; i++ {
   259  			bytes := data[(i+start)*sz : (i+start+1)*sz]
   260  			if lenCols > 1 {
   261  				// for "a","bc" and "ab","c", we need to distinct
   262  				// this is not null value
   263  				m.keys[i] = append(m.keys[i], 0)
   264  				// give the length
   265  				length := uint16(len(bytes))
   266  				m.keys[i] = append(m.keys[i], unsafe.Slice((*byte)(unsafe.Pointer(&length)), 2)...)
   267  			}
   268  			// append the ture value bytes
   269  			m.keys[i] = append(m.keys[i], bytes...)
   270  		}
   271  	} else {
   272  		nsp := vec.GetNulls()
   273  		for i := 0; i < n; i++ {
   274  			isNull := nsp.Contains(uint64(i + start))
   275  			if m.hasNull {
   276  				if isNull {
   277  					m.keys[i] = append(m.keys[i], byte(1))
   278  				} else {
   279  					bytes := data[(i+start)*sz : (i+start+1)*sz]
   280  					if lenCols > 1 {
   281  						// for "a","bc" and "ab","c", we need to distinct
   282  						// this is not null value
   283  						m.keys[i] = append(m.keys[i], 0)
   284  						// give the length
   285  						length := uint16(len(bytes))
   286  						m.keys[i] = append(m.keys[i], unsafe.Slice((*byte)(unsafe.Pointer(&length)), 2)...)
   287  					}
   288  					// append the ture value bytes
   289  					m.keys[i] = append(m.keys[i], bytes...)
   290  				}
   291  			} else {
   292  				if isNull {
   293  					m.zValues[i] = 0
   294  					continue
   295  				}
   296  				bytes := data[(i+start)*sz : (i+start+1)*sz]
   297  				if lenCols > 1 {
   298  					// for "a","bc" and "ab","c", we need to distinct
   299  					// this is not null value
   300  					m.keys[i] = append(m.keys[i], 0)
   301  					// give the length
   302  					length := uint16(len(bytes))
   303  					m.keys[i] = append(m.keys[i], unsafe.Slice((*byte)(unsafe.Pointer(&length)), 2)...)
   304  				}
   305  				// append the ture value bytes
   306  				m.keys[i] = append(m.keys[i], bytes...)
   307  			}
   308  		}
   309  	}
   310  }