github.com/matrixorigin/matrixone@v0.7.0/pkg/container/index/dict/dict.go (about)

     1  // Copyright 2021 Matrix Origin
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package dict
    16  
    17  import (
    18  	"github.com/matrixorigin/matrixone/pkg/common/mpool"
    19  	"github.com/matrixorigin/matrixone/pkg/container/types"
    20  	"github.com/matrixorigin/matrixone/pkg/container/vector"
    21  )
    22  
    23  type Dict struct {
    24  	typ types.Type
    25  
    26  	m      *mpool.MPool
    27  	idx    reverseIndex
    28  	unique *vector.Vector
    29  
    30  	ref int
    31  }
    32  
    33  func New(typ types.Type, m *mpool.MPool) (*Dict, error) {
    34  	d := &Dict{
    35  		typ: typ,
    36  		m:   m,
    37  	}
    38  
    39  	var idx reverseIndex
    40  	var err error
    41  
    42  	if d.fixed() { // check whether the type of data is fixed-length or variable-length
    43  		if idx, err = newFixedReverseIndex(m); err != nil {
    44  			return nil, err
    45  		}
    46  		d.unique = vector.New(types.T_uint64.ToType())
    47  	} else {
    48  		if idx, err = newVarReverseIndex(m); err != nil {
    49  			return nil, err
    50  		}
    51  		d.unique = vector.New(types.T_varchar.ToType())
    52  	}
    53  
    54  	d.idx = idx
    55  	d.ref = 1
    56  	return d, nil
    57  }
    58  
    59  func (d *Dict) GetUnique() *vector.Vector {
    60  	return d.unique
    61  }
    62  
    63  func (d *Dict) Cardinality() uint64 {
    64  	return uint64(d.unique.Length())
    65  }
    66  
    67  func (d *Dict) Dup() *Dict {
    68  	d.ref++
    69  	return d
    70  }
    71  
    72  func (d *Dict) InsertBatch(data *vector.Vector) ([]uint16, error) {
    73  	var ks any
    74  	if d.fixed() {
    75  		ks = d.encodeFixedData(data)
    76  	} else {
    77  		ks = d.encodeVarData(data)
    78  	}
    79  
    80  	values, err := d.idx.insert(ks)
    81  	if err != nil {
    82  		return nil, err
    83  	}
    84  	ips /* insertion points */ := make([]uint16, len(values))
    85  	for i, v := range values {
    86  		if int(v) > d.unique.Length() {
    87  			if d.fixed() {
    88  				err = d.unique.Append(ks.([]uint64)[i], false, d.m)
    89  			} else {
    90  				err = d.unique.Append(ks.([][]byte)[i], false, d.m)
    91  			}
    92  			if err != nil {
    93  				return nil, err
    94  			}
    95  		}
    96  		ips[i] = uint16(v)
    97  	}
    98  	return ips, nil
    99  }
   100  
   101  func (d *Dict) FindBatch(data *vector.Vector) []uint16 {
   102  	var ks any
   103  	if d.fixed() {
   104  		ks = d.encodeFixedData(data)
   105  	} else {
   106  		ks = d.encodeVarData(data)
   107  	}
   108  	values := d.idx.find(ks)
   109  
   110  	poses := make([]uint16, len(values))
   111  	for i, v := range values {
   112  		poses[i] = uint16(v)
   113  	}
   114  	return poses
   115  }
   116  
   117  func (d *Dict) FindData(pos uint16) *vector.Vector {
   118  	if d.fixed() {
   119  		return d.findFixedData(int(pos))
   120  	} else {
   121  		return d.findVarData(int(pos))
   122  	}
   123  }
   124  
   125  func (d *Dict) Free() {
   126  	if d.ref == 0 {
   127  		return
   128  	}
   129  	d.ref--
   130  	if d.ref > 0 {
   131  		return
   132  	}
   133  
   134  	if d.unique != nil {
   135  		d.unique.Free(d.m)
   136  	}
   137  	if d.idx != nil {
   138  		d.idx.free()
   139  	}
   140  }
   141  
   142  func (d *Dict) fixed() bool { return !d.typ.IsString() }
   143  
   144  func (d *Dict) encodeFixedData(data *vector.Vector) []uint64 {
   145  	us := make([]uint64, data.Length())
   146  	switch d.typ.Oid {
   147  	case types.T_bool:
   148  		col := vector.MustTCols[bool](data)
   149  		for i, v := range col {
   150  			if v {
   151  				us[i] = 1
   152  			}
   153  		}
   154  	case types.T_int32:
   155  		col := vector.MustTCols[int32](data)
   156  		for i, v := range col {
   157  			us[i] = uint64(v)
   158  		}
   159  	case types.T_int64:
   160  		col := vector.MustTCols[int64](data)
   161  		for i, v := range col {
   162  			us[i] = uint64(v)
   163  		}
   164  	case types.T_uint32:
   165  		col := vector.MustTCols[uint32](data)
   166  		for i, v := range col {
   167  			us[i] = uint64(v)
   168  		}
   169  	case types.T_uint64:
   170  		copy(us, vector.MustTCols[uint64](data))
   171  	case types.T_float32:
   172  		col := vector.MustTCols[float32](data)
   173  		for i, v := range col {
   174  			us[i] = uint64(v)
   175  		}
   176  	case types.T_float64:
   177  		col := vector.MustTCols[float64](data)
   178  		for i, v := range col {
   179  			us[i] = uint64(v)
   180  		}
   181  	case types.T_decimal64:
   182  		col := vector.MustTCols[types.Decimal64](data)
   183  		for i, v := range col {
   184  			us[i] = types.DecodeUint64(types.EncodeDecimal64(&v))
   185  		}
   186  	case types.T_date:
   187  		col := vector.MustTCols[types.Date](data)
   188  		for i, v := range col {
   189  			us[i] = uint64(v)
   190  		}
   191  	case types.T_datetime:
   192  		col := vector.MustTCols[types.Datetime](data)
   193  		for i, v := range col {
   194  			us[i] = uint64(v)
   195  		}
   196  	case types.T_timestamp:
   197  		col := vector.MustTCols[types.Timestamp](data)
   198  		for i, v := range col {
   199  			us[i] = uint64(v)
   200  		}
   201  	}
   202  	return us
   203  }
   204  
   205  func (d *Dict) encodeVarData(data *vector.Vector) [][]byte {
   206  	return vector.GetBytesVectorValues(data)
   207  }
   208  
   209  func (d *Dict) findFixedData(pos int) *vector.Vector {
   210  	v := vector.NewConst(d.typ, 1)
   211  	data := d.getFixedData(pos)
   212  	switch d.typ.Oid {
   213  	case types.T_bool:
   214  		val := false
   215  		if data == 1 {
   216  			val = true
   217  		}
   218  		vector.MustTCols[bool](v)[0] = val
   219  	case types.T_int32:
   220  		vector.MustTCols[int32](v)[0] = int32(data)
   221  	case types.T_int64:
   222  		vector.MustTCols[int64](v)[0] = int64(data)
   223  	case types.T_uint32:
   224  		vector.MustTCols[uint32](v)[0] = uint32(data)
   225  	case types.T_uint64:
   226  		vector.MustTCols[uint64](v)[0] = uint64(data)
   227  	case types.T_float32:
   228  		vector.MustTCols[float32](v)[0] = float32(data)
   229  	case types.T_float64:
   230  		vector.MustTCols[float64](v)[0] = float64(data)
   231  	case types.T_decimal64:
   232  		val := types.DecodeDecimal64(types.EncodeUint64(&data))
   233  		vector.MustTCols[types.Decimal64](v)[0] = val
   234  	case types.T_date:
   235  		vector.MustTCols[types.Date](v)[0] = types.Date(data)
   236  	case types.T_datetime:
   237  		vector.MustTCols[types.Datetime](v)[0] = types.Datetime(data)
   238  	case types.T_timestamp:
   239  		vector.MustTCols[types.Timestamp](v)[0] = types.Timestamp(data)
   240  	}
   241  	return v
   242  }
   243  
   244  func (d *Dict) findVarData(pos int) *vector.Vector {
   245  	return vector.NewConstBytes(d.typ, 1, d.getVarData(pos), d.m)
   246  }
   247  
   248  func (d *Dict) getFixedData(n int) uint64 {
   249  	return vector.MustTCols[uint64](d.unique)[n-1]
   250  }
   251  
   252  func (d *Dict) getVarData(n int) []byte {
   253  	return d.unique.GetBytes(int64(n - 1))
   254  }