github.com/matrixorigin/matrixone@v0.7.0/pkg/container/index/low_cardinality_index.go (about)

     1  // Copyright 2021 Matrix Origin
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package index
    16  
    17  import (
    18  	"math"
    19  
    20  	"github.com/matrixorigin/matrixone/pkg/common/moerr"
    21  	"github.com/matrixorigin/matrixone/pkg/common/mpool"
    22  	"github.com/matrixorigin/matrixone/pkg/container/index/dict"
    23  	"github.com/matrixorigin/matrixone/pkg/container/nulls"
    24  	"github.com/matrixorigin/matrixone/pkg/container/types"
    25  	"github.com/matrixorigin/matrixone/pkg/container/vector"
    26  )
    27  
    28  const (
    29  	MaxLowCardinality = math.MaxUint16 + 1
    30  )
    31  
    32  var (
    33  	ErrNotSupported = moerr.NewNotSupportedNoCtx("the type is not supported for low cardinality index")
    34  )
    35  
    36  type LowCardinalityIndex struct {
    37  	typ types.Type
    38  
    39  	m    *mpool.MPool
    40  	dict *dict.Dict
    41  	// poses is the positions of original data in the dictionary.
    42  	// Currently, the type of poses[i] is `T_uint16` which means
    43  	// the max cardinality of LowCardinalityIndex is 65536.
    44  	// The position of `null` value is 0.
    45  	poses *vector.Vector
    46  
    47  	ref int
    48  }
    49  
    50  func New(typ types.Type, m *mpool.MPool) (*LowCardinalityIndex, error) {
    51  	if typ.Oid == types.T_decimal128 || typ.Oid == types.T_json {
    52  		return nil, ErrNotSupported
    53  	}
    54  
    55  	d, err := dict.New(typ, m)
    56  	if err != nil {
    57  		return nil, err
    58  	}
    59  	return &LowCardinalityIndex{
    60  		typ:   typ,
    61  		m:     m,
    62  		dict:  d,
    63  		poses: vector.New(types.T_uint16.ToType()),
    64  		ref:   1,
    65  	}, nil
    66  }
    67  
    68  func (idx *LowCardinalityIndex) GetPoses() *vector.Vector {
    69  	return idx.poses
    70  }
    71  
    72  func (idx *LowCardinalityIndex) GetDict() *dict.Dict {
    73  	return idx.dict
    74  }
    75  
    76  func (idx *LowCardinalityIndex) Dup() *LowCardinalityIndex {
    77  	idx.ref++
    78  	return idx
    79  }
    80  
    81  func (idx *LowCardinalityIndex) DupEmpty() *LowCardinalityIndex {
    82  	return &LowCardinalityIndex{
    83  		typ:   idx.typ,
    84  		m:     idx.m,
    85  		dict:  idx.dict.Dup(),
    86  		poses: vector.New(types.T_uint16.ToType()),
    87  		ref:   1,
    88  	}
    89  }
    90  
    91  func (idx *LowCardinalityIndex) InsertBatch(data *vector.Vector) error {
    92  	originalLen := data.Length()
    93  	var sels []int64
    94  	if nulls.Any(data.Nsp) {
    95  		sels = make([]int64, 0, originalLen)
    96  		for i := 0; i < originalLen; i++ {
    97  			if !nulls.Contains(data.Nsp, uint64(i)) {
    98  				sels = append(sels, int64(i))
    99  			}
   100  		}
   101  	}
   102  
   103  	var ips []uint16
   104  	var err error
   105  	if sels != nil {
   106  		if err = vector.Shuffle(data, sels, idx.m); err != nil {
   107  			return err
   108  		}
   109  
   110  		values, err := idx.dict.InsertBatch(data)
   111  		if err != nil {
   112  			return err
   113  		}
   114  
   115  		i := 0
   116  		ips = make([]uint16, originalLen)
   117  		for j := 0; j < originalLen; j++ {
   118  			if i < len(sels) && int64(j) == sels[i] {
   119  				ips[j] = values[i]
   120  				i++
   121  			} else {
   122  				ips[j] = 0
   123  			}
   124  		}
   125  	} else {
   126  		if ips, err = idx.dict.InsertBatch(data); err != nil {
   127  			return err
   128  		}
   129  	}
   130  
   131  	return vector.AppendFixed(idx.poses, ips, idx.m)
   132  }
   133  
   134  // Encode uses the dictionary of the current index to encode the original data.
   135  func (idx *LowCardinalityIndex) Encode(dst, src *vector.Vector) error {
   136  	poses := idx.dict.FindBatch(src)
   137  	return vector.AppendFixed(dst, poses, idx.m)
   138  }
   139  
   140  func (idx *LowCardinalityIndex) Free() {
   141  	if idx.ref == 0 {
   142  		return
   143  	}
   144  	idx.ref--
   145  	if idx.ref > 0 {
   146  		return
   147  	}
   148  
   149  	idx.poses.Free(idx.m)
   150  	idx.dict.Free()
   151  }