github.com/matrixorigin/matrixone@v0.7.0/pkg/vm/engine/tae/txn/txnimpl/index.go (about)

     1  // Copyright 2021 Matrix Origin
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package txnimpl
    16  
    17  import (
    18  	"github.com/matrixorigin/matrixone/pkg/vm/engine/tae/txn/txnbase"
    19  	"io"
    20  	"sync"
    21  
    22  	"github.com/matrixorigin/matrixone/pkg/common/moerr"
    23  	"github.com/matrixorigin/matrixone/pkg/container/types"
    24  	"github.com/matrixorigin/matrixone/pkg/vm/engine/tae/common"
    25  	"github.com/matrixorigin/matrixone/pkg/vm/engine/tae/containers"
    26  )
    27  
    28  type TableIndex interface {
    29  	io.Closer
    30  	BatchDedup(string, containers.Vector) error
    31  	BatchInsert(string, containers.Vector, int, int, uint32, bool) error
    32  	Insert(any, uint32) error
    33  	Delete(any) error
    34  	Search(any) (uint32, error)
    35  	Name() string
    36  	Count() int
    37  	KeyToVector(types.Type) containers.Vector
    38  	KeyToVectors(types.Type) []containers.Vector
    39  }
    40  
    41  type simpleTableIndex struct {
    42  	sync.RWMutex
    43  	tree map[any]uint32
    44  }
    45  
    46  func NewSimpleTableIndex() *simpleTableIndex {
    47  	return &simpleTableIndex{
    48  		tree: make(map[any]uint32),
    49  	}
    50  }
    51  
    52  func DedupOp[T comparable](
    53  	t types.Type,
    54  	attr string,
    55  	vs any,
    56  	tree map[any]uint32) (err error) {
    57  	vals := vs.([]T)
    58  	for _, v := range vals {
    59  		if _, ok := tree[v]; ok {
    60  			entry := common.TypeStringValue(t, v)
    61  			return moerr.NewDuplicateEntryNoCtx(entry, attr)
    62  		}
    63  	}
    64  	return
    65  }
    66  
    67  func InsertOp[T comparable](
    68  	t types.Type,
    69  	attr string,
    70  	input any,
    71  	start, count int,
    72  	fromRow uint32,
    73  	dedupInput bool,
    74  	tree map[any]uint32) (err error) {
    75  	vals := input.([]T)
    76  	if dedupInput {
    77  		set := make(map[T]bool)
    78  		for _, v := range vals[start : start+count] {
    79  			if _, ok := set[v]; ok {
    80  				entry := common.TypeStringValue(t, v)
    81  				return moerr.NewDuplicateEntryNoCtx(entry, attr)
    82  			}
    83  			set[v] = true
    84  		}
    85  		return
    86  	}
    87  	for _, v := range vals[start : start+count] {
    88  		if _, ok := tree[v]; ok {
    89  			entry := common.TypeStringValue(t, v)
    90  			return moerr.NewDuplicateEntryNoCtx(entry, attr)
    91  		}
    92  		tree[v] = fromRow
    93  		fromRow++
    94  	}
    95  	return
    96  }
    97  
    98  func (idx *simpleTableIndex) KeyToVector(kType types.Type) containers.Vector {
    99  	vec := containers.MakeVector(kType, false)
   100  	switch kType.Oid {
   101  	case types.T_char, types.T_varchar, types.T_json, types.T_blob, types.T_text:
   102  		for k := range idx.tree {
   103  			vec.Append([]byte(k.(string)))
   104  		}
   105  	default:
   106  		for k := range idx.tree {
   107  			vec.Append(k)
   108  		}
   109  	}
   110  	return vec
   111  }
   112  
   113  func (idx *simpleTableIndex) KeyToVectors(kType types.Type) []containers.Vector {
   114  	vec := containers.MakeVector(kType, false)
   115  	var vecs []containers.Vector
   116  	switch kType.Oid {
   117  	case types.T_char, types.T_varchar, types.T_json, types.T_blob, types.T_text:
   118  		for k := range idx.tree {
   119  			if vec.Length() > int(txnbase.MaxNodeRows) {
   120  				vecs = append(vecs, vec)
   121  				vec = containers.MakeVector(kType, false)
   122  			}
   123  			vec.Append([]byte(k.(string)))
   124  		}
   125  	default:
   126  		for k := range idx.tree {
   127  			if vec.Length() > int(txnbase.MaxNodeRows) {
   128  				vecs = append(vecs, vec)
   129  				vec = containers.MakeVector(kType, false)
   130  			}
   131  			vec.Append(k)
   132  		}
   133  	}
   134  	if vec.Length() > 0 {
   135  		vecs = append(vecs, vec)
   136  	}
   137  	return vecs
   138  }
   139  
   140  func (idx *simpleTableIndex) Close() error {
   141  	idx.tree = nil
   142  	return nil
   143  }
   144  func (idx *simpleTableIndex) Name() string { return "SimpleIndex" }
   145  func (idx *simpleTableIndex) Count() int {
   146  	idx.RLock()
   147  	cnt := len(idx.tree)
   148  	idx.RUnlock()
   149  	return cnt
   150  }
   151  
   152  func (idx *simpleTableIndex) Insert(v any, row uint32) error {
   153  	idx.Lock()
   154  	defer idx.Unlock()
   155  	_, ok := idx.tree[v]
   156  	if ok {
   157  		return moerr.GetOkExpectedDup()
   158  	}
   159  	idx.tree[v] = row
   160  	return nil
   161  }
   162  func (idx *simpleTableIndex) Delete(vv any) error {
   163  	idx.Lock()
   164  	defer idx.Unlock()
   165  	var v any
   166  	switch vv := vv.(type) {
   167  	case []uint8:
   168  		v = string(vv)
   169  	default:
   170  		v = vv
   171  	}
   172  	_, ok := idx.tree[v]
   173  	if !ok {
   174  		return moerr.GetOkExpectedDup()
   175  	}
   176  	delete(idx.tree, v)
   177  	return nil
   178  }
   179  
   180  func (idx *simpleTableIndex) Search(v any) (uint32, error) {
   181  	idx.RLock()
   182  	defer idx.RUnlock()
   183  	row, ok := idx.tree[v]
   184  	if !ok {
   185  		return 0, moerr.NewNotFoundNoCtx()
   186  	}
   187  	return uint32(row), nil
   188  }
   189  
   190  func (idx *simpleTableIndex) BatchInsert(
   191  	attr string,
   192  	col containers.Vector,
   193  	start, count int,
   194  	row uint32,
   195  	dedupInput bool) error {
   196  	idx.Lock()
   197  	defer idx.Unlock()
   198  	colType := col.GetType()
   199  	switch colType.Oid {
   200  	case types.T_bool:
   201  		return InsertOp[bool](colType, attr, col.Slice(), start, count, row, dedupInput, idx.tree)
   202  	case types.T_int8:
   203  		return InsertOp[int8](colType, attr, col.Slice(), start, count, row, dedupInput, idx.tree)
   204  	case types.T_int16:
   205  		return InsertOp[int16](colType, attr, col.Slice(), start, count, row, dedupInput, idx.tree)
   206  	case types.T_int32:
   207  		return InsertOp[int32](colType, attr, col.Slice(), start, count, row, dedupInput, idx.tree)
   208  	case types.T_int64:
   209  		return InsertOp[int64](colType, attr, col.Slice(), start, count, row, dedupInput, idx.tree)
   210  	case types.T_uint8:
   211  		return InsertOp[uint8](colType, attr, col.Slice(), start, count, row, dedupInput, idx.tree)
   212  	case types.T_uint16:
   213  		return InsertOp[uint16](colType, attr, col.Slice(), start, count, row, dedupInput, idx.tree)
   214  	case types.T_uint32:
   215  		return InsertOp[uint32](colType, attr, col.Slice(), start, count, row, dedupInput, idx.tree)
   216  	case types.T_uint64:
   217  		return InsertOp[uint64](colType, attr, col.Slice(), start, count, row, dedupInput, idx.tree)
   218  	case types.T_decimal64:
   219  		return InsertOp[types.Decimal64](colType, attr, col.Slice(), start, count, row, dedupInput, idx.tree)
   220  	case types.T_decimal128:
   221  		return InsertOp[types.Decimal128](colType, attr, col.Slice(), start, count, row, dedupInput, idx.tree)
   222  	case types.T_uuid:
   223  		return InsertOp[types.Uuid](colType, attr, col.Slice(), start, count, row, dedupInput, idx.tree)
   224  	case types.T_float32:
   225  		return InsertOp[float32](colType, attr, col.Slice(), start, count, row, dedupInput, idx.tree)
   226  	case types.T_float64:
   227  		return InsertOp[float64](colType, attr, col.Slice(), start, count, row, dedupInput, idx.tree)
   228  	case types.T_date:
   229  		return InsertOp[types.Date](colType, attr, col.Slice(), start, count, row, dedupInput, idx.tree)
   230  	case types.T_timestamp:
   231  		return InsertOp[types.Timestamp](colType, attr, col.Slice(), start, count, row, dedupInput, idx.tree)
   232  	case types.T_time:
   233  		return InsertOp[types.Time](colType, attr, col.Slice(), start, count, row, dedupInput, idx.tree)
   234  	case types.T_datetime:
   235  		return InsertOp[types.Datetime](colType, attr, col.Slice(), start, count, row, dedupInput, idx.tree)
   236  	case types.T_TS:
   237  		return InsertOp[types.TS](colType, attr, col.Slice(), start, count, row, dedupInput, idx.tree)
   238  	case types.T_Rowid:
   239  		return InsertOp[types.Rowid](colType, attr, col.Slice(), start, count, row, dedupInput, idx.tree)
   240  	case types.T_char, types.T_varchar, types.T_json, types.T_blob, types.T_text:
   241  		vs := col.Bytes()
   242  		if dedupInput {
   243  			set := make(map[string]bool)
   244  			for i := start; i < start+count; i++ {
   245  				v := string(vs.GetVarValueAt(i))
   246  				if _, ok := set[v]; ok {
   247  					entry := common.TypeStringValue(colType, []byte(v))
   248  					return moerr.NewDuplicateEntryNoCtx(entry, attr)
   249  				}
   250  				set[v] = true
   251  			}
   252  			break
   253  		}
   254  		for i := start; i < start+count; i++ {
   255  			v := string(vs.GetVarValueAt(i))
   256  			if _, ok := idx.tree[v]; ok {
   257  				entry := common.TypeStringValue(colType, []byte(v))
   258  				return moerr.NewDuplicateEntryNoCtx(entry, attr)
   259  			}
   260  			idx.tree[v] = row
   261  			row++
   262  		}
   263  	default:
   264  		panic(moerr.NewInternalErrorNoCtx("%s not supported", col.GetType().String()))
   265  	}
   266  	return nil
   267  }
   268  
   269  // TODO: rewrite
   270  func (idx *simpleTableIndex) BatchDedup(attr string, col containers.Vector) error {
   271  	idx.RLock()
   272  	defer idx.RUnlock()
   273  	colType := col.GetType()
   274  	switch colType.Oid {
   275  	case types.T_bool:
   276  		vals := col.Slice()
   277  		return DedupOp[bool](colType, attr, vals, idx.tree)
   278  	case types.T_int8:
   279  		vals := col.Slice()
   280  		return DedupOp[int8](colType, attr, vals, idx.tree)
   281  	case types.T_int16:
   282  		vals := col.Slice()
   283  		return DedupOp[int16](colType, attr, vals, idx.tree)
   284  	case types.T_int32:
   285  		vals := col.Slice()
   286  		return DedupOp[int32](colType, attr, vals, idx.tree)
   287  	case types.T_int64:
   288  		vals := col.Slice()
   289  		return DedupOp[int64](colType, attr, vals, idx.tree)
   290  	case types.T_uint8:
   291  		vals := col.Slice()
   292  		return DedupOp[uint8](colType, attr, vals, idx.tree)
   293  	case types.T_uint16:
   294  		vals := col.Slice()
   295  		return DedupOp[uint16](colType, attr, vals, idx.tree)
   296  	case types.T_uint32:
   297  		vals := col.Slice()
   298  		return DedupOp[uint32](colType, attr, vals, idx.tree)
   299  	case types.T_uint64:
   300  		vals := col.Slice()
   301  		return DedupOp[uint64](colType, attr, vals, idx.tree)
   302  	case types.T_decimal64:
   303  		vals := col.Slice()
   304  		return DedupOp[types.Decimal64](colType, attr, vals, idx.tree)
   305  	case types.T_decimal128:
   306  		vals := col.Slice()
   307  		return DedupOp[types.Decimal128](colType, attr, vals, idx.tree)
   308  	case types.T_float32:
   309  		vals := col.Slice()
   310  		return DedupOp[float32](colType, attr, vals, idx.tree)
   311  	case types.T_float64:
   312  		vals := col.Slice()
   313  		return DedupOp[float64](colType, attr, vals, idx.tree)
   314  	case types.T_date:
   315  		vals := col.Slice()
   316  		return DedupOp[types.Date](colType, attr, vals, idx.tree)
   317  	case types.T_time:
   318  		vals := col.Slice()
   319  		return DedupOp[types.Time](colType, attr, vals, idx.tree)
   320  	case types.T_datetime:
   321  		vals := col.Slice()
   322  		return DedupOp[types.Datetime](colType, attr, vals, idx.tree)
   323  	case types.T_timestamp:
   324  		vals := col.Slice()
   325  		return DedupOp[types.Timestamp](colType, attr, vals, idx.tree)
   326  	case types.T_TS:
   327  		vals := col.Slice()
   328  		return DedupOp[types.TS](colType, attr, vals, idx.tree)
   329  	case types.T_Rowid:
   330  		vals := col.Slice()
   331  		return DedupOp[types.Rowid](colType, attr, vals, idx.tree)
   332  	case types.T_char, types.T_varchar, types.T_json, types.T_blob, types.T_text:
   333  		bs := col.Bytes()
   334  		for i := 0; i < col.Length(); i++ {
   335  			v := string(bs.GetVarValueAt(i))
   336  			if _, ok := idx.tree[v]; ok {
   337  				entry := common.TypeStringValue(colType, []byte(v))
   338  				return moerr.NewDuplicateEntryNoCtx(entry, attr)
   339  			}
   340  		}
   341  	default:
   342  		panic(moerr.NewInternalErrorNoCtx("%s not supported", col.GetType().String()))
   343  	}
   344  	return nil
   345  }