github.com/matrixorigin/matrixone@v1.2.0/pkg/vm/engine/tae/txn/txnimpl/index.go (about)

     1  // Copyright 2021 Matrix Origin
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package txnimpl
    16  
    17  import (
    18  	"io"
    19  	"sync"
    20  
    21  	"github.com/matrixorigin/matrixone/pkg/common/moerr"
    22  	"github.com/matrixorigin/matrixone/pkg/common/util"
    23  	"github.com/matrixorigin/matrixone/pkg/container/types"
    24  	"github.com/matrixorigin/matrixone/pkg/container/vector"
    25  	"github.com/matrixorigin/matrixone/pkg/vm/engine/tae/common"
    26  	"github.com/matrixorigin/matrixone/pkg/vm/engine/tae/containers"
    27  )
    28  
    29  type TableIndex interface {
    30  	io.Closer
    31  	BatchDedup(string, containers.Vector) error
    32  	BatchInsert(string, containers.Vector, int, int, uint32, bool) error
    33  	Insert(any, uint32) error
    34  	Delete(any) error
    35  	Search(any) (uint32, error)
    36  	Name() string
    37  	Count() int
    38  	KeyToVector(types.Type) containers.Vector
    39  	KeyToVectors(types.Type) []containers.Vector
    40  }
    41  
    42  type simpleTableIndex struct {
    43  	sync.RWMutex
    44  	tree map[any]uint32
    45  }
    46  
    47  func NewSimpleTableIndex() *simpleTableIndex {
    48  	return &simpleTableIndex{
    49  		tree: make(map[any]uint32),
    50  	}
    51  }
    52  
    53  func DedupOp[T comparable](
    54  	t *types.Type,
    55  	attr string,
    56  	vs []T,
    57  	tree map[any]uint32) (err error) {
    58  	for _, v := range vs {
    59  		if _, ok := tree[v]; ok {
    60  			entry := common.TypeStringValue(*t, v, false)
    61  			return moerr.NewDuplicateEntryNoCtx(entry, attr)
    62  		}
    63  	}
    64  	return
    65  }
    66  
    67  func InsertOp[T comparable](
    68  	t *types.Type,
    69  	attr string,
    70  	vals []T,
    71  	start, count int,
    72  	fromRow uint32,
    73  	dedupInput bool,
    74  	tree map[any]uint32) (err error) {
    75  	if dedupInput {
    76  		set := make(map[T]bool)
    77  		for _, v := range vals[start : start+count] {
    78  			if _, ok := set[v]; ok {
    79  				entry := common.TypeStringValue(*t, v, false)
    80  				return moerr.NewDuplicateEntryNoCtx(entry, attr)
    81  			}
    82  			set[v] = true
    83  		}
    84  		return
    85  	}
    86  	for _, v := range vals[start : start+count] {
    87  		if _, ok := tree[v]; ok {
    88  			entry := common.TypeStringValue(*t, v, false)
    89  			return moerr.NewDuplicateEntryNoCtx(entry, attr)
    90  		}
    91  		tree[v] = fromRow
    92  		fromRow++
    93  	}
    94  	return
    95  }
    96  
    97  func (idx *simpleTableIndex) KeyToVector(kType types.Type) containers.Vector {
    98  	vec := makeWorkspaceVector(kType)
    99  	switch kType.Oid {
   100  	case types.T_char, types.T_varchar, types.T_json,
   101  		types.T_binary, types.T_varbinary, types.T_blob, types.T_text:
   102  		for k := range idx.tree {
   103  			vec.Append([]byte(k.(string)), false)
   104  		}
   105  	case types.T_array_float32, types.T_array_float64:
   106  		// No usage for this func.
   107  		for k := range idx.tree {
   108  			vec.Append(k.([]byte), false)
   109  		}
   110  	default:
   111  		for k := range idx.tree {
   112  			vec.Append(k, false)
   113  		}
   114  	}
   115  	return vec
   116  }
   117  
   118  func (idx *simpleTableIndex) KeyToVectors(kType types.Type) []containers.Vector {
   119  	vec := makeWorkspaceVector(kType)
   120  	var vecs []containers.Vector
   121  	switch kType.Oid {
   122  	case types.T_char, types.T_varchar, types.T_json,
   123  		types.T_binary, types.T_varbinary, types.T_blob, types.T_text:
   124  		for k := range idx.tree {
   125  			if vec.Length() > int(MaxNodeRows) {
   126  				vecs = append(vecs, vec)
   127  				vec = makeWorkspaceVector(kType)
   128  			}
   129  			vec.Append([]byte(k.(string)), false)
   130  		}
   131  	case types.T_array_float32:
   132  		// No usage for this func.
   133  		for k := range idx.tree {
   134  			if vec.Length() > int(MaxNodeRows) {
   135  				vecs = append(vecs, vec)
   136  				vec = makeWorkspaceVector(kType)
   137  			}
   138  			vec.Append(types.BytesToArrayToString[float32](k.([]byte)), false)
   139  		}
   140  	case types.T_array_float64:
   141  		for k := range idx.tree {
   142  			if vec.Length() > int(MaxNodeRows) {
   143  				vecs = append(vecs, vec)
   144  				vec = makeWorkspaceVector(kType)
   145  			}
   146  			vec.Append(types.BytesToArrayToString[float64](k.([]byte)), false)
   147  		}
   148  	default:
   149  		for k := range idx.tree {
   150  			if vec.Length() > int(MaxNodeRows) {
   151  				vecs = append(vecs, vec)
   152  				vec = makeWorkspaceVector(kType)
   153  			}
   154  			vec.Append(k, false)
   155  		}
   156  	}
   157  	if vec.Length() > 0 {
   158  		vecs = append(vecs, vec)
   159  	}
   160  	return vecs
   161  }
   162  
   163  func (idx *simpleTableIndex) Close() error {
   164  	idx.tree = nil
   165  	return nil
   166  }
   167  func (idx *simpleTableIndex) Name() string { return "SimpleIndex" }
   168  func (idx *simpleTableIndex) Count() int {
   169  	idx.RLock()
   170  	cnt := len(idx.tree)
   171  	idx.RUnlock()
   172  	return cnt
   173  }
   174  
   175  func (idx *simpleTableIndex) Insert(v any, row uint32) error {
   176  	idx.Lock()
   177  	defer idx.Unlock()
   178  	_, ok := idx.tree[v]
   179  	if ok {
   180  		return moerr.GetOkExpectedDup()
   181  	}
   182  	idx.tree[v] = row
   183  	return nil
   184  }
   185  func (idx *simpleTableIndex) Delete(vv any) error {
   186  	idx.Lock()
   187  	defer idx.Unlock()
   188  	var v any
   189  	switch vv := vv.(type) {
   190  	case []uint8:
   191  		v = string(vv)
   192  	default:
   193  		v = vv
   194  	}
   195  	_, ok := idx.tree[v]
   196  	if !ok {
   197  		return moerr.GetOkExpectedDup()
   198  	}
   199  	delete(idx.tree, v)
   200  	return nil
   201  }
   202  
   203  func (idx *simpleTableIndex) Search(v any) (uint32, error) {
   204  	idx.RLock()
   205  	defer idx.RUnlock()
   206  	row, ok := idx.tree[v]
   207  	if !ok {
   208  		return 0, moerr.NewNotFoundNoCtx()
   209  	}
   210  	return uint32(row), nil
   211  }
   212  
   213  func (idx *simpleTableIndex) BatchInsert(
   214  	attr string,
   215  	col containers.Vector,
   216  	start, count int,
   217  	row uint32,
   218  	dedupInput bool) error {
   219  	idx.Lock()
   220  	defer idx.Unlock()
   221  	colType := col.GetType()
   222  	switch colType.Oid {
   223  	case types.T_bool:
   224  		vs := vector.MustFixedCol[bool](col.GetDownstreamVector())
   225  		return InsertOp(colType, attr, vs, start, count, row, dedupInput, idx.tree)
   226  	case types.T_bit:
   227  		vs := vector.MustFixedCol[uint64](col.GetDownstreamVector())
   228  		return InsertOp(colType, attr, vs, start, count, row, dedupInput, idx.tree)
   229  	case types.T_int8:
   230  		vs := vector.MustFixedCol[int8](col.GetDownstreamVector())
   231  		return InsertOp(colType, attr, vs, start, count, row, dedupInput, idx.tree)
   232  	case types.T_int16:
   233  		vs := vector.MustFixedCol[int16](col.GetDownstreamVector())
   234  		return InsertOp(colType, attr, vs, start, count, row, dedupInput, idx.tree)
   235  	case types.T_int32:
   236  		vs := vector.MustFixedCol[int32](col.GetDownstreamVector())
   237  		return InsertOp(colType, attr, vs, start, count, row, dedupInput, idx.tree)
   238  	case types.T_int64:
   239  		vs := vector.MustFixedCol[int64](col.GetDownstreamVector())
   240  		return InsertOp(colType, attr, vs, start, count, row, dedupInput, idx.tree)
   241  	case types.T_uint8:
   242  		vs := vector.MustFixedCol[uint8](col.GetDownstreamVector())
   243  		return InsertOp(colType, attr, vs, start, count, row, dedupInput, idx.tree)
   244  	case types.T_uint16:
   245  		vs := vector.MustFixedCol[uint16](col.GetDownstreamVector())
   246  		return InsertOp(colType, attr, vs, start, count, row, dedupInput, idx.tree)
   247  	case types.T_uint32:
   248  		vs := vector.MustFixedCol[uint32](col.GetDownstreamVector())
   249  		return InsertOp(colType, attr, vs, start, count, row, dedupInput, idx.tree)
   250  	case types.T_uint64:
   251  		vs := vector.MustFixedCol[uint64](col.GetDownstreamVector())
   252  		return InsertOp(colType, attr, vs, start, count, row, dedupInput, idx.tree)
   253  	case types.T_decimal64:
   254  		vs := vector.MustFixedCol[types.Decimal64](col.GetDownstreamVector())
   255  		return InsertOp(colType, attr, vs, start, count, row, dedupInput, idx.tree)
   256  	case types.T_decimal128:
   257  		vs := vector.MustFixedCol[types.Decimal128](col.GetDownstreamVector())
   258  		return InsertOp(colType, attr, vs, start, count, row, dedupInput, idx.tree)
   259  	case types.T_uuid:
   260  		vs := vector.MustFixedCol[types.Uuid](col.GetDownstreamVector())
   261  		return InsertOp(colType, attr, vs, start, count, row, dedupInput, idx.tree)
   262  	case types.T_float32:
   263  		vs := vector.MustFixedCol[float32](col.GetDownstreamVector())
   264  		return InsertOp(colType, attr, vs, start, count, row, dedupInput, idx.tree)
   265  	case types.T_float64:
   266  		vs := vector.MustFixedCol[float64](col.GetDownstreamVector())
   267  		return InsertOp(colType, attr, vs, start, count, row, dedupInput, idx.tree)
   268  	case types.T_date:
   269  		vs := vector.MustFixedCol[types.Date](col.GetDownstreamVector())
   270  		return InsertOp(colType, attr, vs, start, count, row, dedupInput, idx.tree)
   271  	case types.T_timestamp:
   272  		vs := vector.MustFixedCol[types.Timestamp](col.GetDownstreamVector())
   273  		return InsertOp(colType, attr, vs, start, count, row, dedupInput, idx.tree)
   274  	case types.T_time:
   275  		vs := vector.MustFixedCol[types.Time](col.GetDownstreamVector())
   276  		return InsertOp(colType, attr, vs, start, count, row, dedupInput, idx.tree)
   277  	case types.T_datetime:
   278  		vs := vector.MustFixedCol[types.Datetime](col.GetDownstreamVector())
   279  		return InsertOp(colType, attr, vs, start, count, row, dedupInput, idx.tree)
   280  	case types.T_enum:
   281  		vs := vector.MustFixedCol[types.Enum](col.GetDownstreamVector())
   282  		return InsertOp(colType, attr, vs, start, count, row, dedupInput, idx.tree)
   283  	case types.T_TS:
   284  		vs := vector.MustFixedCol[types.TS](col.GetDownstreamVector())
   285  		return InsertOp(colType, attr, vs, start, count, row, dedupInput, idx.tree)
   286  	case types.T_Rowid:
   287  		vs := vector.MustFixedCol[types.Rowid](col.GetDownstreamVector())
   288  		return InsertOp(colType, attr, vs, start, count, row, dedupInput, idx.tree)
   289  	case types.T_Blockid:
   290  		vs := vector.MustFixedCol[types.Blockid](col.GetDownstreamVector())
   291  		return InsertOp(colType, attr, vs, start, count, row, dedupInput, idx.tree)
   292  	case types.T_char, types.T_varchar, types.T_json,
   293  		types.T_binary, types.T_varbinary, types.T_blob, types.T_text:
   294  		vec := col.GetDownstreamVector()
   295  		if dedupInput {
   296  			set := make(map[string]bool)
   297  			for i := start; i < start+count; i++ {
   298  				v := vec.GetStringAt(i)
   299  				if _, ok := set[v]; ok {
   300  					entry := common.TypeStringValue(*colType, []byte(v), false)
   301  					return moerr.NewDuplicateEntryNoCtx(entry, attr)
   302  				}
   303  				set[v] = true
   304  			}
   305  			break
   306  		}
   307  		for i := start; i < start+count; i++ {
   308  			v := vec.GetStringAt(i)
   309  			if _, ok := idx.tree[v]; ok {
   310  				entry := common.TypeStringValue(*colType, []byte(v), false)
   311  				return moerr.NewDuplicateEntryNoCtx(entry, attr)
   312  			}
   313  			idx.tree[v] = row
   314  			row++
   315  		}
   316  	case types.T_array_float32:
   317  		vec := col.GetDownstreamVector()
   318  		if dedupInput {
   319  			set := make(map[string]bool)
   320  			for i := start; i < start+count; i++ {
   321  				v := types.ArrayToString[float32](vector.GetArrayAt[float32](vec, i))
   322  				if _, ok := set[v]; ok {
   323  					entry := common.TypeStringValue(*colType, vec.GetBytesAt(i), false)
   324  					return moerr.NewDuplicateEntryNoCtx(entry, attr)
   325  				}
   326  				set[v] = true
   327  			}
   328  			break
   329  		}
   330  		for i := start; i < start+count; i++ {
   331  			v := types.ArrayToString[float32](vector.GetArrayAt[float32](vec, i))
   332  			if _, ok := idx.tree[v]; ok {
   333  				entry := common.TypeStringValue(*colType, vec.GetBytesAt(i), false)
   334  				return moerr.NewDuplicateEntryNoCtx(entry, attr)
   335  			}
   336  			idx.tree[v] = row
   337  			row++
   338  		}
   339  	case types.T_array_float64:
   340  		vec := col.GetDownstreamVector()
   341  		if dedupInput {
   342  			set := make(map[string]bool)
   343  			for i := start; i < start+count; i++ {
   344  				v := types.ArrayToString[float64](vector.GetArrayAt[float64](vec, i))
   345  				if _, ok := set[v]; ok {
   346  					entry := common.TypeStringValue(*colType, vec.GetBytesAt(i), false)
   347  					return moerr.NewDuplicateEntryNoCtx(entry, attr)
   348  				}
   349  				set[v] = true
   350  			}
   351  			break
   352  		}
   353  		for i := start; i < start+count; i++ {
   354  			v := types.ArrayToString[float64](vector.GetArrayAt[float64](vec, i))
   355  			if _, ok := idx.tree[v]; ok {
   356  				entry := common.TypeStringValue(*colType, vec.GetBytesAt(i), false)
   357  				return moerr.NewDuplicateEntryNoCtx(entry, attr)
   358  			}
   359  			idx.tree[v] = row
   360  			row++
   361  		}
   362  	default:
   363  		panic(moerr.NewInternalErrorNoCtx("%s not supported", col.GetType().String()))
   364  	}
   365  	return nil
   366  }
   367  
   368  // TODO: rewrite
   369  func (idx *simpleTableIndex) BatchDedup(attr string, col containers.Vector) error {
   370  	idx.RLock()
   371  	defer idx.RUnlock()
   372  	colType := col.GetType()
   373  	switch colType.Oid {
   374  	case types.T_bool:
   375  		vals := vector.MustFixedCol[bool](col.GetDownstreamVector())
   376  		return DedupOp(colType, attr, vals, idx.tree)
   377  	case types.T_bit:
   378  		vals := vector.MustFixedCol[uint64](col.GetDownstreamVector())
   379  		return DedupOp(colType, attr, vals, idx.tree)
   380  	case types.T_int8:
   381  		vals := vector.MustFixedCol[int8](col.GetDownstreamVector())
   382  		return DedupOp(colType, attr, vals, idx.tree)
   383  	case types.T_int16:
   384  		vals := vector.MustFixedCol[int16](col.GetDownstreamVector())
   385  		return DedupOp(colType, attr, vals, idx.tree)
   386  	case types.T_int32:
   387  		vals := vector.MustFixedCol[int32](col.GetDownstreamVector())
   388  		return DedupOp(colType, attr, vals, idx.tree)
   389  	case types.T_int64:
   390  		vals := vector.MustFixedCol[int64](col.GetDownstreamVector())
   391  		return DedupOp(colType, attr, vals, idx.tree)
   392  	case types.T_uint8:
   393  		vals := vector.MustFixedCol[uint8](col.GetDownstreamVector())
   394  		return DedupOp(colType, attr, vals, idx.tree)
   395  	case types.T_uint16:
   396  		vals := vector.MustFixedCol[uint16](col.GetDownstreamVector())
   397  		return DedupOp(colType, attr, vals, idx.tree)
   398  	case types.T_uint32:
   399  		vals := vector.MustFixedCol[uint32](col.GetDownstreamVector())
   400  		return DedupOp(colType, attr, vals, idx.tree)
   401  	case types.T_uint64:
   402  		vals := vector.MustFixedCol[uint64](col.GetDownstreamVector())
   403  		return DedupOp(colType, attr, vals, idx.tree)
   404  	case types.T_decimal64:
   405  		vals := vector.MustFixedCol[types.Decimal64](col.GetDownstreamVector())
   406  		return DedupOp(colType, attr, vals, idx.tree)
   407  	case types.T_decimal128:
   408  		vals := vector.MustFixedCol[types.Decimal128](col.GetDownstreamVector())
   409  		return DedupOp(colType, attr, vals, idx.tree)
   410  	case types.T_float32:
   411  		vals := vector.MustFixedCol[float32](col.GetDownstreamVector())
   412  		return DedupOp(colType, attr, vals, idx.tree)
   413  	case types.T_float64:
   414  		vals := vector.MustFixedCol[float64](col.GetDownstreamVector())
   415  		return DedupOp(colType, attr, vals, idx.tree)
   416  	case types.T_date:
   417  		vals := vector.MustFixedCol[types.Date](col.GetDownstreamVector())
   418  		return DedupOp(colType, attr, vals, idx.tree)
   419  	case types.T_time:
   420  		vals := vector.MustFixedCol[types.Time](col.GetDownstreamVector())
   421  		return DedupOp(colType, attr, vals, idx.tree)
   422  	case types.T_datetime:
   423  		vals := vector.MustFixedCol[types.Datetime](col.GetDownstreamVector())
   424  		return DedupOp(colType, attr, vals, idx.tree)
   425  	case types.T_timestamp:
   426  		vals := vector.MustFixedCol[types.Timestamp](col.GetDownstreamVector())
   427  		return DedupOp(colType, attr, vals, idx.tree)
   428  	case types.T_enum:
   429  		vals := vector.MustFixedCol[types.Enum](col.GetDownstreamVector())
   430  		return DedupOp(colType, attr, vals, idx.tree)
   431  	case types.T_TS:
   432  		vals := vector.MustFixedCol[types.TS](col.GetDownstreamVector())
   433  		return DedupOp(colType, attr, vals, idx.tree)
   434  	case types.T_Rowid:
   435  		vals := vector.MustFixedCol[types.Rowid](col.GetDownstreamVector())
   436  		return DedupOp(colType, attr, vals, idx.tree)
   437  	case types.T_Blockid:
   438  		vals := vector.MustFixedCol[types.Blockid](col.GetDownstreamVector())
   439  		return DedupOp(colType, attr, vals, idx.tree)
   440  	case types.T_char, types.T_varchar, types.T_json,
   441  		types.T_binary, types.T_varbinary, types.T_blob, types.T_text:
   442  		vec := col.GetDownstreamVector()
   443  		for i := 0; i < col.Length(); i++ {
   444  			bs := vec.GetBytesAt(i)
   445  			v := util.UnsafeBytesToString(bs)
   446  			if _, ok := idx.tree[v]; ok {
   447  				entry := common.TypeStringValue(*colType, bs, false)
   448  				return moerr.NewDuplicateEntryNoCtx(entry, attr)
   449  			}
   450  		}
   451  	case types.T_array_float32:
   452  		vec := col.GetDownstreamVector()
   453  		for i := 0; i < col.Length(); i++ {
   454  			bs := vec.GetBytesAt(i)
   455  			v := types.BytesToArrayToString[float32](bs)
   456  			if _, ok := idx.tree[v]; ok {
   457  				entry := common.TypeStringValue(*colType, bs, false)
   458  				return moerr.NewDuplicateEntryNoCtx(entry, attr)
   459  			}
   460  		}
   461  	case types.T_array_float64:
   462  		vec := col.GetDownstreamVector()
   463  		for i := 0; i < col.Length(); i++ {
   464  			bs := vec.GetBytesAt(i)
   465  			v := types.BytesToArrayToString[float64](bs)
   466  			if _, ok := idx.tree[v]; ok {
   467  				entry := common.TypeStringValue(*colType, bs, false)
   468  				return moerr.NewDuplicateEntryNoCtx(entry, attr)
   469  			}
   470  		}
   471  	default:
   472  		panic(moerr.NewInternalErrorNoCtx("%s not supported", col.GetType().String()))
   473  	}
   474  	return nil
   475  }