github.com/matrixorigin/matrixone@v1.2.0/pkg/catalog/secondary_index_utils.go (about)

     1  // Copyright 2023 Matrix Origin
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package catalog
    16  
    17  import (
    18  	"encoding/json"
    19  	"fmt"
    20  	"github.com/matrixorigin/matrixone/pkg/common/moerr"
    21  	"github.com/matrixorigin/matrixone/pkg/sql/parsers/tree"
    22  	"strconv"
    23  	"strings"
    24  )
    25  
    26  // Index Algorithm names
    27  const (
    28  	MoIndexDefaultAlgo = tree.INDEX_TYPE_INVALID // used by UniqueIndex or default SecondaryIndex
    29  	MoIndexBTreeAlgo   = tree.INDEX_TYPE_BTREE   // used for Mocking MySQL behaviour.
    30  	MoIndexIvfFlatAlgo = tree.INDEX_TYPE_IVFFLAT // used for IVF flat index on Vector/Array columns
    31  	MOIndexMasterAlgo  = tree.INDEX_TYPE_MASTER  // used for Master Index on VARCHAR columns
    32  )
    33  
    34  // ToLower is used for before comparing AlgoType and IndexAlgoParamOpType. Reason why they are strings
    35  //  1. Changing AlgoType from string to Enum will break the backward compatibility.
    36  //     "panic: Unable to find target column from predefined table columns"
    37  //  2. IndexAlgoParamOpType is serialized and stored in the mo_indexes as JSON string.
    38  func ToLower(str string) string {
    39  	return strings.ToLower(strings.TrimSpace(str))
    40  }
    41  
    42  // IsNullIndexAlgo is used to skip printing the default "" index algo in the restoreDDL and buildShowCreateTable
    43  func IsNullIndexAlgo(algo string) bool {
    44  	_algo := ToLower(algo)
    45  	return _algo == MoIndexDefaultAlgo.ToString()
    46  }
    47  
    48  // IsRegularIndexAlgo are indexes which will be handled by regular index flow, ie the one where
    49  // we have one hidden table.
    50  func IsRegularIndexAlgo(algo string) bool {
    51  	_algo := ToLower(algo)
    52  	return _algo == MoIndexDefaultAlgo.ToString() || _algo == MoIndexBTreeAlgo.ToString()
    53  }
    54  
    55  func IsIvfIndexAlgo(algo string) bool {
    56  	_algo := ToLower(algo)
    57  	return _algo == MoIndexIvfFlatAlgo.ToString()
    58  }
    59  
    60  func IsMasterIndexAlgo(algo string) bool {
    61  	_algo := ToLower(algo)
    62  	return _algo == MOIndexMasterAlgo.ToString()
    63  }
    64  
    65  // ------------------------[START] IndexAlgoParams------------------------
    66  const (
    67  	IndexAlgoParamLists     = "lists"
    68  	IndexAlgoParamOpType    = "op_type"
    69  	IndexAlgoParamOpType_l2 = "vector_l2_ops"
    70  	//IndexAlgoParamOpType_ip  = "vector_ip_ops"
    71  	//IndexAlgoParamOpType_cos = "vector_cosine_ops"
    72  )
    73  
    74  const (
    75  	KmeansSamplePerList = 50
    76  	MaxSampleCount      = 10_000
    77  )
    78  
    79  // CalcSampleCount is used to calculate the sample count for Kmeans index.
    80  func CalcSampleCount(lists, totalCnt int64) (sampleCnt int64) {
    81  
    82  	if totalCnt > lists*KmeansSamplePerList {
    83  		sampleCnt = lists * KmeansSamplePerList
    84  	} else {
    85  		sampleCnt = totalCnt
    86  	}
    87  
    88  	if totalCnt > MaxSampleCount && sampleCnt < MaxSampleCount {
    89  		sampleCnt = MaxSampleCount
    90  	}
    91  
    92  	if sampleCnt > MaxSampleCount {
    93  		sampleCnt = MaxSampleCount
    94  	}
    95  
    96  	return sampleCnt
    97  }
    98  
    99  /* 1. ToString Functions */
   100  
   101  // IndexParamsToStringList used by buildShowCreateTable and restoreDDL
   102  // Eg:- "LIST = 10 op_type 'vector_l2_ops'"
   103  // NOTE: don't set default values here as it is used by SHOW and RESTORE DDL.
   104  func IndexParamsToStringList(indexParams string) (string, error) {
   105  	result, err := IndexParamsStringToMap(indexParams)
   106  	if err != nil {
   107  		return "", err
   108  	}
   109  
   110  	res := ""
   111  	if val, ok := result[IndexAlgoParamLists]; ok {
   112  		res += fmt.Sprintf(" %s = %s ", IndexAlgoParamLists, val)
   113  	}
   114  
   115  	if opType, ok := result[IndexAlgoParamOpType]; ok {
   116  		opType = ToLower(opType)
   117  		if opType != IndexAlgoParamOpType_l2 {
   118  			//	opType != IndexAlgoParamOpType_ip &&
   119  			//	opType != IndexAlgoParamOpType_cos
   120  			return "", moerr.NewInternalErrorNoCtx("invalid op_type. not of type '%s'", IndexAlgoParamOpType_l2)
   121  			//IndexAlgoParamOpType_ip, , IndexAlgoParamOpType_cos)
   122  
   123  		}
   124  
   125  		res += fmt.Sprintf(" %s '%s' ", IndexAlgoParamOpType, opType)
   126  	}
   127  
   128  	return res, nil
   129  }
   130  
   131  // IndexParamsToJsonString used by buildSecondaryIndexDef
   132  // Eg:- {"lists":"10","op_type":"vector_l2_ops"}
   133  func IndexParamsToJsonString(def *tree.Index) (string, error) {
   134  
   135  	res, err := indexParamsToMap(def)
   136  	if err != nil {
   137  		return "", err
   138  	}
   139  
   140  	if len(res) == 0 {
   141  		return "", nil // don't return empty json "{}" string
   142  	}
   143  
   144  	return IndexParamsMapToJsonString(res)
   145  }
   146  
   147  // IndexParamsMapToJsonString used by AlterTableInPlace and CreateIndexDef
   148  func IndexParamsMapToJsonString(res map[string]string) (string, error) {
   149  	str, err := json.Marshal(res)
   150  	if err != nil {
   151  		return "", err
   152  	}
   153  	return string(str), nil
   154  }
   155  
   156  /* 2. ToMap Functions */
   157  
   158  // IndexParamsStringToMap used by buildShowCreateTable and restoreDDL
   159  func IndexParamsStringToMap(indexParams string) (map[string]string, error) {
   160  	var result map[string]string
   161  	err := json.Unmarshal([]byte(indexParams), &result)
   162  	if err != nil {
   163  		return nil, err
   164  	}
   165  	return result, nil
   166  }
   167  
   168  func indexParamsToMap(def *tree.Index) (map[string]string, error) {
   169  	res := make(map[string]string)
   170  
   171  	switch def.KeyType {
   172  	case tree.INDEX_TYPE_BTREE, tree.INDEX_TYPE_INVALID:
   173  		// do nothing
   174  	case tree.INDEX_TYPE_MASTER:
   175  		// do nothing
   176  	case tree.INDEX_TYPE_IVFFLAT:
   177  		if def.IndexOption.AlgoParamList == 0 {
   178  			// NOTE:
   179  			// 1. In the parser, we added the failure check for list=0 scenario. So if user tries to explicit
   180  			// set list=0, it will fail.
   181  			// 2. However, if user didn't use the list option (we will get it as 0 here), then we will
   182  			// set the default value as 1.
   183  			res[IndexAlgoParamLists] = strconv.FormatInt(1, 10)
   184  		} else if def.IndexOption.AlgoParamList > 0 {
   185  			res[IndexAlgoParamLists] = strconv.FormatInt(def.IndexOption.AlgoParamList, 10)
   186  		} else {
   187  			return nil, moerr.NewInternalErrorNoCtx("invalid list. list must be > 0")
   188  		}
   189  
   190  		if len(def.IndexOption.AlgoParamVectorOpType) > 0 {
   191  			opType := ToLower(def.IndexOption.AlgoParamVectorOpType)
   192  			if opType != IndexAlgoParamOpType_l2 {
   193  				//opType != IndexAlgoParamOpType_ip &&
   194  				//opType != IndexAlgoParamOpType_cos &&
   195  
   196  				return nil, moerr.NewInternalErrorNoCtx("invalid op_type. not of type '%s'",
   197  					IndexAlgoParamOpType_l2,
   198  					//IndexAlgoParamOpType_ip, IndexAlgoParamOpType_cos,
   199  				)
   200  			}
   201  			res[IndexAlgoParamOpType] = def.IndexOption.AlgoParamVectorOpType
   202  		} else {
   203  			res[IndexAlgoParamOpType] = IndexAlgoParamOpType_l2 // set l2 as default
   204  		}
   205  	default:
   206  		return nil, moerr.NewInternalErrorNoCtx("invalid index type")
   207  	}
   208  	return res, nil
   209  }
   210  
   211  func DefaultIvfIndexAlgoOptions() map[string]string {
   212  	res := make(map[string]string)
   213  	res[IndexAlgoParamLists] = "1"                      // set lists = 1 as default
   214  	res[IndexAlgoParamOpType] = IndexAlgoParamOpType_l2 // set l2 as default
   215  	return res
   216  }
   217  
   218  //------------------------[END] IndexAlgoParams------------------------
   219  
   220  // ------------------------[START] Aliaser------------------------
   221  
   222  // This code is used by "secondary index" to resolve the "programmatically generated PK" appended to the
   223  // end of the index key "__mo_index_idx_col".
   224  
   225  const (
   226  	AliasPrefix = "__mo_alias_"
   227  )
   228  
   229  func CreateAlias(column string) string {
   230  	return fmt.Sprintf("%s%s", AliasPrefix, column)
   231  }
   232  
   233  func ResolveAlias(alias string) string {
   234  	return strings.TrimPrefix(alias, AliasPrefix)
   235  }
   236  
   237  func IsAlias(column string) bool {
   238  	return strings.HasPrefix(column, AliasPrefix)
   239  }
   240  
   241  // ------------------------[END] Aliaser------------------------