github.com/matrixorigin/matrixone@v1.2.0/pkg/catalog/secondary_index_utils.go (about) 1 // Copyright 2023 Matrix Origin 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package catalog 16 17 import ( 18 "encoding/json" 19 "fmt" 20 "github.com/matrixorigin/matrixone/pkg/common/moerr" 21 "github.com/matrixorigin/matrixone/pkg/sql/parsers/tree" 22 "strconv" 23 "strings" 24 ) 25 26 // Index Algorithm names 27 const ( 28 MoIndexDefaultAlgo = tree.INDEX_TYPE_INVALID // used by UniqueIndex or default SecondaryIndex 29 MoIndexBTreeAlgo = tree.INDEX_TYPE_BTREE // used for Mocking MySQL behaviour. 30 MoIndexIvfFlatAlgo = tree.INDEX_TYPE_IVFFLAT // used for IVF flat index on Vector/Array columns 31 MOIndexMasterAlgo = tree.INDEX_TYPE_MASTER // used for Master Index on VARCHAR columns 32 ) 33 34 // ToLower is used for before comparing AlgoType and IndexAlgoParamOpType. Reason why they are strings 35 // 1. Changing AlgoType from string to Enum will break the backward compatibility. 36 // "panic: Unable to find target column from predefined table columns" 37 // 2. IndexAlgoParamOpType is serialized and stored in the mo_indexes as JSON string. 38 func ToLower(str string) string { 39 return strings.ToLower(strings.TrimSpace(str)) 40 } 41 42 // IsNullIndexAlgo is used to skip printing the default "" index algo in the restoreDDL and buildShowCreateTable 43 func IsNullIndexAlgo(algo string) bool { 44 _algo := ToLower(algo) 45 return _algo == MoIndexDefaultAlgo.ToString() 46 } 47 48 // IsRegularIndexAlgo are indexes which will be handled by regular index flow, ie the one where 49 // we have one hidden table. 50 func IsRegularIndexAlgo(algo string) bool { 51 _algo := ToLower(algo) 52 return _algo == MoIndexDefaultAlgo.ToString() || _algo == MoIndexBTreeAlgo.ToString() 53 } 54 55 func IsIvfIndexAlgo(algo string) bool { 56 _algo := ToLower(algo) 57 return _algo == MoIndexIvfFlatAlgo.ToString() 58 } 59 60 func IsMasterIndexAlgo(algo string) bool { 61 _algo := ToLower(algo) 62 return _algo == MOIndexMasterAlgo.ToString() 63 } 64 65 // ------------------------[START] IndexAlgoParams------------------------ 66 const ( 67 IndexAlgoParamLists = "lists" 68 IndexAlgoParamOpType = "op_type" 69 IndexAlgoParamOpType_l2 = "vector_l2_ops" 70 //IndexAlgoParamOpType_ip = "vector_ip_ops" 71 //IndexAlgoParamOpType_cos = "vector_cosine_ops" 72 ) 73 74 const ( 75 KmeansSamplePerList = 50 76 MaxSampleCount = 10_000 77 ) 78 79 // CalcSampleCount is used to calculate the sample count for Kmeans index. 80 func CalcSampleCount(lists, totalCnt int64) (sampleCnt int64) { 81 82 if totalCnt > lists*KmeansSamplePerList { 83 sampleCnt = lists * KmeansSamplePerList 84 } else { 85 sampleCnt = totalCnt 86 } 87 88 if totalCnt > MaxSampleCount && sampleCnt < MaxSampleCount { 89 sampleCnt = MaxSampleCount 90 } 91 92 if sampleCnt > MaxSampleCount { 93 sampleCnt = MaxSampleCount 94 } 95 96 return sampleCnt 97 } 98 99 /* 1. ToString Functions */ 100 101 // IndexParamsToStringList used by buildShowCreateTable and restoreDDL 102 // Eg:- "LIST = 10 op_type 'vector_l2_ops'" 103 // NOTE: don't set default values here as it is used by SHOW and RESTORE DDL. 104 func IndexParamsToStringList(indexParams string) (string, error) { 105 result, err := IndexParamsStringToMap(indexParams) 106 if err != nil { 107 return "", err 108 } 109 110 res := "" 111 if val, ok := result[IndexAlgoParamLists]; ok { 112 res += fmt.Sprintf(" %s = %s ", IndexAlgoParamLists, val) 113 } 114 115 if opType, ok := result[IndexAlgoParamOpType]; ok { 116 opType = ToLower(opType) 117 if opType != IndexAlgoParamOpType_l2 { 118 // opType != IndexAlgoParamOpType_ip && 119 // opType != IndexAlgoParamOpType_cos 120 return "", moerr.NewInternalErrorNoCtx("invalid op_type. not of type '%s'", IndexAlgoParamOpType_l2) 121 //IndexAlgoParamOpType_ip, , IndexAlgoParamOpType_cos) 122 123 } 124 125 res += fmt.Sprintf(" %s '%s' ", IndexAlgoParamOpType, opType) 126 } 127 128 return res, nil 129 } 130 131 // IndexParamsToJsonString used by buildSecondaryIndexDef 132 // Eg:- {"lists":"10","op_type":"vector_l2_ops"} 133 func IndexParamsToJsonString(def *tree.Index) (string, error) { 134 135 res, err := indexParamsToMap(def) 136 if err != nil { 137 return "", err 138 } 139 140 if len(res) == 0 { 141 return "", nil // don't return empty json "{}" string 142 } 143 144 return IndexParamsMapToJsonString(res) 145 } 146 147 // IndexParamsMapToJsonString used by AlterTableInPlace and CreateIndexDef 148 func IndexParamsMapToJsonString(res map[string]string) (string, error) { 149 str, err := json.Marshal(res) 150 if err != nil { 151 return "", err 152 } 153 return string(str), nil 154 } 155 156 /* 2. ToMap Functions */ 157 158 // IndexParamsStringToMap used by buildShowCreateTable and restoreDDL 159 func IndexParamsStringToMap(indexParams string) (map[string]string, error) { 160 var result map[string]string 161 err := json.Unmarshal([]byte(indexParams), &result) 162 if err != nil { 163 return nil, err 164 } 165 return result, nil 166 } 167 168 func indexParamsToMap(def *tree.Index) (map[string]string, error) { 169 res := make(map[string]string) 170 171 switch def.KeyType { 172 case tree.INDEX_TYPE_BTREE, tree.INDEX_TYPE_INVALID: 173 // do nothing 174 case tree.INDEX_TYPE_MASTER: 175 // do nothing 176 case tree.INDEX_TYPE_IVFFLAT: 177 if def.IndexOption.AlgoParamList == 0 { 178 // NOTE: 179 // 1. In the parser, we added the failure check for list=0 scenario. So if user tries to explicit 180 // set list=0, it will fail. 181 // 2. However, if user didn't use the list option (we will get it as 0 here), then we will 182 // set the default value as 1. 183 res[IndexAlgoParamLists] = strconv.FormatInt(1, 10) 184 } else if def.IndexOption.AlgoParamList > 0 { 185 res[IndexAlgoParamLists] = strconv.FormatInt(def.IndexOption.AlgoParamList, 10) 186 } else { 187 return nil, moerr.NewInternalErrorNoCtx("invalid list. list must be > 0") 188 } 189 190 if len(def.IndexOption.AlgoParamVectorOpType) > 0 { 191 opType := ToLower(def.IndexOption.AlgoParamVectorOpType) 192 if opType != IndexAlgoParamOpType_l2 { 193 //opType != IndexAlgoParamOpType_ip && 194 //opType != IndexAlgoParamOpType_cos && 195 196 return nil, moerr.NewInternalErrorNoCtx("invalid op_type. not of type '%s'", 197 IndexAlgoParamOpType_l2, 198 //IndexAlgoParamOpType_ip, IndexAlgoParamOpType_cos, 199 ) 200 } 201 res[IndexAlgoParamOpType] = def.IndexOption.AlgoParamVectorOpType 202 } else { 203 res[IndexAlgoParamOpType] = IndexAlgoParamOpType_l2 // set l2 as default 204 } 205 default: 206 return nil, moerr.NewInternalErrorNoCtx("invalid index type") 207 } 208 return res, nil 209 } 210 211 func DefaultIvfIndexAlgoOptions() map[string]string { 212 res := make(map[string]string) 213 res[IndexAlgoParamLists] = "1" // set lists = 1 as default 214 res[IndexAlgoParamOpType] = IndexAlgoParamOpType_l2 // set l2 as default 215 return res 216 } 217 218 //------------------------[END] IndexAlgoParams------------------------ 219 220 // ------------------------[START] Aliaser------------------------ 221 222 // This code is used by "secondary index" to resolve the "programmatically generated PK" appended to the 223 // end of the index key "__mo_index_idx_col". 224 225 const ( 226 AliasPrefix = "__mo_alias_" 227 ) 228 229 func CreateAlias(column string) string { 230 return fmt.Sprintf("%s%s", AliasPrefix, column) 231 } 232 233 func ResolveAlias(alias string) string { 234 return strings.TrimPrefix(alias, AliasPrefix) 235 } 236 237 func IsAlias(column string) bool { 238 return strings.HasPrefix(column, AliasPrefix) 239 } 240 241 // ------------------------[END] Aliaser------------------------