github.com/dolthub/dolt/go@v0.40.5-0.20240520175717-68db7794bea6/libraries/doltcore/sqle/index/key_builder.go (about) 1 // Copyright 2023 Dolthub, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package index 16 17 import ( 18 "context" 19 "fmt" 20 21 "github.com/dolthub/go-mysql-server/memory" 22 "github.com/dolthub/go-mysql-server/sql" 23 "github.com/dolthub/go-mysql-server/sql/analyzer" 24 "github.com/dolthub/go-mysql-server/sql/expression" 25 "github.com/dolthub/go-mysql-server/sql/plan" 26 "github.com/dolthub/go-mysql-server/sql/planbuilder" 27 "github.com/dolthub/go-mysql-server/sql/transform" 28 29 "github.com/dolthub/dolt/go/libraries/doltcore/schema" 30 "github.com/dolthub/dolt/go/libraries/doltcore/sqle/sqlfmt" 31 "github.com/dolthub/dolt/go/store/pool" 32 "github.com/dolthub/dolt/go/store/prolly" 33 "github.com/dolthub/dolt/go/store/prolly/tree" 34 "github.com/dolthub/dolt/go/store/val" 35 ) 36 37 // NewSecondaryKeyBuilder creates a new SecondaryKeyBuilder instance that can build keys for the secondary index |def|. 38 // The schema of the source table is defined in |sch|, and |idxDesc| describes the tuple layout for the index's keys 39 // (index value tuples are not used). 40 func NewSecondaryKeyBuilder(ctx *sql.Context, tableName string, sch schema.Schema, def schema.Index, idxDesc val.TupleDesc, p pool.BuffPool, nodeStore tree.NodeStore) (SecondaryKeyBuilder, error) { 41 b := SecondaryKeyBuilder{ 42 builder: val.NewTupleBuilder(idxDesc), 43 pool: p, 44 nodeStore: nodeStore, 45 sch: sch, 46 indexDef: def, 47 } 48 49 keyless := schema.IsKeyless(sch) 50 if keyless { 51 // the only key is the hash of the values 52 b.split = 1 53 } else { 54 b.split = sch.GetPKCols().Size() 55 } 56 57 b.mapping = make(val.OrdinalMapping, len(def.AllTags())) 58 var virtualExpressions []sql.Expression 59 for i, tag := range def.AllTags() { 60 j, ok := sch.GetPKCols().TagToIdx[tag] 61 if !ok { 62 col := sch.GetNonPKCols().TagToCol[tag] 63 if col.Virtual { 64 if len(virtualExpressions) == 0 { 65 virtualExpressions = make([]sql.Expression, len(def.AllTags())) 66 } 67 68 expr, err := ResolveDefaultExpression(ctx, tableName, sch, col) 69 if err != nil { 70 return SecondaryKeyBuilder{}, err 71 } 72 73 virtualExpressions[i] = expr 74 j = -1 75 } else if keyless { 76 // Skip cardinality column 77 j = b.split + 1 + sch.GetNonPKCols().TagToIdx[tag] 78 } else { 79 j = b.split + sch.GetNonPKCols().TagToIdx[tag] 80 } 81 } 82 b.mapping[i] = j 83 } 84 85 b.virtualExpressions = virtualExpressions 86 87 if keyless { 88 // last key in index is hash which is the only column in the key 89 b.mapping = append(b.mapping, 0) 90 } 91 return b, nil 92 } 93 94 // ResolveDefaultExpression returns a sql.Expression for the column default or generated expression for the 95 // column provided 96 func ResolveDefaultExpression(ctx *sql.Context, tableName string, sch schema.Schema, col schema.Column) (sql.Expression, error) { 97 ct, err := parseCreateTable(ctx, tableName, sch) 98 if err != nil { 99 return nil, err 100 } 101 102 colIdx := ct.PkSchema().Schema.IndexOfColName(col.Name) 103 if colIdx < 0 { 104 return nil, fmt.Errorf("unable to find column %s in analyzed query", col.Name) 105 } 106 107 sqlCol := ct.PkSchema().Schema[colIdx] 108 expr := sqlCol.Default 109 if expr == nil || expr.Expr == nil { 110 expr = sqlCol.Generated 111 } 112 113 if expr == nil || expr.Expr == nil { 114 return nil, fmt.Errorf("unable to find default or generated expression") 115 } 116 117 return expr.Expr, nil 118 } 119 120 // ResolveCheckExpression returns a sql.Expression for the check provided 121 func ResolveCheckExpression(ctx *sql.Context, tableName string, sch schema.Schema, checkExpr string) (sql.Expression, error) { 122 ct, err := parseCreateTable(ctx, tableName, sch) 123 if err != nil { 124 return nil, err 125 } 126 127 for _, check := range ct.Checks() { 128 if stripTableNamesFromExpression(check.Expr).String() == checkExpr { 129 return check.Expr, nil 130 } 131 } 132 133 return nil, fmt.Errorf("unable to find check expression") 134 } 135 136 func stripTableNamesFromExpression(expr sql.Expression) sql.Expression { 137 e, _, _ := transform.Expr(expr, func(e sql.Expression) (sql.Expression, transform.TreeIdentity, error) { 138 if col, ok := e.(*expression.GetField); ok { 139 return col.WithTable(""), transform.NewTree, nil 140 } 141 return e, transform.SameTree, nil 142 }) 143 return e 144 } 145 146 func parseCreateTable(ctx *sql.Context, tableName string, sch schema.Schema) (*plan.CreateTable, error) { 147 createTable, err := sqlfmt.GenerateCreateTableStatement(tableName, sch, nil, nil) 148 if err != nil { 149 return nil, err 150 } 151 152 query := createTable 153 154 mockDatabase := memory.NewDatabase("mydb") 155 mockProvider := memory.NewDBProvider(mockDatabase) 156 catalog := analyzer.NewCatalog(mockProvider) 157 parseCtx := sql.NewEmptyContext() 158 parseCtx.SetCurrentDatabase("mydb") 159 160 b := planbuilder.New(parseCtx, catalog, sql.NewMysqlParser()) 161 pseudoAnalyzedQuery, _, _, err := b.Parse(query, false) 162 if err != nil { 163 return nil, err 164 } 165 166 ct, ok := pseudoAnalyzedQuery.(*plan.CreateTable) 167 if !ok { 168 return nil, fmt.Errorf("expected a *plan.CreateTable node, but got %T", pseudoAnalyzedQuery) 169 } 170 return ct, nil 171 } 172 173 type SecondaryKeyBuilder struct { 174 // sch holds the schema of the table on which the secondary index is created 175 sch schema.Schema 176 // indexDef holds the definition of the secondary index 177 indexDef schema.Index 178 // mapping defines how to map fields from the source table's schema to this index's tuple layout 179 mapping val.OrdinalMapping 180 // virtualExpressions holds the expressions for virtual columns in the index, nil for non-virtual indexes 181 virtualExpressions []sql.Expression 182 // split marks the index in the secondary index's key tuple that splits the main table's 183 // key fields from the main table's value fields. 184 split int 185 builder *val.TupleBuilder 186 pool pool.BuffPool 187 nodeStore tree.NodeStore 188 } 189 190 // SecondaryKeyFromRow builds a secondary index key from a clustered index row. 191 func (b SecondaryKeyBuilder) SecondaryKeyFromRow(ctx context.Context, k, v val.Tuple) (val.Tuple, error) { 192 for to := range b.mapping { 193 from := b.mapping.MapOrdinal(to) 194 if from == -1 { 195 // the "from" field is a virtual column 196 expr := b.virtualExpressions[to] 197 sqlCtx, ok := ctx.(*sql.Context) 198 if !ok { 199 sqlCtx = sql.NewContext(ctx) 200 } 201 202 sqlRow, err := BuildRow(sqlCtx, k, v, b.sch, b.nodeStore) 203 if err != nil { 204 return nil, err 205 } 206 207 value, err := expr.Eval(sqlCtx, sqlRow) 208 if err != nil { 209 return nil, err 210 } 211 212 // TODO: type conversion 213 err = tree.PutField(ctx, b.nodeStore, b.builder, to, value) 214 if err != nil { 215 return nil, err 216 } 217 } else if from < b.split { 218 // the "from" field comes from the key tuple fields 219 // NOTE: Because we are using Tuple.GetField and TupleBuilder.PutRaw, we are not 220 // interpreting the tuple data at all and just copying the bytes. This should work 221 // for primary keys since they are always represented in the secondary index exactly 222 // as they are in the primary index, but for the value tuple, we need to interpret the 223 // data so that we can transform StringAddrEnc fields from pointers to strings (i.e. for 224 // prefix indexes) as well as custom handling for ZCell geometry fields. 225 b.builder.PutRaw(to, k.GetField(from)) 226 } else { 227 // the "from" field comes from the value tuple fields 228 from -= b.split 229 230 if b.canCopyRawBytes(to) { 231 b.builder.PutRaw(to, v.GetField(from)) 232 } else { 233 value, err := tree.GetField(ctx, b.sch.GetValueDescriptor(), from, v, b.nodeStore) 234 if err != nil { 235 return nil, err 236 } 237 238 if len(b.indexDef.PrefixLengths()) > to { 239 value = val.TrimValueToPrefixLength(value, b.indexDef.PrefixLengths()[to]) 240 } 241 242 err = tree.PutField(ctx, b.nodeStore, b.builder, to, value) 243 if err != nil { 244 return nil, err 245 } 246 } 247 } 248 } 249 return b.builder.Build(b.pool), nil 250 } 251 252 // BuildRow returns a sql.Row for the given key/value tuple pair 253 func BuildRow(ctx *sql.Context, key, value val.Tuple, sch schema.Schema, ns tree.NodeStore) (sql.Row, error) { 254 prollyIter := prolly.NewPointLookup(key, value) 255 rowIter := NewProllyRowIterForSchema(sch, prollyIter, sch.GetKeyDescriptor(), sch.GetValueDescriptor(), sch.GetAllCols().Tags, ns) 256 return rowIter.Next(ctx) 257 } 258 259 // canCopyRawBytes returns true if the bytes for |idxField| can 260 // be copied directly. This is a faster way to populate an index 261 // but requires that no data transformation is needed. For example, 262 // prefix indexes have to manipulate the data to extract a prefix 263 // before the data can be populated in the index, so if an index 264 // field is a prefix index, this function will return false. 265 func (b SecondaryKeyBuilder) canCopyRawBytes(idxField int) bool { 266 if b.builder.Desc.Types[idxField].Enc == val.CellEnc { 267 return false 268 } else if len(b.indexDef.PrefixLengths()) > idxField && b.indexDef.PrefixLengths()[idxField] > 0 { 269 return false 270 } 271 272 return true 273 } 274 275 func NewClusteredKeyBuilder(def schema.Index, sch schema.Schema, keyDesc val.TupleDesc, p pool.BuffPool) (b ClusteredKeyBuilder) { 276 b.pool = p 277 if schema.IsKeyless(sch) { 278 // [16]byte hash key is always final key field 279 b.mapping = val.OrdinalMapping{def.Count()} 280 b.builder = val.NewTupleBuilder(val.KeylessTupleDesc) 281 return 282 } 283 284 // secondary indexes contain all clustered key cols, in some order 285 tagToOrdinal := make(map[uint64]int, len(def.AllTags())) 286 for ord, tag := range def.AllTags() { 287 tagToOrdinal[tag] = ord 288 } 289 290 b.builder = val.NewTupleBuilder(keyDesc) 291 b.mapping = make(val.OrdinalMapping, keyDesc.Count()) 292 for i, col := range sch.GetPKCols().GetColumns() { 293 b.mapping[i] = tagToOrdinal[col.Tag] 294 } 295 return 296 } 297 298 type ClusteredKeyBuilder struct { 299 mapping val.OrdinalMapping 300 builder *val.TupleBuilder 301 pool pool.BuffPool 302 } 303 304 // ClusteredKeyFromIndexKey builds a clustered index key from a secondary index key. 305 func (b ClusteredKeyBuilder) ClusteredKeyFromIndexKey(k val.Tuple) val.Tuple { 306 for to, from := range b.mapping { 307 b.builder.PutRaw(to, k.GetField(from)) 308 } 309 return b.builder.Build(b.pool) 310 }