github.com/dolthub/dolt/go@v0.40.5-0.20240520175717-68db7794bea6/libraries/doltcore/sqle/index/key_builder.go (about)

     1  // Copyright 2023 Dolthub, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package index
    16  
    17  import (
    18  	"context"
    19  	"fmt"
    20  
    21  	"github.com/dolthub/go-mysql-server/memory"
    22  	"github.com/dolthub/go-mysql-server/sql"
    23  	"github.com/dolthub/go-mysql-server/sql/analyzer"
    24  	"github.com/dolthub/go-mysql-server/sql/expression"
    25  	"github.com/dolthub/go-mysql-server/sql/plan"
    26  	"github.com/dolthub/go-mysql-server/sql/planbuilder"
    27  	"github.com/dolthub/go-mysql-server/sql/transform"
    28  
    29  	"github.com/dolthub/dolt/go/libraries/doltcore/schema"
    30  	"github.com/dolthub/dolt/go/libraries/doltcore/sqle/sqlfmt"
    31  	"github.com/dolthub/dolt/go/store/pool"
    32  	"github.com/dolthub/dolt/go/store/prolly"
    33  	"github.com/dolthub/dolt/go/store/prolly/tree"
    34  	"github.com/dolthub/dolt/go/store/val"
    35  )
    36  
    37  // NewSecondaryKeyBuilder creates a new SecondaryKeyBuilder instance that can build keys for the secondary index |def|.
    38  // The schema of the source table is defined in |sch|, and |idxDesc| describes the tuple layout for the index's keys
    39  // (index value tuples are not used).
    40  func NewSecondaryKeyBuilder(ctx *sql.Context, tableName string, sch schema.Schema, def schema.Index, idxDesc val.TupleDesc, p pool.BuffPool, nodeStore tree.NodeStore) (SecondaryKeyBuilder, error) {
    41  	b := SecondaryKeyBuilder{
    42  		builder:   val.NewTupleBuilder(idxDesc),
    43  		pool:      p,
    44  		nodeStore: nodeStore,
    45  		sch:       sch,
    46  		indexDef:  def,
    47  	}
    48  
    49  	keyless := schema.IsKeyless(sch)
    50  	if keyless {
    51  		// the only key is the hash of the values
    52  		b.split = 1
    53  	} else {
    54  		b.split = sch.GetPKCols().Size()
    55  	}
    56  
    57  	b.mapping = make(val.OrdinalMapping, len(def.AllTags()))
    58  	var virtualExpressions []sql.Expression
    59  	for i, tag := range def.AllTags() {
    60  		j, ok := sch.GetPKCols().TagToIdx[tag]
    61  		if !ok {
    62  			col := sch.GetNonPKCols().TagToCol[tag]
    63  			if col.Virtual {
    64  				if len(virtualExpressions) == 0 {
    65  					virtualExpressions = make([]sql.Expression, len(def.AllTags()))
    66  				}
    67  
    68  				expr, err := ResolveDefaultExpression(ctx, tableName, sch, col)
    69  				if err != nil {
    70  					return SecondaryKeyBuilder{}, err
    71  				}
    72  
    73  				virtualExpressions[i] = expr
    74  				j = -1
    75  			} else if keyless {
    76  				// Skip cardinality column
    77  				j = b.split + 1 + sch.GetNonPKCols().TagToIdx[tag]
    78  			} else {
    79  				j = b.split + sch.GetNonPKCols().TagToIdx[tag]
    80  			}
    81  		}
    82  		b.mapping[i] = j
    83  	}
    84  
    85  	b.virtualExpressions = virtualExpressions
    86  
    87  	if keyless {
    88  		// last key in index is hash which is the only column in the key
    89  		b.mapping = append(b.mapping, 0)
    90  	}
    91  	return b, nil
    92  }
    93  
    94  // ResolveDefaultExpression returns a sql.Expression for the column default or generated expression for the
    95  // column provided
    96  func ResolveDefaultExpression(ctx *sql.Context, tableName string, sch schema.Schema, col schema.Column) (sql.Expression, error) {
    97  	ct, err := parseCreateTable(ctx, tableName, sch)
    98  	if err != nil {
    99  		return nil, err
   100  	}
   101  
   102  	colIdx := ct.PkSchema().Schema.IndexOfColName(col.Name)
   103  	if colIdx < 0 {
   104  		return nil, fmt.Errorf("unable to find column %s in analyzed query", col.Name)
   105  	}
   106  
   107  	sqlCol := ct.PkSchema().Schema[colIdx]
   108  	expr := sqlCol.Default
   109  	if expr == nil || expr.Expr == nil {
   110  		expr = sqlCol.Generated
   111  	}
   112  
   113  	if expr == nil || expr.Expr == nil {
   114  		return nil, fmt.Errorf("unable to find default or generated expression")
   115  	}
   116  
   117  	return expr.Expr, nil
   118  }
   119  
   120  // ResolveCheckExpression returns a sql.Expression for the check provided
   121  func ResolveCheckExpression(ctx *sql.Context, tableName string, sch schema.Schema, checkExpr string) (sql.Expression, error) {
   122  	ct, err := parseCreateTable(ctx, tableName, sch)
   123  	if err != nil {
   124  		return nil, err
   125  	}
   126  
   127  	for _, check := range ct.Checks() {
   128  		if stripTableNamesFromExpression(check.Expr).String() == checkExpr {
   129  			return check.Expr, nil
   130  		}
   131  	}
   132  
   133  	return nil, fmt.Errorf("unable to find check expression")
   134  }
   135  
   136  func stripTableNamesFromExpression(expr sql.Expression) sql.Expression {
   137  	e, _, _ := transform.Expr(expr, func(e sql.Expression) (sql.Expression, transform.TreeIdentity, error) {
   138  		if col, ok := e.(*expression.GetField); ok {
   139  			return col.WithTable(""), transform.NewTree, nil
   140  		}
   141  		return e, transform.SameTree, nil
   142  	})
   143  	return e
   144  }
   145  
   146  func parseCreateTable(ctx *sql.Context, tableName string, sch schema.Schema) (*plan.CreateTable, error) {
   147  	createTable, err := sqlfmt.GenerateCreateTableStatement(tableName, sch, nil, nil)
   148  	if err != nil {
   149  		return nil, err
   150  	}
   151  
   152  	query := createTable
   153  
   154  	mockDatabase := memory.NewDatabase("mydb")
   155  	mockProvider := memory.NewDBProvider(mockDatabase)
   156  	catalog := analyzer.NewCatalog(mockProvider)
   157  	parseCtx := sql.NewEmptyContext()
   158  	parseCtx.SetCurrentDatabase("mydb")
   159  
   160  	b := planbuilder.New(parseCtx, catalog, sql.NewMysqlParser())
   161  	pseudoAnalyzedQuery, _, _, err := b.Parse(query, false)
   162  	if err != nil {
   163  		return nil, err
   164  	}
   165  
   166  	ct, ok := pseudoAnalyzedQuery.(*plan.CreateTable)
   167  	if !ok {
   168  		return nil, fmt.Errorf("expected a *plan.CreateTable node, but got %T", pseudoAnalyzedQuery)
   169  	}
   170  	return ct, nil
   171  }
   172  
   173  type SecondaryKeyBuilder struct {
   174  	// sch holds the schema of the table on which the secondary index is created
   175  	sch schema.Schema
   176  	// indexDef holds the definition of the secondary index
   177  	indexDef schema.Index
   178  	// mapping defines how to map fields from the source table's schema to this index's tuple layout
   179  	mapping val.OrdinalMapping
   180  	// virtualExpressions holds the expressions for virtual columns in the index, nil for non-virtual indexes
   181  	virtualExpressions []sql.Expression
   182  	// split marks the index in the secondary index's key tuple that splits the main table's
   183  	// key fields from the main table's value fields.
   184  	split     int
   185  	builder   *val.TupleBuilder
   186  	pool      pool.BuffPool
   187  	nodeStore tree.NodeStore
   188  }
   189  
   190  // SecondaryKeyFromRow builds a secondary index key from a clustered index row.
   191  func (b SecondaryKeyBuilder) SecondaryKeyFromRow(ctx context.Context, k, v val.Tuple) (val.Tuple, error) {
   192  	for to := range b.mapping {
   193  		from := b.mapping.MapOrdinal(to)
   194  		if from == -1 {
   195  			// the "from" field is a virtual column
   196  			expr := b.virtualExpressions[to]
   197  			sqlCtx, ok := ctx.(*sql.Context)
   198  			if !ok {
   199  				sqlCtx = sql.NewContext(ctx)
   200  			}
   201  
   202  			sqlRow, err := BuildRow(sqlCtx, k, v, b.sch, b.nodeStore)
   203  			if err != nil {
   204  				return nil, err
   205  			}
   206  
   207  			value, err := expr.Eval(sqlCtx, sqlRow)
   208  			if err != nil {
   209  				return nil, err
   210  			}
   211  
   212  			// TODO: type conversion
   213  			err = tree.PutField(ctx, b.nodeStore, b.builder, to, value)
   214  			if err != nil {
   215  				return nil, err
   216  			}
   217  		} else if from < b.split {
   218  			// the "from" field comes from the key tuple fields
   219  			// NOTE: Because we are using Tuple.GetField and TupleBuilder.PutRaw, we are not
   220  			//       interpreting the tuple data at all and just copying the bytes. This should work
   221  			//       for primary keys since they are always represented in the secondary index exactly
   222  			//       as they are in the primary index, but for the value tuple, we need to interpret the
   223  			//       data so that we can transform StringAddrEnc fields from pointers to strings (i.e. for
   224  			//       prefix indexes) as well as custom handling for ZCell geometry fields.
   225  			b.builder.PutRaw(to, k.GetField(from))
   226  		} else {
   227  			// the "from" field comes from the value tuple fields
   228  			from -= b.split
   229  
   230  			if b.canCopyRawBytes(to) {
   231  				b.builder.PutRaw(to, v.GetField(from))
   232  			} else {
   233  				value, err := tree.GetField(ctx, b.sch.GetValueDescriptor(), from, v, b.nodeStore)
   234  				if err != nil {
   235  					return nil, err
   236  				}
   237  
   238  				if len(b.indexDef.PrefixLengths()) > to {
   239  					value = val.TrimValueToPrefixLength(value, b.indexDef.PrefixLengths()[to])
   240  				}
   241  
   242  				err = tree.PutField(ctx, b.nodeStore, b.builder, to, value)
   243  				if err != nil {
   244  					return nil, err
   245  				}
   246  			}
   247  		}
   248  	}
   249  	return b.builder.Build(b.pool), nil
   250  }
   251  
   252  // BuildRow returns a sql.Row for the given key/value tuple pair
   253  func BuildRow(ctx *sql.Context, key, value val.Tuple, sch schema.Schema, ns tree.NodeStore) (sql.Row, error) {
   254  	prollyIter := prolly.NewPointLookup(key, value)
   255  	rowIter := NewProllyRowIterForSchema(sch, prollyIter, sch.GetKeyDescriptor(), sch.GetValueDescriptor(), sch.GetAllCols().Tags, ns)
   256  	return rowIter.Next(ctx)
   257  }
   258  
   259  // canCopyRawBytes returns true if the bytes for |idxField| can
   260  // be copied directly. This is a faster way to populate an index
   261  // but requires that no data transformation is needed. For example,
   262  // prefix indexes have to manipulate the data to extract a prefix
   263  // before the data can be populated in the index, so if an index
   264  // field is a prefix index, this function will return false.
   265  func (b SecondaryKeyBuilder) canCopyRawBytes(idxField int) bool {
   266  	if b.builder.Desc.Types[idxField].Enc == val.CellEnc {
   267  		return false
   268  	} else if len(b.indexDef.PrefixLengths()) > idxField && b.indexDef.PrefixLengths()[idxField] > 0 {
   269  		return false
   270  	}
   271  
   272  	return true
   273  }
   274  
   275  func NewClusteredKeyBuilder(def schema.Index, sch schema.Schema, keyDesc val.TupleDesc, p pool.BuffPool) (b ClusteredKeyBuilder) {
   276  	b.pool = p
   277  	if schema.IsKeyless(sch) {
   278  		// [16]byte hash key is always final key field
   279  		b.mapping = val.OrdinalMapping{def.Count()}
   280  		b.builder = val.NewTupleBuilder(val.KeylessTupleDesc)
   281  		return
   282  	}
   283  
   284  	// secondary indexes contain all clustered key cols, in some order
   285  	tagToOrdinal := make(map[uint64]int, len(def.AllTags()))
   286  	for ord, tag := range def.AllTags() {
   287  		tagToOrdinal[tag] = ord
   288  	}
   289  
   290  	b.builder = val.NewTupleBuilder(keyDesc)
   291  	b.mapping = make(val.OrdinalMapping, keyDesc.Count())
   292  	for i, col := range sch.GetPKCols().GetColumns() {
   293  		b.mapping[i] = tagToOrdinal[col.Tag]
   294  	}
   295  	return
   296  }
   297  
   298  type ClusteredKeyBuilder struct {
   299  	mapping val.OrdinalMapping
   300  	builder *val.TupleBuilder
   301  	pool    pool.BuffPool
   302  }
   303  
   304  // ClusteredKeyFromIndexKey builds a clustered index key from a secondary index key.
   305  func (b ClusteredKeyBuilder) ClusteredKeyFromIndexKey(k val.Tuple) val.Tuple {
   306  	for to, from := range b.mapping {
   307  		b.builder.PutRaw(to, k.GetField(from))
   308  	}
   309  	return b.builder.Build(b.pool)
   310  }