vitess.io/vitess@v0.16.2/go/vt/vtgate/planbuilder/dml_planner.go (about)

     1  /*
     2  Copyright 2019 The Vitess Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package planbuilder
    18  
    19  import (
    20  	"fmt"
    21  
    22  	topodatapb "vitess.io/vitess/go/vt/proto/topodata"
    23  	"vitess.io/vitess/go/vt/sqlparser"
    24  	"vitess.io/vitess/go/vt/vterrors"
    25  	"vitess.io/vitess/go/vt/vtgate/engine"
    26  	"vitess.io/vitess/go/vt/vtgate/evalengine"
    27  	"vitess.io/vitess/go/vt/vtgate/planbuilder/plancontext"
    28  
    29  	"vitess.io/vitess/go/vt/vtgate/semantics"
    30  	"vitess.io/vitess/go/vt/vtgate/vindexes"
    31  )
    32  
    33  type (
    34  	// costDML is used to compare the cost of vindexOptionDML
    35  	costDML struct {
    36  		vindexCost int
    37  		isUnique   bool
    38  		opCode     engine.Opcode
    39  	}
    40  
    41  	// vindexPlusPredicatesDML is a struct used to store all the predicates that the vindex can be used to query
    42  	vindexPlusPredicatesDML struct {
    43  		colVindex *vindexes.ColumnVindex
    44  
    45  		// during planning, we store the alternatives found for this DML in this slice
    46  		options []*vindexOptionDML
    47  	}
    48  
    49  	// vindexOptionDML stores the information needed to know if we have all the information needed to use a vindex
    50  	vindexOptionDML struct {
    51  		ready  bool
    52  		values []evalengine.Expr
    53  		// columns that we have seen so far. Used only for multi-column vindexes so that we can track how many columns part of the vindex we have seen
    54  		colsSeen    map[string]any
    55  		opcode      engine.Opcode
    56  		foundVindex vindexes.Vindex
    57  		cost        costDML
    58  	}
    59  )
    60  
    61  // getDMLRouting returns the vindex and values for the DML,
    62  // If it cannot find a unique vindex match, it returns an error.
    63  func getDMLRouting(where *sqlparser.Where, table *vindexes.Table) (
    64  	engine.Opcode,
    65  	*vindexes.ColumnVindex,
    66  	vindexes.Vindex,
    67  	[]evalengine.Expr,
    68  	error,
    69  ) {
    70  	// Check that we have a primary vindex which is valid
    71  	if len(table.ColumnVindexes) == 0 || !table.ColumnVindexes[0].IsUnique() {
    72  		return engine.Scatter, nil, nil, nil, vterrors.VT09001(table.Name)
    73  	}
    74  	// ksidVindex is the primary vindex
    75  	ksidVindex := table.ColumnVindexes[0]
    76  	if where == nil {
    77  		return engine.Scatter, ksidVindex, nil, nil, nil
    78  	}
    79  
    80  	filters := sqlparser.SplitAndExpression(nil, where.Expr)
    81  	// go over the vindexes in the order of increasing cost
    82  	for _, colVindex := range table.Ordered {
    83  		if lu, isLu := colVindex.Vindex.(vindexes.LookupBackfill); isLu && lu.IsBackfilling() {
    84  			// Checking if the Vindex is currently backfilling or not, if it isn't we can read from the vindex table
    85  			// and we will be able to do a delete equal. Otherwise, we continue to look for next best vindex.
    86  			continue
    87  		}
    88  		// get the best vindex option that can be used for this vindexes.ColumnVindex
    89  		if vindexOption := getBestVindexOption(filters, colVindex); vindexOption != nil {
    90  			return vindexOption.opcode, ksidVindex, colVindex.Vindex, vindexOption.values, nil
    91  		}
    92  	}
    93  	return engine.Scatter, ksidVindex, nil, nil, nil
    94  }
    95  
    96  // getBestVindexOption returns the best vindex option that can be used for this vindexes.ColumnVindex
    97  // It returns nil if there is no suitable way to use the ColumnVindex
    98  func getBestVindexOption(exprs []sqlparser.Expr, index *vindexes.ColumnVindex) *vindexOptionDML {
    99  	vindexPlusPredicates := &vindexPlusPredicatesDML{
   100  		colVindex: index,
   101  	}
   102  	for _, filter := range exprs {
   103  		comparison, ok := filter.(*sqlparser.ComparisonExpr)
   104  		if !ok {
   105  			continue
   106  		}
   107  		var colName *sqlparser.ColName
   108  		var valExpr sqlparser.Expr
   109  		if col, ok := comparison.Left.(*sqlparser.ColName); ok {
   110  			colName = col
   111  			valExpr = comparison.Right
   112  		} else if col, ok := comparison.Right.(*sqlparser.ColName); ok {
   113  			colName = col
   114  			valExpr = comparison.Left
   115  		} else {
   116  			continue
   117  		}
   118  
   119  		var opcode engine.Opcode
   120  		switch comparison.Operator {
   121  		case sqlparser.EqualOp:
   122  			if !sqlparser.IsValue(valExpr) {
   123  				continue
   124  			}
   125  			opcode = engine.Equal
   126  		case sqlparser.InOp:
   127  			if !sqlparser.IsSimpleTuple(valExpr) {
   128  				continue
   129  			}
   130  			opcode = engine.IN
   131  		default:
   132  			continue
   133  		}
   134  		expr, err := evalengine.Translate(comparison.Right, semantics.EmptySemTable())
   135  		if err != nil {
   136  			continue
   137  		}
   138  		addVindexOptions(colName, expr, opcode, vindexPlusPredicates)
   139  	}
   140  	return vindexPlusPredicates.bestOption()
   141  }
   142  
   143  // bestOption returns the option which is ready and has the lowest associated cost
   144  func (vpp *vindexPlusPredicatesDML) bestOption() *vindexOptionDML {
   145  	var best *vindexOptionDML
   146  	for _, option := range vpp.options {
   147  		if option.ready {
   148  			if best == nil || lessCostDML(option.cost, best.cost) {
   149  				best = option
   150  			}
   151  		}
   152  	}
   153  	return best
   154  }
   155  
   156  // lessCostDML compares two costDML and returns true if the first cost is cheaper than the second
   157  func lessCostDML(c1, c2 costDML) bool {
   158  	switch {
   159  	case c1.opCode != c2.opCode:
   160  		return c1.opCode < c2.opCode
   161  	case c1.isUnique == c2.isUnique:
   162  		return c1.vindexCost <= c2.vindexCost
   163  	default:
   164  		return c1.isUnique
   165  	}
   166  }
   167  
   168  // addVindexOptions adds new vindexOptionDML if it matches any column of the vindexes.ColumnVindex
   169  func addVindexOptions(column *sqlparser.ColName, value evalengine.Expr, opcode engine.Opcode, v *vindexPlusPredicatesDML) {
   170  	switch v.colVindex.Vindex.(type) {
   171  	case vindexes.SingleColumn:
   172  		col := v.colVindex.Columns[0]
   173  		if column.Name.Equal(col) {
   174  			// single column vindex - just add the option
   175  			vindex := v.colVindex
   176  			v.options = append(v.options, &vindexOptionDML{
   177  				values:      []evalengine.Expr{value},
   178  				opcode:      opcode,
   179  				foundVindex: vindex.Vindex,
   180  				cost:        costForDML(v.colVindex, opcode),
   181  				ready:       true,
   182  			})
   183  		}
   184  	case vindexes.MultiColumn:
   185  		colLoweredName := ""
   186  		indexOfCol := -1
   187  		for idx, col := range v.colVindex.Columns {
   188  			if column.Name.Equal(col) {
   189  				colLoweredName = column.Name.Lowered()
   190  				indexOfCol = idx
   191  				break
   192  			}
   193  		}
   194  		if colLoweredName == "" {
   195  			break
   196  		}
   197  
   198  		var newOption []*vindexOptionDML
   199  		for _, op := range v.options {
   200  			if op.ready {
   201  				continue
   202  			}
   203  			_, isPresent := op.colsSeen[colLoweredName]
   204  			if isPresent {
   205  				continue
   206  			}
   207  			option := copyOptionDML(op)
   208  			option.updateWithNewColumn(colLoweredName, indexOfCol, value, v.colVindex, opcode)
   209  			newOption = append(newOption, option)
   210  		}
   211  		v.options = append(v.options, newOption...)
   212  
   213  		// multi column vindex - just always add as new option
   214  		option := createOptionDML(v.colVindex)
   215  		option.updateWithNewColumn(colLoweredName, indexOfCol, value, v.colVindex, opcode)
   216  		v.options = append(v.options, option)
   217  	}
   218  }
   219  
   220  // copyOptionDML is used to copy vindexOptionDML
   221  func copyOptionDML(orig *vindexOptionDML) *vindexOptionDML {
   222  	colsSeen := make(map[string]any, len(orig.colsSeen))
   223  	values := make([]evalengine.Expr, len(orig.values))
   224  
   225  	copy(values, orig.values)
   226  	for k, v := range orig.colsSeen {
   227  		colsSeen[k] = v
   228  	}
   229  	vo := &vindexOptionDML{
   230  		values:      values,
   231  		colsSeen:    colsSeen,
   232  		opcode:      orig.opcode,
   233  		foundVindex: orig.foundVindex,
   234  		cost:        orig.cost,
   235  	}
   236  	return vo
   237  }
   238  
   239  // updateWithNewColumn is used to update vindexOptionDML with a new column that matches one of its unseen columns
   240  func (option *vindexOptionDML) updateWithNewColumn(colLoweredName string, indexOfCol int, value evalengine.Expr, colVindex *vindexes.ColumnVindex, opcode engine.Opcode) {
   241  	option.colsSeen[colLoweredName] = true
   242  	option.values[indexOfCol] = value
   243  	option.ready = len(option.colsSeen) == len(colVindex.Columns)
   244  	if option.opcode < opcode {
   245  		option.opcode = opcode
   246  		option.cost = costForDML(colVindex, opcode)
   247  	}
   248  }
   249  
   250  // createOptionDML is used to create a vindexOptionDML
   251  func createOptionDML(
   252  	colVindex *vindexes.ColumnVindex,
   253  ) *vindexOptionDML {
   254  	values := make([]evalengine.Expr, len(colVindex.Columns))
   255  	vindex := colVindex.Vindex
   256  
   257  	return &vindexOptionDML{
   258  		values:      values,
   259  		colsSeen:    map[string]any{},
   260  		foundVindex: vindex,
   261  	}
   262  }
   263  
   264  // costForDML returns a cost struct to make route choices easier to compare
   265  func costForDML(foundVindex *vindexes.ColumnVindex, opcode engine.Opcode) costDML {
   266  	switch opcode {
   267  	// For these opcodes, we should not have a vindex, so we just return the opcode as the cost
   268  	case engine.Unsharded, engine.Scatter:
   269  		return costDML{
   270  			opCode: opcode,
   271  		}
   272  	}
   273  
   274  	return costDML{
   275  		vindexCost: foundVindex.Cost(),
   276  		isUnique:   foundVindex.IsUnique(),
   277  		opCode:     opcode,
   278  	}
   279  }
   280  
   281  func buildDMLPlan(
   282  	vschema plancontext.VSchema,
   283  	dmlType string,
   284  	stmt sqlparser.Statement,
   285  	reservedVars *sqlparser.ReservedVars,
   286  	tableExprs sqlparser.TableExprs,
   287  	where *sqlparser.Where,
   288  	orderBy sqlparser.OrderBy,
   289  	limit *sqlparser.Limit,
   290  	comments *sqlparser.ParsedComments,
   291  	nodes ...sqlparser.SQLNode,
   292  ) (*engine.DML, []string, *vindexes.ColumnVindex, error) {
   293  	edml := engine.NewDML()
   294  	pb := newPrimitiveBuilder(vschema, newJointab(reservedVars))
   295  	rb, err := pb.processDMLTable(tableExprs, reservedVars, nil)
   296  	if err != nil {
   297  		return nil, nil, nil, err
   298  	}
   299  	edml.Keyspace = rb.eroute.Keyspace
   300  	tc := &tableCollector{}
   301  	for _, tval := range pb.st.tables {
   302  		tc.addVindexTable(tval.vschemaTable)
   303  	}
   304  
   305  	edml.Table, err = pb.st.AllVschemaTableNames()
   306  	if err != nil {
   307  		return nil, nil, nil, err
   308  	}
   309  	if !edml.Keyspace.Sharded {
   310  		// We only validate non-table subexpressions because the previous analysis has already validated them.
   311  		var subqueryArgs []sqlparser.SQLNode
   312  		subqueryArgs = append(subqueryArgs, nodes...)
   313  		subqueryArgs = append(subqueryArgs, where, orderBy, limit)
   314  		subqueryIsUnsharded, subqueryTables := pb.finalizeUnshardedDMLSubqueries(reservedVars, subqueryArgs...)
   315  		if subqueryIsUnsharded {
   316  			vschema.WarnUnshardedOnly("subqueries can't be sharded in DML")
   317  		} else {
   318  			return nil, nil, nil, vterrors.VT12001("sharded subqueries in DML")
   319  		}
   320  		edml.Opcode = engine.Unsharded
   321  		// Generate query after all the analysis. Otherwise table name substitutions for
   322  		// routed tables won't happen.
   323  		edml.Query = generateQuery(stmt)
   324  		edml.Table = append(edml.Table, subqueryTables...)
   325  		return edml, tc.getTables(), nil, nil
   326  	}
   327  
   328  	if hasSubquery(stmt) {
   329  		return nil, nil, nil, vterrors.VT12001("sharded subqueries in DML")
   330  	}
   331  
   332  	// Generate query after all the analysis. Otherwise table name substitutions for
   333  	// routed tables won't happen.
   334  	edml.Query = generateQuery(stmt)
   335  
   336  	directives := comments.Directives()
   337  	if directives.IsSet(sqlparser.DirectiveMultiShardAutocommit) {
   338  		edml.MultiShardAutocommit = true
   339  	}
   340  
   341  	edml.QueryTimeout = queryTimeout(directives)
   342  
   343  	if len(pb.st.tables) != 1 {
   344  		return nil, nil, nil, vterrors.VT12001(fmt.Sprintf("multi-table %s statement in a sharded keyspace", dmlType))
   345  	}
   346  	edmlTable, err := edml.GetSingleTable()
   347  	if err != nil {
   348  		return nil, nil, nil, err
   349  	}
   350  	routingType, ksidVindex, vindex, values, err := getDMLRouting(where, edmlTable)
   351  	if err != nil {
   352  		return nil, nil, nil, err
   353  	}
   354  
   355  	if rb.eroute.TargetDestination != nil {
   356  		if rb.eroute.TargetTabletType != topodatapb.TabletType_PRIMARY {
   357  			return nil, nil, nil, vterrors.VT09002(dmlType)
   358  		}
   359  		edml.Opcode = engine.ByDestination
   360  		edml.TargetDestination = rb.eroute.TargetDestination
   361  		return edml, tc.getTables(), ksidVindex, nil
   362  	}
   363  
   364  	edml.Opcode = routingType
   365  	if routingType == engine.Scatter {
   366  		if limit != nil {
   367  			return nil, nil, nil, vterrors.VT12001(fmt.Sprintf("multi-shard %s with LIMIT", dmlType))
   368  		}
   369  	} else {
   370  		edml.Vindex = vindex
   371  		edml.Values = values
   372  	}
   373  
   374  	return edml, tc.getTables(), ksidVindex, nil
   375  }
   376  
   377  func generateDMLSubquery(tblExpr sqlparser.TableExpr, where *sqlparser.Where, orderBy sqlparser.OrderBy, limit *sqlparser.Limit, table *vindexes.Table, ksidCols []sqlparser.IdentifierCI) string {
   378  	buf := sqlparser.NewTrackedBuffer(nil)
   379  	for idx, col := range ksidCols {
   380  		if idx == 0 {
   381  			buf.Myprintf("select %v", col)
   382  		} else {
   383  			buf.Myprintf(", %v", col)
   384  		}
   385  	}
   386  	for _, cv := range table.Owned {
   387  		for _, column := range cv.Columns {
   388  			buf.Myprintf(", %v", column)
   389  		}
   390  	}
   391  	buf.Myprintf(" from %v%v%v%v for update", tblExpr, where, orderBy, limit)
   392  	return buf.String()
   393  }
   394  
   395  func generateQuery(statement sqlparser.Statement) string {
   396  	buf := sqlparser.NewTrackedBuffer(dmlFormatter)
   397  	statement.Format(buf)
   398  	return buf.String()
   399  }
   400  
   401  // dmlFormatter strips out keyspace name from dmls.
   402  func dmlFormatter(buf *sqlparser.TrackedBuffer, node sqlparser.SQLNode) {
   403  	switch node := node.(type) {
   404  	case sqlparser.TableName:
   405  		node.Name.Format(buf)
   406  		return
   407  	}
   408  	node.Format(buf)
   409  }