github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/opt/exec/execbuilder/cascades.go (about)

     1  // Copyright 2020 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package execbuilder
    12  
    13  import (
    14  	"context"
    15  
    16  	"github.com/cockroachdb/cockroach/pkg/sql/opt"
    17  	"github.com/cockroachdb/cockroach/pkg/sql/opt/exec"
    18  	"github.com/cockroachdb/cockroach/pkg/sql/opt/memo"
    19  	"github.com/cockroachdb/cockroach/pkg/sql/opt/props"
    20  	"github.com/cockroachdb/cockroach/pkg/sql/opt/props/physical"
    21  	"github.com/cockroachdb/cockroach/pkg/sql/opt/xform"
    22  	"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
    23  	"github.com/cockroachdb/errors"
    24  )
    25  
    26  // cascadeBuilder is a helper that fills in exec.Cascade metadata; it also
    27  // contains the implementation of exec.Cascade.PlanFn.
    28  //
    29  // We walk through a simple example of a cascade to illustrate the flow around
    30  // executing cascades:
    31  //
    32  //   CREATE TABLE parent (p INT PRIMARY KEY);
    33  //   CREATE TABLE child (
    34  //     c INT PRIMARY KEY,
    35  //     p INT NOT NULL REFERENCES parent(p) ON DELETE CASCADE
    36  //   );
    37  //
    38  //   DELETE FROM parent WHERE p > 1;
    39  //
    40  // The optimizer expression for this query is:
    41  //
    42  //   delete parent
    43  //    ├── columns: <none>
    44  //    ├── fetch columns: p:2
    45  //    ├── input binding: &1
    46  //    ├── cascades
    47  //    │    └── fk_p_ref_parent
    48  //    └── select
    49  //         ├── columns: p:2!null
    50  //         ├── scan parent
    51  //         │    └── columns: p:2!null
    52  //         └── filters
    53  //              └── p:2 > 1
    54  //
    55  // Note that at this time, the cascading query in the child table was not built.
    56  // The expression above does contain a reference to a memo.CascadeBuilder which
    57  // will be invoked to build the query at a later time.
    58  //
    59  // When execbuilding the query above, a BufferNode is constructed for the
    60  // mutation input (binding &1 above) and a cascadeBuilder object is constructed
    61  // for the cascade.
    62  //
    63  // The setupCascade method is called to fill in an exec.Cascade which is passed
    64  // to ConstructPlan. Note that we still did not build the cascading query; all
    65  // we did was provide some plumbing and an entry point (through PlanFn) for that
    66  // to happen later.
    67  //
    68  // The plan is constructed and processed by the execution engine. After the
    69  // plans for the subqueries and the main query are executed, the cascades are
    70  // processed (in a queue). At this time the PlanFn method is called and the
    71  // following happens:
    72  //
    73  //  1. We set up a new empty memo and add metadata for the columns of the
    74  //     BufferNode (binding &1).
    75  //
    76  //  2. We invoke the memo.CascadeBuilder to optbuild the cascading query. At this
    77  //     point, the new memo will contain the following expression:
    78  //
    79  //      delete child
    80  //       ├── columns: <none>
    81  //       ├── fetch columns: c:4 child.p:5
    82  //       └── semi-join (hash)
    83  //            ├── columns: c:4!null child.p:5!null
    84  //            ├── scan child
    85  //            │    └── columns: c:4!null child.p:5!null
    86  //            ├── with-scan &1
    87  //            │    ├── columns: p:6!null
    88  //            │    └── mapping:
    89  //            │         └──  parent.p:1 => p:6
    90  //            └── filters
    91  //                  └── child.p:5 = p:6
    92  //
    93  //    Notes:
    94  //     - normally, a WithScan can only refer to an ancestor mutation or With
    95  //       operator. In this case we are creating a reference "out of the void".
    96  //       This works just fine; we can consider adding a special dummy root
    97  //       operator but so far it hasn't been necessary;
    98  //     - the binding &1 column ID has changed: it used to be 2, it is now 1.
    99  //       This is because we are starting with a fresh memo. We need to take into
   100  //       account this remapping when referring to the foreign key columns.
   101  //
   102  //  3. We optimize the newly built expression.
   103  //
   104  //  4. We execbuild the optimizer expression. We have to be careful to set up
   105  //     the "With" reference before starting.
   106  //
   107  // After PlanFn is called, the resulting plan is executed. Note that this plan
   108  // could itself have more exec.Cascades; these are queued and handled in the
   109  // same way.
   110  //
   111  type cascadeBuilder struct {
   112  	b              *Builder
   113  	mutationBuffer exec.BufferNode
   114  	// mutationBufferCols maps With column IDs from the original memo to buffer
   115  	// node column ordinals; see builtWithExpr.outputCols.
   116  	mutationBufferCols opt.ColMap
   117  
   118  	// colMeta remembers the metadata of the With columns from the original memo.
   119  	colMeta []opt.ColumnMeta
   120  }
   121  
   122  // cascadeInputWithID is a special WithID that we use to refer to a cascade
   123  // input. It should be large enough to never clash with "regular" WithIDs (which
   124  // are generated sequentially).
   125  const cascadeInputWithID opt.WithID = 1000000
   126  
   127  func makeCascadeBuilder(b *Builder, mutationWithID opt.WithID) (*cascadeBuilder, error) {
   128  	withExpr := b.findBuiltWithExpr(mutationWithID)
   129  	if withExpr == nil {
   130  		return nil, errors.AssertionFailedf("cannot find mutation input withExpr")
   131  	}
   132  	cb := &cascadeBuilder{
   133  		b:                  b,
   134  		mutationBuffer:     withExpr.bufferNode,
   135  		mutationBufferCols: withExpr.outputCols,
   136  	}
   137  
   138  	// Remember the column metadata, as we will need to recreate it in the new
   139  	// memo.
   140  	md := b.mem.Metadata()
   141  	cb.colMeta = make([]opt.ColumnMeta, 0, cb.mutationBufferCols.Len())
   142  	cb.mutationBufferCols.ForEach(func(key, val int) {
   143  		id := opt.ColumnID(key)
   144  		cb.colMeta = append(cb.colMeta, *md.ColumnMeta(id))
   145  	})
   146  
   147  	return cb, nil
   148  }
   149  
   150  // setupCascade fills in an exec.Cascade struct for the given cascade.
   151  func (cb *cascadeBuilder) setupCascade(cascade *memo.FKCascade) exec.Cascade {
   152  	return exec.Cascade{
   153  		FKName: cascade.FKName,
   154  		Buffer: cb.mutationBuffer,
   155  		PlanFn: func(
   156  			ctx context.Context,
   157  			semaCtx *tree.SemaContext,
   158  			evalCtx *tree.EvalContext,
   159  			execFactory exec.Factory,
   160  			bufferRef exec.BufferNode,
   161  			numBufferedRows int,
   162  		) (exec.Plan, error) {
   163  			return cb.planCascade(ctx, semaCtx, evalCtx, execFactory, cascade, bufferRef, numBufferedRows)
   164  		},
   165  	}
   166  }
   167  
   168  // planCascade is used to plan a cascade query. It is NOT run while
   169  // planning the query; it is run by the execution logic (through
   170  // exec.Cascade.PlanFn) after the main query was executed.
   171  //
   172  // See the comment for cascadeBuilder for a detailed explanation of the
   173  // process.
   174  func (cb *cascadeBuilder) planCascade(
   175  	ctx context.Context,
   176  	semaCtx *tree.SemaContext,
   177  	evalCtx *tree.EvalContext,
   178  	execFactory exec.Factory,
   179  	cascade *memo.FKCascade,
   180  	bufferRef exec.BufferNode,
   181  	numBufferedRows int,
   182  ) (exec.Plan, error) {
   183  	// 1. Set up a brand new memo in which to plan the cascading query.
   184  	var o xform.Optimizer
   185  	o.Init(evalCtx, cb.b.catalog)
   186  	factory := o.Factory()
   187  	md := factory.Metadata()
   188  
   189  	// Set up metadata for the buffer columns.
   190  
   191  	// withColRemap is the mapping between the With column IDs in the original
   192  	// memo and the corresponding column IDs in the new memo.
   193  	var withColRemap opt.ColMap
   194  	// bufferColMap is the mapping between the column IDs in the new memo and
   195  	// the column ordinal in the buffer node.
   196  	var bufferColMap opt.ColMap
   197  	var withCols opt.ColSet
   198  	for i := range cb.colMeta {
   199  		id := md.AddColumn(cb.colMeta[i].Alias, cb.colMeta[i].Type)
   200  		withCols.Add(id)
   201  		ordinal, _ := cb.mutationBufferCols.Get(int(cb.colMeta[i].MetaID))
   202  		bufferColMap.Set(int(id), ordinal)
   203  		withColRemap.Set(int(cb.colMeta[i].MetaID), int(id))
   204  	}
   205  
   206  	// Create relational properties for the special WithID input.
   207  	// TODO(radu): save some more information from the original binding props
   208  	// (like not-null columns, FDs) and remap them to the new columns.
   209  	var bindingProps props.Relational
   210  	bindingProps.Populated = true
   211  	bindingProps.OutputCols = withCols
   212  	bindingProps.Cardinality = props.Cardinality{
   213  		Min: uint32(numBufferedRows),
   214  		Max: uint32(numBufferedRows),
   215  	}
   216  	bindingProps.Stats = props.Statistics{
   217  		Available: true,
   218  		RowCount:  float64(numBufferedRows),
   219  	}
   220  
   221  	// Remap the cascade columns.
   222  	oldVals, err := remapColumns(cascade.OldValues, withColRemap)
   223  	if err != nil {
   224  		return nil, err
   225  	}
   226  	newVals, err := remapColumns(cascade.NewValues, withColRemap)
   227  	if err != nil {
   228  		return nil, err
   229  	}
   230  
   231  	// 2. Invoke the memo.CascadeBuilder to build the cascade.
   232  	relExpr, err := cascade.Builder.Build(
   233  		ctx,
   234  		semaCtx,
   235  		evalCtx,
   236  		cb.b.catalog,
   237  		factory,
   238  		cascadeInputWithID,
   239  		&bindingProps,
   240  		oldVals,
   241  		newVals,
   242  	)
   243  	if err != nil {
   244  		return nil, errors.Wrap(err, "while building cascade expression")
   245  	}
   246  
   247  	o.Memo().SetRoot(relExpr, &physical.Required{})
   248  
   249  	// 3. Optimize the expression.
   250  	optimizedExpr, err := o.Optimize()
   251  	if err != nil {
   252  		return nil, errors.Wrap(err, "while optimizing cascade expression")
   253  	}
   254  
   255  	// 4. Execbuild the optimized expression.
   256  	eb := New(execFactory, factory.Memo(), cb.b.catalog, optimizedExpr, evalCtx)
   257  	// Set up the With binding.
   258  	eb.addBuiltWithExpr(cascadeInputWithID, bufferColMap, bufferRef)
   259  	plan, err := eb.Build()
   260  	if err != nil {
   261  		return nil, errors.Wrap(err, "while building cascade plan")
   262  	}
   263  	return plan, nil
   264  }
   265  
   266  // Remap columns according to a ColMap.
   267  func remapColumns(cols opt.ColList, m opt.ColMap) (opt.ColList, error) {
   268  	res := make(opt.ColList, len(cols))
   269  	for i := range cols {
   270  		val, ok := m.Get(int(cols[i]))
   271  		if !ok {
   272  			return nil, errors.AssertionFailedf("column %d not in mapping %s\n", cols[i], m.String())
   273  		}
   274  		res[i] = opt.ColumnID(val)
   275  	}
   276  	return res, nil
   277  }