github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/opt/exec/execbuilder/cascades.go (about) 1 // Copyright 2020 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package execbuilder 12 13 import ( 14 "context" 15 16 "github.com/cockroachdb/cockroach/pkg/sql/opt" 17 "github.com/cockroachdb/cockroach/pkg/sql/opt/exec" 18 "github.com/cockroachdb/cockroach/pkg/sql/opt/memo" 19 "github.com/cockroachdb/cockroach/pkg/sql/opt/props" 20 "github.com/cockroachdb/cockroach/pkg/sql/opt/props/physical" 21 "github.com/cockroachdb/cockroach/pkg/sql/opt/xform" 22 "github.com/cockroachdb/cockroach/pkg/sql/sem/tree" 23 "github.com/cockroachdb/errors" 24 ) 25 26 // cascadeBuilder is a helper that fills in exec.Cascade metadata; it also 27 // contains the implementation of exec.Cascade.PlanFn. 28 // 29 // We walk through a simple example of a cascade to illustrate the flow around 30 // executing cascades: 31 // 32 // CREATE TABLE parent (p INT PRIMARY KEY); 33 // CREATE TABLE child ( 34 // c INT PRIMARY KEY, 35 // p INT NOT NULL REFERENCES parent(p) ON DELETE CASCADE 36 // ); 37 // 38 // DELETE FROM parent WHERE p > 1; 39 // 40 // The optimizer expression for this query is: 41 // 42 // delete parent 43 // ├── columns: <none> 44 // ├── fetch columns: p:2 45 // ├── input binding: &1 46 // ├── cascades 47 // │ └── fk_p_ref_parent 48 // └── select 49 // ├── columns: p:2!null 50 // ├── scan parent 51 // │ └── columns: p:2!null 52 // └── filters 53 // └── p:2 > 1 54 // 55 // Note that at this time, the cascading query in the child table was not built. 56 // The expression above does contain a reference to a memo.CascadeBuilder which 57 // will be invoked to build the query at a later time. 58 // 59 // When execbuilding the query above, a BufferNode is constructed for the 60 // mutation input (binding &1 above) and a cascadeBuilder object is constructed 61 // for the cascade. 62 // 63 // The setupCascade method is called to fill in an exec.Cascade which is passed 64 // to ConstructPlan. Note that we still did not build the cascading query; all 65 // we did was provide some plumbing and an entry point (through PlanFn) for that 66 // to happen later. 67 // 68 // The plan is constructed and processed by the execution engine. After the 69 // plans for the subqueries and the main query are executed, the cascades are 70 // processed (in a queue). At this time the PlanFn method is called and the 71 // following happens: 72 // 73 // 1. We set up a new empty memo and add metadata for the columns of the 74 // BufferNode (binding &1). 75 // 76 // 2. We invoke the memo.CascadeBuilder to optbuild the cascading query. At this 77 // point, the new memo will contain the following expression: 78 // 79 // delete child 80 // ├── columns: <none> 81 // ├── fetch columns: c:4 child.p:5 82 // └── semi-join (hash) 83 // ├── columns: c:4!null child.p:5!null 84 // ├── scan child 85 // │ └── columns: c:4!null child.p:5!null 86 // ├── with-scan &1 87 // │ ├── columns: p:6!null 88 // │ └── mapping: 89 // │ └── parent.p:1 => p:6 90 // └── filters 91 // └── child.p:5 = p:6 92 // 93 // Notes: 94 // - normally, a WithScan can only refer to an ancestor mutation or With 95 // operator. In this case we are creating a reference "out of the void". 96 // This works just fine; we can consider adding a special dummy root 97 // operator but so far it hasn't been necessary; 98 // - the binding &1 column ID has changed: it used to be 2, it is now 1. 99 // This is because we are starting with a fresh memo. We need to take into 100 // account this remapping when referring to the foreign key columns. 101 // 102 // 3. We optimize the newly built expression. 103 // 104 // 4. We execbuild the optimizer expression. We have to be careful to set up 105 // the "With" reference before starting. 106 // 107 // After PlanFn is called, the resulting plan is executed. Note that this plan 108 // could itself have more exec.Cascades; these are queued and handled in the 109 // same way. 110 // 111 type cascadeBuilder struct { 112 b *Builder 113 mutationBuffer exec.BufferNode 114 // mutationBufferCols maps With column IDs from the original memo to buffer 115 // node column ordinals; see builtWithExpr.outputCols. 116 mutationBufferCols opt.ColMap 117 118 // colMeta remembers the metadata of the With columns from the original memo. 119 colMeta []opt.ColumnMeta 120 } 121 122 // cascadeInputWithID is a special WithID that we use to refer to a cascade 123 // input. It should be large enough to never clash with "regular" WithIDs (which 124 // are generated sequentially). 125 const cascadeInputWithID opt.WithID = 1000000 126 127 func makeCascadeBuilder(b *Builder, mutationWithID opt.WithID) (*cascadeBuilder, error) { 128 withExpr := b.findBuiltWithExpr(mutationWithID) 129 if withExpr == nil { 130 return nil, errors.AssertionFailedf("cannot find mutation input withExpr") 131 } 132 cb := &cascadeBuilder{ 133 b: b, 134 mutationBuffer: withExpr.bufferNode, 135 mutationBufferCols: withExpr.outputCols, 136 } 137 138 // Remember the column metadata, as we will need to recreate it in the new 139 // memo. 140 md := b.mem.Metadata() 141 cb.colMeta = make([]opt.ColumnMeta, 0, cb.mutationBufferCols.Len()) 142 cb.mutationBufferCols.ForEach(func(key, val int) { 143 id := opt.ColumnID(key) 144 cb.colMeta = append(cb.colMeta, *md.ColumnMeta(id)) 145 }) 146 147 return cb, nil 148 } 149 150 // setupCascade fills in an exec.Cascade struct for the given cascade. 151 func (cb *cascadeBuilder) setupCascade(cascade *memo.FKCascade) exec.Cascade { 152 return exec.Cascade{ 153 FKName: cascade.FKName, 154 Buffer: cb.mutationBuffer, 155 PlanFn: func( 156 ctx context.Context, 157 semaCtx *tree.SemaContext, 158 evalCtx *tree.EvalContext, 159 execFactory exec.Factory, 160 bufferRef exec.BufferNode, 161 numBufferedRows int, 162 ) (exec.Plan, error) { 163 return cb.planCascade(ctx, semaCtx, evalCtx, execFactory, cascade, bufferRef, numBufferedRows) 164 }, 165 } 166 } 167 168 // planCascade is used to plan a cascade query. It is NOT run while 169 // planning the query; it is run by the execution logic (through 170 // exec.Cascade.PlanFn) after the main query was executed. 171 // 172 // See the comment for cascadeBuilder for a detailed explanation of the 173 // process. 174 func (cb *cascadeBuilder) planCascade( 175 ctx context.Context, 176 semaCtx *tree.SemaContext, 177 evalCtx *tree.EvalContext, 178 execFactory exec.Factory, 179 cascade *memo.FKCascade, 180 bufferRef exec.BufferNode, 181 numBufferedRows int, 182 ) (exec.Plan, error) { 183 // 1. Set up a brand new memo in which to plan the cascading query. 184 var o xform.Optimizer 185 o.Init(evalCtx, cb.b.catalog) 186 factory := o.Factory() 187 md := factory.Metadata() 188 189 // Set up metadata for the buffer columns. 190 191 // withColRemap is the mapping between the With column IDs in the original 192 // memo and the corresponding column IDs in the new memo. 193 var withColRemap opt.ColMap 194 // bufferColMap is the mapping between the column IDs in the new memo and 195 // the column ordinal in the buffer node. 196 var bufferColMap opt.ColMap 197 var withCols opt.ColSet 198 for i := range cb.colMeta { 199 id := md.AddColumn(cb.colMeta[i].Alias, cb.colMeta[i].Type) 200 withCols.Add(id) 201 ordinal, _ := cb.mutationBufferCols.Get(int(cb.colMeta[i].MetaID)) 202 bufferColMap.Set(int(id), ordinal) 203 withColRemap.Set(int(cb.colMeta[i].MetaID), int(id)) 204 } 205 206 // Create relational properties for the special WithID input. 207 // TODO(radu): save some more information from the original binding props 208 // (like not-null columns, FDs) and remap them to the new columns. 209 var bindingProps props.Relational 210 bindingProps.Populated = true 211 bindingProps.OutputCols = withCols 212 bindingProps.Cardinality = props.Cardinality{ 213 Min: uint32(numBufferedRows), 214 Max: uint32(numBufferedRows), 215 } 216 bindingProps.Stats = props.Statistics{ 217 Available: true, 218 RowCount: float64(numBufferedRows), 219 } 220 221 // Remap the cascade columns. 222 oldVals, err := remapColumns(cascade.OldValues, withColRemap) 223 if err != nil { 224 return nil, err 225 } 226 newVals, err := remapColumns(cascade.NewValues, withColRemap) 227 if err != nil { 228 return nil, err 229 } 230 231 // 2. Invoke the memo.CascadeBuilder to build the cascade. 232 relExpr, err := cascade.Builder.Build( 233 ctx, 234 semaCtx, 235 evalCtx, 236 cb.b.catalog, 237 factory, 238 cascadeInputWithID, 239 &bindingProps, 240 oldVals, 241 newVals, 242 ) 243 if err != nil { 244 return nil, errors.Wrap(err, "while building cascade expression") 245 } 246 247 o.Memo().SetRoot(relExpr, &physical.Required{}) 248 249 // 3. Optimize the expression. 250 optimizedExpr, err := o.Optimize() 251 if err != nil { 252 return nil, errors.Wrap(err, "while optimizing cascade expression") 253 } 254 255 // 4. Execbuild the optimized expression. 256 eb := New(execFactory, factory.Memo(), cb.b.catalog, optimizedExpr, evalCtx) 257 // Set up the With binding. 258 eb.addBuiltWithExpr(cascadeInputWithID, bufferColMap, bufferRef) 259 plan, err := eb.Build() 260 if err != nil { 261 return nil, errors.Wrap(err, "while building cascade plan") 262 } 263 return plan, nil 264 } 265 266 // Remap columns according to a ColMap. 267 func remapColumns(cols opt.ColList, m opt.ColMap) (opt.ColList, error) { 268 res := make(opt.ColList, len(cols)) 269 for i := range cols { 270 val, ok := m.Get(int(cols[i])) 271 if !ok { 272 return nil, errors.AssertionFailedf("column %d not in mapping %s\n", cols[i], m.String()) 273 } 274 res[i] = opt.ColumnID(val) 275 } 276 return res, nil 277 }