github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/opt/memo/memo.go (about) 1 // Copyright 2018 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package memo 12 13 import ( 14 "context" 15 16 "github.com/cockroachdb/cockroach/pkg/sql/opt" 17 "github.com/cockroachdb/cockroach/pkg/sql/opt/cat" 18 "github.com/cockroachdb/cockroach/pkg/sql/opt/props" 19 "github.com/cockroachdb/cockroach/pkg/sql/opt/props/physical" 20 "github.com/cockroachdb/cockroach/pkg/sql/sem/tree" 21 "github.com/cockroachdb/cockroach/pkg/sql/sessiondata" 22 "github.com/cockroachdb/cockroach/pkg/util/log" 23 "github.com/cockroachdb/errors" 24 ) 25 26 // Memo is a data structure for efficiently storing a forest of query plans. 27 // Conceptually, the memo is composed of a numbered set of equivalency classes 28 // called groups where each group contains a set of logically equivalent 29 // expressions. Two expressions are considered logically equivalent if: 30 // 31 // 1. They return the same number and data type of columns. However, order and 32 // naming of columns doesn't matter. 33 // 2. They return the same number of rows, with the same values in each row. 34 // However, order of rows doesn't matter. 35 // 36 // The different expressions in a single group are called memo expressions 37 // (memo-ized expressions). The children of a memo expression can themselves be 38 // part of memo groups. Therefore, the memo forest is composed of every possible 39 // combination of parent expression with its possible child expressions, 40 // recursively applied. 41 // 42 // Memo expressions can be relational (e.g. join) or scalar (e.g. <). Operators 43 // are always both logical (specify results) and physical (specify results and 44 // a particular implementation). This means that even a "raw" unoptimized 45 // expression tree can be executed (naively). Both relational and scalar 46 // operators are uniformly represented as nodes in memo expression trees, which 47 // facilitates tree pattern matching and replacement. However, because scalar 48 // expression memo groups never have more than one expression, scalar 49 // expressions can use a simpler representation. 50 // 51 // Because memo groups contain logically equivalent expressions, all the memo 52 // expressions in a group share the same logical properties. However, it's 53 // possible for two logically equivalent expression to be placed in different 54 // memo groups. This occurs because determining logical equivalency of two 55 // relational expressions is too complex to perform 100% correctly. A 56 // correctness failure (i.e. considering two expressions logically equivalent 57 // when they are not) results in invalid transformations and invalid plans. 58 // But placing two logically equivalent expressions in different groups has a 59 // much gentler failure mode: the memo and transformations are less efficient. 60 // Expressions within the memo may have different physical properties. For 61 // example, a memo group might contain both hash join and merge join 62 // expressions which produce the same set of output rows, but produce them in 63 // different orders. 64 // 65 // Expressions are inserted into the memo by the factory, which ensure that 66 // expressions have been fully normalized before insertion (see the comment in 67 // factory.go for more details). A new group is created only when unique 68 // normalized expressions are created by the factory during construction or 69 // rewrite of the tree. Uniqueness is determined by "interning" each expression, 70 // which means that multiple equivalent expressions are mapped to a single 71 // in-memory instance. This allows interned expressions to be checked for 72 // equivalence by simple pointer comparison. For example: 73 // 74 // SELECT * FROM a, b WHERE a.x = b.x 75 // 76 // After insertion into the memo, the memo would contain these six groups, with 77 // numbers substituted for pointers to the normalized expression in each group: 78 // 79 // G6: [inner-join [G1 G2 G5]] 80 // G5: [eq [G3 G4]] 81 // G4: [variable b.x] 82 // G3: [variable a.x] 83 // G2: [scan b] 84 // G1: [scan a] 85 // 86 // Each leaf expressions is interned by hashing its operator type and any 87 // private field values. Expressions higher in the tree can then rely on the 88 // fact that all children have been interned, and include their pointer values 89 // in its hash value. Therefore, the memo need only hash the expression's fields 90 // in order to determine whether the expression already exists in the memo. 91 // Walking the subtree is not necessary. 92 // 93 // The normalizing factory will never add more than one expression to a memo 94 // group. But the explorer does add denormalized expressions to existing memo 95 // groups, since oftentimes one of these equivalent, but denormalized 96 // expressions will have a lower cost than the initial normalized expression 97 // added by the factory. For example, the join commutativity transformation 98 // expands the memo like this: 99 // 100 // G6: [inner-join [G1 G2 G5]] [inner-join [G2 G1 G5]] 101 // G5: [eq [G3 G4]] 102 // G4: [variable b.x] 103 // G3: [variable a.x] 104 // G2: [scan b] 105 // G1: [scan a] 106 // 107 // See the comments in explorer.go for more details. 108 type Memo struct { 109 // metadata provides information about the columns and tables used in this 110 // particular query. 111 metadata opt.Metadata 112 113 // interner interns all expressions in the memo, ensuring that there is at 114 // most one instance of each expression in the memo. 115 interner interner 116 117 // logPropsBuilder is inlined in the memo so that it can be reused each time 118 // scalar or relational properties need to be built. 119 logPropsBuilder logicalPropsBuilder 120 121 // rootExpr is the root expression of the memo expression forest. It is set 122 // via a call to SetRoot. After optimization, it is set to be the root of the 123 // lowest cost tree in the forest. 124 rootExpr opt.Expr 125 126 // rootProps are the physical properties required of the root memo expression. 127 // It is set via a call to SetRoot. 128 rootProps *physical.Required 129 130 // memEstimate is the approximate memory usage of the memo, in bytes. 131 memEstimate int64 132 133 // The following are selected fields from SessionData which can affect 134 // planning. We need to cross-check these before reusing a cached memo. 135 dataConversion sessiondata.DataConversionConfig 136 reorderJoinsLimit int 137 zigzagJoinEnabled bool 138 optimizerFKChecks bool 139 optimizerFKCascades bool 140 useHistograms bool 141 useMultiColStats bool 142 safeUpdates bool 143 saveTablesPrefix string 144 insertFastPath bool 145 146 // curID is the highest currently in-use scalar expression ID. 147 curID opt.ScalarID 148 149 // curWithID is the highest currently in-use WITH ID. 150 curWithID opt.WithID 151 152 newGroupFn func(opt.Expr) 153 154 // WARNING: if you add more members, add initialization code in Init. 155 } 156 157 // Init initializes a new empty memo instance, or resets existing state so it 158 // can be reused. It must be called before use (or reuse). The memo collects 159 // information about the context in which it is compiled from the evalContext 160 // argument. If any of that changes, then the memo must be invalidated (see the 161 // IsStale method for more details). 162 func (m *Memo) Init(evalCtx *tree.EvalContext) { 163 m.metadata.Init() 164 m.interner.Clear() 165 m.logPropsBuilder.init(evalCtx, m) 166 167 m.rootExpr = nil 168 m.rootProps = nil 169 m.memEstimate = 0 170 171 m.dataConversion = evalCtx.SessionData.DataConversion 172 m.reorderJoinsLimit = evalCtx.SessionData.ReorderJoinsLimit 173 m.zigzagJoinEnabled = evalCtx.SessionData.ZigzagJoinEnabled 174 m.optimizerFKChecks = evalCtx.SessionData.OptimizerFKChecks 175 m.optimizerFKCascades = evalCtx.SessionData.OptimizerFKCascades 176 m.useHistograms = evalCtx.SessionData.OptimizerUseHistograms 177 m.useMultiColStats = evalCtx.SessionData.OptimizerUseMultiColStats 178 m.safeUpdates = evalCtx.SessionData.SafeUpdates 179 m.saveTablesPrefix = evalCtx.SessionData.SaveTablesPrefix 180 m.insertFastPath = evalCtx.SessionData.InsertFastPath 181 182 m.curID = 0 183 m.curWithID = 0 184 } 185 186 // NotifyOnNewGroup sets a callback function which is invoked each time we 187 // create a new memo group. 188 func (m *Memo) NotifyOnNewGroup(fn func(opt.Expr)) { 189 m.newGroupFn = fn 190 } 191 192 // IsEmpty returns true if there are no expressions in the memo. 193 func (m *Memo) IsEmpty() bool { 194 // Root expression can be nil before optimization and interner is empty after 195 // exploration, so check both. 196 return m.interner.Count() == 0 && m.rootExpr == nil 197 } 198 199 // MemoryEstimate returns a rough estimate of the memo's memory usage, in bytes. 200 // It only includes memory usage that is proportional to the size and complexity 201 // of the query, rather than constant overhead bytes. 202 func (m *Memo) MemoryEstimate() int64 { 203 // Multiply by 2 to take rough account of allocation fragmentation, private 204 // data, list overhead, properties, etc. 205 return m.memEstimate * 2 206 } 207 208 // Metadata returns the metadata instance associated with the memo. 209 func (m *Memo) Metadata() *opt.Metadata { 210 return &m.metadata 211 } 212 213 // RootExpr returns the root memo expression previously set via a call to 214 // SetRoot. 215 func (m *Memo) RootExpr() opt.Expr { 216 return m.rootExpr 217 } 218 219 // RootProps returns the physical properties required of the root memo group, 220 // previously set via a call to SetRoot. 221 func (m *Memo) RootProps() *physical.Required { 222 return m.rootProps 223 } 224 225 // SetRoot stores the root memo expression when it is a relational expression, 226 // and also stores the physical properties required of the root group. 227 func (m *Memo) SetRoot(e RelExpr, phys *physical.Required) { 228 m.rootExpr = e 229 if m.rootProps != phys { 230 m.rootProps = m.InternPhysicalProps(phys) 231 } 232 233 // Once memo is optimized, release reference to the eval context and free up 234 // the memory used by the interner. 235 if m.IsOptimized() { 236 m.logPropsBuilder.clear() 237 m.interner.Clear() 238 } 239 } 240 241 // SetScalarRoot stores the root memo expression when it is a scalar expression. 242 // Used only for testing. 243 func (m *Memo) SetScalarRoot(scalar opt.ScalarExpr) { 244 m.rootExpr = scalar 245 } 246 247 // HasPlaceholders returns true if the memo contains at least one placeholder 248 // operator. 249 func (m *Memo) HasPlaceholders() bool { 250 rel, ok := m.rootExpr.(RelExpr) 251 if !ok { 252 panic(errors.AssertionFailedf("placeholders only supported when memo root is relational")) 253 } 254 255 return rel.Relational().HasPlaceholder 256 } 257 258 // IsStale returns true if the memo has been invalidated by changes to any of 259 // its dependencies. Once a memo is known to be stale, it must be ejected from 260 // any query cache or prepared statement and replaced with a recompiled memo 261 // that takes into account the changes. IsStale checks the following 262 // dependencies: 263 // 264 // 1. Current database: this can change name resolution. 265 // 2. Current search path: this can change name resolution. 266 // 3. Current location: this determines time zone, and can change how time- 267 // related types are constructed and compared. 268 // 4. Data source schema: this determines most aspects of how the query is 269 // compiled. 270 // 5. Data source privileges: current user may no longer have access to one or 271 // more data sources. 272 // 273 // This function cannot swallow errors and return only a boolean, as it may 274 // perform KV operations on behalf of the transaction associated with the 275 // provided catalog, and those errors are required to be propagated. 276 func (m *Memo) IsStale( 277 ctx context.Context, evalCtx *tree.EvalContext, catalog cat.Catalog, 278 ) (bool, error) { 279 // Memo is stale if fields from SessionData that can affect planning have 280 // changed. 281 if !m.dataConversion.Equals(&evalCtx.SessionData.DataConversion) || 282 m.reorderJoinsLimit != evalCtx.SessionData.ReorderJoinsLimit || 283 m.zigzagJoinEnabled != evalCtx.SessionData.ZigzagJoinEnabled || 284 m.optimizerFKChecks != evalCtx.SessionData.OptimizerFKChecks || 285 m.optimizerFKCascades != evalCtx.SessionData.OptimizerFKCascades || 286 m.useHistograms != evalCtx.SessionData.OptimizerUseHistograms || 287 m.useMultiColStats != evalCtx.SessionData.OptimizerUseMultiColStats || 288 m.safeUpdates != evalCtx.SessionData.SafeUpdates || 289 m.saveTablesPrefix != evalCtx.SessionData.SaveTablesPrefix || 290 m.insertFastPath != evalCtx.SessionData.InsertFastPath { 291 return true, nil 292 } 293 294 // Memo is stale if the fingerprint of any object in the memo's metadata has 295 // changed, or if the current user no longer has sufficient privilege to 296 // access the object. 297 if depsUpToDate, err := m.Metadata().CheckDependencies(ctx, catalog); err != nil { 298 return true, err 299 } else if !depsUpToDate { 300 return true, nil 301 } 302 return false, nil 303 } 304 305 // InternPhysicalProps adds the given physical props to the memo if they haven't 306 // yet been added. If the same props was added previously, then return a pointer 307 // to the previously added props. This allows interned physical props to be 308 // compared for equality using simple pointer comparison. 309 func (m *Memo) InternPhysicalProps(phys *physical.Required) *physical.Required { 310 // Special case physical properties that require nothing of operator. 311 if !phys.Defined() { 312 return physical.MinRequired 313 } 314 return m.interner.InternPhysicalProps(phys) 315 } 316 317 // SetBestProps updates the physical properties, provided ordering, and cost of 318 // a relational expression's memo group (see the relevant methods of RelExpr). 319 // It is called by the optimizer once it determines the expression in the group 320 // that is part of the lowest cost tree (for the overall query). 321 func (m *Memo) SetBestProps( 322 e RelExpr, required *physical.Required, provided *physical.Provided, cost Cost, 323 ) { 324 if e.RequiredPhysical() != nil { 325 if e.RequiredPhysical() != required || 326 !e.ProvidedPhysical().Equals(provided) || 327 e.Cost() != cost { 328 panic(errors.AssertionFailedf( 329 "cannot overwrite %s / %s (%.9g) with %s / %s (%.9g)", 330 e.RequiredPhysical(), 331 e.ProvidedPhysical(), 332 log.Safe(e.Cost()), 333 required.String(), 334 provided.String(), // Call String() so provided doesn't escape. 335 cost, 336 )) 337 } 338 return 339 } 340 bp := e.bestProps() 341 bp.required = required 342 bp.provided = *provided 343 bp.cost = cost 344 } 345 346 // ResetCost updates the cost of a relational expression's memo group. It 347 // should *only* be called by Optimizer.RecomputeCost() for testing purposes. 348 func (m *Memo) ResetCost(e RelExpr, cost Cost) { 349 e.bestProps().cost = cost 350 } 351 352 // IsOptimized returns true if the memo has been fully optimized. 353 func (m *Memo) IsOptimized() bool { 354 // The memo is optimized once the root expression has its physical properties 355 // assigned. 356 rel, ok := m.rootExpr.(RelExpr) 357 return ok && rel.RequiredPhysical() != nil 358 } 359 360 // NextID returns a new unique ScalarID to number expressions with. 361 func (m *Memo) NextID() opt.ScalarID { 362 m.curID++ 363 return m.curID 364 } 365 366 // RequestColStat calculates and returns the column statistic calculated on the 367 // relational expression. 368 func (m *Memo) RequestColStat( 369 expr RelExpr, cols opt.ColSet, 370 ) (colStat *props.ColumnStatistic, ok bool) { 371 // When SetRoot is called, the statistics builder may have been cleared. 372 // If this happens, we can't serve the request anymore. 373 if m.logPropsBuilder.sb.md != nil { 374 return m.logPropsBuilder.sb.colStat(cols, expr), true 375 } 376 return nil, false 377 } 378 379 // RowsProcessed calculates and returns the number of rows processed by the 380 // relational expression. It is currently only supported for joins. 381 func (m *Memo) RowsProcessed(expr RelExpr) (_ float64, ok bool) { 382 // When SetRoot is called, the statistics builder may have been cleared. 383 // If this happens, we can't serve the request anymore. 384 if m.logPropsBuilder.sb.md != nil { 385 return m.logPropsBuilder.sb.rowsProcessed(expr), true 386 } 387 return 0, false 388 } 389 390 // NextWithID returns a not-yet-assigned identifier for a WITH expression. 391 func (m *Memo) NextWithID() opt.WithID { 392 m.curWithID++ 393 return m.curWithID 394 } 395 396 // ClearColStats clears all column statistics from every relational expression 397 // in the memo. This is used to free up the potentially large amount of memory 398 // used by histograms. 399 func (m *Memo) ClearColStats(parent opt.Expr) { 400 for i, n := 0, parent.ChildCount(); i < n; i++ { 401 child := parent.Child(i) 402 m.ClearColStats(child) 403 } 404 405 switch t := parent.(type) { 406 case RelExpr: 407 t.Relational().Stats.ColStats = props.ColStatsMap{} 408 } 409 }