github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/opt/metadata.go (about) 1 // Copyright 2018 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package opt 12 13 import ( 14 "context" 15 "fmt" 16 "math/bits" 17 "strings" 18 19 "github.com/cockroachdb/cockroach/pkg/sql/opt/cat" 20 "github.com/cockroachdb/cockroach/pkg/sql/privilege" 21 "github.com/cockroachdb/cockroach/pkg/sql/sem/tree" 22 "github.com/cockroachdb/cockroach/pkg/sql/types" 23 "github.com/cockroachdb/errors" 24 ) 25 26 // SchemaID uniquely identifies the usage of a schema within the scope of a 27 // query. SchemaID 0 is reserved to mean "unknown schema". Internally, the 28 // SchemaID consists of an index into the Metadata.schemas slice. 29 // 30 // See the comment for Metadata for more details on identifiers. 31 type SchemaID int32 32 33 // privilegeBitmap stores a union of zero or more privileges. Each privilege 34 // that is present in the bitmap is represented by a bit that is shifted by 35 // 1 << privilege.Kind, so that multiple privileges can be stored. 36 type privilegeBitmap uint32 37 38 // Metadata assigns unique ids to the columns, tables, and other metadata used 39 // for global identification within the scope of a particular query. These ids 40 // tend to be small integers that can be efficiently stored and manipulated. 41 // 42 // Within a query, every unique column and every projection should be assigned a 43 // unique column id. Additionally, every separate reference to a table in the 44 // query should get a new set of output column ids. 45 // 46 // For example, consider the query: 47 // 48 // SELECT x FROM a WHERE y > 0 49 // 50 // There are 2 columns in the above query: x and y. During name resolution, the 51 // above query becomes: 52 // 53 // SELECT [0] FROM a WHERE [1] > 0 54 // -- [0] -> x 55 // -- [1] -> y 56 // 57 // Reusing column ids is dangerous and should be avoided in most cases. From the 58 // optimizer's perspective, any column with the same id is the same column. 59 // Therefore, using the same column id to represent two different columns can 60 // produce inaccurate plan costs, plans that are semantically inequivalent to 61 // the original plan, or errors. Columns of different types must never use the 62 // same id. Additionally, column ids cannot be overloaded within two relational 63 // expressions that interact with each other. 64 // 65 // Consider the query: 66 // 67 // SELECT * FROM a AS l JOIN a AS r ON (l.x = r.y) 68 // 69 // In this query, `l.x` is not equivalent to `r.x` and `l.y` is not equivalent 70 // to `r.y`. Therefore, we need to give these columns different ids. 71 // 72 // There are a handful of exceptional cases in which column ids are reused. This 73 // is safe only in cases where a column is passed-through to the parent 74 // expression without being operated on or mutated. In these cases, the reduced 75 // overhead of not generating new column and table ids outweighs the risks of 76 // using non-unique ids. 77 // 78 // The known places where column ids are reused are: 79 // 80 // - Aggregation functions 81 // 82 // This is safe when columns are passed-through, like in ConstAgg and 83 // FirstAgg. 84 // 85 // - Project 86 // 87 // This is safe for pass-through columns. 88 // 89 // - Select 90 // 91 // This is safe because select only filters rows and does not mutate columns. 92 // 93 // - SplitDisjunction and SplitDisjunctionAddKey 94 // 95 // Column ids in the left and output of the generated UnionAll are reused 96 // from the original input expression. This is safe because the columns from 97 // the left side of the union are essentially passed-through to the parent 98 // expression. 99 // 100 // - Uncorrelated sub-expressions 101 // 102 // This is safe because columns within uncorrelated sub-expressions cannot 103 // interact with outer columns. 104 // 105 // NOTE: Please add new rules that reuse column ids to this list. 106 type Metadata struct { 107 // schemas stores each schema used in the query, indexed by SchemaID. 108 schemas []cat.Schema 109 110 // cols stores information about each metadata column, indexed by 111 // ColumnID.index(). 112 cols []ColumnMeta 113 114 // tables stores information about each metadata table, indexed by 115 // TableID.index(). 116 tables []TableMeta 117 118 // sequences stores information about each metadata sequence, indexed by SequenceID. 119 sequences []cat.Sequence 120 121 // deps stores information about all data source objects depended on by the 122 // query, as well as the privileges required to access them. The objects are 123 // deduplicated: any name/object pair shows up at most once. 124 // Note: the same data source object can appear multiple times if different 125 // names were used. For example, in the query `SELECT * from t, db.t` the two 126 // tables might resolve to the same object now but to different objects later; 127 // we want to verify the resolution of both names. 128 deps []mdDep 129 130 // views stores the list of referenced views. This information is only 131 // needed for EXPLAIN (opt, env). 132 views []cat.View 133 134 // currUniqueID is the highest UniqueID that has been assigned. 135 currUniqueID UniqueID 136 137 // NOTE! When adding fields here, update Init, CopyFrom and TestMetadata. 138 } 139 140 type mdDep struct { 141 ds cat.DataSource 142 143 name MDDepName 144 145 // privileges is the union of all required privileges. 146 privileges privilegeBitmap 147 } 148 149 // MDDepName stores either the unresolved DataSourceName or the StableID from 150 // the query that was used to resolve a data source. 151 type MDDepName struct { 152 // byID is non-zero if and only if the data source was looked up using the 153 // StableID. 154 byID cat.StableID 155 156 // byName is non-zero if and only if the data source was looked up using a 157 // name. 158 byName cat.DataSourceName 159 } 160 161 func (n *MDDepName) equals(other *MDDepName) bool { 162 return n.byID == other.byID && n.byName.Equals(&other.byName) 163 } 164 165 // Init prepares the metadata for use (or reuse). 166 func (md *Metadata) Init() { 167 // Clear the metadata objects to release memory (this clearing pattern is 168 // optimized by Go). 169 for i := range md.schemas { 170 md.schemas[i] = nil 171 } 172 md.schemas = md.schemas[:0] 173 174 for i := range md.cols { 175 md.cols[i] = ColumnMeta{} 176 } 177 md.cols = md.cols[:0] 178 179 for i := range md.tables { 180 md.tables[i] = TableMeta{} 181 } 182 md.tables = md.tables[:0] 183 184 for i := range md.sequences { 185 md.sequences[i] = nil 186 } 187 md.sequences = md.sequences[:0] 188 189 for i := range md.deps { 190 md.deps[i] = mdDep{} 191 } 192 md.deps = md.deps[:0] 193 194 for i := range md.views { 195 md.views[i] = nil 196 } 197 md.views = md.views[:0] 198 199 md.currUniqueID = 0 200 } 201 202 // CopyFrom initializes the metadata with a copy of the provided metadata. 203 // This metadata can then be modified independent of the copied metadata. 204 // 205 // Table annotations are not transferred over; all annotations are unset on 206 // the copy. 207 func (md *Metadata) CopyFrom(from *Metadata) { 208 if len(md.schemas) != 0 || len(md.cols) != 0 || len(md.tables) != 0 || 209 len(md.sequences) != 0 || len(md.deps) != 0 || len(md.views) != 0 { 210 panic(errors.AssertionFailedf("CopyFrom requires empty destination")) 211 } 212 md.schemas = append(md.schemas, from.schemas...) 213 md.cols = append(md.cols, from.cols...) 214 md.tables = append(md.tables, from.tables...) 215 216 // Clear table annotations. These objects can be mutable and can't be safely 217 // shared between different metadata instances. 218 for i := range md.tables { 219 md.tables[i].clearAnnotations() 220 } 221 // TODO(radu): we aren't copying the scalar expressions in Constraints and 222 // ComputedCols.. 223 224 md.sequences = append(md.sequences, from.sequences...) 225 md.deps = append(md.deps, from.deps...) 226 md.views = append(md.views, from.views...) 227 md.currUniqueID = from.currUniqueID 228 } 229 230 // DepByName is used with AddDependency when the data source was looked up using a 231 // data source name. 232 func DepByName(name *cat.DataSourceName) MDDepName { 233 return MDDepName{byName: *name} 234 } 235 236 // DepByID is used with AddDependency when the data source was looked up by ID. 237 func DepByID(id cat.StableID) MDDepName { 238 return MDDepName{byID: id} 239 } 240 241 // AddDependency tracks one of the catalog data sources on which the query 242 // depends, as well as the privilege required to access that data source. If 243 // the Memo using this metadata is cached, then a call to CheckDependencies can 244 // detect if the name resolves to a different data source now, or if changes to 245 // schema or permissions on the data source has invalidated the cached metadata. 246 func (md *Metadata) AddDependency(name MDDepName, ds cat.DataSource, priv privilege.Kind) { 247 // Search for the same name / object pair. 248 for i := range md.deps { 249 if md.deps[i].ds == ds && md.deps[i].name.equals(&name) { 250 md.deps[i].privileges |= (1 << priv) 251 return 252 } 253 } 254 md.deps = append(md.deps, mdDep{ 255 ds: ds, 256 name: name, 257 privileges: (1 << priv), 258 }) 259 } 260 261 // CheckDependencies resolves (again) each data source on which this metadata 262 // depends, in order to check that all data source names resolve to the same 263 // objects, and that the user still has sufficient privileges to access the 264 // objects. If the dependencies are no longer up-to-date, then CheckDependencies 265 // returns false. 266 // 267 // This function cannot swallow errors and return only a boolean, as it may 268 // perform KV operations on behalf of the transaction associated with the 269 // provided catalog, and those errors are required to be propagated. 270 func (md *Metadata) CheckDependencies( 271 ctx context.Context, catalog cat.Catalog, 272 ) (upToDate bool, err error) { 273 for i := range md.deps { 274 name := &md.deps[i].name 275 var toCheck cat.DataSource 276 var err error 277 if name.byID != 0 { 278 toCheck, _, err = catalog.ResolveDataSourceByID(ctx, cat.Flags{}, name.byID) 279 } else { 280 // Resolve data source object. 281 toCheck, _, err = catalog.ResolveDataSource(ctx, cat.Flags{}, &name.byName) 282 } 283 if err != nil { 284 return false, err 285 } 286 287 // Ensure that it's the same object, and there were no schema or table 288 // statistics changes. 289 if !toCheck.Equals(md.deps[i].ds) { 290 return false, nil 291 } 292 293 for privs := md.deps[i].privileges; privs != 0; { 294 // Strip off each privilege bit and make call to CheckPrivilege for it. 295 // Note that priv == 0 can occur when a dependency was added with 296 // privilege.Kind = 0 (e.g. for a table within a view, where the table 297 // privileges do not need to be checked). Ignore the "zero privilege". 298 priv := privilege.Kind(bits.TrailingZeros32(uint32(privs))) 299 if priv != 0 { 300 if err := catalog.CheckPrivilege(ctx, toCheck, priv); err != nil { 301 return false, err 302 } 303 } 304 305 // Set the just-handled privilege bit to zero and look for next. 306 privs &= ^(1 << priv) 307 } 308 } 309 return true, nil 310 } 311 312 // AddSchema indexes a new reference to a schema used by the query. 313 func (md *Metadata) AddSchema(sch cat.Schema) SchemaID { 314 md.schemas = append(md.schemas, sch) 315 return SchemaID(len(md.schemas)) 316 } 317 318 // Schema looks up the metadata for the schema associated with the given schema 319 // id. 320 func (md *Metadata) Schema(schID SchemaID) cat.Schema { 321 return md.schemas[schID-1] 322 } 323 324 // AddTable indexes a new reference to a table within the query. Separate 325 // references to the same table are assigned different table ids (e.g. in a 326 // self-join query). All columns are added to the metadata. If mutation columns 327 // are present, they are added after active columns. 328 // 329 // The ExplicitCatalog/ExplicitSchema fields of the table's alias are honored so 330 // that its original formatting is preserved for error messages, 331 // pretty-printing, etc. 332 func (md *Metadata) AddTable(tab cat.Table, alias *tree.TableName) TableID { 333 tabID := makeTableID(len(md.tables), ColumnID(len(md.cols)+1)) 334 if md.tables == nil { 335 md.tables = make([]TableMeta, 0, 4) 336 } 337 md.tables = append(md.tables, TableMeta{MetaID: tabID, Table: tab, Alias: *alias}) 338 339 colCount := tab.DeletableColumnCount() 340 if md.cols == nil { 341 md.cols = make([]ColumnMeta, 0, colCount) 342 } 343 344 for i := 0; i < colCount; i++ { 345 col := tab.Column(i) 346 colID := md.AddColumn(string(col.ColName()), col.DatumType()) 347 md.ColumnMeta(colID).Table = tabID 348 } 349 350 return tabID 351 } 352 353 // TableMeta looks up the metadata for the table associated with the given table 354 // id. The same table can be added multiple times to the query metadata and 355 // associated with multiple table ids. 356 func (md *Metadata) TableMeta(tabID TableID) *TableMeta { 357 return &md.tables[tabID.index()] 358 } 359 360 // Table looks up the catalog table associated with the given metadata id. The 361 // same table can be associated with multiple metadata ids. 362 func (md *Metadata) Table(tabID TableID) cat.Table { 363 return md.TableMeta(tabID).Table 364 } 365 366 // AllTables returns the metadata for all tables. The result must not be 367 // modified. 368 func (md *Metadata) AllTables() []TableMeta { 369 return md.tables 370 } 371 372 // AddColumn assigns a new unique id to a column within the query and records 373 // its alias and type. If the alias is empty, a "column<ID>" alias is created. 374 func (md *Metadata) AddColumn(alias string, typ *types.T) ColumnID { 375 if alias == "" { 376 alias = fmt.Sprintf("column%d", len(md.cols)+1) 377 } 378 colID := ColumnID(len(md.cols) + 1) 379 md.cols = append(md.cols, ColumnMeta{MetaID: colID, Alias: alias, Type: typ}) 380 return colID 381 } 382 383 // NumColumns returns the count of columns tracked by this Metadata instance. 384 func (md *Metadata) NumColumns() int { 385 return len(md.cols) 386 } 387 388 // ColumnMeta looks up the metadata for the column associated with the given 389 // column id. The same column can be added multiple times to the query metadata 390 // and associated with multiple column ids. 391 func (md *Metadata) ColumnMeta(colID ColumnID) *ColumnMeta { 392 return &md.cols[colID.index()] 393 } 394 395 // QualifiedAlias returns the column alias, possibly qualified with the table, 396 // schema, or database name: 397 // 398 // 1. If fullyQualify is true, then the returned alias is prefixed by the 399 // original, fully qualified name of the table: tab.Name().FQString(). 400 // 401 // 2. If there's another column in the metadata with the same column alias but 402 // a different table name, then prefix the column alias with the table 403 // name: "tabName.columnAlias". 404 // 405 func (md *Metadata) QualifiedAlias(colID ColumnID, fullyQualify bool, catalog cat.Catalog) string { 406 cm := md.ColumnMeta(colID) 407 if cm.Table == 0 { 408 // Column doesn't belong to a table, so no need to qualify it further. 409 return cm.Alias 410 } 411 412 // If a fully qualified alias has not been requested, then only qualify it if 413 // it would otherwise be ambiguous. 414 var tabAlias tree.TableName 415 qualify := fullyQualify 416 if !fullyQualify { 417 for i := range md.cols { 418 if i == int(cm.MetaID-1) { 419 continue 420 } 421 422 // If there are two columns with same alias, then column is ambiguous. 423 cm2 := &md.cols[i] 424 if cm2.Alias == cm.Alias { 425 tabAlias = md.TableMeta(cm.Table).Alias 426 if cm2.Table == 0 { 427 qualify = true 428 } else { 429 // Only qualify if the qualified names are actually different. 430 tabAlias2 := md.TableMeta(cm2.Table).Alias 431 if tabAlias.String() != tabAlias2.String() { 432 qualify = true 433 } 434 } 435 } 436 } 437 } 438 439 // If the column name should not even be partly qualified, then no more to do. 440 if !qualify { 441 return cm.Alias 442 } 443 444 var sb strings.Builder 445 if fullyQualify { 446 tn, err := catalog.FullyQualifiedName(context.TODO(), md.TableMeta(cm.Table).Table) 447 if err != nil { 448 panic(err) 449 } 450 sb.WriteString(tn.FQString()) 451 } else { 452 sb.WriteString(tabAlias.String()) 453 } 454 sb.WriteRune('.') 455 sb.WriteString(cm.Alias) 456 return sb.String() 457 } 458 459 // SequenceID uniquely identifies the usage of a sequence within the scope of a 460 // query. SequenceID 0 is reserved to mean "unknown sequence". 461 type SequenceID uint64 462 463 // index returns the index of the sequence in Metadata.sequences. It's biased by 1, so 464 // that SequenceID 0 can be be reserved to mean "unknown sequence". 465 func (s SequenceID) index() int { 466 return int(s - 1) 467 } 468 469 // makeSequenceID constructs a new SequenceID from its component parts. 470 func makeSequenceID(index int) SequenceID { 471 // Bias the sequence index by 1. 472 return SequenceID(index + 1) 473 } 474 475 // AddSequence adds the sequence to the metadata, returning a SequenceID that 476 // can be used to retrieve it. 477 func (md *Metadata) AddSequence(seq cat.Sequence) SequenceID { 478 seqID := makeSequenceID(len(md.sequences)) 479 if md.sequences == nil { 480 md.sequences = make([]cat.Sequence, 0, 4) 481 } 482 md.sequences = append(md.sequences, seq) 483 484 return seqID 485 } 486 487 // Sequence looks up the catalog sequence associated with the given metadata id. The 488 // same sequence can be associated with multiple metadata ids. 489 func (md *Metadata) Sequence(seqID SequenceID) cat.Sequence { 490 return md.sequences[seqID.index()] 491 } 492 493 // UniqueID should be used to disambiguate multiple uses of an expression 494 // within the scope of a query. For example, a UniqueID field should be 495 // added to an expression type if two instances of that type might otherwise 496 // be indistinguishable based on the values of their other fields. 497 // 498 // See the comment for Metadata for more details on identifiers. 499 type UniqueID uint64 500 501 // NextUniqueID returns a fresh UniqueID which is guaranteed to never have been 502 // previously allocated in this memo. 503 func (md *Metadata) NextUniqueID() UniqueID { 504 md.currUniqueID++ 505 return md.currUniqueID 506 } 507 508 // AddView adds a new reference to a view used by the query. 509 func (md *Metadata) AddView(v cat.View) { 510 md.views = append(md.views, v) 511 } 512 513 // AllViews returns the metadata for all views. The result must not be 514 // modified. 515 func (md *Metadata) AllViews() []cat.View { 516 return md.views 517 } 518 519 // AllDataSourceNames returns the fully qualified names of all datasources 520 // referenced by the metadata. 521 func (md *Metadata) AllDataSourceNames( 522 fullyQualifiedName func(ds cat.DataSource) (cat.DataSourceName, error), 523 ) (tables, sequences, views []tree.TableName, _ error) { 524 // Catalog objects can show up multiple times in our lists, so deduplicate 525 // them. 526 seen := make(map[tree.TableName]struct{}) 527 528 getNames := func(count int, get func(int) cat.DataSource) ([]tree.TableName, error) { 529 result := make([]tree.TableName, 0, count) 530 for i := 0; i < count; i++ { 531 ds := get(i) 532 tn, err := fullyQualifiedName(ds) 533 if err != nil { 534 return nil, err 535 } 536 if _, ok := seen[tn]; !ok { 537 seen[tn] = struct{}{} 538 result = append(result, tn) 539 } 540 } 541 return result, nil 542 } 543 var err error 544 tables, err = getNames(len(md.tables), func(i int) cat.DataSource { 545 return md.tables[i].Table 546 }) 547 if err != nil { 548 return nil, nil, nil, err 549 } 550 sequences, err = getNames(len(md.sequences), func(i int) cat.DataSource { 551 return md.sequences[i] 552 }) 553 if err != nil { 554 return nil, nil, nil, err 555 } 556 views, err = getNames(len(md.views), func(i int) cat.DataSource { 557 return md.views[i] 558 }) 559 if err != nil { 560 return nil, nil, nil, err 561 } 562 return tables, sequences, views, nil 563 } 564 565 // WithID uniquely identifies a With expression within the scope of a query. 566 // WithID=0 is reserved to mean "unknown expression". 567 // See the comment for Metadata for more details on identifiers. 568 type WithID uint64