github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/opt/metadata.go (about)

     1  // Copyright 2018 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package opt
    12  
    13  import (
    14  	"context"
    15  	"fmt"
    16  	"math/bits"
    17  	"strings"
    18  
    19  	"github.com/cockroachdb/cockroach/pkg/sql/opt/cat"
    20  	"github.com/cockroachdb/cockroach/pkg/sql/privilege"
    21  	"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
    22  	"github.com/cockroachdb/cockroach/pkg/sql/types"
    23  	"github.com/cockroachdb/errors"
    24  )
    25  
    26  // SchemaID uniquely identifies the usage of a schema within the scope of a
    27  // query. SchemaID 0 is reserved to mean "unknown schema". Internally, the
    28  // SchemaID consists of an index into the Metadata.schemas slice.
    29  //
    30  // See the comment for Metadata for more details on identifiers.
    31  type SchemaID int32
    32  
    33  // privilegeBitmap stores a union of zero or more privileges. Each privilege
    34  // that is present in the bitmap is represented by a bit that is shifted by
    35  // 1 << privilege.Kind, so that multiple privileges can be stored.
    36  type privilegeBitmap uint32
    37  
    38  // Metadata assigns unique ids to the columns, tables, and other metadata used
    39  // for global identification within the scope of a particular query. These ids
    40  // tend to be small integers that can be efficiently stored and manipulated.
    41  //
    42  // Within a query, every unique column and every projection should be assigned a
    43  // unique column id. Additionally, every separate reference to a table in the
    44  // query should get a new set of output column ids.
    45  //
    46  // For example, consider the query:
    47  //
    48  //   SELECT x FROM a WHERE y > 0
    49  //
    50  // There are 2 columns in the above query: x and y. During name resolution, the
    51  // above query becomes:
    52  //
    53  //   SELECT [0] FROM a WHERE [1] > 0
    54  //   -- [0] -> x
    55  //   -- [1] -> y
    56  //
    57  // Reusing column ids is dangerous and should be avoided in most cases. From the
    58  // optimizer's perspective, any column with the same id is the same column.
    59  // Therefore, using the same column id to represent two different columns can
    60  // produce inaccurate plan costs, plans that are semantically inequivalent to
    61  // the original plan, or errors. Columns of different types must never use the
    62  // same id. Additionally, column ids cannot be overloaded within two relational
    63  // expressions that interact with each other.
    64  //
    65  // Consider the query:
    66  //
    67  //   SELECT * FROM a AS l JOIN a AS r ON (l.x = r.y)
    68  //
    69  // In this query, `l.x` is not equivalent to `r.x` and `l.y` is not equivalent
    70  // to `r.y`. Therefore, we need to give these columns different ids.
    71  //
    72  // There are a handful of exceptional cases in which column ids are reused. This
    73  // is safe only in cases where a column is passed-through to the parent
    74  // expression without being operated on or mutated. In these cases, the reduced
    75  // overhead of not generating new column and table ids outweighs the risks of
    76  // using non-unique ids.
    77  //
    78  // The known places where column ids are reused are:
    79  //
    80  //  - Aggregation functions
    81  //
    82  //    This is safe when columns are passed-through, like in ConstAgg and
    83  //    FirstAgg.
    84  //
    85  //  - Project
    86  //
    87  //    This is safe for pass-through columns.
    88  //
    89  //  - Select
    90  //
    91  //    This is safe because select only filters rows and does not mutate columns.
    92  //
    93  //  - SplitDisjunction and SplitDisjunctionAddKey
    94  //
    95  //    Column ids in the left and output of the generated UnionAll are reused
    96  //    from the original input expression. This is safe because the columns from
    97  //    the left side of the union are essentially passed-through to the parent
    98  //    expression.
    99  //
   100  //  - Uncorrelated sub-expressions
   101  //
   102  //    This is safe because columns within uncorrelated sub-expressions cannot
   103  //    interact with outer columns.
   104  //
   105  // NOTE: Please add new rules that reuse column ids to this list.
   106  type Metadata struct {
   107  	// schemas stores each schema used in the query, indexed by SchemaID.
   108  	schemas []cat.Schema
   109  
   110  	// cols stores information about each metadata column, indexed by
   111  	// ColumnID.index().
   112  	cols []ColumnMeta
   113  
   114  	// tables stores information about each metadata table, indexed by
   115  	// TableID.index().
   116  	tables []TableMeta
   117  
   118  	// sequences stores information about each metadata sequence, indexed by SequenceID.
   119  	sequences []cat.Sequence
   120  
   121  	// deps stores information about all data source objects depended on by the
   122  	// query, as well as the privileges required to access them. The objects are
   123  	// deduplicated: any name/object pair shows up at most once.
   124  	// Note: the same data source object can appear multiple times if different
   125  	// names were used. For example, in the query `SELECT * from t, db.t` the two
   126  	// tables might resolve to the same object now but to different objects later;
   127  	// we want to verify the resolution of both names.
   128  	deps []mdDep
   129  
   130  	// views stores the list of referenced views. This information is only
   131  	// needed for EXPLAIN (opt, env).
   132  	views []cat.View
   133  
   134  	// currUniqueID is the highest UniqueID that has been assigned.
   135  	currUniqueID UniqueID
   136  
   137  	// NOTE! When adding fields here, update Init, CopyFrom and TestMetadata.
   138  }
   139  
   140  type mdDep struct {
   141  	ds cat.DataSource
   142  
   143  	name MDDepName
   144  
   145  	// privileges is the union of all required privileges.
   146  	privileges privilegeBitmap
   147  }
   148  
   149  // MDDepName stores either the unresolved DataSourceName or the StableID from
   150  // the query that was used to resolve a data source.
   151  type MDDepName struct {
   152  	// byID is non-zero if and only if the data source was looked up using the
   153  	// StableID.
   154  	byID cat.StableID
   155  
   156  	// byName is non-zero if and only if the data source was looked up using a
   157  	// name.
   158  	byName cat.DataSourceName
   159  }
   160  
   161  func (n *MDDepName) equals(other *MDDepName) bool {
   162  	return n.byID == other.byID && n.byName.Equals(&other.byName)
   163  }
   164  
   165  // Init prepares the metadata for use (or reuse).
   166  func (md *Metadata) Init() {
   167  	// Clear the metadata objects to release memory (this clearing pattern is
   168  	// optimized by Go).
   169  	for i := range md.schemas {
   170  		md.schemas[i] = nil
   171  	}
   172  	md.schemas = md.schemas[:0]
   173  
   174  	for i := range md.cols {
   175  		md.cols[i] = ColumnMeta{}
   176  	}
   177  	md.cols = md.cols[:0]
   178  
   179  	for i := range md.tables {
   180  		md.tables[i] = TableMeta{}
   181  	}
   182  	md.tables = md.tables[:0]
   183  
   184  	for i := range md.sequences {
   185  		md.sequences[i] = nil
   186  	}
   187  	md.sequences = md.sequences[:0]
   188  
   189  	for i := range md.deps {
   190  		md.deps[i] = mdDep{}
   191  	}
   192  	md.deps = md.deps[:0]
   193  
   194  	for i := range md.views {
   195  		md.views[i] = nil
   196  	}
   197  	md.views = md.views[:0]
   198  
   199  	md.currUniqueID = 0
   200  }
   201  
   202  // CopyFrom initializes the metadata with a copy of the provided metadata.
   203  // This metadata can then be modified independent of the copied metadata.
   204  //
   205  // Table annotations are not transferred over; all annotations are unset on
   206  // the copy.
   207  func (md *Metadata) CopyFrom(from *Metadata) {
   208  	if len(md.schemas) != 0 || len(md.cols) != 0 || len(md.tables) != 0 ||
   209  		len(md.sequences) != 0 || len(md.deps) != 0 || len(md.views) != 0 {
   210  		panic(errors.AssertionFailedf("CopyFrom requires empty destination"))
   211  	}
   212  	md.schemas = append(md.schemas, from.schemas...)
   213  	md.cols = append(md.cols, from.cols...)
   214  	md.tables = append(md.tables, from.tables...)
   215  
   216  	// Clear table annotations. These objects can be mutable and can't be safely
   217  	// shared between different metadata instances.
   218  	for i := range md.tables {
   219  		md.tables[i].clearAnnotations()
   220  	}
   221  	// TODO(radu): we aren't copying the scalar expressions in Constraints and
   222  	// ComputedCols..
   223  
   224  	md.sequences = append(md.sequences, from.sequences...)
   225  	md.deps = append(md.deps, from.deps...)
   226  	md.views = append(md.views, from.views...)
   227  	md.currUniqueID = from.currUniqueID
   228  }
   229  
   230  // DepByName is used with AddDependency when the data source was looked up using a
   231  // data source name.
   232  func DepByName(name *cat.DataSourceName) MDDepName {
   233  	return MDDepName{byName: *name}
   234  }
   235  
   236  // DepByID is used with AddDependency when the data source was looked up by ID.
   237  func DepByID(id cat.StableID) MDDepName {
   238  	return MDDepName{byID: id}
   239  }
   240  
   241  // AddDependency tracks one of the catalog data sources on which the query
   242  // depends, as well as the privilege required to access that data source. If
   243  // the Memo using this metadata is cached, then a call to CheckDependencies can
   244  // detect if the name resolves to a different data source now, or if changes to
   245  // schema or permissions on the data source has invalidated the cached metadata.
   246  func (md *Metadata) AddDependency(name MDDepName, ds cat.DataSource, priv privilege.Kind) {
   247  	// Search for the same name / object pair.
   248  	for i := range md.deps {
   249  		if md.deps[i].ds == ds && md.deps[i].name.equals(&name) {
   250  			md.deps[i].privileges |= (1 << priv)
   251  			return
   252  		}
   253  	}
   254  	md.deps = append(md.deps, mdDep{
   255  		ds:         ds,
   256  		name:       name,
   257  		privileges: (1 << priv),
   258  	})
   259  }
   260  
   261  // CheckDependencies resolves (again) each data source on which this metadata
   262  // depends, in order to check that all data source names resolve to the same
   263  // objects, and that the user still has sufficient privileges to access the
   264  // objects. If the dependencies are no longer up-to-date, then CheckDependencies
   265  // returns false.
   266  //
   267  // This function cannot swallow errors and return only a boolean, as it may
   268  // perform KV operations on behalf of the transaction associated with the
   269  // provided catalog, and those errors are required to be propagated.
   270  func (md *Metadata) CheckDependencies(
   271  	ctx context.Context, catalog cat.Catalog,
   272  ) (upToDate bool, err error) {
   273  	for i := range md.deps {
   274  		name := &md.deps[i].name
   275  		var toCheck cat.DataSource
   276  		var err error
   277  		if name.byID != 0 {
   278  			toCheck, _, err = catalog.ResolveDataSourceByID(ctx, cat.Flags{}, name.byID)
   279  		} else {
   280  			// Resolve data source object.
   281  			toCheck, _, err = catalog.ResolveDataSource(ctx, cat.Flags{}, &name.byName)
   282  		}
   283  		if err != nil {
   284  			return false, err
   285  		}
   286  
   287  		// Ensure that it's the same object, and there were no schema or table
   288  		// statistics changes.
   289  		if !toCheck.Equals(md.deps[i].ds) {
   290  			return false, nil
   291  		}
   292  
   293  		for privs := md.deps[i].privileges; privs != 0; {
   294  			// Strip off each privilege bit and make call to CheckPrivilege for it.
   295  			// Note that priv == 0 can occur when a dependency was added with
   296  			// privilege.Kind = 0 (e.g. for a table within a view, where the table
   297  			// privileges do not need to be checked). Ignore the "zero privilege".
   298  			priv := privilege.Kind(bits.TrailingZeros32(uint32(privs)))
   299  			if priv != 0 {
   300  				if err := catalog.CheckPrivilege(ctx, toCheck, priv); err != nil {
   301  					return false, err
   302  				}
   303  			}
   304  
   305  			// Set the just-handled privilege bit to zero and look for next.
   306  			privs &= ^(1 << priv)
   307  		}
   308  	}
   309  	return true, nil
   310  }
   311  
   312  // AddSchema indexes a new reference to a schema used by the query.
   313  func (md *Metadata) AddSchema(sch cat.Schema) SchemaID {
   314  	md.schemas = append(md.schemas, sch)
   315  	return SchemaID(len(md.schemas))
   316  }
   317  
   318  // Schema looks up the metadata for the schema associated with the given schema
   319  // id.
   320  func (md *Metadata) Schema(schID SchemaID) cat.Schema {
   321  	return md.schemas[schID-1]
   322  }
   323  
   324  // AddTable indexes a new reference to a table within the query. Separate
   325  // references to the same table are assigned different table ids (e.g.  in a
   326  // self-join query). All columns are added to the metadata. If mutation columns
   327  // are present, they are added after active columns.
   328  //
   329  // The ExplicitCatalog/ExplicitSchema fields of the table's alias are honored so
   330  // that its original formatting is preserved for error messages,
   331  // pretty-printing, etc.
   332  func (md *Metadata) AddTable(tab cat.Table, alias *tree.TableName) TableID {
   333  	tabID := makeTableID(len(md.tables), ColumnID(len(md.cols)+1))
   334  	if md.tables == nil {
   335  		md.tables = make([]TableMeta, 0, 4)
   336  	}
   337  	md.tables = append(md.tables, TableMeta{MetaID: tabID, Table: tab, Alias: *alias})
   338  
   339  	colCount := tab.DeletableColumnCount()
   340  	if md.cols == nil {
   341  		md.cols = make([]ColumnMeta, 0, colCount)
   342  	}
   343  
   344  	for i := 0; i < colCount; i++ {
   345  		col := tab.Column(i)
   346  		colID := md.AddColumn(string(col.ColName()), col.DatumType())
   347  		md.ColumnMeta(colID).Table = tabID
   348  	}
   349  
   350  	return tabID
   351  }
   352  
   353  // TableMeta looks up the metadata for the table associated with the given table
   354  // id. The same table can be added multiple times to the query metadata and
   355  // associated with multiple table ids.
   356  func (md *Metadata) TableMeta(tabID TableID) *TableMeta {
   357  	return &md.tables[tabID.index()]
   358  }
   359  
   360  // Table looks up the catalog table associated with the given metadata id. The
   361  // same table can be associated with multiple metadata ids.
   362  func (md *Metadata) Table(tabID TableID) cat.Table {
   363  	return md.TableMeta(tabID).Table
   364  }
   365  
   366  // AllTables returns the metadata for all tables. The result must not be
   367  // modified.
   368  func (md *Metadata) AllTables() []TableMeta {
   369  	return md.tables
   370  }
   371  
   372  // AddColumn assigns a new unique id to a column within the query and records
   373  // its alias and type. If the alias is empty, a "column<ID>" alias is created.
   374  func (md *Metadata) AddColumn(alias string, typ *types.T) ColumnID {
   375  	if alias == "" {
   376  		alias = fmt.Sprintf("column%d", len(md.cols)+1)
   377  	}
   378  	colID := ColumnID(len(md.cols) + 1)
   379  	md.cols = append(md.cols, ColumnMeta{MetaID: colID, Alias: alias, Type: typ})
   380  	return colID
   381  }
   382  
   383  // NumColumns returns the count of columns tracked by this Metadata instance.
   384  func (md *Metadata) NumColumns() int {
   385  	return len(md.cols)
   386  }
   387  
   388  // ColumnMeta looks up the metadata for the column associated with the given
   389  // column id. The same column can be added multiple times to the query metadata
   390  // and associated with multiple column ids.
   391  func (md *Metadata) ColumnMeta(colID ColumnID) *ColumnMeta {
   392  	return &md.cols[colID.index()]
   393  }
   394  
   395  // QualifiedAlias returns the column alias, possibly qualified with the table,
   396  // schema, or database name:
   397  //
   398  //   1. If fullyQualify is true, then the returned alias is prefixed by the
   399  //      original, fully qualified name of the table: tab.Name().FQString().
   400  //
   401  //   2. If there's another column in the metadata with the same column alias but
   402  //      a different table name, then prefix the column alias with the table
   403  //      name: "tabName.columnAlias".
   404  //
   405  func (md *Metadata) QualifiedAlias(colID ColumnID, fullyQualify bool, catalog cat.Catalog) string {
   406  	cm := md.ColumnMeta(colID)
   407  	if cm.Table == 0 {
   408  		// Column doesn't belong to a table, so no need to qualify it further.
   409  		return cm.Alias
   410  	}
   411  
   412  	// If a fully qualified alias has not been requested, then only qualify it if
   413  	// it would otherwise be ambiguous.
   414  	var tabAlias tree.TableName
   415  	qualify := fullyQualify
   416  	if !fullyQualify {
   417  		for i := range md.cols {
   418  			if i == int(cm.MetaID-1) {
   419  				continue
   420  			}
   421  
   422  			// If there are two columns with same alias, then column is ambiguous.
   423  			cm2 := &md.cols[i]
   424  			if cm2.Alias == cm.Alias {
   425  				tabAlias = md.TableMeta(cm.Table).Alias
   426  				if cm2.Table == 0 {
   427  					qualify = true
   428  				} else {
   429  					// Only qualify if the qualified names are actually different.
   430  					tabAlias2 := md.TableMeta(cm2.Table).Alias
   431  					if tabAlias.String() != tabAlias2.String() {
   432  						qualify = true
   433  					}
   434  				}
   435  			}
   436  		}
   437  	}
   438  
   439  	// If the column name should not even be partly qualified, then no more to do.
   440  	if !qualify {
   441  		return cm.Alias
   442  	}
   443  
   444  	var sb strings.Builder
   445  	if fullyQualify {
   446  		tn, err := catalog.FullyQualifiedName(context.TODO(), md.TableMeta(cm.Table).Table)
   447  		if err != nil {
   448  			panic(err)
   449  		}
   450  		sb.WriteString(tn.FQString())
   451  	} else {
   452  		sb.WriteString(tabAlias.String())
   453  	}
   454  	sb.WriteRune('.')
   455  	sb.WriteString(cm.Alias)
   456  	return sb.String()
   457  }
   458  
   459  // SequenceID uniquely identifies the usage of a sequence within the scope of a
   460  // query. SequenceID 0 is reserved to mean "unknown sequence".
   461  type SequenceID uint64
   462  
   463  // index returns the index of the sequence in Metadata.sequences. It's biased by 1, so
   464  // that SequenceID 0 can be be reserved to mean "unknown sequence".
   465  func (s SequenceID) index() int {
   466  	return int(s - 1)
   467  }
   468  
   469  // makeSequenceID constructs a new SequenceID from its component parts.
   470  func makeSequenceID(index int) SequenceID {
   471  	// Bias the sequence index by 1.
   472  	return SequenceID(index + 1)
   473  }
   474  
   475  // AddSequence adds the sequence to the metadata, returning a SequenceID that
   476  // can be used to retrieve it.
   477  func (md *Metadata) AddSequence(seq cat.Sequence) SequenceID {
   478  	seqID := makeSequenceID(len(md.sequences))
   479  	if md.sequences == nil {
   480  		md.sequences = make([]cat.Sequence, 0, 4)
   481  	}
   482  	md.sequences = append(md.sequences, seq)
   483  
   484  	return seqID
   485  }
   486  
   487  // Sequence looks up the catalog sequence associated with the given metadata id. The
   488  // same sequence can be associated with multiple metadata ids.
   489  func (md *Metadata) Sequence(seqID SequenceID) cat.Sequence {
   490  	return md.sequences[seqID.index()]
   491  }
   492  
   493  // UniqueID should be used to disambiguate multiple uses of an expression
   494  // within the scope of a query. For example, a UniqueID field should be
   495  // added to an expression type if two instances of that type might otherwise
   496  // be indistinguishable based on the values of their other fields.
   497  //
   498  // See the comment for Metadata for more details on identifiers.
   499  type UniqueID uint64
   500  
   501  // NextUniqueID returns a fresh UniqueID which is guaranteed to never have been
   502  // previously allocated in this memo.
   503  func (md *Metadata) NextUniqueID() UniqueID {
   504  	md.currUniqueID++
   505  	return md.currUniqueID
   506  }
   507  
   508  // AddView adds a new reference to a view used by the query.
   509  func (md *Metadata) AddView(v cat.View) {
   510  	md.views = append(md.views, v)
   511  }
   512  
   513  // AllViews returns the metadata for all views. The result must not be
   514  // modified.
   515  func (md *Metadata) AllViews() []cat.View {
   516  	return md.views
   517  }
   518  
   519  // AllDataSourceNames returns the fully qualified names of all datasources
   520  // referenced by the metadata.
   521  func (md *Metadata) AllDataSourceNames(
   522  	fullyQualifiedName func(ds cat.DataSource) (cat.DataSourceName, error),
   523  ) (tables, sequences, views []tree.TableName, _ error) {
   524  	// Catalog objects can show up multiple times in our lists, so deduplicate
   525  	// them.
   526  	seen := make(map[tree.TableName]struct{})
   527  
   528  	getNames := func(count int, get func(int) cat.DataSource) ([]tree.TableName, error) {
   529  		result := make([]tree.TableName, 0, count)
   530  		for i := 0; i < count; i++ {
   531  			ds := get(i)
   532  			tn, err := fullyQualifiedName(ds)
   533  			if err != nil {
   534  				return nil, err
   535  			}
   536  			if _, ok := seen[tn]; !ok {
   537  				seen[tn] = struct{}{}
   538  				result = append(result, tn)
   539  			}
   540  		}
   541  		return result, nil
   542  	}
   543  	var err error
   544  	tables, err = getNames(len(md.tables), func(i int) cat.DataSource {
   545  		return md.tables[i].Table
   546  	})
   547  	if err != nil {
   548  		return nil, nil, nil, err
   549  	}
   550  	sequences, err = getNames(len(md.sequences), func(i int) cat.DataSource {
   551  		return md.sequences[i]
   552  	})
   553  	if err != nil {
   554  		return nil, nil, nil, err
   555  	}
   556  	views, err = getNames(len(md.views), func(i int) cat.DataSource {
   557  		return md.views[i]
   558  	})
   559  	if err != nil {
   560  		return nil, nil, nil, err
   561  	}
   562  	return tables, sequences, views, nil
   563  }
   564  
   565  // WithID uniquely identifies a With expression within the scope of a query.
   566  // WithID=0 is reserved to mean "unknown expression".
   567  // See the comment for Metadata for more details on identifiers.
   568  type WithID uint64