github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/sqlbase/data_source.go (about)

     1  // Copyright 2016 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package sqlbase
    12  
    13  import (
    14  	"bytes"
    15  	"fmt"
    16  
    17  	"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
    18  )
    19  
    20  // To understand DataSourceInfo below it is crucial to understand the
    21  // meaning of a "data source" and its relationship to names/IndexedVars.
    22  //
    23  // A data source is an object that can deliver rows of column data,
    24  // where each row is implemented in CockroachDB as an array of values.
    25  // The defining property of a data source is that the columns in its
    26  // result row arrays are always 0-indexed.
    27  //
    28  // From the language perspective, data sources are defined indirectly by:
    29  // - the FROM clause in a SELECT statement;
    30  // - JOIN clauses within the FROM clause;
    31  // - the clause that follows INSERT INTO colName(Cols...);
    32  // - the clause that follows UPSERT ....;
    33  // - the invisible data source defined by the original table row during
    34  //   UPSERT, if it exists.
    35  //
    36  // Most expressions (tree.Expr trees) in CockroachDB refer to a
    37  // single data source. A notable exception is UPSERT, where expressions
    38  // can refer to two sources: one for the values being inserted, one for
    39  // the original row data in the table for the conflicting (already
    40  // existing) rows.
    41  //
    42  // Meanwhile, IndexedVars in CockroachDB provide the interface between
    43  // symbolic names in expressions (e.g. "f.x", called VarNames,
    44  // or names) and data sources. During evaluation, an IndexedVar must
    45  // resolve to a column value. For a given name there are thus two
    46  // subsequent questions that must be answered:
    47  //
    48  // - which data source is the name referring to? (when there is more than 1 source)
    49  // - which 0-indexed column in that data source is the name referring to?
    50  //
    51  // The IndexedVar must distinguish data sources because the same column index
    52  // may refer to different columns in different data sources. For
    53  // example in an UPSERT statement the IndexedVar for "excluded.x" could refer
    54  // to column 0 in the (already existing) table row, whereas "src.x" could
    55  // refer to column 0 in the valueNode that provides values to insert.
    56  //
    57  // Within this context, the infrastructure for data sources and IndexedVars
    58  // is implemented as follows:
    59  //
    60  // - DataSourceInfo provides column metadata for exactly one data source;
    61  // - the index in IndexedVars points to one of the columns in the DataSourceInfo.
    62  // - IndexedVarResolver (select_name_resolution.go) is tasked with
    63  //   linking back IndexedVars with their data source and column index.
    64  //
    65  // This being said, there is a misunderstanding one should be careful
    66  // to avoid: *there is no direct relationship between data sources and
    67  // table names* in SQL. In other words:
    68  //
    69  // - the same table name can be present in two or more data sources; for example
    70  //   with:
    71  //        INSERT INTO excluded VALUES (42) ON CONFLICT (x) DO UPDATE ...
    72  //   the name "excluded" can refer either to the data source for VALUES(42)
    73  //   or the implicit data source corresponding to the rows in the original table
    74  //   that conflict with the new values.
    75  //
    76  //   When this happens, a name of the form "excluded.x" must be
    77  //   resolved by considering all the data sources; if there is more
    78  //   than one data source providing the table name "excluded" (as in
    79  //   this case), the query is rejected with an ambiguity error.
    80  //
    81  // - a single data source may provide values for multiple table names; for
    82  //   example with:
    83  //         SELECT * FROM (f CROSS JOIN g) WHERE f.x = g.x
    84  //   there is a single data source corresponding to the results of the
    85  //   CROSS JOIN, providing a single 0-indexed array of values on each
    86  //   result row.
    87  //
    88  //   (multiple table names for a single data source happen in JOINed sources
    89  //   and JOINed sources only. Note that a FROM clause with a comma-separated
    90  //   list of sources is a CROSS JOIN in disguise.)
    91  //
    92  //   When this happens, names of the form "f.x" in either WHERE,
    93  //   SELECT renders, or other expressions which can refer to the data
    94  //   source do not refer to the "internal" data sources of the JOIN;
    95  //   they always refer to the final result rows of the JOIN source as
    96  //   a whole.
    97  //
    98  //   This implies that a single DataSourceInfo that provides metadata
    99  //   for a complex JOIN clause must "know" which table name is
   100  //   associated with each column in its result set.
   101  //
   102  
   103  // DataSourceInfo provides column metadata for exactly one data source.
   104  type DataSourceInfo struct {
   105  	// SourceColumns match the plan.Columns() 1-to-1. However the column
   106  	// names might be different if the statement renames them using AS.
   107  	SourceColumns ResultColumns
   108  
   109  	// SourceAlias indicates to which table the source columns belong.
   110  	// This often corresponds to the original table names for each column but
   111  	// might be different if the statement renames them using AS.
   112  	SourceAlias tree.TableName
   113  }
   114  
   115  func (src *DataSourceInfo) String() string {
   116  	var buf bytes.Buffer
   117  	for i := range src.SourceColumns {
   118  		if i > 0 {
   119  			buf.WriteByte('\t')
   120  		}
   121  		fmt.Fprintf(&buf, "%d", i)
   122  	}
   123  	buf.WriteString("\toutput column positions\n")
   124  	for i, c := range src.SourceColumns {
   125  		if i > 0 {
   126  			buf.WriteByte('\t')
   127  		}
   128  		if c.Hidden {
   129  			buf.WriteByte('*')
   130  		}
   131  		buf.WriteString(c.Name)
   132  	}
   133  	buf.WriteString("\toutput column names\n")
   134  	if src.SourceAlias == AnonymousTable {
   135  		buf.WriteString("\t<anonymous table>\n")
   136  	} else {
   137  		fmt.Fprintf(&buf, "\t'%s'\n", src.SourceAlias.String())
   138  	}
   139  	return buf.String()
   140  }
   141  
   142  // AnonymousTable is the empty table name, used when a data source
   143  // has no own name, e.g. VALUES, subqueries or the empty source.
   144  var AnonymousTable = tree.TableName{}
   145  
   146  // NewSourceInfoForSingleTable creates a simple DataSourceInfo
   147  // which maps the same tableAlias to all columns.
   148  func NewSourceInfoForSingleTable(tn tree.TableName, columns ResultColumns) *DataSourceInfo {
   149  	if tn.ObjectName != "" && tn.SchemaName != "" {
   150  		// When we're not looking at an unqualified table, we make sure that
   151  		// the table name in the data source struct is fully qualified. This
   152  		// ensures that queries like this are valid:
   153  		//
   154  		// select "".information_schema.schemata.schema_name from  "".information_schema.schemata
   155  		tn.ExplicitCatalog = true
   156  		tn.ExplicitSchema = true
   157  	}
   158  	return &DataSourceInfo{
   159  		SourceColumns: columns,
   160  		SourceAlias:   tn,
   161  	}
   162  }
   163  
   164  type varFormatter struct {
   165  	TableName  tree.TableName
   166  	ColumnName tree.Name
   167  }
   168  
   169  // Format implements the NodeFormatter interface.
   170  func (c *varFormatter) Format(ctx *tree.FmtCtx) {
   171  	if ctx.HasFlags(tree.FmtShowTableAliases) && c.TableName.ObjectName != "" {
   172  		// This logic is different from (*TableName).Format() with
   173  		// FmtAlwaysQualify, because FmtShowTableAliases only wants to
   174  		// prefixes the table names for vars in expressions, not table
   175  		// names in sub-queries.
   176  		if c.TableName.SchemaName != "" {
   177  			if c.TableName.CatalogName != "" {
   178  				ctx.FormatNode(&c.TableName.CatalogName)
   179  				ctx.WriteByte('.')
   180  			}
   181  			ctx.FormatNode(&c.TableName.SchemaName)
   182  			ctx.WriteByte('.')
   183  		}
   184  
   185  		ctx.FormatNode(&c.TableName.ObjectName)
   186  		ctx.WriteByte('.')
   187  	}
   188  	ctx.FormatNode(&c.ColumnName)
   189  }
   190  
   191  // NodeFormatter returns a tree.NodeFormatter that, when formatted,
   192  // represents the object at the input column index.
   193  func (src *DataSourceInfo) NodeFormatter(colIdx int) tree.NodeFormatter {
   194  	return &varFormatter{
   195  		TableName:  src.SourceAlias,
   196  		ColumnName: tree.Name(src.SourceColumns[colIdx].Name),
   197  	}
   198  }