github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/sqlbase/data_source.go (about) 1 // Copyright 2016 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package sqlbase 12 13 import ( 14 "bytes" 15 "fmt" 16 17 "github.com/cockroachdb/cockroach/pkg/sql/sem/tree" 18 ) 19 20 // To understand DataSourceInfo below it is crucial to understand the 21 // meaning of a "data source" and its relationship to names/IndexedVars. 22 // 23 // A data source is an object that can deliver rows of column data, 24 // where each row is implemented in CockroachDB as an array of values. 25 // The defining property of a data source is that the columns in its 26 // result row arrays are always 0-indexed. 27 // 28 // From the language perspective, data sources are defined indirectly by: 29 // - the FROM clause in a SELECT statement; 30 // - JOIN clauses within the FROM clause; 31 // - the clause that follows INSERT INTO colName(Cols...); 32 // - the clause that follows UPSERT ....; 33 // - the invisible data source defined by the original table row during 34 // UPSERT, if it exists. 35 // 36 // Most expressions (tree.Expr trees) in CockroachDB refer to a 37 // single data source. A notable exception is UPSERT, where expressions 38 // can refer to two sources: one for the values being inserted, one for 39 // the original row data in the table for the conflicting (already 40 // existing) rows. 41 // 42 // Meanwhile, IndexedVars in CockroachDB provide the interface between 43 // symbolic names in expressions (e.g. "f.x", called VarNames, 44 // or names) and data sources. During evaluation, an IndexedVar must 45 // resolve to a column value. For a given name there are thus two 46 // subsequent questions that must be answered: 47 // 48 // - which data source is the name referring to? (when there is more than 1 source) 49 // - which 0-indexed column in that data source is the name referring to? 50 // 51 // The IndexedVar must distinguish data sources because the same column index 52 // may refer to different columns in different data sources. For 53 // example in an UPSERT statement the IndexedVar for "excluded.x" could refer 54 // to column 0 in the (already existing) table row, whereas "src.x" could 55 // refer to column 0 in the valueNode that provides values to insert. 56 // 57 // Within this context, the infrastructure for data sources and IndexedVars 58 // is implemented as follows: 59 // 60 // - DataSourceInfo provides column metadata for exactly one data source; 61 // - the index in IndexedVars points to one of the columns in the DataSourceInfo. 62 // - IndexedVarResolver (select_name_resolution.go) is tasked with 63 // linking back IndexedVars with their data source and column index. 64 // 65 // This being said, there is a misunderstanding one should be careful 66 // to avoid: *there is no direct relationship between data sources and 67 // table names* in SQL. In other words: 68 // 69 // - the same table name can be present in two or more data sources; for example 70 // with: 71 // INSERT INTO excluded VALUES (42) ON CONFLICT (x) DO UPDATE ... 72 // the name "excluded" can refer either to the data source for VALUES(42) 73 // or the implicit data source corresponding to the rows in the original table 74 // that conflict with the new values. 75 // 76 // When this happens, a name of the form "excluded.x" must be 77 // resolved by considering all the data sources; if there is more 78 // than one data source providing the table name "excluded" (as in 79 // this case), the query is rejected with an ambiguity error. 80 // 81 // - a single data source may provide values for multiple table names; for 82 // example with: 83 // SELECT * FROM (f CROSS JOIN g) WHERE f.x = g.x 84 // there is a single data source corresponding to the results of the 85 // CROSS JOIN, providing a single 0-indexed array of values on each 86 // result row. 87 // 88 // (multiple table names for a single data source happen in JOINed sources 89 // and JOINed sources only. Note that a FROM clause with a comma-separated 90 // list of sources is a CROSS JOIN in disguise.) 91 // 92 // When this happens, names of the form "f.x" in either WHERE, 93 // SELECT renders, or other expressions which can refer to the data 94 // source do not refer to the "internal" data sources of the JOIN; 95 // they always refer to the final result rows of the JOIN source as 96 // a whole. 97 // 98 // This implies that a single DataSourceInfo that provides metadata 99 // for a complex JOIN clause must "know" which table name is 100 // associated with each column in its result set. 101 // 102 103 // DataSourceInfo provides column metadata for exactly one data source. 104 type DataSourceInfo struct { 105 // SourceColumns match the plan.Columns() 1-to-1. However the column 106 // names might be different if the statement renames them using AS. 107 SourceColumns ResultColumns 108 109 // SourceAlias indicates to which table the source columns belong. 110 // This often corresponds to the original table names for each column but 111 // might be different if the statement renames them using AS. 112 SourceAlias tree.TableName 113 } 114 115 func (src *DataSourceInfo) String() string { 116 var buf bytes.Buffer 117 for i := range src.SourceColumns { 118 if i > 0 { 119 buf.WriteByte('\t') 120 } 121 fmt.Fprintf(&buf, "%d", i) 122 } 123 buf.WriteString("\toutput column positions\n") 124 for i, c := range src.SourceColumns { 125 if i > 0 { 126 buf.WriteByte('\t') 127 } 128 if c.Hidden { 129 buf.WriteByte('*') 130 } 131 buf.WriteString(c.Name) 132 } 133 buf.WriteString("\toutput column names\n") 134 if src.SourceAlias == AnonymousTable { 135 buf.WriteString("\t<anonymous table>\n") 136 } else { 137 fmt.Fprintf(&buf, "\t'%s'\n", src.SourceAlias.String()) 138 } 139 return buf.String() 140 } 141 142 // AnonymousTable is the empty table name, used when a data source 143 // has no own name, e.g. VALUES, subqueries or the empty source. 144 var AnonymousTable = tree.TableName{} 145 146 // NewSourceInfoForSingleTable creates a simple DataSourceInfo 147 // which maps the same tableAlias to all columns. 148 func NewSourceInfoForSingleTable(tn tree.TableName, columns ResultColumns) *DataSourceInfo { 149 if tn.ObjectName != "" && tn.SchemaName != "" { 150 // When we're not looking at an unqualified table, we make sure that 151 // the table name in the data source struct is fully qualified. This 152 // ensures that queries like this are valid: 153 // 154 // select "".information_schema.schemata.schema_name from "".information_schema.schemata 155 tn.ExplicitCatalog = true 156 tn.ExplicitSchema = true 157 } 158 return &DataSourceInfo{ 159 SourceColumns: columns, 160 SourceAlias: tn, 161 } 162 } 163 164 type varFormatter struct { 165 TableName tree.TableName 166 ColumnName tree.Name 167 } 168 169 // Format implements the NodeFormatter interface. 170 func (c *varFormatter) Format(ctx *tree.FmtCtx) { 171 if ctx.HasFlags(tree.FmtShowTableAliases) && c.TableName.ObjectName != "" { 172 // This logic is different from (*TableName).Format() with 173 // FmtAlwaysQualify, because FmtShowTableAliases only wants to 174 // prefixes the table names for vars in expressions, not table 175 // names in sub-queries. 176 if c.TableName.SchemaName != "" { 177 if c.TableName.CatalogName != "" { 178 ctx.FormatNode(&c.TableName.CatalogName) 179 ctx.WriteByte('.') 180 } 181 ctx.FormatNode(&c.TableName.SchemaName) 182 ctx.WriteByte('.') 183 } 184 185 ctx.FormatNode(&c.TableName.ObjectName) 186 ctx.WriteByte('.') 187 } 188 ctx.FormatNode(&c.ColumnName) 189 } 190 191 // NodeFormatter returns a tree.NodeFormatter that, when formatted, 192 // represents the object at the input column index. 193 func (src *DataSourceInfo) NodeFormatter(colIdx int) tree.NodeFormatter { 194 return &varFormatter{ 195 TableName: src.SourceAlias, 196 ColumnName: tree.Name(src.SourceColumns[colIdx].Name), 197 } 198 }