vitess.io/vitess@v0.16.2/go/vt/vtgate/planbuilder/symtab.go (about)

     1  /*
     2  Copyright 2019 The Vitess Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package planbuilder
    18  
    19  import (
    20  	"fmt"
    21  	"strconv"
    22  	"strings"
    23  
    24  	"vitess.io/vitess/go/vt/vterrors"
    25  
    26  	"vitess.io/vitess/go/vt/sqlparser"
    27  	"vitess.io/vitess/go/vt/vtgate/vindexes"
    28  
    29  	querypb "vitess.io/vitess/go/vt/proto/query"
    30  )
    31  
    32  // symtab represents the symbol table for a SELECT statement
    33  // or a subquery. The symtab evolves over time.
    34  // As a query is analyzed, multiple independent
    35  // symtabs are created, and they are later merged as each
    36  // sub-expression of a FROM clause is merged.
    37  //
    38  // A symtab maintains uniqueColumns, which is a list of unique
    39  // vindex column names. These names can be resolved without the
    40  // need to qualify them by their table names. If there are
    41  // duplicates during a merge, those columns are removed from
    42  // the unique list, thereby disallowing unqualified references
    43  // to such columns.
    44  //
    45  // After a select expression is analyzed, the
    46  // ResultColumns field is set. In the case of a subquery, the
    47  // Outer field points to the outer symtab. Any symbols that
    48  // are not resolved locally are added to the Externs field,
    49  // which is later used to determine if the subquery can be
    50  // merged with an outer route.
    51  type symtab struct {
    52  	tables     map[sqlparser.TableName]*table
    53  	tableNames []sqlparser.TableName
    54  
    55  	// uniqueColumns has the column name as key
    56  	// and points at the columns that tables contains.
    57  	uniqueColumns map[string]*column
    58  
    59  	// singleRoute is set only if all the symbols in
    60  	// the symbol table are part of the same route.
    61  	singleRoute *route
    62  
    63  	ResultColumns []*resultColumn
    64  	Outer         *symtab
    65  	Externs       []*sqlparser.ColName
    66  }
    67  
    68  // newSymtab creates a new symtab.
    69  func newSymtab() *symtab {
    70  	return &symtab{
    71  		tables:        make(map[sqlparser.TableName]*table),
    72  		uniqueColumns: make(map[string]*column),
    73  	}
    74  }
    75  
    76  // newSymtab creates a new symtab initialized
    77  // to contain just one route.
    78  func newSymtabWithRoute(rb *route) *symtab {
    79  	return &symtab{
    80  		tables:        make(map[sqlparser.TableName]*table),
    81  		uniqueColumns: make(map[string]*column),
    82  		singleRoute:   rb,
    83  	}
    84  }
    85  
    86  // AddVSchemaTable adds a vschema table to symtab.
    87  func (st *symtab) AddVSchemaTable(alias sqlparser.TableName, vschemaTable *vindexes.Table, rb *route) error {
    88  	t := &table{
    89  		alias:        alias,
    90  		origin:       rb,
    91  		vschemaTable: vschemaTable,
    92  	}
    93  
    94  	for _, col := range vschemaTable.Columns {
    95  		if _, err := t.mergeColumn(col.Name, &column{
    96  			origin: rb,
    97  			st:     st,
    98  			typ:    col.Type,
    99  		}); err != nil {
   100  			return err
   101  		}
   102  	}
   103  	if vschemaTable.ColumnListAuthoritative {
   104  		// This will prevent new columns from being added.
   105  		t.isAuthoritative = true
   106  	}
   107  
   108  	for _, cv := range vschemaTable.ColumnVindexes {
   109  		single, ok := cv.Vindex.(vindexes.SingleColumn)
   110  		if !ok {
   111  			continue
   112  		}
   113  		for i, cvcol := range cv.Columns {
   114  			col, err := t.mergeColumn(cvcol, &column{
   115  				origin: rb,
   116  				st:     st,
   117  			})
   118  			if err != nil {
   119  				return err
   120  			}
   121  			if i == 0 {
   122  				if col.vindex == nil || col.vindex.Cost() > single.Cost() {
   123  					col.vindex = single
   124  				}
   125  			}
   126  		}
   127  	}
   128  
   129  	if ai := vschemaTable.AutoIncrement; ai != nil {
   130  		if _, ok := t.columns[ai.Column.Lowered()]; !ok {
   131  			if _, err := t.mergeColumn(ai.Column, &column{
   132  				origin: rb,
   133  				st:     st,
   134  			}); err != nil {
   135  				return err
   136  			}
   137  		}
   138  	}
   139  	if err := st.AddTable(t); err != nil {
   140  		return err
   141  	}
   142  	return nil
   143  }
   144  
   145  // Merge merges the new symtab into the current one.
   146  // Duplicate table aliases return an error.
   147  // uniqueColumns is updated, but duplicates are removed.
   148  // Merges are only performed during the FROM clause analysis.
   149  // At this point, only tables and uniqueColumns are set.
   150  // All other fields are ignored.
   151  func (st *symtab) Merge(newsyms *symtab) error {
   152  	if st.tableNames == nil || newsyms.tableNames == nil {
   153  		// If any side of symtab has anonymous tables,
   154  		// we treat the merged symtab as having anonymous tables.
   155  		return nil
   156  	}
   157  	for _, t := range newsyms.tables {
   158  		if err := st.AddTable(t); err != nil {
   159  			return err
   160  		}
   161  	}
   162  	return nil
   163  }
   164  
   165  // AddTable adds a table to symtab.
   166  func (st *symtab) AddTable(t *table) error {
   167  	if rb, ok := t.origin.(*route); !ok || rb.Resolve() != st.singleRoute {
   168  		st.singleRoute = nil
   169  	}
   170  	if _, ok := st.tables[t.alias]; ok {
   171  		return vterrors.VT03013(t.alias.Name.String())
   172  	}
   173  	st.tables[t.alias] = t
   174  	st.tableNames = append(st.tableNames, t.alias)
   175  
   176  	// update the uniqueColumns list, and eliminate
   177  	// duplicate symbols if found.
   178  	for colname, c := range t.columns {
   179  		c.st = st
   180  		if _, ok := st.uniqueColumns[colname]; ok {
   181  			// Keep the entry, but make it nil. This will
   182  			// ensure that yet another column of the same name
   183  			// doesn't get added back in.
   184  			st.uniqueColumns[colname] = nil
   185  			continue
   186  		}
   187  		st.uniqueColumns[colname] = c
   188  	}
   189  	return nil
   190  }
   191  
   192  // AllTables returns an ordered list of all current tables.
   193  func (st *symtab) AllTables() []*table {
   194  	if len(st.tableNames) == 0 {
   195  		return nil
   196  	}
   197  	tables := make([]*table, 0, len(st.tableNames))
   198  	for _, tname := range st.tableNames {
   199  		tables = append(tables, st.tables[tname])
   200  	}
   201  	return tables
   202  }
   203  
   204  // AllVschemaTableNames returns an ordered list of all current vschema tables.
   205  func (st *symtab) AllVschemaTableNames() ([]*vindexes.Table, error) {
   206  	if len(st.tableNames) == 0 {
   207  		return nil, nil
   208  	}
   209  	tables := make([]*vindexes.Table, 0, len(st.tableNames))
   210  	for _, tname := range st.tableNames {
   211  		t, ok := st.tables[tname]
   212  		if !ok {
   213  			return nil, vterrors.VT05004(sqlparser.String(tname))
   214  		}
   215  		if t.vschemaTable != nil {
   216  			tables = append(tables, t.vschemaTable)
   217  		}
   218  	}
   219  	return tables, nil
   220  }
   221  
   222  // FindTable finds a table in symtab. This function is specifically used
   223  // for expanding 'select a.*' constructs. If you're in a subquery,
   224  // you're most likely referring to a table in the local 'from' clause.
   225  // For this reason, the search is only performed in the current scope.
   226  // This may be a deviation from the formal definition of SQL, but there
   227  // are currently no use cases that require the full support.
   228  func (st *symtab) FindTable(tname sqlparser.TableName) (*table, error) {
   229  	if st.tableNames == nil {
   230  		// Unreachable because current code path checks for this condition
   231  		// before invoking this function.
   232  		return nil, vterrors.VT05007()
   233  	}
   234  	t, ok := st.tables[tname]
   235  	if !ok {
   236  		return nil, vterrors.VT05004(sqlparser.String(tname))
   237  	}
   238  	return t, nil
   239  }
   240  
   241  // SetResultColumns sets the result columns.
   242  func (st *symtab) SetResultColumns(rcs []*resultColumn) {
   243  	for _, rc := range rcs {
   244  		rc.column.st = st
   245  	}
   246  	st.ResultColumns = rcs
   247  }
   248  
   249  // Find returns the logicalPlan for the symbol referenced by col.
   250  // If a reference is found, col.Metadata is set to point
   251  // to it. Subsequent searches will reuse this metadata.
   252  //
   253  // Unqualified columns are searched in the following order:
   254  // 1. ResultColumns
   255  // 2. uniqueColumns
   256  // 3. symtab has only one table. The column is presumed to
   257  // belong to that table.
   258  // 4. symtab has more than one table, but all tables belong
   259  // to the same route. An anonymous column is created against
   260  // the current route.
   261  // If all the above fail, an error is returned. This means
   262  // that an unqualified reference can only be locally resolved.
   263  //
   264  // For qualified columns, we first look for the table. If one
   265  // is found, we look for a column in the pre-existing list.
   266  // If one is not found, we optimistically create an entry
   267  // presuming that the table has such a column. If this is
   268  // not the case, the query will fail when sent to vttablet.
   269  // If the table is not found in the local scope, the search
   270  // is continued in the outer scope, but only if ResultColumns
   271  // is not set (this is MySQL behavior).
   272  //
   273  // For symbols that were found locally, isLocal is returned
   274  // as true. Otherwise, it's returned as false and the symbol
   275  // gets added to the Externs list, which can later be used
   276  // to decide where to push-down the subquery.
   277  func (st *symtab) Find(col *sqlparser.ColName) (origin logicalPlan, isLocal bool, err error) {
   278  	// Return previously cached info if present.
   279  	if column, ok := col.Metadata.(*column); ok {
   280  		return column.Origin(), column.st == st, nil
   281  	}
   282  
   283  	// Unqualified column case.
   284  	if col.Qualifier.IsEmpty() {
   285  		// Step 1. Search ResultColumns.
   286  		c, err := st.searchResultColumn(col)
   287  		if err != nil {
   288  			return nil, false, err
   289  		}
   290  		if c != nil {
   291  			col.Metadata = c
   292  			return c.Origin(), true, nil
   293  		}
   294  	}
   295  
   296  	// Steps 2-4 performed by searchTables.
   297  	c, err := st.searchTables(col)
   298  	if err != nil {
   299  		return nil, false, err
   300  	}
   301  	if c != nil {
   302  		col.Metadata = c
   303  		return c.Origin(), true, nil
   304  	}
   305  
   306  	if st.Outer == nil {
   307  		return nil, false, vterrors.VT03019(sqlparser.String(col))
   308  	}
   309  	// Search is not continued if ResultColumns already has values:
   310  	// select a ... having ... (select b ... having a...). In this case,
   311  	// a (in having) should not match the outer-most 'a'. This is to
   312  	// match MySQL's behavior.
   313  	if len(st.ResultColumns) != 0 {
   314  		return nil, false, vterrors.VT03020(sqlparser.String(col))
   315  	}
   316  
   317  	if origin, _, err = st.Outer.Find(col); err != nil {
   318  		return nil, false, err
   319  	}
   320  	st.Externs = append(st.Externs, col)
   321  	return origin, false, nil
   322  }
   323  
   324  // searchResultColumn looks for col in the results columns.
   325  func (st *symtab) searchResultColumn(col *sqlparser.ColName) (c *column, err error) {
   326  	var cursym *resultColumn
   327  	for _, rc := range st.ResultColumns {
   328  		if rc.alias.Equal(col.Name) {
   329  			if cursym != nil {
   330  				return nil, vterrors.VT03021(sqlparser.String(col))
   331  			}
   332  			cursym = rc
   333  		}
   334  	}
   335  	if cursym != nil {
   336  		return cursym.column, nil
   337  	}
   338  	return nil, nil
   339  }
   340  
   341  // searchTables looks for the column in the tables. The search order
   342  // is as described in Find.
   343  func (st *symtab) searchTables(col *sqlparser.ColName) (*column, error) {
   344  	var t *table
   345  	// @@ syntax is only allowed for dual tables, in which case there should be
   346  	// only one in the symtab. So, such expressions will be implicitly matched.
   347  	if col.Qualifier.IsEmpty() || strings.HasPrefix(col.Qualifier.Name.String(), "@@") {
   348  		// Search uniqueColumns first. If found, our job is done.
   349  		// Check for nil because there can be nil entries if there
   350  		// are duplicate columns across multiple tables.
   351  		if c := st.uniqueColumns[col.Name.Lowered()]; c != nil {
   352  			return c, nil
   353  		}
   354  
   355  		switch {
   356  		case len(st.tables) == 1:
   357  			// If there's only one table match against it.
   358  			// Loop executes once to match the only table.
   359  			for _, v := range st.tables {
   360  				t = v
   361  			}
   362  			// No return: break out.
   363  		case st.singleRoute != nil:
   364  			// If there's only one route, create an anonymous symbol.
   365  			return &column{origin: st.singleRoute, st: st}, nil
   366  		default:
   367  			// If none of the above, the symbol is unresolvable.
   368  			return nil, vterrors.VT03019(sqlparser.String(col))
   369  		}
   370  	} else {
   371  		var ok bool
   372  		t, ok = st.tables[col.Qualifier]
   373  		if !ok {
   374  			return nil, nil
   375  		}
   376  	}
   377  
   378  	// At this point, t should be set.
   379  	c, ok := t.columns[col.Name.Lowered()]
   380  	if !ok {
   381  		// We know all the column names of a subquery. Might as well return an error if it's not found.
   382  		if t.isAuthoritative {
   383  			return nil, vterrors.VT03019(sqlparser.String(col))
   384  		}
   385  		c = &column{
   386  			origin: t.Origin(),
   387  			st:     st,
   388  		}
   389  		t.addColumn(col.Name, c)
   390  	}
   391  	return c, nil
   392  }
   393  
   394  // ResultFromNumber returns the result column index based on the column
   395  // order expression.
   396  func ResultFromNumber(rcs []*resultColumn, val *sqlparser.Literal, caller string) (int, error) {
   397  	if val.Type != sqlparser.IntVal {
   398  		return 0, vterrors.VT13001("column number is not an INT")
   399  	}
   400  	num, err := strconv.ParseInt(val.Val, 0, 64)
   401  	if err != nil {
   402  		return 0, vterrors.VT13001(fmt.Sprintf("error parsing column number: %s", sqlparser.String(val)))
   403  	}
   404  	if num < 1 || num > int64(len(rcs)) {
   405  		return 0, vterrors.VT03014(num, caller)
   406  	}
   407  	return int(num - 1), nil
   408  }
   409  
   410  // Vindex returns the vindex if the expression is a plain column reference
   411  // that is part of the specified route, and has an associated vindex.
   412  func (st *symtab) Vindex(expr sqlparser.Expr, scope *route) vindexes.SingleColumn {
   413  	col, ok := expr.(*sqlparser.ColName)
   414  	if !ok {
   415  		return nil
   416  	}
   417  	if col.Metadata == nil {
   418  		// Find will set the Metadata.
   419  		if _, _, err := st.Find(col); err != nil {
   420  			return nil
   421  		}
   422  	}
   423  	c := col.Metadata.(*column)
   424  	if c.Origin() != scope {
   425  		return nil
   426  	}
   427  	return c.vindex
   428  }
   429  
   430  // BuildColName builds a *sqlparser.ColName for the resultColumn specified
   431  // by the index. The built ColName will correctly reference the resultColumn
   432  // it was built from.
   433  func BuildColName(rcs []*resultColumn, index int) (*sqlparser.ColName, error) {
   434  	alias := rcs[index].alias
   435  	if alias.IsEmpty() {
   436  		return nil, vterrors.VT12001("reference a complex expression")
   437  	}
   438  	for i, rc := range rcs {
   439  		if i == index {
   440  			continue
   441  		}
   442  		if rc.alias.Equal(alias) {
   443  			return nil, vterrors.VT03021(alias)
   444  		}
   445  	}
   446  	return &sqlparser.ColName{
   447  		Metadata: rcs[index].column,
   448  		Name:     alias,
   449  	}, nil
   450  }
   451  
   452  // ResolveSymbols resolves all column references against symtab.
   453  // This makes sure that they all have their Metadata initialized.
   454  // If a symbol cannot be resolved or if the expression contains
   455  // a subquery, an error is returned.
   456  func (st *symtab) ResolveSymbols(node sqlparser.SQLNode) error {
   457  	return sqlparser.Walk(func(currNode sqlparser.SQLNode) (kontinue bool, err error) {
   458  		switch currNode := currNode.(type) {
   459  		case *sqlparser.ColName:
   460  			if _, _, err := st.Find(currNode); err != nil {
   461  				return false, err
   462  			}
   463  		case *sqlparser.Subquery:
   464  			return false, vterrors.VT12001(fmt.Sprintf("subqueries disallowed in %T", node))
   465  		}
   466  		return true, nil
   467  	}, node)
   468  }
   469  
   470  // table is part of symtab.
   471  // It represents a table alias in a FROM clause. It points
   472  // to the logicalPlan that represents it.
   473  type table struct {
   474  	alias           sqlparser.TableName
   475  	columns         map[string]*column
   476  	columnNames     []sqlparser.IdentifierCI
   477  	isAuthoritative bool
   478  	origin          logicalPlan
   479  	vschemaTable    *vindexes.Table
   480  }
   481  
   482  func (t *table) addColumn(alias sqlparser.IdentifierCI, c *column) {
   483  	if t.columns == nil {
   484  		t.columns = make(map[string]*column)
   485  	}
   486  	lowered := alias.Lowered()
   487  	// Dups are allowed, but first one wins if referenced.
   488  	if _, ok := t.columns[lowered]; !ok {
   489  		c.colNumber = len(t.columnNames)
   490  		t.columns[lowered] = c
   491  	}
   492  	t.columnNames = append(t.columnNames, alias)
   493  }
   494  
   495  // mergeColumn merges or creates a new column for the table.
   496  // If the table is authoritative and the column doesn't already
   497  // exist, it returns an error. If the table is not authoritative,
   498  // the column is added if not already present.
   499  func (t *table) mergeColumn(alias sqlparser.IdentifierCI, c *column) (*column, error) {
   500  	if t.columns == nil {
   501  		t.columns = make(map[string]*column)
   502  	}
   503  	lowered := alias.Lowered()
   504  	if col, ok := t.columns[lowered]; ok {
   505  		return col, nil
   506  	}
   507  	if t.isAuthoritative {
   508  		return nil, vterrors.VT03022(sqlparser.String(alias), sqlparser.String(t.alias))
   509  	}
   510  	c.colNumber = len(t.columnNames)
   511  	t.columns[lowered] = c
   512  	t.columnNames = append(t.columnNames, alias)
   513  	return c, nil
   514  }
   515  
   516  // Origin returns the route that originates the table.
   517  func (t *table) Origin() logicalPlan {
   518  	// If it's a route, we have to resolve it.
   519  	if rb, ok := t.origin.(*route); ok {
   520  		return rb.Resolve()
   521  	}
   522  	return t.origin
   523  }
   524  
   525  // column represents a unique symbol in the query that other
   526  // parts can refer to.
   527  // Every column contains the logicalPlan it originates from.
   528  // If a column has associated vindexes, then the one with the
   529  // lowest cost is set.
   530  //
   531  // Two columns are equal if their pointer values match.
   532  //
   533  // For subquery and vindexFunc, the colNumber is also set because
   534  // the column order is known and unchangeable.
   535  type column struct {
   536  	origin    logicalPlan
   537  	st        *symtab
   538  	vindex    vindexes.SingleColumn
   539  	typ       querypb.Type
   540  	colNumber int
   541  }
   542  
   543  // Origin returns the route that originates the column.
   544  func (c *column) Origin() logicalPlan {
   545  	// If it's a route, we have to resolve it.
   546  	if rb, ok := c.origin.(*route); ok {
   547  		return rb.Resolve()
   548  	}
   549  	return c.origin
   550  }
   551  
   552  // resultColumn contains symbol info about a select expression. If the
   553  // expression represents an underlying column, then it points to it.
   554  // Otherwise, an anonymous column is created as place-holder.
   555  type resultColumn struct {
   556  	// alias will represent the unqualified symbol name for that expression.
   557  	// If the statement provides an explicit alias, that name will be used.
   558  	// If the expression is a simple column, then the base name of the
   559  	// column will be used as the alias. If the expression is non-trivial,
   560  	// alias will be empty, and cannot be referenced from other parts of
   561  	// the query.
   562  	alias  sqlparser.IdentifierCI
   563  	column *column
   564  }
   565  
   566  // NewResultColumn creates a new resultColumn based on the supplied expression.
   567  // The created symbol is not remembered until it is later set as ResultColumns
   568  // after all select expressions are analyzed.
   569  func newResultColumn(expr *sqlparser.AliasedExpr, origin logicalPlan) *resultColumn {
   570  	rc := &resultColumn{
   571  		alias: expr.As,
   572  	}
   573  	if col, ok := expr.Expr.(*sqlparser.ColName); ok {
   574  		// If no alias was specified, then the base name
   575  		// of the column becomes the alias.
   576  		if rc.alias.IsEmpty() {
   577  			rc.alias = col.Name
   578  		}
   579  		// If it's a col it should already have metadata.
   580  		rc.column = col.Metadata.(*column)
   581  	} else {
   582  		// We don't generate an alias if the expression is non-trivial.
   583  		// Just to be safe, generate an anonymous column for the expression.
   584  		typ, err := GetReturnType(expr.Expr)
   585  		rc.column = &column{
   586  			origin: origin,
   587  		}
   588  		if err == nil {
   589  			rc.column.typ = typ
   590  		}
   591  	}
   592  	return rc
   593  }
   594  
   595  // GetReturnType returns the type of the select expression that MySQL will return
   596  func GetReturnType(input sqlparser.Expr) (querypb.Type, error) {
   597  	switch node := input.(type) {
   598  	case *sqlparser.FuncExpr:
   599  		functionName := strings.ToUpper(node.Name.String())
   600  		switch functionName {
   601  		case "ABS":
   602  			// Returned value depends on the return type of the input
   603  			if len(node.Exprs) == 1 {
   604  				expr, isAliasedExpr := node.Exprs[0].(*sqlparser.AliasedExpr)
   605  				if isAliasedExpr {
   606  					return GetReturnType(expr.Expr)
   607  				}
   608  			}
   609  		}
   610  	case *sqlparser.ColName:
   611  		col := node.Metadata.(*column)
   612  		return col.typ, nil
   613  	case *sqlparser.Count, *sqlparser.CountStar:
   614  		return querypb.Type_INT64, nil
   615  	}
   616  	return 0, vterrors.VT12001(fmt.Sprintf("evaluate return type for %T", input))
   617  }