vitess.io/vitess@v0.16.2/go/vt/vtgate/planbuilder/symtab.go (about) 1 /* 2 Copyright 2019 The Vitess Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package planbuilder 18 19 import ( 20 "fmt" 21 "strconv" 22 "strings" 23 24 "vitess.io/vitess/go/vt/vterrors" 25 26 "vitess.io/vitess/go/vt/sqlparser" 27 "vitess.io/vitess/go/vt/vtgate/vindexes" 28 29 querypb "vitess.io/vitess/go/vt/proto/query" 30 ) 31 32 // symtab represents the symbol table for a SELECT statement 33 // or a subquery. The symtab evolves over time. 34 // As a query is analyzed, multiple independent 35 // symtabs are created, and they are later merged as each 36 // sub-expression of a FROM clause is merged. 37 // 38 // A symtab maintains uniqueColumns, which is a list of unique 39 // vindex column names. These names can be resolved without the 40 // need to qualify them by their table names. If there are 41 // duplicates during a merge, those columns are removed from 42 // the unique list, thereby disallowing unqualified references 43 // to such columns. 44 // 45 // After a select expression is analyzed, the 46 // ResultColumns field is set. In the case of a subquery, the 47 // Outer field points to the outer symtab. Any symbols that 48 // are not resolved locally are added to the Externs field, 49 // which is later used to determine if the subquery can be 50 // merged with an outer route. 51 type symtab struct { 52 tables map[sqlparser.TableName]*table 53 tableNames []sqlparser.TableName 54 55 // uniqueColumns has the column name as key 56 // and points at the columns that tables contains. 57 uniqueColumns map[string]*column 58 59 // singleRoute is set only if all the symbols in 60 // the symbol table are part of the same route. 61 singleRoute *route 62 63 ResultColumns []*resultColumn 64 Outer *symtab 65 Externs []*sqlparser.ColName 66 } 67 68 // newSymtab creates a new symtab. 69 func newSymtab() *symtab { 70 return &symtab{ 71 tables: make(map[sqlparser.TableName]*table), 72 uniqueColumns: make(map[string]*column), 73 } 74 } 75 76 // newSymtab creates a new symtab initialized 77 // to contain just one route. 78 func newSymtabWithRoute(rb *route) *symtab { 79 return &symtab{ 80 tables: make(map[sqlparser.TableName]*table), 81 uniqueColumns: make(map[string]*column), 82 singleRoute: rb, 83 } 84 } 85 86 // AddVSchemaTable adds a vschema table to symtab. 87 func (st *symtab) AddVSchemaTable(alias sqlparser.TableName, vschemaTable *vindexes.Table, rb *route) error { 88 t := &table{ 89 alias: alias, 90 origin: rb, 91 vschemaTable: vschemaTable, 92 } 93 94 for _, col := range vschemaTable.Columns { 95 if _, err := t.mergeColumn(col.Name, &column{ 96 origin: rb, 97 st: st, 98 typ: col.Type, 99 }); err != nil { 100 return err 101 } 102 } 103 if vschemaTable.ColumnListAuthoritative { 104 // This will prevent new columns from being added. 105 t.isAuthoritative = true 106 } 107 108 for _, cv := range vschemaTable.ColumnVindexes { 109 single, ok := cv.Vindex.(vindexes.SingleColumn) 110 if !ok { 111 continue 112 } 113 for i, cvcol := range cv.Columns { 114 col, err := t.mergeColumn(cvcol, &column{ 115 origin: rb, 116 st: st, 117 }) 118 if err != nil { 119 return err 120 } 121 if i == 0 { 122 if col.vindex == nil || col.vindex.Cost() > single.Cost() { 123 col.vindex = single 124 } 125 } 126 } 127 } 128 129 if ai := vschemaTable.AutoIncrement; ai != nil { 130 if _, ok := t.columns[ai.Column.Lowered()]; !ok { 131 if _, err := t.mergeColumn(ai.Column, &column{ 132 origin: rb, 133 st: st, 134 }); err != nil { 135 return err 136 } 137 } 138 } 139 if err := st.AddTable(t); err != nil { 140 return err 141 } 142 return nil 143 } 144 145 // Merge merges the new symtab into the current one. 146 // Duplicate table aliases return an error. 147 // uniqueColumns is updated, but duplicates are removed. 148 // Merges are only performed during the FROM clause analysis. 149 // At this point, only tables and uniqueColumns are set. 150 // All other fields are ignored. 151 func (st *symtab) Merge(newsyms *symtab) error { 152 if st.tableNames == nil || newsyms.tableNames == nil { 153 // If any side of symtab has anonymous tables, 154 // we treat the merged symtab as having anonymous tables. 155 return nil 156 } 157 for _, t := range newsyms.tables { 158 if err := st.AddTable(t); err != nil { 159 return err 160 } 161 } 162 return nil 163 } 164 165 // AddTable adds a table to symtab. 166 func (st *symtab) AddTable(t *table) error { 167 if rb, ok := t.origin.(*route); !ok || rb.Resolve() != st.singleRoute { 168 st.singleRoute = nil 169 } 170 if _, ok := st.tables[t.alias]; ok { 171 return vterrors.VT03013(t.alias.Name.String()) 172 } 173 st.tables[t.alias] = t 174 st.tableNames = append(st.tableNames, t.alias) 175 176 // update the uniqueColumns list, and eliminate 177 // duplicate symbols if found. 178 for colname, c := range t.columns { 179 c.st = st 180 if _, ok := st.uniqueColumns[colname]; ok { 181 // Keep the entry, but make it nil. This will 182 // ensure that yet another column of the same name 183 // doesn't get added back in. 184 st.uniqueColumns[colname] = nil 185 continue 186 } 187 st.uniqueColumns[colname] = c 188 } 189 return nil 190 } 191 192 // AllTables returns an ordered list of all current tables. 193 func (st *symtab) AllTables() []*table { 194 if len(st.tableNames) == 0 { 195 return nil 196 } 197 tables := make([]*table, 0, len(st.tableNames)) 198 for _, tname := range st.tableNames { 199 tables = append(tables, st.tables[tname]) 200 } 201 return tables 202 } 203 204 // AllVschemaTableNames returns an ordered list of all current vschema tables. 205 func (st *symtab) AllVschemaTableNames() ([]*vindexes.Table, error) { 206 if len(st.tableNames) == 0 { 207 return nil, nil 208 } 209 tables := make([]*vindexes.Table, 0, len(st.tableNames)) 210 for _, tname := range st.tableNames { 211 t, ok := st.tables[tname] 212 if !ok { 213 return nil, vterrors.VT05004(sqlparser.String(tname)) 214 } 215 if t.vschemaTable != nil { 216 tables = append(tables, t.vschemaTable) 217 } 218 } 219 return tables, nil 220 } 221 222 // FindTable finds a table in symtab. This function is specifically used 223 // for expanding 'select a.*' constructs. If you're in a subquery, 224 // you're most likely referring to a table in the local 'from' clause. 225 // For this reason, the search is only performed in the current scope. 226 // This may be a deviation from the formal definition of SQL, but there 227 // are currently no use cases that require the full support. 228 func (st *symtab) FindTable(tname sqlparser.TableName) (*table, error) { 229 if st.tableNames == nil { 230 // Unreachable because current code path checks for this condition 231 // before invoking this function. 232 return nil, vterrors.VT05007() 233 } 234 t, ok := st.tables[tname] 235 if !ok { 236 return nil, vterrors.VT05004(sqlparser.String(tname)) 237 } 238 return t, nil 239 } 240 241 // SetResultColumns sets the result columns. 242 func (st *symtab) SetResultColumns(rcs []*resultColumn) { 243 for _, rc := range rcs { 244 rc.column.st = st 245 } 246 st.ResultColumns = rcs 247 } 248 249 // Find returns the logicalPlan for the symbol referenced by col. 250 // If a reference is found, col.Metadata is set to point 251 // to it. Subsequent searches will reuse this metadata. 252 // 253 // Unqualified columns are searched in the following order: 254 // 1. ResultColumns 255 // 2. uniqueColumns 256 // 3. symtab has only one table. The column is presumed to 257 // belong to that table. 258 // 4. symtab has more than one table, but all tables belong 259 // to the same route. An anonymous column is created against 260 // the current route. 261 // If all the above fail, an error is returned. This means 262 // that an unqualified reference can only be locally resolved. 263 // 264 // For qualified columns, we first look for the table. If one 265 // is found, we look for a column in the pre-existing list. 266 // If one is not found, we optimistically create an entry 267 // presuming that the table has such a column. If this is 268 // not the case, the query will fail when sent to vttablet. 269 // If the table is not found in the local scope, the search 270 // is continued in the outer scope, but only if ResultColumns 271 // is not set (this is MySQL behavior). 272 // 273 // For symbols that were found locally, isLocal is returned 274 // as true. Otherwise, it's returned as false and the symbol 275 // gets added to the Externs list, which can later be used 276 // to decide where to push-down the subquery. 277 func (st *symtab) Find(col *sqlparser.ColName) (origin logicalPlan, isLocal bool, err error) { 278 // Return previously cached info if present. 279 if column, ok := col.Metadata.(*column); ok { 280 return column.Origin(), column.st == st, nil 281 } 282 283 // Unqualified column case. 284 if col.Qualifier.IsEmpty() { 285 // Step 1. Search ResultColumns. 286 c, err := st.searchResultColumn(col) 287 if err != nil { 288 return nil, false, err 289 } 290 if c != nil { 291 col.Metadata = c 292 return c.Origin(), true, nil 293 } 294 } 295 296 // Steps 2-4 performed by searchTables. 297 c, err := st.searchTables(col) 298 if err != nil { 299 return nil, false, err 300 } 301 if c != nil { 302 col.Metadata = c 303 return c.Origin(), true, nil 304 } 305 306 if st.Outer == nil { 307 return nil, false, vterrors.VT03019(sqlparser.String(col)) 308 } 309 // Search is not continued if ResultColumns already has values: 310 // select a ... having ... (select b ... having a...). In this case, 311 // a (in having) should not match the outer-most 'a'. This is to 312 // match MySQL's behavior. 313 if len(st.ResultColumns) != 0 { 314 return nil, false, vterrors.VT03020(sqlparser.String(col)) 315 } 316 317 if origin, _, err = st.Outer.Find(col); err != nil { 318 return nil, false, err 319 } 320 st.Externs = append(st.Externs, col) 321 return origin, false, nil 322 } 323 324 // searchResultColumn looks for col in the results columns. 325 func (st *symtab) searchResultColumn(col *sqlparser.ColName) (c *column, err error) { 326 var cursym *resultColumn 327 for _, rc := range st.ResultColumns { 328 if rc.alias.Equal(col.Name) { 329 if cursym != nil { 330 return nil, vterrors.VT03021(sqlparser.String(col)) 331 } 332 cursym = rc 333 } 334 } 335 if cursym != nil { 336 return cursym.column, nil 337 } 338 return nil, nil 339 } 340 341 // searchTables looks for the column in the tables. The search order 342 // is as described in Find. 343 func (st *symtab) searchTables(col *sqlparser.ColName) (*column, error) { 344 var t *table 345 // @@ syntax is only allowed for dual tables, in which case there should be 346 // only one in the symtab. So, such expressions will be implicitly matched. 347 if col.Qualifier.IsEmpty() || strings.HasPrefix(col.Qualifier.Name.String(), "@@") { 348 // Search uniqueColumns first. If found, our job is done. 349 // Check for nil because there can be nil entries if there 350 // are duplicate columns across multiple tables. 351 if c := st.uniqueColumns[col.Name.Lowered()]; c != nil { 352 return c, nil 353 } 354 355 switch { 356 case len(st.tables) == 1: 357 // If there's only one table match against it. 358 // Loop executes once to match the only table. 359 for _, v := range st.tables { 360 t = v 361 } 362 // No return: break out. 363 case st.singleRoute != nil: 364 // If there's only one route, create an anonymous symbol. 365 return &column{origin: st.singleRoute, st: st}, nil 366 default: 367 // If none of the above, the symbol is unresolvable. 368 return nil, vterrors.VT03019(sqlparser.String(col)) 369 } 370 } else { 371 var ok bool 372 t, ok = st.tables[col.Qualifier] 373 if !ok { 374 return nil, nil 375 } 376 } 377 378 // At this point, t should be set. 379 c, ok := t.columns[col.Name.Lowered()] 380 if !ok { 381 // We know all the column names of a subquery. Might as well return an error if it's not found. 382 if t.isAuthoritative { 383 return nil, vterrors.VT03019(sqlparser.String(col)) 384 } 385 c = &column{ 386 origin: t.Origin(), 387 st: st, 388 } 389 t.addColumn(col.Name, c) 390 } 391 return c, nil 392 } 393 394 // ResultFromNumber returns the result column index based on the column 395 // order expression. 396 func ResultFromNumber(rcs []*resultColumn, val *sqlparser.Literal, caller string) (int, error) { 397 if val.Type != sqlparser.IntVal { 398 return 0, vterrors.VT13001("column number is not an INT") 399 } 400 num, err := strconv.ParseInt(val.Val, 0, 64) 401 if err != nil { 402 return 0, vterrors.VT13001(fmt.Sprintf("error parsing column number: %s", sqlparser.String(val))) 403 } 404 if num < 1 || num > int64(len(rcs)) { 405 return 0, vterrors.VT03014(num, caller) 406 } 407 return int(num - 1), nil 408 } 409 410 // Vindex returns the vindex if the expression is a plain column reference 411 // that is part of the specified route, and has an associated vindex. 412 func (st *symtab) Vindex(expr sqlparser.Expr, scope *route) vindexes.SingleColumn { 413 col, ok := expr.(*sqlparser.ColName) 414 if !ok { 415 return nil 416 } 417 if col.Metadata == nil { 418 // Find will set the Metadata. 419 if _, _, err := st.Find(col); err != nil { 420 return nil 421 } 422 } 423 c := col.Metadata.(*column) 424 if c.Origin() != scope { 425 return nil 426 } 427 return c.vindex 428 } 429 430 // BuildColName builds a *sqlparser.ColName for the resultColumn specified 431 // by the index. The built ColName will correctly reference the resultColumn 432 // it was built from. 433 func BuildColName(rcs []*resultColumn, index int) (*sqlparser.ColName, error) { 434 alias := rcs[index].alias 435 if alias.IsEmpty() { 436 return nil, vterrors.VT12001("reference a complex expression") 437 } 438 for i, rc := range rcs { 439 if i == index { 440 continue 441 } 442 if rc.alias.Equal(alias) { 443 return nil, vterrors.VT03021(alias) 444 } 445 } 446 return &sqlparser.ColName{ 447 Metadata: rcs[index].column, 448 Name: alias, 449 }, nil 450 } 451 452 // ResolveSymbols resolves all column references against symtab. 453 // This makes sure that they all have their Metadata initialized. 454 // If a symbol cannot be resolved or if the expression contains 455 // a subquery, an error is returned. 456 func (st *symtab) ResolveSymbols(node sqlparser.SQLNode) error { 457 return sqlparser.Walk(func(currNode sqlparser.SQLNode) (kontinue bool, err error) { 458 switch currNode := currNode.(type) { 459 case *sqlparser.ColName: 460 if _, _, err := st.Find(currNode); err != nil { 461 return false, err 462 } 463 case *sqlparser.Subquery: 464 return false, vterrors.VT12001(fmt.Sprintf("subqueries disallowed in %T", node)) 465 } 466 return true, nil 467 }, node) 468 } 469 470 // table is part of symtab. 471 // It represents a table alias in a FROM clause. It points 472 // to the logicalPlan that represents it. 473 type table struct { 474 alias sqlparser.TableName 475 columns map[string]*column 476 columnNames []sqlparser.IdentifierCI 477 isAuthoritative bool 478 origin logicalPlan 479 vschemaTable *vindexes.Table 480 } 481 482 func (t *table) addColumn(alias sqlparser.IdentifierCI, c *column) { 483 if t.columns == nil { 484 t.columns = make(map[string]*column) 485 } 486 lowered := alias.Lowered() 487 // Dups are allowed, but first one wins if referenced. 488 if _, ok := t.columns[lowered]; !ok { 489 c.colNumber = len(t.columnNames) 490 t.columns[lowered] = c 491 } 492 t.columnNames = append(t.columnNames, alias) 493 } 494 495 // mergeColumn merges or creates a new column for the table. 496 // If the table is authoritative and the column doesn't already 497 // exist, it returns an error. If the table is not authoritative, 498 // the column is added if not already present. 499 func (t *table) mergeColumn(alias sqlparser.IdentifierCI, c *column) (*column, error) { 500 if t.columns == nil { 501 t.columns = make(map[string]*column) 502 } 503 lowered := alias.Lowered() 504 if col, ok := t.columns[lowered]; ok { 505 return col, nil 506 } 507 if t.isAuthoritative { 508 return nil, vterrors.VT03022(sqlparser.String(alias), sqlparser.String(t.alias)) 509 } 510 c.colNumber = len(t.columnNames) 511 t.columns[lowered] = c 512 t.columnNames = append(t.columnNames, alias) 513 return c, nil 514 } 515 516 // Origin returns the route that originates the table. 517 func (t *table) Origin() logicalPlan { 518 // If it's a route, we have to resolve it. 519 if rb, ok := t.origin.(*route); ok { 520 return rb.Resolve() 521 } 522 return t.origin 523 } 524 525 // column represents a unique symbol in the query that other 526 // parts can refer to. 527 // Every column contains the logicalPlan it originates from. 528 // If a column has associated vindexes, then the one with the 529 // lowest cost is set. 530 // 531 // Two columns are equal if their pointer values match. 532 // 533 // For subquery and vindexFunc, the colNumber is also set because 534 // the column order is known and unchangeable. 535 type column struct { 536 origin logicalPlan 537 st *symtab 538 vindex vindexes.SingleColumn 539 typ querypb.Type 540 colNumber int 541 } 542 543 // Origin returns the route that originates the column. 544 func (c *column) Origin() logicalPlan { 545 // If it's a route, we have to resolve it. 546 if rb, ok := c.origin.(*route); ok { 547 return rb.Resolve() 548 } 549 return c.origin 550 } 551 552 // resultColumn contains symbol info about a select expression. If the 553 // expression represents an underlying column, then it points to it. 554 // Otherwise, an anonymous column is created as place-holder. 555 type resultColumn struct { 556 // alias will represent the unqualified symbol name for that expression. 557 // If the statement provides an explicit alias, that name will be used. 558 // If the expression is a simple column, then the base name of the 559 // column will be used as the alias. If the expression is non-trivial, 560 // alias will be empty, and cannot be referenced from other parts of 561 // the query. 562 alias sqlparser.IdentifierCI 563 column *column 564 } 565 566 // NewResultColumn creates a new resultColumn based on the supplied expression. 567 // The created symbol is not remembered until it is later set as ResultColumns 568 // after all select expressions are analyzed. 569 func newResultColumn(expr *sqlparser.AliasedExpr, origin logicalPlan) *resultColumn { 570 rc := &resultColumn{ 571 alias: expr.As, 572 } 573 if col, ok := expr.Expr.(*sqlparser.ColName); ok { 574 // If no alias was specified, then the base name 575 // of the column becomes the alias. 576 if rc.alias.IsEmpty() { 577 rc.alias = col.Name 578 } 579 // If it's a col it should already have metadata. 580 rc.column = col.Metadata.(*column) 581 } else { 582 // We don't generate an alias if the expression is non-trivial. 583 // Just to be safe, generate an anonymous column for the expression. 584 typ, err := GetReturnType(expr.Expr) 585 rc.column = &column{ 586 origin: origin, 587 } 588 if err == nil { 589 rc.column.typ = typ 590 } 591 } 592 return rc 593 } 594 595 // GetReturnType returns the type of the select expression that MySQL will return 596 func GetReturnType(input sqlparser.Expr) (querypb.Type, error) { 597 switch node := input.(type) { 598 case *sqlparser.FuncExpr: 599 functionName := strings.ToUpper(node.Name.String()) 600 switch functionName { 601 case "ABS": 602 // Returned value depends on the return type of the input 603 if len(node.Exprs) == 1 { 604 expr, isAliasedExpr := node.Exprs[0].(*sqlparser.AliasedExpr) 605 if isAliasedExpr { 606 return GetReturnType(expr.Expr) 607 } 608 } 609 } 610 case *sqlparser.ColName: 611 col := node.Metadata.(*column) 612 return col.typ, nil 613 case *sqlparser.Count, *sqlparser.CountStar: 614 return querypb.Type_INT64, nil 615 } 616 return 0, vterrors.VT12001(fmt.Sprintf("evaluate return type for %T", input)) 617 }