github.com/rohankumardubey/aresdb@v0.0.2-0.20190517170215-e54e3ca06b9c/query/aql_compiler.go (about) 1 // Copyright (c) 2017-2018 Uber Technologies, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package query 16 17 // #include "time_series_aggregate.h" 18 import "C" 19 20 import ( 21 "sort" 22 "strings" 23 "unsafe" 24 25 "fmt" 26 "github.com/uber/aresdb/memstore" 27 memCom "github.com/uber/aresdb/memstore/common" 28 metaCom "github.com/uber/aresdb/metastore/common" 29 "github.com/uber/aresdb/query/common" 30 "github.com/uber/aresdb/query/expr" 31 "github.com/uber/aresdb/utils" 32 "strconv" 33 ) 34 35 // DataTypeToExprType maps data type from the column schema format to 36 // expression AST format. 37 var DataTypeToExprType = map[memCom.DataType]expr.Type{ 38 memCom.Bool: expr.Boolean, 39 memCom.Int8: expr.Signed, 40 memCom.Int16: expr.Signed, 41 memCom.Int32: expr.Signed, 42 memCom.Int64: expr.Signed, 43 memCom.Uint8: expr.Unsigned, 44 memCom.Uint16: expr.Unsigned, 45 memCom.Uint32: expr.Unsigned, 46 memCom.Float32: expr.Float, 47 memCom.SmallEnum: expr.Unsigned, 48 memCom.BigEnum: expr.Unsigned, 49 memCom.GeoPoint: expr.GeoPoint, 50 memCom.GeoShape: expr.GeoShape, 51 } 52 53 const ( 54 unsupportedInputType = "unsupported input type for %s: %s" 55 defaultTimezoneTableAlias = "__timezone_lookup" 56 geoShapeLimit = 100 57 nonAggregationQueryLimit = 1000 58 ) 59 60 // constants for call names. 61 const ( 62 convertTzCallName = "convert_tz" 63 countCallName = "count" 64 dayOfWeekCallName = "dayofweek" 65 fromUnixTimeCallName = "from_unixtime" 66 geographyIntersectsCallName = "geography_intersects" 67 hexCallName = "hex" 68 // hll aggregation function applies to hll columns 69 hllCallName = "hll" 70 // countdistincthll aggregation function applies to all columns, hll value is computed on the fly 71 countDistinctHllCallName = "countdistincthll" 72 hourCallName = "hour" 73 listCallName = "" 74 maxCallName = "max" 75 minCallName = "min" 76 sumCallName = "sum" 77 avgCallName = "avg" 78 ) 79 80 // Compile returns the compiled AQLQueryContext for data feeding and query 81 // execution. Caller should check for AQLQueryContext.Error. 82 func (q *AQLQuery) Compile(store memstore.MemStore, returnHLL bool) *AQLQueryContext { 83 qc := &AQLQueryContext{Query: q, ReturnHLLData: returnHLL} 84 85 // processTimezone might append additional joins 86 qc.processTimezone() 87 if qc.Error != nil { 88 return qc 89 } 90 91 // Read schema for every table used. 92 qc.readSchema(store) 93 defer qc.releaseSchema() 94 if qc.Error != nil { 95 return qc 96 } 97 98 // Parse all other SQL expressions to ASTs. 99 qc.parseExprs() 100 if qc.Error != nil { 101 return qc 102 } 103 104 // Resolve data types in the ASTs against schema, also translate enum values. 105 qc.resolveTypes() 106 if qc.Error != nil { 107 return qc 108 } 109 110 // Process join conditions first to collect information about geo join. 111 qc.processJoinConditions() 112 if qc.Error != nil { 113 return qc 114 } 115 116 // Identify prefilters. 117 qc.matchPrefilters() 118 119 // Process filters. 120 qc.processFilters() 121 if qc.Error != nil { 122 return qc 123 } 124 125 // Process measure and dimensions. 126 qc.processMeasure() 127 if qc.Error != nil { 128 return qc 129 } 130 qc.processDimensions() 131 if qc.Error != nil { 132 return qc 133 } 134 135 qc.sortUsedColumns() 136 137 qc.sortDimensionColumns() 138 if qc.Error != nil { 139 return qc 140 } 141 142 // TODO: VM instruction generation 143 return qc 144 } 145 146 // adjustFilterToTimeFilter try to find one rowfilter to be time filter if there is no timefilter for fact table query 147 func (qc *AQLQueryContext) adjustFilterToTimeFilter() { 148 toBeRemovedFilters := []int{} 149 timeFilter := TimeFilter{} 150 for i, filter := range qc.Query.filters { 151 if e, ok := filter.(*expr.BinaryExpr); ok { 152 lhs, isCol := e.LHS.(*expr.VarRef) 153 if !isCol { 154 continue 155 } 156 157 // check if this filter on main table event time column 158 tableID, columnID, err := qc.resolveColumn(lhs.Val) 159 if err != nil || tableID != 0 || columnID != 0 { 160 continue 161 } 162 163 val := "" 164 // only support number literal or string literal 165 switch rhs := e.RHS.(type) { 166 case *expr.NumberLiteral: 167 val = rhs.String() 168 case *expr.StringLiteral: 169 val = rhs.Val 170 } 171 if val == "" { 172 continue 173 } 174 175 switch e.Op { 176 case expr.LT: 177 if timeFilter.To == "" { 178 // only convert first LT 179 timeFilter.To = val 180 toBeRemovedFilters = append(toBeRemovedFilters, i) 181 } else { 182 qc.Error = utils.StackError(nil, "Only one '<' filter allowed for event time column") 183 return 184 } 185 case expr.GTE: 186 if timeFilter.From == "" { 187 // only convert first GTE 188 timeFilter.From = val 189 toBeRemovedFilters = append(toBeRemovedFilters, i) 190 } else { 191 qc.Error = utils.StackError(nil, "Only one '>=' filter allowed for event time column") 192 return 193 } 194 } 195 } 196 } 197 if timeFilter.From != "" || timeFilter.To != "" { 198 // processTimeFilter will handle the from is nil case 199 if qc.fromTime, qc.toTime, qc.Error = parseTimeFilter(timeFilter, qc.fixedTimezone, utils.Now()); qc.Error != nil { 200 return 201 } 202 // remove from original query filter 203 for i := len(toBeRemovedFilters) - 1; i >= 0; i-- { 204 index := toBeRemovedFilters[i] 205 qc.Query.filters = append(qc.Query.filters[:index], qc.Query.filters[index+1:]...) 206 } 207 } 208 } 209 210 func (qc *AQLQueryContext) processJoinConditions() { 211 if len(qc.Query.Joins) > 8 { 212 qc.Error = utils.StackError(nil, "At most %d foreign tables allowed, got: %d", 8, len(qc.Query.Joins)) 213 return 214 } 215 216 qc.OOPK.foreignTables = make([]*foreignTable, len(qc.Query.Joins)) 217 mainTableSchema := qc.TableSchemaByName[qc.Query.Table] 218 for joinTableID, join := range qc.Query.Joins { 219 joinSchema := qc.TableSchemaByName[join.Table] 220 if isGeoJoin(join) { 221 if qc.OOPK.geoIntersection != nil { 222 qc.Error = utils.StackError(nil, "At most one geo join allowed") 223 return 224 } 225 qc.matchGeoJoin(joinTableID, mainTableSchema, joinSchema, join.conditions) 226 if qc.Error != nil { 227 return 228 } 229 } else { 230 // we will extract the geo join out of the join conditions since we are going to handle geo intersects 231 // as filter instead of an equal join. 232 qc.OOPK.foreignTables[joinTableID] = &foreignTable{} 233 qc.matchEqualJoin(joinTableID, joinSchema, join.conditions) 234 if qc.Error != nil { 235 return 236 } 237 } 238 } 239 } 240 241 // matchGeoJoin initializes the GeoIntersection struct for later query process use. For now only one geo join is 242 // allowed per query. If users want to intersect with multiple geo join conditions, they should specify multiple geo 243 // shapeLatLongs in the geo filter. 244 // There are following constrictions: 245 // 1. At most one geo join condition. 246 // 2. Geo table must be dimension table. 247 // 3. The join condition must include exactly one shape column and one point column. 248 // 4. Exactly one geo filter should be specified. 249 // 5. Geo filter column must be the primary key of the geo table. 250 // 6. Geo UUIDs must be string in query. 251 // 7. Geo filter operator must be EQ or IN 252 // 8. Geo table's fields are not allowed in measures. 253 // 9. Only one geo dimension allowed. 254 func (qc *AQLQueryContext) matchGeoJoin(joinTableID int, mainTableSchema *memstore.TableSchema, 255 joinSchema *memstore.TableSchema, conditions []expr.Expr) { 256 if len(conditions) != 1 { 257 qc.Error = utils.StackError(nil, "At most one join condition allowed per geo join") 258 return 259 } 260 261 if joinSchema.Schema.IsFactTable { 262 qc.Error = utils.StackError(nil, "Only dimension table is allowed in geo join") 263 return 264 } 265 266 // one foreign table primary key columns only. 267 if len(joinSchema.Schema.PrimaryKeyColumns) > 1 { 268 qc.Error = utils.StackError(nil, "Composite primary key for geo table is not allowed") 269 return 270 } 271 272 c, _ := conditions[0].(*expr.Call) 273 274 // guaranteed by query rewrite. 275 shape, _ := c.Args[0].(*expr.VarRef) 276 point, _ := c.Args[1].(*expr.VarRef) 277 278 if shape.TableID != joinTableID+1 { 279 qc.Error = utils.StackError(nil, "Only shape in geo table can be referenced as join condition") 280 return 281 } 282 283 qc.OOPK.geoIntersection = &geoIntersection{ 284 shapeTableID: shape.TableID, 285 shapeColumnID: shape.ColumnID, 286 pointTableID: point.TableID, 287 pointColumnID: point.ColumnID, 288 dimIndex: -1, 289 inOrOut: true, 290 } 291 292 // Set column usage for geo points. 293 expr.Walk(columnUsageCollector{ 294 tableScanners: qc.TableScanners, 295 usages: columnUsedByAllBatches, 296 }, point) 297 } 298 299 func isGeoJoin(j Join) bool { 300 if len(j.conditions) >= 1 { 301 c, ok := j.conditions[0].(*expr.Call) 302 if !ok { 303 return false 304 } 305 return c.Name == geographyIntersectsCallName 306 } 307 return false 308 } 309 310 // list of join conditions enforced for now 311 // 1. equi-join only 312 // 2. many-to-one join only 313 // 3. foreign table must be a dimension table 314 // 4. one foreign table primary key columns only 315 // 5. foreign table primary key can have only one column 316 // 6. every foreign table must be joined directly to the main table, i.e. no bridges? 317 // 7. up to 8 foreign tables 318 func (qc *AQLQueryContext) matchEqualJoin(joinTableID int, joinSchema *memstore.TableSchema, conditions []expr.Expr) { 319 if len(conditions) != 1 { 320 qc.Error = utils.StackError(nil, "%d join conditions expected, got %d", 1, len(conditions)) 321 return 322 } 323 324 // foreign table must be a dimension table 325 if joinSchema.Schema.IsFactTable { 326 qc.Error = utils.StackError(nil, "join table %s is fact table, only dimension table supported", qc.Query.Table) 327 return 328 } 329 330 // one foreign table primary key columns only 331 if len(joinSchema.Schema.PrimaryKeyColumns) > 1 { 332 qc.Error = utils.StackError(nil, "composite key not supported") 333 return 334 } 335 336 // equi-join only 337 e, ok := conditions[0].(*expr.BinaryExpr) 338 if !ok { 339 qc.Error = utils.StackError(nil, "binary expression expected, got %s", conditions[0].String()) 340 return 341 } 342 if e.Op != expr.EQ { 343 qc.Error = utils.StackError(nil, "equal join expected, got %s", e.Op.String()) 344 return 345 } 346 347 left, ok := e.LHS.(*expr.VarRef) 348 if !ok { 349 qc.Error = utils.StackError(nil, "column in join condition expected, got %s", e.LHS.String()) 350 return 351 } 352 353 right, ok := e.RHS.(*expr.VarRef) 354 if !ok { 355 qc.Error = utils.StackError(nil, "column in join condition expected, got %s", e.RHS.String()) 356 return 357 } 358 359 // main table at left and foreign table at right 360 if left.TableID != 0 { 361 left, right = right, left 362 } 363 364 // every foreign table must be joined directly to the main table 365 if left.TableID != 0 || right.TableID != joinTableID+1 { 366 qc.Error = utils.StackError(nil, "foreign table must be joined directly to the main table, join condition: %s", e.String()) 367 return 368 } 369 370 // many-to-one join only (join with foreign table's primary key) 371 if joinSchema.Schema.PrimaryKeyColumns[0] != right.ColumnID { 372 qc.Error = utils.StackError(nil, "join column is not primary key of foreign table") 373 return 374 } 375 376 qc.OOPK.foreignTables[joinTableID].remoteJoinColumn = left 377 // set column usage for join column in main table 378 // no need to set usage for remote join column in foreign table since 379 // we only use primary key of foreign table to join 380 expr.Walk(columnUsageCollector{ 381 tableScanners: qc.TableScanners, 382 usages: columnUsedByAllBatches, 383 }, left) 384 } 385 386 func (qc *AQLQueryContext) parseExprs() { 387 var err error 388 389 // Join conditions. 390 for i, join := range qc.Query.Joins { 391 join.conditions = make([]expr.Expr, len(join.Conditions)) 392 for j, cond := range join.Conditions { 393 join.conditions[j], err = expr.ParseExpr(cond) 394 if err != nil { 395 qc.Error = utils.StackError(err, "Failed to parse join condition: %s", cond) 396 return 397 } 398 } 399 qc.Query.Joins[i] = join 400 } 401 402 qc.fromTime, qc.toTime, qc.Error = parseTimeFilter(qc.Query.TimeFilter, qc.fixedTimezone, utils.Now()) 403 if qc.Error != nil { 404 return 405 } 406 407 // Filters. 408 qc.Query.filters = make([]expr.Expr, len(qc.Query.Filters)) 409 for i, filter := range qc.Query.Filters { 410 qc.Query.filters[i], err = expr.ParseExpr(filter) 411 if err != nil { 412 qc.Error = utils.StackError(err, "Failed to parse filter %s", filter) 413 return 414 } 415 } 416 if qc.fromTime == nil && qc.toTime == nil && len(qc.TableScanners) > 0 && qc.TableScanners[0].Schema.Schema.IsFactTable { 417 qc.adjustFilterToTimeFilter() 418 if qc.Error != nil { 419 return 420 } 421 } 422 423 // Dimensions. 424 rawDimensions := qc.Query.Dimensions 425 qc.Query.Dimensions = []Dimension{} 426 for _, dim := range rawDimensions { 427 dim.TimeBucketizer = strings.Trim(dim.TimeBucketizer, " ") 428 if dim.TimeBucketizer != "" { 429 // make sure time column is defined 430 if dim.Expr == "" { 431 qc.Error = utils.StackError(err, "Failed to parse TimeSeriesBucketizer '%s' since time column is empty ", dim.TimeBucketizer) 432 return 433 } 434 435 timeColumnExpr, err := expr.ParseExpr(dim.Expr) 436 if err != nil { 437 qc.Error = utils.StackError(err, "Failed to parse timeColumn '%s'", dim.Expr) 438 return 439 } 440 441 dim.expr, err = qc.buildTimeDimensionExpr(dim.TimeBucketizer, timeColumnExpr) 442 if err != nil { 443 qc.Error = utils.StackError(err, "Failed to parse dimension: %s", dim.TimeBucketizer) 444 return 445 } 446 qc.Query.Dimensions = append(qc.Query.Dimensions, dim) 447 } else { 448 // dimension is defined as sqlExpression 449 dim.expr, err = expr.ParseExpr(dim.Expr) 450 if err != nil { 451 qc.Error = utils.StackError(err, "Failed to parse dimension: %s", dim.Expr) 452 return 453 } 454 if _, ok := dim.expr.(*expr.Wildcard); ok { 455 qc.Query.Dimensions = append(qc.Query.Dimensions, qc.getAllColumnsDimension()...) 456 } else { 457 qc.Query.Dimensions = append(qc.Query.Dimensions, dim) 458 } 459 } 460 } 461 462 // Measures. 463 for i, measure := range qc.Query.Measures { 464 measure.expr, err = expr.ParseExpr(measure.Expr) 465 if err != nil { 466 qc.Error = utils.StackError(err, "Failed to parse measure: %s", measure.Expr) 467 return 468 } 469 measure.filters = make([]expr.Expr, len(measure.Filters)) 470 for j, filter := range measure.Filters { 471 measure.filters[j], err = expr.ParseExpr(filter) 472 if err != nil { 473 qc.Error = utils.StackError(err, "Failed to parse measure filter %s", filter) 474 return 475 } 476 } 477 qc.Query.Measures[i] = measure 478 } 479 } 480 481 func (qc *AQLQueryContext) processTimezone() { 482 if timezoneColumn, joinKey, success := parseTimezoneColumnString(qc.Query.Timezone); success { 483 timezoneTable := utils.GetConfig().Query.TimezoneTable.TableName 484 qc.timezoneTable.tableColumn = timezoneColumn 485 for _, join := range qc.Query.Joins { 486 if join.Table == timezoneTable { 487 qc.timezoneTable.tableAlias = join.Alias 488 } 489 } 490 // append timezone table to joins 491 if qc.timezoneTable.tableAlias == "" { 492 qc.timezoneTable.tableAlias = defaultTimezoneTableAlias 493 qc.Query.Joins = append(qc.Query.Joins, Join{ 494 Table: timezoneTable, 495 Alias: defaultTimezoneTableAlias, 496 Conditions: []string{fmt.Sprintf("%s=%s.id", joinKey, defaultTimezoneTableAlias)}, 497 }) 498 } 499 } else { 500 loc, err := parseTimezone(qc.Query.Timezone) 501 if err != nil { 502 qc.Error = utils.StackError(err, "timezone Failed to parse: %s", qc.Query.Timezone) 503 return 504 } 505 qc.fixedTimezone = loc 506 } 507 } 508 509 func (qc *AQLQueryContext) readSchema(store memstore.MemStore) { 510 qc.TableScanners = make([]*TableScanner, 1+len(qc.Query.Joins)) 511 qc.TableIDByAlias = make(map[string]int) 512 qc.TableSchemaByName = make(map[string]*memstore.TableSchema) 513 514 store.RLock() 515 defer store.RUnlock() 516 517 // Main table. 518 schema := store.GetSchemas()[qc.Query.Table] 519 if schema == nil { 520 qc.Error = utils.StackError(nil, "unknown main table %s", qc.Query.Table) 521 return 522 } 523 qc.TableSchemaByName[qc.Query.Table] = schema 524 schema.RLock() 525 qc.TableScanners[0] = &TableScanner{} 526 qc.TableScanners[0].Schema = schema 527 qc.TableScanners[0].Shards = []int{0} 528 qc.TableScanners[0].ColumnUsages = make(map[int]columnUsage) 529 if schema.Schema.IsFactTable { 530 // Archiving cutoff filter usage for fact table. 531 qc.TableScanners[0].ColumnUsages[0] = columnUsedByLiveBatches 532 } 533 qc.TableIDByAlias[qc.Query.Table] = 0 534 535 // Foreign tables. 536 for i, join := range qc.Query.Joins { 537 schema = store.GetSchemas()[join.Table] 538 if schema == nil { 539 qc.Error = utils.StackError(nil, "unknown join table %s", join.Table) 540 return 541 } 542 543 if qc.TableSchemaByName[join.Table] == nil { 544 qc.TableSchemaByName[join.Table] = schema 545 // Prevent double locking. 546 schema.RLock() 547 } 548 549 qc.TableScanners[1+i] = &TableScanner{} 550 qc.TableScanners[1+i].Schema = schema 551 qc.TableScanners[1+i].Shards = []int{0} 552 qc.TableScanners[1+i].ColumnUsages = make(map[int]columnUsage) 553 if schema.Schema.IsFactTable { 554 // Archiving cutoff filter usage for fact table. 555 qc.TableScanners[1+i].ColumnUsages[0] = columnUsedByLiveBatches 556 } 557 558 alias := join.Alias 559 if alias == "" { 560 alias = join.Table 561 } 562 _, exists := qc.TableIDByAlias[alias] 563 if exists { 564 qc.Error = utils.StackError(nil, "table alias %s is redefined", alias) 565 return 566 } 567 qc.TableIDByAlias[alias] = 1 + i 568 } 569 } 570 571 func (qc *AQLQueryContext) releaseSchema() { 572 for _, schema := range qc.TableSchemaByName { 573 schema.RUnlock() 574 } 575 } 576 577 // resolveColumn resolves the VarRef identifier against the schema, 578 // and returns the matched tableID (query scoped) and columnID (schema scoped). 579 func (qc *AQLQueryContext) resolveColumn(identifier string) (int, int, error) { 580 tableAlias := qc.Query.Table 581 column := identifier 582 segments := strings.SplitN(identifier, ".", 2) 583 if len(segments) == 2 { 584 tableAlias = segments[0] 585 column = segments[1] 586 } 587 588 tableID, exists := qc.TableIDByAlias[tableAlias] 589 if !exists { 590 return 0, 0, utils.StackError(nil, "unknown table alias %s", tableAlias) 591 } 592 593 columnID, exists := qc.TableScanners[tableID].Schema.ColumnIDs[column] 594 if !exists { 595 return 0, 0, utils.StackError(nil, "unknown column %s for table alias %s", 596 column, tableAlias) 597 } 598 599 return tableID, columnID, nil 600 } 601 602 // cast returns an expression that casts the input to the desired type. 603 // The returned expression AST will be used directly for VM instruction 604 // generation of the desired types. 605 func cast(e expr.Expr, t expr.Type) expr.Expr { 606 // Input type is already desired. 607 if e.Type() == t { 608 return e 609 } 610 // Type casting is only required if at least one side is float. 611 // We do not cast (or check for overflow) among boolean, signed and unsigned. 612 if e.Type() != expr.Float && t != expr.Float { 613 return e 614 } 615 // Data type for NumberLiteral can be changed directly. 616 l, _ := e.(*expr.NumberLiteral) 617 if l != nil { 618 l.ExprType = t 619 return l 620 } 621 // Use ParenExpr to respresent a VM type cast. 622 return &expr.ParenExpr{Expr: e, ExprType: t} 623 } 624 625 func blockNumericOpsForColumnOverFourBytes(token expr.Token, expressions ...expr.Expr) error { 626 if token == expr.UNARY_MINUS || token == expr.BITWISE_NOT || 627 (token >= expr.ADD && token <= expr.BITWISE_LEFT_SHIFT) { 628 for _, expression := range expressions { 629 if varRef, isVarRef := expression.(*expr.VarRef); isVarRef && memCom.DataTypeBytes(varRef.DataType) > 4 { 630 return utils.StackError(nil, "numeric operations not supported for column over 4 bytes length, got %s", expression.String()) 631 } 632 } 633 } 634 return nil 635 } 636 637 func isUUIDColumn(expression expr.Expr) bool { 638 if varRef, ok := expression.(*expr.VarRef); ok { 639 return varRef.DataType == memCom.UUID 640 } 641 return false 642 } 643 644 // Rewrite walks the expresison AST and resolves data types bottom up. 645 // In addition it also translates enum strings and rewrites their predicates. 646 func (qc *AQLQueryContext) Rewrite(expression expr.Expr) expr.Expr { 647 switch e := expression.(type) { 648 case *expr.ParenExpr: 649 // Strip parenthesis from the input 650 return e.Expr 651 case *expr.VarRef: 652 tableID, columnID, err := qc.resolveColumn(e.Val) 653 if err != nil { 654 qc.Error = err 655 return expression 656 } 657 column := qc.TableScanners[tableID].Schema.Schema.Columns[columnID] 658 if column.Deleted { 659 qc.Error = utils.StackError(nil, "column %s of table %s has been deleted", 660 column.Name, qc.TableScanners[tableID].Schema.Schema.Name) 661 return expression 662 } 663 dataType := qc.TableScanners[tableID].Schema.ValueTypeByColumn[columnID] 664 e.ExprType = DataTypeToExprType[dataType] 665 e.TableID = tableID 666 e.ColumnID = columnID 667 dict := qc.TableScanners[tableID].Schema.EnumDicts[column.Name] 668 e.EnumDict = dict.Dict 669 e.EnumReverseDict = dict.ReverseDict 670 e.DataType = dataType 671 e.IsHLLColumn = column.HLLConfig.IsHLLColumn 672 case *expr.UnaryExpr: 673 if isUUIDColumn(e.Expr) && e.Op != expr.GET_HLL_VALUE { 674 qc.Error = utils.StackError(nil, "uuid column type only supports countdistincthll unary expression") 675 return expression 676 } 677 678 if err := blockNumericOpsForColumnOverFourBytes(e.Op, e.Expr); err != nil { 679 qc.Error = err 680 return expression 681 } 682 683 e.ExprType = e.Expr.Type() 684 switch e.Op { 685 case expr.EXCLAMATION, expr.NOT, expr.IS_FALSE: 686 e.ExprType = expr.Boolean 687 // Normalize the operator. 688 e.Op = expr.NOT 689 e.Expr = cast(e.Expr, expr.Boolean) 690 childExpr := e.Expr 691 callRef, isCallRef := childExpr.(*expr.Call) 692 if isCallRef && callRef.Name == geographyIntersectsCallName { 693 qc.Error = utils.StackError(nil, "Not %s condition is not allowed", geographyIntersectsCallName) 694 break 695 } 696 case expr.UNARY_MINUS: 697 // Upgrade to signed. 698 if e.ExprType < expr.Signed { 699 e.ExprType = expr.Signed 700 } 701 case expr.IS_NULL, expr.IS_NOT_NULL: 702 e.ExprType = expr.Boolean 703 case expr.IS_TRUE: 704 // Strip IS_TRUE if child is already boolean. 705 if e.Expr.Type() == expr.Boolean { 706 return e.Expr 707 } 708 // Rewrite to NOT(NOT(child)). 709 e.ExprType = expr.Boolean 710 e.Op = expr.NOT 711 e.Expr = cast(e.Expr, expr.Boolean) 712 return &expr.UnaryExpr{Expr: e, Op: expr.NOT, ExprType: expr.Boolean} 713 case expr.BITWISE_NOT: 714 // Cast child to unsigned. 715 e.ExprType = expr.Unsigned 716 e.Expr = cast(e.Expr, expr.Unsigned) 717 case expr.GET_MONTH_START, expr.GET_QUARTER_START, expr.GET_YEAR_START, expr.GET_WEEK_START: 718 // Cast child to unsigned. 719 e.ExprType = expr.Unsigned 720 e.Expr = cast(e.Expr, expr.Unsigned) 721 case expr.GET_DAY_OF_MONTH, expr.GET_DAY_OF_YEAR, expr.GET_MONTH_OF_YEAR, expr.GET_QUARTER_OF_YEAR: 722 // Cast child to unsigned. 723 e.ExprType = expr.Unsigned 724 e.Expr = cast(e.Expr, expr.Unsigned) 725 case expr.GET_HLL_VALUE: 726 e.ExprType = expr.Unsigned 727 e.Expr = cast(e.Expr, expr.Unsigned) 728 default: 729 qc.Error = utils.StackError(nil, "unsupported unary expression %s", 730 e.String()) 731 } 732 case *expr.BinaryExpr: 733 if err := blockNumericOpsForColumnOverFourBytes(e.Op, e.LHS, e.RHS); err != nil { 734 qc.Error = err 735 return expression 736 } 737 738 if e.Op != expr.EQ && e.Op != expr.NEQ { 739 _, isRHSStr := e.RHS.(*expr.StringLiteral) 740 _, isLHSStr := e.LHS.(*expr.StringLiteral) 741 if isRHSStr || isLHSStr { 742 qc.Error = utils.StackError(nil, "string type only support EQ and NEQ operators") 743 return expression 744 } 745 } 746 highestType := e.LHS.Type() 747 if e.RHS.Type() > highestType { 748 highestType = e.RHS.Type() 749 } 750 switch e.Op { 751 case expr.ADD, expr.SUB: 752 // Upgrade and cast to highestType. 753 e.ExprType = highestType 754 if highestType == expr.Float { 755 e.LHS = cast(e.LHS, expr.Float) 756 e.RHS = cast(e.RHS, expr.Float) 757 } else if e.Op == expr.SUB { 758 // For lhs - rhs, upgrade to signed at least. 759 e.ExprType = expr.Signed 760 } 761 case expr.MUL, expr.MOD: 762 // Upgrade and cast to highestType. 763 e.ExprType = highestType 764 e.LHS = cast(e.LHS, highestType) 765 e.RHS = cast(e.RHS, highestType) 766 case expr.DIV: 767 // Upgrade and cast to float. 768 e.ExprType = expr.Float 769 e.LHS = cast(e.LHS, expr.Float) 770 e.RHS = cast(e.RHS, expr.Float) 771 case expr.BITWISE_AND, expr.BITWISE_OR, expr.BITWISE_XOR, 772 expr.BITWISE_LEFT_SHIFT, expr.BITWISE_RIGHT_SHIFT, expr.FLOOR, expr.CONVERT_TZ: 773 // Cast to unsigned. 774 e.ExprType = expr.Unsigned 775 e.LHS = cast(e.LHS, expr.Unsigned) 776 e.RHS = cast(e.RHS, expr.Unsigned) 777 case expr.AND, expr.OR: 778 // Cast to boolean. 779 e.ExprType = expr.Boolean 780 e.LHS = cast(e.LHS, expr.Boolean) 781 e.RHS = cast(e.RHS, expr.Boolean) 782 case expr.LT, expr.LTE, expr.GT, expr.GTE: 783 // Cast to boolean. 784 e.ExprType = expr.Boolean 785 e.LHS = cast(e.LHS, highestType) 786 e.RHS = cast(e.RHS, highestType) 787 case expr.NEQ, expr.EQ: 788 // swap lhs and rhs if rhs is VarRef but lhs is not. 789 if _, lhsVarRef := e.LHS.(*expr.VarRef); !lhsVarRef { 790 if _, rhsVarRef := e.RHS.(*expr.VarRef); rhsVarRef { 791 e.LHS, e.RHS = e.RHS, e.LHS 792 } 793 } 794 795 e.ExprType = expr.Boolean 796 // Match enum = 'case' and enum != 'case'. 797 798 lhs, _ := e.LHS.(*expr.VarRef) 799 // rhs is bool 800 rhsBool, _ := e.RHS.(*expr.BooleanLiteral) 801 if lhs != nil && rhsBool != nil { 802 if (e.Op == expr.EQ && rhsBool.Val) || (e.Op == expr.NEQ && !rhsBool.Val) { 803 return &expr.UnaryExpr{Expr: lhs, Op: expr.IS_TRUE, ExprType: expr.Boolean} 804 } 805 return &expr.UnaryExpr{Expr: lhs, Op: expr.NOT, ExprType: expr.Boolean} 806 } 807 808 // rhs is string enum 809 rhs, _ := e.RHS.(*expr.StringLiteral) 810 if lhs != nil && rhs != nil && lhs.EnumDict != nil { 811 // Enum dictionary translation 812 value, exists := lhs.EnumDict[rhs.Val] 813 if !exists { 814 // Combination of nullable data with not/and/or operators on top makes 815 // short circuiting hard. 816 // To play it safe we match against an invalid value. 817 value = -1 818 } 819 e.RHS = &expr.NumberLiteral{Int: value, ExprType: expr.Unsigned} 820 } else { 821 // Cast to highestType. 822 e.LHS = cast(e.LHS, highestType) 823 e.RHS = cast(e.RHS, highestType) 824 } 825 826 if rhs != nil && lhs.DataType == memCom.GeoPoint { 827 if val, err := memCom.GeoPointFromString(rhs.Val); err != nil { 828 qc.Error = err 829 } else { 830 e.RHS = &expr.GeopointLiteral{ 831 Val: val, 832 } 833 } 834 } 835 case expr.IN: 836 return qc.expandINop(e) 837 case expr.NOT_IN: 838 return &expr.UnaryExpr{ 839 Op: expr.NOT, 840 Expr: qc.expandINop(e), 841 } 842 default: 843 qc.Error = utils.StackError(nil, "unsupported binary expression %s", 844 e.String()) 845 } 846 case *expr.Call: 847 e.Name = strings.ToLower(e.Name) 848 switch e.Name { 849 case convertTzCallName: 850 if len(e.Args) != 3 { 851 qc.Error = utils.StackError( 852 nil, "convert_tz must have 3 arguments", 853 ) 854 break 855 } 856 fromTzStringExpr, isStrLiteral := e.Args[1].(*expr.StringLiteral) 857 if !isStrLiteral { 858 qc.Error = utils.StackError(nil, "2nd argument of convert_tz must be a string") 859 break 860 } 861 toTzStringExpr, isStrLiteral := e.Args[2].(*expr.StringLiteral) 862 if !isStrLiteral { 863 qc.Error = utils.StackError(nil, "3rd argument of convert_tz must be a string") 864 break 865 } 866 fromTz, err := parseTimezone(fromTzStringExpr.Val) 867 if err != nil { 868 qc.Error = utils.StackError(err, "failed to rewrite convert_tz") 869 break 870 } 871 toTz, err := parseTimezone(toTzStringExpr.Val) 872 if err != nil { 873 qc.Error = utils.StackError(err, "failed to rewrite convert_tz") 874 break 875 } 876 _, fromOffsetInSeconds := utils.Now().In(fromTz).Zone() 877 _, toOffsetInSeconds := utils.Now().In(toTz).Zone() 878 offsetInSeconds := toOffsetInSeconds - fromOffsetInSeconds 879 return &expr.BinaryExpr{ 880 Op: expr.ADD, 881 LHS: e.Args[0], 882 RHS: &expr.NumberLiteral{ 883 Int: offsetInSeconds, 884 Expr: strconv.Itoa(offsetInSeconds), 885 ExprType: expr.Unsigned, 886 }, 887 ExprType: expr.Unsigned, 888 } 889 case countCallName: 890 e.ExprType = expr.Unsigned 891 case dayOfWeekCallName: 892 // dayofweek from ts: (ts / secondsInDay + 4) % 7 + 1 893 // ref: https://dev.mysql.com/doc/refman/5.5/en/date-and-time-functions.html#function_dayofweek 894 if len(e.Args) != 1 { 895 qc.Error = utils.StackError(nil, "dayofweek takes exactly 1 argument") 896 break 897 } 898 tsExpr := e.Args[0] 899 return &expr.BinaryExpr{ 900 Op: expr.ADD, 901 ExprType: expr.Unsigned, 902 RHS: &expr.NumberLiteral{ 903 Int: 1, 904 Expr: "1", 905 ExprType: expr.Unsigned, 906 }, 907 LHS: &expr.BinaryExpr{ 908 Op: expr.MOD, 909 ExprType: expr.Unsigned, 910 RHS: &expr.NumberLiteral{ 911 Int: common.DaysPerWeek, 912 Expr: strconv.Itoa(common.DaysPerWeek), 913 ExprType: expr.Unsigned, 914 }, 915 LHS: &expr.BinaryExpr{ 916 Op: expr.ADD, 917 ExprType: expr.Unsigned, 918 RHS: &expr.NumberLiteral{ 919 // offset for 920 Int: common.WeekdayOffset, 921 Expr: strconv.Itoa(common.WeekdayOffset), 922 ExprType: expr.Unsigned, 923 }, 924 LHS: &expr.BinaryExpr{ 925 Op: expr.DIV, 926 ExprType: expr.Unsigned, 927 RHS: &expr.NumberLiteral{ 928 Int: common.SecondsPerDay, 929 Expr: strconv.Itoa(common.SecondsPerDay), 930 ExprType: expr.Unsigned, 931 }, 932 LHS: tsExpr, 933 }, 934 }, 935 }, 936 } 937 // no-op, this will be over written 938 case fromUnixTimeCallName: 939 // for now, only the following format is allowed for backward compatibility 940 // from_unixtime(time_col / 1000) 941 timeColumnDivideErrMsg := "from_unixtime must be time column / 1000" 942 timeColDivide, isBinary := e.Args[0].(*expr.BinaryExpr) 943 if !isBinary || timeColDivide.Op != expr.DIV { 944 qc.Error = utils.StackError(nil, timeColumnDivideErrMsg) 945 break 946 } 947 divisor, isLiteral := timeColDivide.RHS.(*expr.NumberLiteral) 948 if !isLiteral || divisor.Int != 1000 { 949 qc.Error = utils.StackError(nil, timeColumnDivideErrMsg) 950 break 951 } 952 if par, isParen := timeColDivide.LHS.(*expr.ParenExpr); isParen { 953 timeColDivide.LHS = par.Expr 954 } 955 timeColExpr, isVarRef := timeColDivide.LHS.(*expr.VarRef) 956 if !isVarRef { 957 qc.Error = utils.StackError(nil, timeColumnDivideErrMsg) 958 break 959 } 960 return timeColExpr 961 case hourCallName: 962 if len(e.Args) != 1 { 963 qc.Error = utils.StackError(nil, "hour takes exactly 1 argument") 964 break 965 } 966 // hour(ts) = (ts % secondsInDay) / secondsInHour 967 return &expr.BinaryExpr{ 968 Op: expr.DIV, 969 ExprType: expr.Unsigned, 970 LHS: &expr.BinaryExpr{ 971 Op: expr.MOD, 972 LHS: e.Args[0], 973 RHS: &expr.NumberLiteral{ 974 Expr: strconv.Itoa(common.SecondsPerDay), 975 Int: common.SecondsPerDay, 976 ExprType: expr.Unsigned, 977 }, 978 }, 979 RHS: &expr.NumberLiteral{ 980 Expr: strconv.Itoa(common.SecondsPerHour), 981 Int: common.SecondsPerHour, 982 ExprType: expr.Unsigned, 983 }, 984 } 985 // list of literals, no need to cast it for now. 986 case listCallName: 987 case geographyIntersectsCallName: 988 if len(e.Args) != 2 { 989 qc.Error = utils.StackError( 990 nil, "expect 2 argument for %s, but got %s", e.Name, e.String()) 991 break 992 } 993 994 lhsRef, isVarRef := e.Args[0].(*expr.VarRef) 995 if !isVarRef || (lhsRef.DataType != memCom.GeoShape && lhsRef.DataType != memCom.GeoPoint) { 996 qc.Error = utils.StackError( 997 nil, "expect argument to be a valid geo shape or geo point column for %s, but got %s of type %s", 998 e.Name, e.Args[0].String(), memCom.DataTypeName[lhsRef.DataType]) 999 break 1000 } 1001 1002 lhsGeoPoint := lhsRef.DataType == memCom.GeoPoint 1003 1004 rhsRef, isVarRef := e.Args[1].(*expr.VarRef) 1005 if !isVarRef || (rhsRef.DataType != memCom.GeoShape && rhsRef.DataType != memCom.GeoPoint) { 1006 qc.Error = utils.StackError( 1007 nil, "expect argument to be a valid geo shape or geo point column for %s, but got %s of type %s", 1008 e.Name, e.Args[1].String(), memCom.DataTypeName[rhsRef.DataType]) 1009 break 1010 } 1011 1012 rhsGeoPoint := rhsRef.DataType == memCom.GeoPoint 1013 1014 if lhsGeoPoint == rhsGeoPoint { 1015 qc.Error = utils.StackError( 1016 nil, "expect exactly one geo shape column and one geo point column for %s, got %s", 1017 e.Name, e.String()) 1018 break 1019 } 1020 1021 // Switch geo point so that lhs is geo shape and rhs is geo point 1022 if lhsGeoPoint { 1023 e.Args[0], e.Args[1] = e.Args[1], e.Args[0] 1024 } 1025 1026 e.ExprType = expr.Boolean 1027 case hexCallName: 1028 if len(e.Args) != 1 { 1029 qc.Error = utils.StackError( 1030 nil, "expect 1 argument for %s, but got %s", e.Name, e.String()) 1031 break 1032 } 1033 colRef, isVarRef := e.Args[0].(*expr.VarRef) 1034 if !isVarRef || colRef.DataType != memCom.UUID { 1035 qc.Error = utils.StackError( 1036 nil, "expect 1 argument to be a valid uuid column for %s, but got %s of type %s", 1037 e.Name, e.Args[0].String(), memCom.DataTypeName[colRef.DataType]) 1038 break 1039 } 1040 e.ExprType = e.Args[0].Type() 1041 case countDistinctHllCallName: 1042 if len(e.Args) != 1 { 1043 qc.Error = utils.StackError( 1044 nil, "expect 1 argument for %s, but got %s", e.Name, e.String()) 1045 break 1046 } 1047 colRef, isVarRef := e.Args[0].(*expr.VarRef) 1048 if !isVarRef { 1049 qc.Error = utils.StackError( 1050 nil, "expect 1 argument to be a column for %s", e.Name) 1051 break 1052 } 1053 1054 e.Name = hllCallName 1055 // 1. noop when column itself is hll column 1056 // 2. compute hll on the fly when column is not hll column 1057 if !colRef.IsHLLColumn { 1058 e.Args[0] = &expr.UnaryExpr{ 1059 Op: expr.GET_HLL_VALUE, 1060 Expr: colRef, 1061 ExprType: expr.Unsigned, 1062 } 1063 } 1064 e.ExprType = expr.Unsigned 1065 case hllCallName: 1066 if len(e.Args) != 1 { 1067 qc.Error = utils.StackError( 1068 nil, "expect 1 argument for %s, but got %s", e.Name, e.String()) 1069 break 1070 } 1071 colRef, isVarRef := e.Args[0].(*expr.VarRef) 1072 if !isVarRef || colRef.DataType != memCom.Uint32 { 1073 qc.Error = utils.StackError( 1074 nil, "expect 1 argument to be a valid hll column for %s, but got %s of type %s", 1075 e.Name, e.Args[0].String(), memCom.DataTypeName[colRef.DataType]) 1076 break 1077 } 1078 e.ExprType = e.Args[0].Type() 1079 case sumCallName, minCallName, maxCallName, avgCallName: 1080 if len(e.Args) != 1 { 1081 qc.Error = utils.StackError( 1082 nil, "expect 1 argument for %s, but got %s", e.Name, e.String()) 1083 break 1084 } 1085 // For avg, the expression type should always be float. 1086 if e.Name == avgCallName { 1087 e.Args[0] = cast(e.Args[0], expr.Float) 1088 } 1089 e.ExprType = e.Args[0].Type() 1090 default: 1091 qc.Error = utils.StackError(nil, "unknown function %s", e.Name) 1092 } 1093 case *expr.Case: 1094 highestType := e.Else.Type() 1095 for _, whenThen := range e.WhenThens { 1096 if whenThen.Then.Type() > highestType { 1097 highestType = whenThen.Then.Type() 1098 } 1099 } 1100 // Cast else and thens to highestType, cast whens to boolean. 1101 e.Else = cast(e.Else, highestType) 1102 for i, whenThen := range e.WhenThens { 1103 whenThen.When = cast(whenThen.When, expr.Boolean) 1104 whenThen.Then = cast(whenThen.Then, highestType) 1105 e.WhenThens[i] = whenThen 1106 } 1107 e.ExprType = highestType 1108 } 1109 return expression 1110 } 1111 1112 // normalizeAndFilters extracts top AND operators and flatten them out to the 1113 // filter slice. 1114 func normalizeAndFilters(filters []expr.Expr) []expr.Expr { 1115 i := 0 1116 for i < len(filters) { 1117 f, _ := filters[i].(*expr.BinaryExpr) 1118 if f != nil && f.Op == expr.AND { 1119 filters[i] = f.LHS 1120 filters = append(filters, f.RHS) 1121 } else { 1122 i++ 1123 } 1124 } 1125 return filters 1126 } 1127 1128 // resolveTypes walks all expresison ASTs and resolves data types bottom up. 1129 // In addition it also translates enum strings and rewrites their predicates. 1130 func (qc *AQLQueryContext) resolveTypes() { 1131 // Join conditions. 1132 for i, join := range qc.Query.Joins { 1133 for j, cond := range join.conditions { 1134 join.conditions[j] = expr.Rewrite(qc, cond) 1135 if qc.Error != nil { 1136 return 1137 } 1138 } 1139 qc.Query.Joins[i] = join 1140 } 1141 1142 // Dimensions. 1143 for i, dim := range qc.Query.Dimensions { 1144 dim.expr = expr.Rewrite(qc, dim.expr) 1145 if qc.Error != nil { 1146 return 1147 } 1148 qc.Query.Dimensions[i] = dim 1149 } 1150 1151 // Measures. 1152 for i, measure := range qc.Query.Measures { 1153 measure.expr = expr.Rewrite(qc, measure.expr) 1154 if qc.Error != nil { 1155 return 1156 } 1157 for j, filter := range measure.filters { 1158 measure.filters[j] = expr.Rewrite(qc, filter) 1159 if qc.Error != nil { 1160 return 1161 } 1162 } 1163 measure.filters = normalizeAndFilters(measure.filters) 1164 qc.Query.Measures[i] = measure 1165 } 1166 1167 // Filters. 1168 for i, filter := range qc.Query.filters { 1169 qc.Query.filters[i] = expr.Rewrite(qc, filter) 1170 if qc.Error != nil { 1171 return 1172 } 1173 } 1174 qc.Query.filters = normalizeAndFilters(qc.Query.filters) 1175 } 1176 1177 // extractFitler processes the specified query level filter and matches it 1178 // against the following formats: 1179 // column = value 1180 // column > value 1181 // column >= value 1182 // column < value 1183 // column <= value 1184 // column 1185 // not column 1186 // It returns the numeric constant value associated with the filter in a uint32 1187 // space (for all types including float32). 1188 // In addition it also returns the boundaryType for >, >=, <, <= operators. 1189 // Note that since the candidate filters have already been preselected against 1190 // some criterias, this function does not perform full format validation. 1191 func (qc *AQLQueryContext) extractFilter(filterID int) ( 1192 value uint32, boundary boundaryType, success bool) { 1193 switch f := qc.Query.filters[filterID].(type) { 1194 case *expr.VarRef: 1195 // Match `column` format 1196 value = 1 1197 success = true 1198 case *expr.UnaryExpr: 1199 // Match `not column` format 1200 success = true 1201 case *expr.BinaryExpr: 1202 // Match `column op value` format 1203 rhs, _ := f.RHS.(*expr.NumberLiteral) 1204 if rhs == nil { 1205 return 1206 } 1207 switch rhs.ExprType { 1208 case expr.Float: 1209 *(*float32)(unsafe.Pointer(&value)) = float32(rhs.Val) 1210 case expr.Signed: 1211 *(*int32)(unsafe.Pointer(&value)) = int32(rhs.Int) 1212 case expr.Unsigned: 1213 value = uint32(rhs.Int) 1214 default: 1215 return 1216 } 1217 switch f.Op { 1218 case expr.GTE, expr.LTE: 1219 boundary = inclusiveBoundary 1220 case expr.GT, expr.LT: 1221 boundary = exclusiveBoundary 1222 } 1223 success = true 1224 } 1225 return 1226 } 1227 1228 // matchPrefilters identifies all prefilters from query level filters, 1229 // stores them in AQLQueryContext.Prefilters, 1230 // and stores their values in TableScanner for future prefilter vector slicing. 1231 func (qc *AQLQueryContext) matchPrefilters() { 1232 // Format of candidateFilters: 1233 // [tableID]map[columnID]{filterIDs for lower bound, upper bound, equality} 1234 // tableID is query scoped, while columnID is schema scoped. 1235 candidateFilters := make([]map[int][3]int, len(qc.TableScanners)) 1236 for tableID := range qc.TableScanners { 1237 candidateFilters[tableID] = make(map[int][3]int) 1238 } 1239 1240 // Index candidate filters by table/column 1241 for filterID, filter := range qc.Query.filters { 1242 f, _ := filter.(*expr.BinaryExpr) 1243 if f == nil { 1244 switch f := filter.(type) { 1245 case *expr.VarRef: 1246 // Match `column` format 1247 if f.ExprType == expr.Boolean { 1248 candidateFilters[f.TableID][f.ColumnID] = [3]int{-1, -1, filterID} 1249 } 1250 case *expr.UnaryExpr: 1251 // Match `not column` format 1252 if f.Op == expr.NOT { 1253 f, _ := f.Expr.(*expr.VarRef) 1254 if f != nil && f.ExprType == expr.Boolean { 1255 candidateFilters[f.TableID][f.ColumnID] = [3]int{-1, -1, filterID} 1256 } 1257 } 1258 // TODO: IS_NULL can be matched as an equality filter. 1259 // TODO: IS_NOT_NULL can be matched as the final range filter. 1260 } 1261 continue 1262 } 1263 1264 // Match `column op value` format, where op can be =, <, <=, >, >=. 1265 if f.Op < expr.EQ || f.Op > expr.GTE { 1266 continue 1267 } 1268 1269 lhs, _ := f.LHS.(*expr.VarRef) 1270 if lhs == nil { 1271 continue 1272 } 1273 1274 columnToFilterMap := candidateFilters[lhs.TableID] 1275 filters, exists := columnToFilterMap[lhs.ColumnID] 1276 if !exists { 1277 filters = [3]int{-1, -1, -1} 1278 } 1279 switch f.Op { 1280 case expr.GT, expr.GTE: 1281 filters[0] = filterID 1282 case expr.LT, expr.LTE: 1283 filters[1] = filterID 1284 case expr.EQ: 1285 filters[2] = filterID 1286 } 1287 columnToFilterMap[lhs.ColumnID] = filters 1288 } 1289 1290 // Prefilter matching 1291 for tableID, scanner := range qc.TableScanners { 1292 // Match in archiving sort column order 1293 for _, columnID := range scanner.Schema.Schema.ArchivingSortColumns { 1294 filterIndex, exists := candidateFilters[tableID][columnID] 1295 if !exists { 1296 // Stop on first missing column 1297 break 1298 } 1299 // Equality 1300 if filterIndex[2] >= 0 { 1301 value, _, success := qc.extractFilter(filterIndex[2]) 1302 if !success { 1303 // Stop if the value fails to be extracted 1304 break 1305 } 1306 scanner.EqualityPrefilterValues = append( 1307 scanner.EqualityPrefilterValues, value) 1308 qc.Prefilters = append(qc.Prefilters, filterIndex[2]) 1309 scanner.ColumnUsages[columnID] |= columnUsedByPrefilter 1310 // Continue matching the next column 1311 continue 1312 } 1313 // Lower bound 1314 if filterIndex[0] >= 0 { 1315 value, boundaryType, success := qc.extractFilter(filterIndex[0]) 1316 if success { 1317 scanner.RangePrefilterValues[0] = value 1318 scanner.RangePrefilterBoundaries[0] = boundaryType 1319 qc.Prefilters = append(qc.Prefilters, filterIndex[0]) 1320 scanner.ColumnUsages[columnID] |= columnUsedByPrefilter 1321 } 1322 } 1323 // Upper bound 1324 if filterIndex[1] >= 0 { 1325 value, boundaryType, success := qc.extractFilter(filterIndex[1]) 1326 if success { 1327 scanner.RangePrefilterValues[1] = value 1328 scanner.RangePrefilterBoundaries[1] = boundaryType 1329 qc.Prefilters = append(qc.Prefilters, filterIndex[1]) 1330 scanner.ColumnUsages[columnID] |= columnUsedByPrefilter 1331 } 1332 } 1333 // Stop after the first range filter 1334 break 1335 } 1336 } 1337 1338 sort.Ints(qc.Prefilters) 1339 } 1340 1341 // columnUsageCollector is the visitor used to traverses an AST, finds VarRef columns 1342 // and sets the usage bits in tableScanners. The VarRef nodes must have already 1343 // been resolved and annotated with TableID and ColumnID. 1344 type columnUsageCollector struct { 1345 tableScanners []*TableScanner 1346 usages columnUsage 1347 } 1348 1349 func (c columnUsageCollector) Visit(expression expr.Expr) expr.Visitor { 1350 switch e := expression.(type) { 1351 case *expr.VarRef: 1352 c.tableScanners[e.TableID].ColumnUsages[e.ColumnID] |= c.usages 1353 } 1354 return c 1355 } 1356 1357 // foreignTableColumnDetector detects foreign table columns involved in AST 1358 type foreignTableColumnDetector struct { 1359 hasForeignTableColumn bool 1360 } 1361 1362 func (c *foreignTableColumnDetector) Visit(expression expr.Expr) expr.Visitor { 1363 switch e := expression.(type) { 1364 case *expr.VarRef: 1365 c.hasForeignTableColumn = c.hasForeignTableColumn || (e.TableID > 0) 1366 } 1367 return c 1368 } 1369 1370 // processFilters processes all filters and categorize them into common filters, 1371 // prefilters, and time filters. It also collect column usages from the filters. 1372 func (qc *AQLQueryContext) processFilters() { 1373 // OOPK engine only supports one measure per query. 1374 if len(qc.Query.Measures) != 1 { 1375 qc.Error = utils.StackError(nil, "expect one measure per query, but got %d", 1376 len(qc.Query.Measures)) 1377 return 1378 } 1379 1380 // Categorize common filters and prefilters based on matched prefilters. 1381 commonFilters := qc.Query.Measures[0].filters 1382 prefilters := qc.Prefilters 1383 for index, filter := range qc.Query.filters { 1384 if len(prefilters) == 0 || prefilters[0] > index { 1385 // common filters 1386 commonFilters = append(commonFilters, filter) 1387 } else { 1388 qc.OOPK.Prefilters = append(qc.OOPK.Prefilters, filter) 1389 prefilters = prefilters[1:] 1390 } 1391 } 1392 1393 var geoFilterFound bool 1394 for _, filter := range commonFilters { 1395 foreignTableColumnDetector := foreignTableColumnDetector{} 1396 expr.Walk(&foreignTableColumnDetector, filter) 1397 if foreignTableColumnDetector.hasForeignTableColumn { 1398 var isGeoFilter bool 1399 if qc.OOPK.geoIntersection != nil { 1400 geoTableID := qc.OOPK.geoIntersection.shapeTableID 1401 joinSchema := qc.TableSchemaByName[qc.Query.Joins[geoTableID-1].Table] 1402 isGeoFilter = qc.matchGeoFilter(filter, geoTableID, joinSchema, geoFilterFound) 1403 if qc.Error != nil { 1404 return 1405 } 1406 } 1407 1408 if !isGeoFilter { 1409 qc.OOPK.ForeignTableCommonFilters = append(qc.OOPK.ForeignTableCommonFilters, filter) 1410 } else { 1411 geoFilterFound = true 1412 } 1413 } else { 1414 qc.OOPK.MainTableCommonFilters = append(qc.OOPK.MainTableCommonFilters, filter) 1415 } 1416 } 1417 1418 if qc.OOPK.geoIntersection != nil && !geoFilterFound { 1419 qc.Error = utils.StackError(nil, "Exact one geo filter is needed if geo intersection"+ 1420 " is used during join") 1421 return 1422 } 1423 1424 // Process time filter. 1425 qc.processTimeFilter() 1426 if qc.Error != nil { 1427 return 1428 } 1429 1430 // Collect column usages from the filters. 1431 for _, f := range qc.OOPK.MainTableCommonFilters { 1432 expr.Walk(columnUsageCollector{ 1433 tableScanners: qc.TableScanners, 1434 usages: columnUsedByAllBatches, 1435 }, f) 1436 } 1437 1438 for _, f := range qc.OOPK.ForeignTableCommonFilters { 1439 expr.Walk(columnUsageCollector{ 1440 tableScanners: qc.TableScanners, 1441 usages: columnUsedByAllBatches, 1442 }, f) 1443 } 1444 1445 for _, f := range qc.OOPK.Prefilters { 1446 expr.Walk(columnUsageCollector{ 1447 tableScanners: qc.TableScanners, 1448 usages: columnUsedByLiveBatches, 1449 }, f) 1450 } 1451 1452 if qc.OOPK.TimeFilters[0] != nil { 1453 expr.Walk(columnUsageCollector{ 1454 tableScanners: qc.TableScanners, 1455 usages: columnUsedByFirstArchiveBatch | columnUsedByLiveBatches, 1456 }, qc.OOPK.TimeFilters[0]) 1457 } 1458 1459 if qc.OOPK.TimeFilters[1] != nil { 1460 expr.Walk(columnUsageCollector{ 1461 tableScanners: qc.TableScanners, 1462 usages: columnUsedByLastArchiveBatch | columnUsedByLiveBatches, 1463 }, qc.OOPK.TimeFilters[1]) 1464 } 1465 } 1466 1467 func getStrFromNumericalOrStrLiteral(e expr.Expr) (string, error) { 1468 var str string 1469 if strExpr, ok := e.(*expr.StringLiteral); ok { 1470 str = strExpr.Val 1471 } else { 1472 if numExpr, ok := e.(*expr.NumberLiteral); ok { 1473 str = numExpr.String() 1474 } else { 1475 return str, utils.StackError(nil, 1476 "Unable to extract string from %s", e.String()) 1477 } 1478 } 1479 return str, nil 1480 } 1481 1482 // matchGeoFilter tries to match the filter as a geo filter and prepare shapeUUIDs for aql processor. It returns whether 1483 // the filterExpr is a geo filter. 1484 func (qc *AQLQueryContext) matchGeoFilter(filterExpr expr.Expr, joinTableID int, 1485 joinSchema *memstore.TableSchema, geoFilterFound bool) (geoFilterFoundInCurrentExpr bool) { 1486 var shapeUUIDs []string 1487 invalidOpsFound, geoFilterFoundInCurrentExpr := qc.matchGeoFilterHelper(filterExpr, joinTableID, joinSchema, &shapeUUIDs) 1488 if qc.Error != nil { 1489 return 1490 } 1491 if geoFilterFoundInCurrentExpr && invalidOpsFound { 1492 qc.Error = utils.StackError(nil, "Only EQ and IN allowed for geo filters") 1493 return 1494 } 1495 if geoFilterFoundInCurrentExpr && geoFilterFound { 1496 qc.Error = utils.StackError(nil, "Only one geo filter is allowed") 1497 return 1498 } 1499 1500 if len(shapeUUIDs) > geoShapeLimit { 1501 qc.Error = utils.StackError(nil, "At most %d gep shapes supported, got %d", geoShapeLimit, len(shapeUUIDs)) 1502 return 1503 } 1504 1505 if geoFilterFoundInCurrentExpr { 1506 qc.OOPK.geoIntersection.shapeUUIDs = shapeUUIDs 1507 } 1508 return 1509 } 1510 1511 func (qc *AQLQueryContext) matchGeoFilterHelper(filterExpr expr.Expr, joinTableID int, 1512 joinSchema *memstore.TableSchema, shapeUUIDs *[]string) (inValidOpFound, foundGeoFilter bool) { 1513 switch e := filterExpr.(type) { 1514 case *expr.BinaryExpr: 1515 if e.Op == expr.OR { 1516 inValidOpFoundL, foundGeoFilterL := qc.matchGeoFilterHelper(e.LHS, joinTableID, joinSchema, shapeUUIDs) 1517 inValidOpFoundR, foundGeoFilterR := qc.matchGeoFilterHelper(e.RHS, joinTableID, joinSchema, shapeUUIDs) 1518 inValidOpFound = inValidOpFoundL || inValidOpFoundR 1519 foundGeoFilter = foundGeoFilterL || foundGeoFilterR 1520 } else if e.Op == expr.EQ { 1521 columnExpr := e.LHS 1522 1523 if paren, ok := columnExpr.(*expr.ParenExpr); ok { 1524 columnExpr = paren.Expr 1525 } 1526 if column, ok := columnExpr.(*expr.VarRef); ok && column.TableID == joinTableID { 1527 // geo filter's column must be primary key. 1528 if joinSchema.Schema.PrimaryKeyColumns[0] != column.ColumnID { 1529 qc.Error = utils.StackError(nil, "Geo filter column is not the primary key") 1530 return 1531 } 1532 uuidStr, err := getStrFromNumericalOrStrLiteral(e.RHS) 1533 if err != nil { 1534 qc.Error = utils.StackError(err, 1535 "Unable to extract uuid from expression %s", e.RHS.String()) 1536 return 1537 } 1538 normalizedUUID, err := utils.NormalizeUUIDString(uuidStr) 1539 if err != nil { 1540 qc.Error = err 1541 return 1542 } 1543 foundGeoFilter = true 1544 *shapeUUIDs = append(*shapeUUIDs, normalizedUUID) 1545 } 1546 } else { 1547 inValidOpFound = true 1548 // keep traversing to find geo fields 1549 _, foundGeoFilterL := qc.matchGeoFilterHelper(e.LHS, joinTableID, joinSchema, shapeUUIDs) 1550 _, foundGeoFilterR := qc.matchGeoFilterHelper(e.RHS, joinTableID, joinSchema, shapeUUIDs) 1551 foundGeoFilter = foundGeoFilterL || foundGeoFilterR 1552 } 1553 case *expr.UnaryExpr: 1554 inValidOpFound = true 1555 _, foundGeoFilter = qc.matchGeoFilterHelper(e.Expr, joinTableID, joinSchema, shapeUUIDs) 1556 } 1557 return 1558 } 1559 1560 // processTimeFilter processes the time filter by matching it against the time 1561 // column of the main fact table. The time filter will be identified as common 1562 // filter if it does not match with the designated time column. 1563 func (qc *AQLQueryContext) processTimeFilter() { 1564 from, to := qc.fromTime, qc.toTime 1565 1566 // Match against time column of the main fact table. 1567 var timeColumnMatched bool 1568 1569 tableColumnPair := strings.SplitN(qc.Query.TimeFilter.Column, ".", 2) 1570 if len(tableColumnPair) < 2 { 1571 qc.Query.TimeFilter.Column = tableColumnPair[0] 1572 } else { 1573 qc.Query.TimeFilter.Column = tableColumnPair[1] 1574 if tableColumnPair[0] != qc.Query.Table { 1575 qc.Error = utils.StackError(nil, "timeFilter only supports main table: %s, got: %s", qc.Query.Table, tableColumnPair[0]) 1576 return 1577 } 1578 } 1579 1580 if qc.TableScanners[0].Schema.Schema.IsFactTable { 1581 if from == nil { 1582 qc.Error = utils.StackError(nil, "'from' of time filter is missing") 1583 return 1584 } 1585 1586 timeColumn := qc.TableScanners[0].Schema.Schema.Columns[0].Name 1587 if qc.Query.TimeFilter.Column == "" || qc.Query.TimeFilter.Column == timeColumn { 1588 timeColumnMatched = true 1589 qc.Query.TimeFilter.Column = timeColumn 1590 } 1591 } 1592 1593 // TODO: resolve time filter column against foreign tables. 1594 timeColumnID := 0 1595 found := false 1596 if qc.Query.TimeFilter.Column != "" { 1597 // Validate column existence and type. 1598 timeColumnID, found = qc.TableScanners[0].Schema.ColumnIDs[qc.Query.TimeFilter.Column] 1599 if !found { 1600 qc.Error = utils.StackError(nil, "unknown time filter column %s", 1601 qc.Query.TimeFilter.Column) 1602 return 1603 } 1604 timeColumnType := qc.TableScanners[0].Schema.ValueTypeByColumn[timeColumnID] 1605 if timeColumnType != memCom.Uint32 { 1606 qc.Error = utils.StackError(nil, 1607 "expect time filter column %s of type Uint32, but got %s", 1608 qc.Query.TimeFilter.Column, memCom.DataTypeName[timeColumnType]) 1609 return 1610 } 1611 } 1612 fromExpr, toExpr := createTimeFilterExpr(&expr.VarRef{ 1613 Val: qc.Query.TimeFilter.Column, 1614 ExprType: expr.Unsigned, 1615 TableID: 0, 1616 ColumnID: timeColumnID, 1617 DataType: memCom.Uint32, 1618 }, from, to) 1619 1620 qc.TableScanners[0].ArchiveBatchIDEnd = int((utils.Now().Unix() + 86399) / 86400) 1621 if timeColumnMatched { 1622 qc.OOPK.TimeFilters[0] = fromExpr 1623 qc.OOPK.TimeFilters[1] = toExpr 1624 if from != nil { 1625 qc.TableScanners[0].ArchiveBatchIDStart = int(from.Time.Unix() / 86400) 1626 } 1627 if to != nil { 1628 qc.TableScanners[0].ArchiveBatchIDEnd = int((to.Time.Unix() + 86399) / 86400) 1629 } 1630 } else { 1631 if fromExpr != nil { 1632 qc.OOPK.MainTableCommonFilters = append(qc.OOPK.MainTableCommonFilters, fromExpr) 1633 } 1634 if toExpr != nil { 1635 qc.OOPK.MainTableCommonFilters = append(qc.OOPK.MainTableCommonFilters, toExpr) 1636 } 1637 } 1638 } 1639 1640 // matchAndRewriteGeoDimension tells whether a dimension matches geo join and whether it's a valid 1641 // geo join. It returns the rewritten geo dimension and error. If the err is non nil, it means it's a invalid geo join. 1642 // A valid geo dimension can only in one of the following format: 1643 // 1. UUID 1644 // 2. hex(UUID) 1645 func (qc *AQLQueryContext) matchAndRewriteGeoDimension(dimExpr expr.Expr) (expr.Expr, error) { 1646 gc := &geoTableUsageCollector{ 1647 geoIntersection: *qc.OOPK.geoIntersection, 1648 } 1649 1650 expr.Walk(gc, dimExpr) 1651 if !gc.useGeoTable { 1652 return nil, nil 1653 } 1654 1655 if callExpr, ok := dimExpr.(*expr.Call); ok { 1656 if callExpr.Name != hexCallName { 1657 return nil, utils.StackError(nil, 1658 "Only hex function is supported on UUID type, but got %s", callExpr.Name) 1659 } 1660 1661 if len(callExpr.Args) != 1 { 1662 return nil, utils.StackError(nil, 1663 "Exactly 1 argument allowed for hex, got %d", len(callExpr.Args)) 1664 } 1665 1666 dimExpr = callExpr.Args[0] 1667 } 1668 1669 joinSchema := qc.TableSchemaByName[qc.Query.Joins[gc.geoIntersection.shapeTableID-1].Table] 1670 if varRefExpr, ok := dimExpr.(*expr.VarRef); ok { 1671 var err error 1672 if varRefExpr.ColumnID != joinSchema.Schema.PrimaryKeyColumns[0] { 1673 err = utils.StackError(nil, "Only geo uuid is allowed in dimensions") 1674 } 1675 1676 varRefExpr.DataType = memCom.Uint8 1677 return varRefExpr, err 1678 } 1679 1680 return nil, utils.StackError(nil, "Only hex(uuid) or uuid supported, got %s", dimExpr.String()) 1681 } 1682 1683 // geoTableUsageCollector traverses an AST expression tree, finds VarRef columns 1684 // and check whether it uses any geo table columns. 1685 type geoTableUsageCollector struct { 1686 geoIntersection geoIntersection 1687 useGeoTable bool 1688 } 1689 1690 func (g *geoTableUsageCollector) Visit(expression expr.Expr) expr.Visitor { 1691 switch e := expression.(type) { 1692 case *expr.VarRef: 1693 g.useGeoTable = g.useGeoTable || e.TableID == g.geoIntersection.shapeTableID 1694 } 1695 return g 1696 } 1697 1698 func (qc *AQLQueryContext) processMeasure() { 1699 // OOPK engine only supports one measure per query. 1700 if len(qc.Query.Measures) != 1 { 1701 qc.Error = utils.StackError(nil, "expect one measure per query, but got %d", 1702 len(qc.Query.Measures)) 1703 return 1704 } 1705 1706 if _, ok := qc.Query.Measures[0].expr.(*expr.NumberLiteral); ok { 1707 qc.isNonAggregationQuery = true 1708 if qc.Query.Limit <= 0 { 1709 qc.Query.Limit = nonAggregationQueryLimit 1710 } 1711 return 1712 } 1713 1714 // Match and strip the aggregate function. 1715 aggregate, ok := qc.Query.Measures[0].expr.(*expr.Call) 1716 if !ok { 1717 qc.Error = utils.StackError(nil, "expect aggregate function, but got %s", 1718 qc.Query.Measures[0].Expr) 1719 return 1720 } 1721 1722 if qc.ReturnHLLData && aggregate.Name != hllCallName { 1723 qc.Error = utils.StackError(nil, "expect hll aggregate function as client specify 'Accept' as "+ 1724 "'application/hll', but got %s", 1725 qc.Query.Measures[0].Expr) 1726 return 1727 } 1728 1729 if len(aggregate.Args) != 1 { 1730 qc.Error = utils.StackError(nil, 1731 "expect one parameter for aggregate function %s, but got %d", 1732 aggregate.Name, len(aggregate.Args)) 1733 return 1734 } 1735 qc.OOPK.Measure = aggregate.Args[0] 1736 // default is 4 bytes 1737 qc.OOPK.MeasureBytes = 4 1738 switch strings.ToLower(aggregate.Name) { 1739 case countCallName: 1740 qc.OOPK.Measure = &expr.NumberLiteral{ 1741 Int: 1, 1742 Expr: "1", 1743 ExprType: expr.Unsigned, 1744 } 1745 qc.OOPK.AggregateType = C.AGGR_SUM_UNSIGNED 1746 case sumCallName: 1747 qc.OOPK.MeasureBytes = 8 1748 switch qc.OOPK.Measure.Type() { 1749 case expr.Float: 1750 qc.OOPK.AggregateType = C.AGGR_SUM_FLOAT 1751 case expr.Signed: 1752 qc.OOPK.AggregateType = C.AGGR_SUM_SIGNED 1753 case expr.Unsigned: 1754 qc.OOPK.AggregateType = C.AGGR_SUM_UNSIGNED 1755 default: 1756 qc.Error = utils.StackError(nil, 1757 unsupportedInputType, sumCallName, qc.OOPK.Measure.String()) 1758 return 1759 } 1760 case avgCallName: 1761 // 4 bytes for storing average result and another 4 byte for count 1762 qc.OOPK.MeasureBytes = 8 1763 // for average, we should always use float type as the agg type. 1764 qc.OOPK.AggregateType = C.AGGR_AVG_FLOAT 1765 case minCallName: 1766 switch qc.OOPK.Measure.Type() { 1767 case expr.Float: 1768 qc.OOPK.AggregateType = C.AGGR_MIN_FLOAT 1769 case expr.Signed: 1770 qc.OOPK.AggregateType = C.AGGR_MIN_SIGNED 1771 case expr.Unsigned: 1772 qc.OOPK.AggregateType = C.AGGR_MIN_UNSIGNED 1773 default: 1774 qc.Error = utils.StackError(nil, 1775 unsupportedInputType, minCallName, qc.OOPK.Measure.String()) 1776 return 1777 } 1778 case maxCallName: 1779 switch qc.OOPK.Measure.Type() { 1780 case expr.Float: 1781 qc.OOPK.AggregateType = C.AGGR_MAX_FLOAT 1782 case expr.Signed: 1783 qc.OOPK.AggregateType = C.AGGR_MAX_SIGNED 1784 case expr.Unsigned: 1785 qc.OOPK.AggregateType = C.AGGR_MAX_UNSIGNED 1786 default: 1787 qc.Error = utils.StackError(nil, 1788 unsupportedInputType, maxCallName, qc.OOPK.Measure.String()) 1789 return 1790 } 1791 case hllCallName: 1792 qc.OOPK.AggregateType = C.AGGR_HLL 1793 default: 1794 qc.Error = utils.StackError(nil, 1795 "unsupported aggregate function: %s", aggregate.Name) 1796 return 1797 } 1798 } 1799 1800 func (qc *AQLQueryContext) getAllColumnsDimension() (columns []Dimension) { 1801 // only main table columns wildcard match supported 1802 for _, column := range qc.TableScanners[0].Schema.Schema.Columns { 1803 if !column.Deleted && column.Type != metaCom.GeoShape { 1804 columns = append(columns, Dimension{ 1805 expr: &expr.VarRef{Val: column.Name}, 1806 Expr: column.Name, 1807 }) 1808 } 1809 } 1810 return 1811 } 1812 1813 func (qc *AQLQueryContext) processDimensions() { 1814 // Copy dimension ASTs. 1815 qc.OOPK.Dimensions = make([]expr.Expr, len(qc.Query.Dimensions)) 1816 for i, dim := range qc.Query.Dimensions { 1817 // TODO: support numeric bucketizer. 1818 qc.OOPK.Dimensions[i] = dim.expr 1819 if dim.expr.Type() == expr.GeoShape { 1820 qc.Error = utils.StackError(nil, 1821 "GeoShape can not be used for dimension: %s", dim.Expr) 1822 return 1823 } 1824 } 1825 1826 if qc.OOPK.geoIntersection != nil { 1827 gc := &geoTableUsageCollector{ 1828 geoIntersection: *qc.OOPK.geoIntersection, 1829 } 1830 // Check whether measure and dimensions are referencing any geo table columns. 1831 expr.Walk(gc, qc.OOPK.Measure) 1832 1833 if gc.useGeoTable { 1834 qc.Error = utils.StackError(nil, 1835 "Geo table column is not allowed to be used in measure: %s", qc.OOPK.Measure.String()) 1836 return 1837 } 1838 1839 foundGeoJoin := false 1840 for i, dimExpr := range qc.OOPK.Dimensions { 1841 geoDimExpr, err := qc.matchAndRewriteGeoDimension(dimExpr) 1842 if err != nil { 1843 qc.Error = err 1844 return 1845 } 1846 1847 if geoDimExpr != nil { 1848 if foundGeoJoin { 1849 qc.Error = utils.StackError(nil, 1850 "Only one geo dimension allowed: %s", dimExpr.String()) 1851 return 1852 } 1853 foundGeoJoin = true 1854 qc.OOPK.Dimensions[i] = geoDimExpr 1855 qc.OOPK.geoIntersection.dimIndex = i 1856 } 1857 } 1858 } 1859 1860 // Collect column usage from measure and dimensions 1861 expr.Walk(columnUsageCollector{ 1862 tableScanners: qc.TableScanners, 1863 usages: columnUsedByAllBatches, 1864 }, qc.OOPK.Measure) 1865 1866 for _, dim := range qc.OOPK.Dimensions { 1867 expr.Walk(columnUsageCollector{ 1868 tableScanners: qc.TableScanners, 1869 usages: columnUsedByAllBatches, 1870 }, dim) 1871 } 1872 } 1873 1874 func getDimensionDataType(expression expr.Expr) memCom.DataType { 1875 if e, ok := expression.(*expr.VarRef); ok { 1876 return e.DataType 1877 } 1878 switch expression.Type() { 1879 case expr.Boolean: 1880 return memCom.Bool 1881 case expr.Unsigned: 1882 return memCom.Uint32 1883 case expr.Signed: 1884 return memCom.Int32 1885 case expr.Float: 1886 return memCom.Float32 1887 default: 1888 return memCom.Uint32 1889 } 1890 } 1891 1892 func getDimensionDataBytes(expression expr.Expr) int { 1893 return memCom.DataTypeBytes(getDimensionDataType(expression)) 1894 } 1895 1896 // Sort dimension columns based on the data width in bytes 1897 // dimension columns in OOPK will not be reordered, but a mapping 1898 // from original id to ordered offsets (value and validity) in 1899 // dimension vector will be stored. 1900 // GeoUUID dimension will be 1 bytes. VarRef expression will use column data length, 1901 // others will be default to 4 bytes. 1902 func (qc *AQLQueryContext) sortDimensionColumns() { 1903 orderedIndex := 0 1904 numDimensions := len(qc.OOPK.Dimensions) 1905 qc.OOPK.DimensionVectorIndex = make([]int, numDimensions) 1906 byteWidth := 1 << uint(len(qc.OOPK.NumDimsPerDimWidth)-1) 1907 for byteIndex := range qc.OOPK.NumDimsPerDimWidth { 1908 for originIndex, dim := range qc.OOPK.Dimensions { 1909 dataBytes := getDimensionDataBytes(dim) 1910 if dataBytes == byteWidth { 1911 // record value offset, null offset pair 1912 // null offsets will have to add total dim bytes later 1913 qc.OOPK.DimensionVectorIndex[originIndex] = orderedIndex 1914 qc.OOPK.NumDimsPerDimWidth[byteIndex]++ 1915 qc.OOPK.DimRowBytes += dataBytes 1916 orderedIndex++ 1917 } 1918 } 1919 byteWidth >>= 1 1920 } 1921 // plus one byte per dimension column for validity 1922 qc.OOPK.DimRowBytes += numDimensions 1923 1924 if !qc.isNonAggregationQuery { 1925 // no dimension size checking for non-aggregation query 1926 if qc.OOPK.DimRowBytes > C.MAX_DIMENSION_BYTES { 1927 qc.Error = utils.StackError(nil, "maximum dimension bytes: %d, got: %d", C.MAX_DIMENSION_BYTES, qc.OOPK.DimRowBytes) 1928 return 1929 } 1930 } 1931 } 1932 1933 func (qc *AQLQueryContext) sortUsedColumns() { 1934 for _, scanner := range qc.TableScanners { 1935 scanner.Columns = make([]int, 0, len(scanner.ColumnUsages)) 1936 scanner.ColumnsByIDs = make(map[int]int) 1937 // Unsorted/uncompressed columns 1938 for columnID := range scanner.ColumnUsages { 1939 if utils.IndexOfInt(scanner.Schema.Schema.ArchivingSortColumns, columnID) < 0 { 1940 scanner.ColumnsByIDs[columnID] = len(scanner.Columns) 1941 scanner.Columns = append(scanner.Columns, columnID) 1942 } 1943 } 1944 // Sorted/compressed columns 1945 for i := len(scanner.Schema.Schema.ArchivingSortColumns) - 1; i >= 0; i-- { 1946 columnID := scanner.Schema.Schema.ArchivingSortColumns[i] 1947 _, found := scanner.ColumnUsages[columnID] 1948 if found { 1949 scanner.ColumnsByIDs[columnID] = len(scanner.Columns) 1950 scanner.Columns = append(scanner.Columns, columnID) 1951 } 1952 } 1953 } 1954 } 1955 1956 func parseTimezoneColumnString(timezoneColumnString string) (column, joinKey string, success bool) { 1957 exp, err := expr.ParseExpr(timezoneColumnString) 1958 if err != nil { 1959 return 1960 } 1961 if c, ok := exp.(*expr.Call); ok { 1962 if len(c.Args) == 1 { 1963 return c.Name, c.Args[0].String(), true 1964 } 1965 } 1966 return 1967 } 1968 1969 func (qc *AQLQueryContext) expandINop(e *expr.BinaryExpr) (expandedExpr expr.Expr) { 1970 lhs, ok := e.LHS.(*expr.VarRef) 1971 if !ok { 1972 qc.Error = utils.StackError(nil, "lhs of IN or NOT_IN must be a valid column") 1973 } 1974 rhs := e.RHS 1975 switch rhsTyped := rhs.(type) { 1976 case *expr.Call: 1977 expandedExpr = &expr.BooleanLiteral{Val: false} 1978 for _, value := range rhsTyped.Args { 1979 switch expandedExpr.(type) { 1980 case *expr.BooleanLiteral: 1981 expandedExpr = qc.Rewrite(&expr.BinaryExpr{ 1982 Op: expr.EQ, 1983 LHS: lhs, 1984 RHS: value, 1985 }).(*expr.BinaryExpr) 1986 default: 1987 lastExpr := expandedExpr 1988 expandedExpr = &expr.BinaryExpr{ 1989 Op: expr.OR, 1990 LHS: lastExpr, 1991 RHS: qc.Rewrite(&expr.BinaryExpr{ 1992 Op: expr.EQ, 1993 LHS: lhs, 1994 RHS: value, 1995 }).(*expr.BinaryExpr), 1996 } 1997 } 1998 } 1999 break 2000 default: 2001 qc.Error = utils.StackError(nil, "only EQ and IN operators are supported for geo fields") 2002 } 2003 return 2004 }