github.com/rohankumardubey/aresdb@v0.0.2-0.20190517170215-e54e3ca06b9c/query/aql_compiler.go (about)

     1  //  Copyright (c) 2017-2018 Uber Technologies, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package query
    16  
    17  // #include "time_series_aggregate.h"
    18  import "C"
    19  
    20  import (
    21  	"sort"
    22  	"strings"
    23  	"unsafe"
    24  
    25  	"fmt"
    26  	"github.com/uber/aresdb/memstore"
    27  	memCom "github.com/uber/aresdb/memstore/common"
    28  	metaCom "github.com/uber/aresdb/metastore/common"
    29  	"github.com/uber/aresdb/query/common"
    30  	"github.com/uber/aresdb/query/expr"
    31  	"github.com/uber/aresdb/utils"
    32  	"strconv"
    33  )
    34  
    35  // DataTypeToExprType maps data type from the column schema format to
    36  // expression AST format.
    37  var DataTypeToExprType = map[memCom.DataType]expr.Type{
    38  	memCom.Bool:      expr.Boolean,
    39  	memCom.Int8:      expr.Signed,
    40  	memCom.Int16:     expr.Signed,
    41  	memCom.Int32:     expr.Signed,
    42  	memCom.Int64:     expr.Signed,
    43  	memCom.Uint8:     expr.Unsigned,
    44  	memCom.Uint16:    expr.Unsigned,
    45  	memCom.Uint32:    expr.Unsigned,
    46  	memCom.Float32:   expr.Float,
    47  	memCom.SmallEnum: expr.Unsigned,
    48  	memCom.BigEnum:   expr.Unsigned,
    49  	memCom.GeoPoint:  expr.GeoPoint,
    50  	memCom.GeoShape:  expr.GeoShape,
    51  }
    52  
    53  const (
    54  	unsupportedInputType      = "unsupported input type for %s: %s"
    55  	defaultTimezoneTableAlias = "__timezone_lookup"
    56  	geoShapeLimit             = 100
    57  	nonAggregationQueryLimit  = 1000
    58  )
    59  
    60  // constants for call names.
    61  const (
    62  	convertTzCallName           = "convert_tz"
    63  	countCallName               = "count"
    64  	dayOfWeekCallName           = "dayofweek"
    65  	fromUnixTimeCallName        = "from_unixtime"
    66  	geographyIntersectsCallName = "geography_intersects"
    67  	hexCallName                 = "hex"
    68  	// hll aggregation function applies to hll columns
    69  	hllCallName = "hll"
    70  	// countdistincthll aggregation function applies to all columns, hll value is computed on the fly
    71  	countDistinctHllCallName = "countdistincthll"
    72  	hourCallName             = "hour"
    73  	listCallName             = ""
    74  	maxCallName              = "max"
    75  	minCallName              = "min"
    76  	sumCallName              = "sum"
    77  	avgCallName              = "avg"
    78  )
    79  
    80  // Compile returns the compiled AQLQueryContext for data feeding and query
    81  // execution. Caller should check for AQLQueryContext.Error.
    82  func (q *AQLQuery) Compile(store memstore.MemStore, returnHLL bool) *AQLQueryContext {
    83  	qc := &AQLQueryContext{Query: q, ReturnHLLData: returnHLL}
    84  
    85  	// processTimezone might append additional joins
    86  	qc.processTimezone()
    87  	if qc.Error != nil {
    88  		return qc
    89  	}
    90  
    91  	// Read schema for every table used.
    92  	qc.readSchema(store)
    93  	defer qc.releaseSchema()
    94  	if qc.Error != nil {
    95  		return qc
    96  	}
    97  
    98  	// Parse all other SQL expressions to ASTs.
    99  	qc.parseExprs()
   100  	if qc.Error != nil {
   101  		return qc
   102  	}
   103  
   104  	// Resolve data types in the ASTs against schema, also translate enum values.
   105  	qc.resolveTypes()
   106  	if qc.Error != nil {
   107  		return qc
   108  	}
   109  
   110  	// Process join conditions first to collect information about geo join.
   111  	qc.processJoinConditions()
   112  	if qc.Error != nil {
   113  		return qc
   114  	}
   115  
   116  	// Identify prefilters.
   117  	qc.matchPrefilters()
   118  
   119  	// Process filters.
   120  	qc.processFilters()
   121  	if qc.Error != nil {
   122  		return qc
   123  	}
   124  
   125  	// Process measure and dimensions.
   126  	qc.processMeasure()
   127  	if qc.Error != nil {
   128  		return qc
   129  	}
   130  	qc.processDimensions()
   131  	if qc.Error != nil {
   132  		return qc
   133  	}
   134  
   135  	qc.sortUsedColumns()
   136  
   137  	qc.sortDimensionColumns()
   138  	if qc.Error != nil {
   139  		return qc
   140  	}
   141  
   142  	// TODO: VM instruction generation
   143  	return qc
   144  }
   145  
   146  // adjustFilterToTimeFilter try to find one rowfilter to be time filter if there is no timefilter for fact table query
   147  func (qc *AQLQueryContext) adjustFilterToTimeFilter() {
   148  	toBeRemovedFilters := []int{}
   149  	timeFilter := TimeFilter{}
   150  	for i, filter := range qc.Query.filters {
   151  		if e, ok := filter.(*expr.BinaryExpr); ok {
   152  			lhs, isCol := e.LHS.(*expr.VarRef)
   153  			if !isCol {
   154  				continue
   155  			}
   156  
   157  			// check if this filter on main table event time column
   158  			tableID, columnID, err := qc.resolveColumn(lhs.Val)
   159  			if err != nil || tableID != 0 || columnID != 0 {
   160  				continue
   161  			}
   162  
   163  			val := ""
   164  			// only support number literal or string literal
   165  			switch rhs := e.RHS.(type) {
   166  			case *expr.NumberLiteral:
   167  				val = rhs.String()
   168  			case *expr.StringLiteral:
   169  				val = rhs.Val
   170  			}
   171  			if val == "" {
   172  				continue
   173  			}
   174  
   175  			switch e.Op {
   176  			case expr.LT:
   177  				if timeFilter.To == "" {
   178  					// only convert first LT
   179  					timeFilter.To = val
   180  					toBeRemovedFilters = append(toBeRemovedFilters, i)
   181  				} else {
   182  					qc.Error = utils.StackError(nil, "Only one '<' filter allowed for event time column")
   183  					return
   184  				}
   185  			case expr.GTE:
   186  				if timeFilter.From == "" {
   187  					// only convert first GTE
   188  					timeFilter.From = val
   189  					toBeRemovedFilters = append(toBeRemovedFilters, i)
   190  				} else {
   191  					qc.Error = utils.StackError(nil, "Only one '>=' filter allowed for event time column")
   192  					return
   193  				}
   194  			}
   195  		}
   196  	}
   197  	if timeFilter.From != "" || timeFilter.To != "" {
   198  		// processTimeFilter will handle the from is nil case
   199  		if qc.fromTime, qc.toTime, qc.Error = parseTimeFilter(timeFilter, qc.fixedTimezone, utils.Now()); qc.Error != nil {
   200  			return
   201  		}
   202  		// remove from original query filter
   203  		for i := len(toBeRemovedFilters) - 1; i >= 0; i-- {
   204  			index := toBeRemovedFilters[i]
   205  			qc.Query.filters = append(qc.Query.filters[:index], qc.Query.filters[index+1:]...)
   206  		}
   207  	}
   208  }
   209  
   210  func (qc *AQLQueryContext) processJoinConditions() {
   211  	if len(qc.Query.Joins) > 8 {
   212  		qc.Error = utils.StackError(nil, "At most %d foreign tables allowed, got: %d", 8, len(qc.Query.Joins))
   213  		return
   214  	}
   215  
   216  	qc.OOPK.foreignTables = make([]*foreignTable, len(qc.Query.Joins))
   217  	mainTableSchema := qc.TableSchemaByName[qc.Query.Table]
   218  	for joinTableID, join := range qc.Query.Joins {
   219  		joinSchema := qc.TableSchemaByName[join.Table]
   220  		if isGeoJoin(join) {
   221  			if qc.OOPK.geoIntersection != nil {
   222  				qc.Error = utils.StackError(nil, "At most one geo join allowed")
   223  				return
   224  			}
   225  			qc.matchGeoJoin(joinTableID, mainTableSchema, joinSchema, join.conditions)
   226  			if qc.Error != nil {
   227  				return
   228  			}
   229  		} else {
   230  			// we will extract the geo join out of the join conditions since we are going to handle geo intersects
   231  			// as filter instead of an equal join.
   232  			qc.OOPK.foreignTables[joinTableID] = &foreignTable{}
   233  			qc.matchEqualJoin(joinTableID, joinSchema, join.conditions)
   234  			if qc.Error != nil {
   235  				return
   236  			}
   237  		}
   238  	}
   239  }
   240  
   241  // matchGeoJoin initializes the GeoIntersection struct for later query process use. For now only one geo join is
   242  // allowed per query. If users want to intersect with multiple geo join conditions, they should specify multiple geo
   243  // shapeLatLongs in the geo filter.
   244  // There are following constrictions:
   245  // 1. At most one geo join condition.
   246  // 2. Geo table must be dimension table.
   247  // 3. The join condition must include exactly one shape column and one point column.
   248  // 4. Exactly one geo filter should be specified.
   249  // 5. Geo filter column must be the primary key of the geo table.
   250  // 6. Geo UUIDs must be string in query.
   251  // 7. Geo filter operator must be EQ or IN
   252  // 8. Geo table's fields are not allowed in measures.
   253  // 9. Only one geo dimension allowed.
   254  func (qc *AQLQueryContext) matchGeoJoin(joinTableID int, mainTableSchema *memstore.TableSchema,
   255  	joinSchema *memstore.TableSchema, conditions []expr.Expr) {
   256  	if len(conditions) != 1 {
   257  		qc.Error = utils.StackError(nil, "At most one join condition allowed per geo join")
   258  		return
   259  	}
   260  
   261  	if joinSchema.Schema.IsFactTable {
   262  		qc.Error = utils.StackError(nil, "Only dimension table is allowed in geo join")
   263  		return
   264  	}
   265  
   266  	// one foreign table primary key columns only.
   267  	if len(joinSchema.Schema.PrimaryKeyColumns) > 1 {
   268  		qc.Error = utils.StackError(nil, "Composite primary key for geo table is not allowed")
   269  		return
   270  	}
   271  
   272  	c, _ := conditions[0].(*expr.Call)
   273  
   274  	// guaranteed by query rewrite.
   275  	shape, _ := c.Args[0].(*expr.VarRef)
   276  	point, _ := c.Args[1].(*expr.VarRef)
   277  
   278  	if shape.TableID != joinTableID+1 {
   279  		qc.Error = utils.StackError(nil, "Only shape in geo table can be referenced as join condition")
   280  		return
   281  	}
   282  
   283  	qc.OOPK.geoIntersection = &geoIntersection{
   284  		shapeTableID:  shape.TableID,
   285  		shapeColumnID: shape.ColumnID,
   286  		pointTableID:  point.TableID,
   287  		pointColumnID: point.ColumnID,
   288  		dimIndex:      -1,
   289  		inOrOut:       true,
   290  	}
   291  
   292  	// Set column usage for geo points.
   293  	expr.Walk(columnUsageCollector{
   294  		tableScanners: qc.TableScanners,
   295  		usages:        columnUsedByAllBatches,
   296  	}, point)
   297  }
   298  
   299  func isGeoJoin(j Join) bool {
   300  	if len(j.conditions) >= 1 {
   301  		c, ok := j.conditions[0].(*expr.Call)
   302  		if !ok {
   303  			return false
   304  		}
   305  		return c.Name == geographyIntersectsCallName
   306  	}
   307  	return false
   308  }
   309  
   310  // list of join conditions enforced for now
   311  // 1. equi-join only
   312  // 2. many-to-one join only
   313  // 3. foreign table must be a dimension table
   314  // 4. one foreign table primary key columns only
   315  // 5. foreign table primary key can have only one column
   316  // 6. every foreign table must be joined directly to the main table, i.e. no bridges?
   317  // 7. up to 8 foreign tables
   318  func (qc *AQLQueryContext) matchEqualJoin(joinTableID int, joinSchema *memstore.TableSchema, conditions []expr.Expr) {
   319  	if len(conditions) != 1 {
   320  		qc.Error = utils.StackError(nil, "%d join conditions expected, got %d", 1, len(conditions))
   321  		return
   322  	}
   323  
   324  	// foreign table must be a dimension table
   325  	if joinSchema.Schema.IsFactTable {
   326  		qc.Error = utils.StackError(nil, "join table %s is fact table, only dimension table supported", qc.Query.Table)
   327  		return
   328  	}
   329  
   330  	// one foreign table primary key columns only
   331  	if len(joinSchema.Schema.PrimaryKeyColumns) > 1 {
   332  		qc.Error = utils.StackError(nil, "composite key not supported")
   333  		return
   334  	}
   335  
   336  	// equi-join only
   337  	e, ok := conditions[0].(*expr.BinaryExpr)
   338  	if !ok {
   339  		qc.Error = utils.StackError(nil, "binary expression expected, got %s", conditions[0].String())
   340  		return
   341  	}
   342  	if e.Op != expr.EQ {
   343  		qc.Error = utils.StackError(nil, "equal join expected, got %s", e.Op.String())
   344  		return
   345  	}
   346  
   347  	left, ok := e.LHS.(*expr.VarRef)
   348  	if !ok {
   349  		qc.Error = utils.StackError(nil, "column in join condition expected, got %s", e.LHS.String())
   350  		return
   351  	}
   352  
   353  	right, ok := e.RHS.(*expr.VarRef)
   354  	if !ok {
   355  		qc.Error = utils.StackError(nil, "column in join condition expected, got %s", e.RHS.String())
   356  		return
   357  	}
   358  
   359  	// main table at left and foreign table at right
   360  	if left.TableID != 0 {
   361  		left, right = right, left
   362  	}
   363  
   364  	// every foreign table must be joined directly to the main table
   365  	if left.TableID != 0 || right.TableID != joinTableID+1 {
   366  		qc.Error = utils.StackError(nil, "foreign table must be joined directly to the main table, join condition: %s", e.String())
   367  		return
   368  	}
   369  
   370  	// many-to-one join only (join with foreign table's primary key)
   371  	if joinSchema.Schema.PrimaryKeyColumns[0] != right.ColumnID {
   372  		qc.Error = utils.StackError(nil, "join column is not primary key of foreign table")
   373  		return
   374  	}
   375  
   376  	qc.OOPK.foreignTables[joinTableID].remoteJoinColumn = left
   377  	// set column usage for join column in main table
   378  	// no need to set usage for remote join column in foreign table since
   379  	// we only use primary key of foreign table to join
   380  	expr.Walk(columnUsageCollector{
   381  		tableScanners: qc.TableScanners,
   382  		usages:        columnUsedByAllBatches,
   383  	}, left)
   384  }
   385  
   386  func (qc *AQLQueryContext) parseExprs() {
   387  	var err error
   388  
   389  	// Join conditions.
   390  	for i, join := range qc.Query.Joins {
   391  		join.conditions = make([]expr.Expr, len(join.Conditions))
   392  		for j, cond := range join.Conditions {
   393  			join.conditions[j], err = expr.ParseExpr(cond)
   394  			if err != nil {
   395  				qc.Error = utils.StackError(err, "Failed to parse join condition: %s", cond)
   396  				return
   397  			}
   398  		}
   399  		qc.Query.Joins[i] = join
   400  	}
   401  
   402  	qc.fromTime, qc.toTime, qc.Error = parseTimeFilter(qc.Query.TimeFilter, qc.fixedTimezone, utils.Now())
   403  	if qc.Error != nil {
   404  		return
   405  	}
   406  
   407  	// Filters.
   408  	qc.Query.filters = make([]expr.Expr, len(qc.Query.Filters))
   409  	for i, filter := range qc.Query.Filters {
   410  		qc.Query.filters[i], err = expr.ParseExpr(filter)
   411  		if err != nil {
   412  			qc.Error = utils.StackError(err, "Failed to parse filter %s", filter)
   413  			return
   414  		}
   415  	}
   416  	if qc.fromTime == nil && qc.toTime == nil && len(qc.TableScanners) > 0 && qc.TableScanners[0].Schema.Schema.IsFactTable {
   417  		qc.adjustFilterToTimeFilter()
   418  		if qc.Error != nil {
   419  			return
   420  		}
   421  	}
   422  
   423  	// Dimensions.
   424  	rawDimensions := qc.Query.Dimensions
   425  	qc.Query.Dimensions = []Dimension{}
   426  	for _, dim := range rawDimensions {
   427  		dim.TimeBucketizer = strings.Trim(dim.TimeBucketizer, " ")
   428  		if dim.TimeBucketizer != "" {
   429  			// make sure time column is defined
   430  			if dim.Expr == "" {
   431  				qc.Error = utils.StackError(err, "Failed to parse TimeSeriesBucketizer '%s' since time column is empty ", dim.TimeBucketizer)
   432  				return
   433  			}
   434  
   435  			timeColumnExpr, err := expr.ParseExpr(dim.Expr)
   436  			if err != nil {
   437  				qc.Error = utils.StackError(err, "Failed to parse timeColumn '%s'", dim.Expr)
   438  				return
   439  			}
   440  
   441  			dim.expr, err = qc.buildTimeDimensionExpr(dim.TimeBucketizer, timeColumnExpr)
   442  			if err != nil {
   443  				qc.Error = utils.StackError(err, "Failed to parse dimension: %s", dim.TimeBucketizer)
   444  				return
   445  			}
   446  			qc.Query.Dimensions = append(qc.Query.Dimensions, dim)
   447  		} else {
   448  			// dimension is defined as sqlExpression
   449  			dim.expr, err = expr.ParseExpr(dim.Expr)
   450  			if err != nil {
   451  				qc.Error = utils.StackError(err, "Failed to parse dimension: %s", dim.Expr)
   452  				return
   453  			}
   454  			if _, ok := dim.expr.(*expr.Wildcard); ok {
   455  				qc.Query.Dimensions = append(qc.Query.Dimensions, qc.getAllColumnsDimension()...)
   456  			} else {
   457  				qc.Query.Dimensions = append(qc.Query.Dimensions, dim)
   458  			}
   459  		}
   460  	}
   461  
   462  	// Measures.
   463  	for i, measure := range qc.Query.Measures {
   464  		measure.expr, err = expr.ParseExpr(measure.Expr)
   465  		if err != nil {
   466  			qc.Error = utils.StackError(err, "Failed to parse measure: %s", measure.Expr)
   467  			return
   468  		}
   469  		measure.filters = make([]expr.Expr, len(measure.Filters))
   470  		for j, filter := range measure.Filters {
   471  			measure.filters[j], err = expr.ParseExpr(filter)
   472  			if err != nil {
   473  				qc.Error = utils.StackError(err, "Failed to parse measure filter %s", filter)
   474  				return
   475  			}
   476  		}
   477  		qc.Query.Measures[i] = measure
   478  	}
   479  }
   480  
   481  func (qc *AQLQueryContext) processTimezone() {
   482  	if timezoneColumn, joinKey, success := parseTimezoneColumnString(qc.Query.Timezone); success {
   483  		timezoneTable := utils.GetConfig().Query.TimezoneTable.TableName
   484  		qc.timezoneTable.tableColumn = timezoneColumn
   485  		for _, join := range qc.Query.Joins {
   486  			if join.Table == timezoneTable {
   487  				qc.timezoneTable.tableAlias = join.Alias
   488  			}
   489  		}
   490  		// append timezone table to joins
   491  		if qc.timezoneTable.tableAlias == "" {
   492  			qc.timezoneTable.tableAlias = defaultTimezoneTableAlias
   493  			qc.Query.Joins = append(qc.Query.Joins, Join{
   494  				Table:      timezoneTable,
   495  				Alias:      defaultTimezoneTableAlias,
   496  				Conditions: []string{fmt.Sprintf("%s=%s.id", joinKey, defaultTimezoneTableAlias)},
   497  			})
   498  		}
   499  	} else {
   500  		loc, err := parseTimezone(qc.Query.Timezone)
   501  		if err != nil {
   502  			qc.Error = utils.StackError(err, "timezone Failed to parse: %s", qc.Query.Timezone)
   503  			return
   504  		}
   505  		qc.fixedTimezone = loc
   506  	}
   507  }
   508  
   509  func (qc *AQLQueryContext) readSchema(store memstore.MemStore) {
   510  	qc.TableScanners = make([]*TableScanner, 1+len(qc.Query.Joins))
   511  	qc.TableIDByAlias = make(map[string]int)
   512  	qc.TableSchemaByName = make(map[string]*memstore.TableSchema)
   513  
   514  	store.RLock()
   515  	defer store.RUnlock()
   516  
   517  	// Main table.
   518  	schema := store.GetSchemas()[qc.Query.Table]
   519  	if schema == nil {
   520  		qc.Error = utils.StackError(nil, "unknown main table %s", qc.Query.Table)
   521  		return
   522  	}
   523  	qc.TableSchemaByName[qc.Query.Table] = schema
   524  	schema.RLock()
   525  	qc.TableScanners[0] = &TableScanner{}
   526  	qc.TableScanners[0].Schema = schema
   527  	qc.TableScanners[0].Shards = []int{0}
   528  	qc.TableScanners[0].ColumnUsages = make(map[int]columnUsage)
   529  	if schema.Schema.IsFactTable {
   530  		// Archiving cutoff filter usage for fact table.
   531  		qc.TableScanners[0].ColumnUsages[0] = columnUsedByLiveBatches
   532  	}
   533  	qc.TableIDByAlias[qc.Query.Table] = 0
   534  
   535  	// Foreign tables.
   536  	for i, join := range qc.Query.Joins {
   537  		schema = store.GetSchemas()[join.Table]
   538  		if schema == nil {
   539  			qc.Error = utils.StackError(nil, "unknown join table %s", join.Table)
   540  			return
   541  		}
   542  
   543  		if qc.TableSchemaByName[join.Table] == nil {
   544  			qc.TableSchemaByName[join.Table] = schema
   545  			// Prevent double locking.
   546  			schema.RLock()
   547  		}
   548  
   549  		qc.TableScanners[1+i] = &TableScanner{}
   550  		qc.TableScanners[1+i].Schema = schema
   551  		qc.TableScanners[1+i].Shards = []int{0}
   552  		qc.TableScanners[1+i].ColumnUsages = make(map[int]columnUsage)
   553  		if schema.Schema.IsFactTable {
   554  			// Archiving cutoff filter usage for fact table.
   555  			qc.TableScanners[1+i].ColumnUsages[0] = columnUsedByLiveBatches
   556  		}
   557  
   558  		alias := join.Alias
   559  		if alias == "" {
   560  			alias = join.Table
   561  		}
   562  		_, exists := qc.TableIDByAlias[alias]
   563  		if exists {
   564  			qc.Error = utils.StackError(nil, "table alias %s is redefined", alias)
   565  			return
   566  		}
   567  		qc.TableIDByAlias[alias] = 1 + i
   568  	}
   569  }
   570  
   571  func (qc *AQLQueryContext) releaseSchema() {
   572  	for _, schema := range qc.TableSchemaByName {
   573  		schema.RUnlock()
   574  	}
   575  }
   576  
   577  // resolveColumn resolves the VarRef identifier against the schema,
   578  // and returns the matched tableID (query scoped) and columnID (schema scoped).
   579  func (qc *AQLQueryContext) resolveColumn(identifier string) (int, int, error) {
   580  	tableAlias := qc.Query.Table
   581  	column := identifier
   582  	segments := strings.SplitN(identifier, ".", 2)
   583  	if len(segments) == 2 {
   584  		tableAlias = segments[0]
   585  		column = segments[1]
   586  	}
   587  
   588  	tableID, exists := qc.TableIDByAlias[tableAlias]
   589  	if !exists {
   590  		return 0, 0, utils.StackError(nil, "unknown table alias %s", tableAlias)
   591  	}
   592  
   593  	columnID, exists := qc.TableScanners[tableID].Schema.ColumnIDs[column]
   594  	if !exists {
   595  		return 0, 0, utils.StackError(nil, "unknown column %s for table alias %s",
   596  			column, tableAlias)
   597  	}
   598  
   599  	return tableID, columnID, nil
   600  }
   601  
   602  // cast returns an expression that casts the input to the desired type.
   603  // The returned expression AST will be used directly for VM instruction
   604  // generation of the desired types.
   605  func cast(e expr.Expr, t expr.Type) expr.Expr {
   606  	// Input type is already desired.
   607  	if e.Type() == t {
   608  		return e
   609  	}
   610  	// Type casting is only required if at least one side is float.
   611  	// We do not cast (or check for overflow) among boolean, signed and unsigned.
   612  	if e.Type() != expr.Float && t != expr.Float {
   613  		return e
   614  	}
   615  	// Data type for NumberLiteral can be changed directly.
   616  	l, _ := e.(*expr.NumberLiteral)
   617  	if l != nil {
   618  		l.ExprType = t
   619  		return l
   620  	}
   621  	// Use ParenExpr to respresent a VM type cast.
   622  	return &expr.ParenExpr{Expr: e, ExprType: t}
   623  }
   624  
   625  func blockNumericOpsForColumnOverFourBytes(token expr.Token, expressions ...expr.Expr) error {
   626  	if token == expr.UNARY_MINUS || token == expr.BITWISE_NOT ||
   627  		(token >= expr.ADD && token <= expr.BITWISE_LEFT_SHIFT) {
   628  		for _, expression := range expressions {
   629  			if varRef, isVarRef := expression.(*expr.VarRef); isVarRef && memCom.DataTypeBytes(varRef.DataType) > 4 {
   630  				return utils.StackError(nil, "numeric operations not supported for column over 4 bytes length, got %s", expression.String())
   631  			}
   632  		}
   633  	}
   634  	return nil
   635  }
   636  
   637  func isUUIDColumn(expression expr.Expr) bool {
   638  	if varRef, ok := expression.(*expr.VarRef); ok {
   639  		return varRef.DataType == memCom.UUID
   640  	}
   641  	return false
   642  }
   643  
   644  // Rewrite walks the expresison AST and resolves data types bottom up.
   645  // In addition it also translates enum strings and rewrites their predicates.
   646  func (qc *AQLQueryContext) Rewrite(expression expr.Expr) expr.Expr {
   647  	switch e := expression.(type) {
   648  	case *expr.ParenExpr:
   649  		// Strip parenthesis from the input
   650  		return e.Expr
   651  	case *expr.VarRef:
   652  		tableID, columnID, err := qc.resolveColumn(e.Val)
   653  		if err != nil {
   654  			qc.Error = err
   655  			return expression
   656  		}
   657  		column := qc.TableScanners[tableID].Schema.Schema.Columns[columnID]
   658  		if column.Deleted {
   659  			qc.Error = utils.StackError(nil, "column %s of table %s has been deleted",
   660  				column.Name, qc.TableScanners[tableID].Schema.Schema.Name)
   661  			return expression
   662  		}
   663  		dataType := qc.TableScanners[tableID].Schema.ValueTypeByColumn[columnID]
   664  		e.ExprType = DataTypeToExprType[dataType]
   665  		e.TableID = tableID
   666  		e.ColumnID = columnID
   667  		dict := qc.TableScanners[tableID].Schema.EnumDicts[column.Name]
   668  		e.EnumDict = dict.Dict
   669  		e.EnumReverseDict = dict.ReverseDict
   670  		e.DataType = dataType
   671  		e.IsHLLColumn = column.HLLConfig.IsHLLColumn
   672  	case *expr.UnaryExpr:
   673  		if isUUIDColumn(e.Expr) && e.Op != expr.GET_HLL_VALUE {
   674  			qc.Error = utils.StackError(nil, "uuid column type only supports countdistincthll unary expression")
   675  			return expression
   676  		}
   677  
   678  		if err := blockNumericOpsForColumnOverFourBytes(e.Op, e.Expr); err != nil {
   679  			qc.Error = err
   680  			return expression
   681  		}
   682  
   683  		e.ExprType = e.Expr.Type()
   684  		switch e.Op {
   685  		case expr.EXCLAMATION, expr.NOT, expr.IS_FALSE:
   686  			e.ExprType = expr.Boolean
   687  			// Normalize the operator.
   688  			e.Op = expr.NOT
   689  			e.Expr = cast(e.Expr, expr.Boolean)
   690  			childExpr := e.Expr
   691  			callRef, isCallRef := childExpr.(*expr.Call)
   692  			if isCallRef && callRef.Name == geographyIntersectsCallName {
   693  				qc.Error = utils.StackError(nil, "Not %s condition is not allowed", geographyIntersectsCallName)
   694  				break
   695  			}
   696  		case expr.UNARY_MINUS:
   697  			// Upgrade to signed.
   698  			if e.ExprType < expr.Signed {
   699  				e.ExprType = expr.Signed
   700  			}
   701  		case expr.IS_NULL, expr.IS_NOT_NULL:
   702  			e.ExprType = expr.Boolean
   703  		case expr.IS_TRUE:
   704  			// Strip IS_TRUE if child is already boolean.
   705  			if e.Expr.Type() == expr.Boolean {
   706  				return e.Expr
   707  			}
   708  			// Rewrite to NOT(NOT(child)).
   709  			e.ExprType = expr.Boolean
   710  			e.Op = expr.NOT
   711  			e.Expr = cast(e.Expr, expr.Boolean)
   712  			return &expr.UnaryExpr{Expr: e, Op: expr.NOT, ExprType: expr.Boolean}
   713  		case expr.BITWISE_NOT:
   714  			// Cast child to unsigned.
   715  			e.ExprType = expr.Unsigned
   716  			e.Expr = cast(e.Expr, expr.Unsigned)
   717  		case expr.GET_MONTH_START, expr.GET_QUARTER_START, expr.GET_YEAR_START, expr.GET_WEEK_START:
   718  			// Cast child to unsigned.
   719  			e.ExprType = expr.Unsigned
   720  			e.Expr = cast(e.Expr, expr.Unsigned)
   721  		case expr.GET_DAY_OF_MONTH, expr.GET_DAY_OF_YEAR, expr.GET_MONTH_OF_YEAR, expr.GET_QUARTER_OF_YEAR:
   722  			// Cast child to unsigned.
   723  			e.ExprType = expr.Unsigned
   724  			e.Expr = cast(e.Expr, expr.Unsigned)
   725  		case expr.GET_HLL_VALUE:
   726  			e.ExprType = expr.Unsigned
   727  			e.Expr = cast(e.Expr, expr.Unsigned)
   728  		default:
   729  			qc.Error = utils.StackError(nil, "unsupported unary expression %s",
   730  				e.String())
   731  		}
   732  	case *expr.BinaryExpr:
   733  		if err := blockNumericOpsForColumnOverFourBytes(e.Op, e.LHS, e.RHS); err != nil {
   734  			qc.Error = err
   735  			return expression
   736  		}
   737  
   738  		if e.Op != expr.EQ && e.Op != expr.NEQ {
   739  			_, isRHSStr := e.RHS.(*expr.StringLiteral)
   740  			_, isLHSStr := e.LHS.(*expr.StringLiteral)
   741  			if isRHSStr || isLHSStr {
   742  				qc.Error = utils.StackError(nil, "string type only support EQ and NEQ operators")
   743  				return expression
   744  			}
   745  		}
   746  		highestType := e.LHS.Type()
   747  		if e.RHS.Type() > highestType {
   748  			highestType = e.RHS.Type()
   749  		}
   750  		switch e.Op {
   751  		case expr.ADD, expr.SUB:
   752  			// Upgrade and cast to highestType.
   753  			e.ExprType = highestType
   754  			if highestType == expr.Float {
   755  				e.LHS = cast(e.LHS, expr.Float)
   756  				e.RHS = cast(e.RHS, expr.Float)
   757  			} else if e.Op == expr.SUB {
   758  				// For lhs - rhs, upgrade to signed at least.
   759  				e.ExprType = expr.Signed
   760  			}
   761  		case expr.MUL, expr.MOD:
   762  			// Upgrade and cast to highestType.
   763  			e.ExprType = highestType
   764  			e.LHS = cast(e.LHS, highestType)
   765  			e.RHS = cast(e.RHS, highestType)
   766  		case expr.DIV:
   767  			// Upgrade and cast to float.
   768  			e.ExprType = expr.Float
   769  			e.LHS = cast(e.LHS, expr.Float)
   770  			e.RHS = cast(e.RHS, expr.Float)
   771  		case expr.BITWISE_AND, expr.BITWISE_OR, expr.BITWISE_XOR,
   772  			expr.BITWISE_LEFT_SHIFT, expr.BITWISE_RIGHT_SHIFT, expr.FLOOR, expr.CONVERT_TZ:
   773  			// Cast to unsigned.
   774  			e.ExprType = expr.Unsigned
   775  			e.LHS = cast(e.LHS, expr.Unsigned)
   776  			e.RHS = cast(e.RHS, expr.Unsigned)
   777  		case expr.AND, expr.OR:
   778  			// Cast to boolean.
   779  			e.ExprType = expr.Boolean
   780  			e.LHS = cast(e.LHS, expr.Boolean)
   781  			e.RHS = cast(e.RHS, expr.Boolean)
   782  		case expr.LT, expr.LTE, expr.GT, expr.GTE:
   783  			// Cast to boolean.
   784  			e.ExprType = expr.Boolean
   785  			e.LHS = cast(e.LHS, highestType)
   786  			e.RHS = cast(e.RHS, highestType)
   787  		case expr.NEQ, expr.EQ:
   788  			// swap lhs and rhs if rhs is VarRef but lhs is not.
   789  			if _, lhsVarRef := e.LHS.(*expr.VarRef); !lhsVarRef {
   790  				if _, rhsVarRef := e.RHS.(*expr.VarRef); rhsVarRef {
   791  					e.LHS, e.RHS = e.RHS, e.LHS
   792  				}
   793  			}
   794  
   795  			e.ExprType = expr.Boolean
   796  			// Match enum = 'case' and enum != 'case'.
   797  
   798  			lhs, _ := e.LHS.(*expr.VarRef)
   799  			// rhs is bool
   800  			rhsBool, _ := e.RHS.(*expr.BooleanLiteral)
   801  			if lhs != nil && rhsBool != nil {
   802  				if (e.Op == expr.EQ && rhsBool.Val) || (e.Op == expr.NEQ && !rhsBool.Val) {
   803  					return &expr.UnaryExpr{Expr: lhs, Op: expr.IS_TRUE, ExprType: expr.Boolean}
   804  				}
   805  				return &expr.UnaryExpr{Expr: lhs, Op: expr.NOT, ExprType: expr.Boolean}
   806  			}
   807  
   808  			// rhs is string enum
   809  			rhs, _ := e.RHS.(*expr.StringLiteral)
   810  			if lhs != nil && rhs != nil && lhs.EnumDict != nil {
   811  				// Enum dictionary translation
   812  				value, exists := lhs.EnumDict[rhs.Val]
   813  				if !exists {
   814  					// Combination of nullable data with not/and/or operators on top makes
   815  					// short circuiting hard.
   816  					// To play it safe we match against an invalid value.
   817  					value = -1
   818  				}
   819  				e.RHS = &expr.NumberLiteral{Int: value, ExprType: expr.Unsigned}
   820  			} else {
   821  				// Cast to highestType.
   822  				e.LHS = cast(e.LHS, highestType)
   823  				e.RHS = cast(e.RHS, highestType)
   824  			}
   825  
   826  			if rhs != nil && lhs.DataType == memCom.GeoPoint {
   827  				if val, err := memCom.GeoPointFromString(rhs.Val); err != nil {
   828  					qc.Error = err
   829  				} else {
   830  					e.RHS = &expr.GeopointLiteral{
   831  						Val: val,
   832  					}
   833  				}
   834  			}
   835  		case expr.IN:
   836  			return qc.expandINop(e)
   837  		case expr.NOT_IN:
   838  			return &expr.UnaryExpr{
   839  				Op:   expr.NOT,
   840  				Expr: qc.expandINop(e),
   841  			}
   842  		default:
   843  			qc.Error = utils.StackError(nil, "unsupported binary expression %s",
   844  				e.String())
   845  		}
   846  	case *expr.Call:
   847  		e.Name = strings.ToLower(e.Name)
   848  		switch e.Name {
   849  		case convertTzCallName:
   850  			if len(e.Args) != 3 {
   851  				qc.Error = utils.StackError(
   852  					nil, "convert_tz must have 3 arguments",
   853  				)
   854  				break
   855  			}
   856  			fromTzStringExpr, isStrLiteral := e.Args[1].(*expr.StringLiteral)
   857  			if !isStrLiteral {
   858  				qc.Error = utils.StackError(nil, "2nd argument of convert_tz must be a string")
   859  				break
   860  			}
   861  			toTzStringExpr, isStrLiteral := e.Args[2].(*expr.StringLiteral)
   862  			if !isStrLiteral {
   863  				qc.Error = utils.StackError(nil, "3rd argument of convert_tz must be a string")
   864  				break
   865  			}
   866  			fromTz, err := parseTimezone(fromTzStringExpr.Val)
   867  			if err != nil {
   868  				qc.Error = utils.StackError(err, "failed to rewrite convert_tz")
   869  				break
   870  			}
   871  			toTz, err := parseTimezone(toTzStringExpr.Val)
   872  			if err != nil {
   873  				qc.Error = utils.StackError(err, "failed to rewrite convert_tz")
   874  				break
   875  			}
   876  			_, fromOffsetInSeconds := utils.Now().In(fromTz).Zone()
   877  			_, toOffsetInSeconds := utils.Now().In(toTz).Zone()
   878  			offsetInSeconds := toOffsetInSeconds - fromOffsetInSeconds
   879  			return &expr.BinaryExpr{
   880  				Op:  expr.ADD,
   881  				LHS: e.Args[0],
   882  				RHS: &expr.NumberLiteral{
   883  					Int:      offsetInSeconds,
   884  					Expr:     strconv.Itoa(offsetInSeconds),
   885  					ExprType: expr.Unsigned,
   886  				},
   887  				ExprType: expr.Unsigned,
   888  			}
   889  		case countCallName:
   890  			e.ExprType = expr.Unsigned
   891  		case dayOfWeekCallName:
   892  			// dayofweek from ts: (ts / secondsInDay + 4) % 7 + 1
   893  			// ref: https://dev.mysql.com/doc/refman/5.5/en/date-and-time-functions.html#function_dayofweek
   894  			if len(e.Args) != 1 {
   895  				qc.Error = utils.StackError(nil, "dayofweek takes exactly 1 argument")
   896  				break
   897  			}
   898  			tsExpr := e.Args[0]
   899  			return &expr.BinaryExpr{
   900  				Op:       expr.ADD,
   901  				ExprType: expr.Unsigned,
   902  				RHS: &expr.NumberLiteral{
   903  					Int:      1,
   904  					Expr:     "1",
   905  					ExprType: expr.Unsigned,
   906  				},
   907  				LHS: &expr.BinaryExpr{
   908  					Op:       expr.MOD,
   909  					ExprType: expr.Unsigned,
   910  					RHS: &expr.NumberLiteral{
   911  						Int:      common.DaysPerWeek,
   912  						Expr:     strconv.Itoa(common.DaysPerWeek),
   913  						ExprType: expr.Unsigned,
   914  					},
   915  					LHS: &expr.BinaryExpr{
   916  						Op:       expr.ADD,
   917  						ExprType: expr.Unsigned,
   918  						RHS: &expr.NumberLiteral{
   919  							// offset for
   920  							Int:      common.WeekdayOffset,
   921  							Expr:     strconv.Itoa(common.WeekdayOffset),
   922  							ExprType: expr.Unsigned,
   923  						},
   924  						LHS: &expr.BinaryExpr{
   925  							Op:       expr.DIV,
   926  							ExprType: expr.Unsigned,
   927  							RHS: &expr.NumberLiteral{
   928  								Int:      common.SecondsPerDay,
   929  								Expr:     strconv.Itoa(common.SecondsPerDay),
   930  								ExprType: expr.Unsigned,
   931  							},
   932  							LHS: tsExpr,
   933  						},
   934  					},
   935  				},
   936  			}
   937  			// no-op, this will be over written
   938  		case fromUnixTimeCallName:
   939  			// for now, only the following format is allowed for backward compatibility
   940  			// from_unixtime(time_col / 1000)
   941  			timeColumnDivideErrMsg := "from_unixtime must be time column / 1000"
   942  			timeColDivide, isBinary := e.Args[0].(*expr.BinaryExpr)
   943  			if !isBinary || timeColDivide.Op != expr.DIV {
   944  				qc.Error = utils.StackError(nil, timeColumnDivideErrMsg)
   945  				break
   946  			}
   947  			divisor, isLiteral := timeColDivide.RHS.(*expr.NumberLiteral)
   948  			if !isLiteral || divisor.Int != 1000 {
   949  				qc.Error = utils.StackError(nil, timeColumnDivideErrMsg)
   950  				break
   951  			}
   952  			if par, isParen := timeColDivide.LHS.(*expr.ParenExpr); isParen {
   953  				timeColDivide.LHS = par.Expr
   954  			}
   955  			timeColExpr, isVarRef := timeColDivide.LHS.(*expr.VarRef)
   956  			if !isVarRef {
   957  				qc.Error = utils.StackError(nil, timeColumnDivideErrMsg)
   958  				break
   959  			}
   960  			return timeColExpr
   961  		case hourCallName:
   962  			if len(e.Args) != 1 {
   963  				qc.Error = utils.StackError(nil, "hour takes exactly 1 argument")
   964  				break
   965  			}
   966  			// hour(ts) = (ts % secondsInDay) / secondsInHour
   967  			return &expr.BinaryExpr{
   968  				Op:       expr.DIV,
   969  				ExprType: expr.Unsigned,
   970  				LHS: &expr.BinaryExpr{
   971  					Op:  expr.MOD,
   972  					LHS: e.Args[0],
   973  					RHS: &expr.NumberLiteral{
   974  						Expr:     strconv.Itoa(common.SecondsPerDay),
   975  						Int:      common.SecondsPerDay,
   976  						ExprType: expr.Unsigned,
   977  					},
   978  				},
   979  				RHS: &expr.NumberLiteral{
   980  					Expr:     strconv.Itoa(common.SecondsPerHour),
   981  					Int:      common.SecondsPerHour,
   982  					ExprType: expr.Unsigned,
   983  				},
   984  			}
   985  			// list of literals, no need to cast it for now.
   986  		case listCallName:
   987  		case geographyIntersectsCallName:
   988  			if len(e.Args) != 2 {
   989  				qc.Error = utils.StackError(
   990  					nil, "expect 2 argument for %s, but got %s", e.Name, e.String())
   991  				break
   992  			}
   993  
   994  			lhsRef, isVarRef := e.Args[0].(*expr.VarRef)
   995  			if !isVarRef || (lhsRef.DataType != memCom.GeoShape && lhsRef.DataType != memCom.GeoPoint) {
   996  				qc.Error = utils.StackError(
   997  					nil, "expect argument to be a valid geo shape or geo point column for %s, but got %s of type %s",
   998  					e.Name, e.Args[0].String(), memCom.DataTypeName[lhsRef.DataType])
   999  				break
  1000  			}
  1001  
  1002  			lhsGeoPoint := lhsRef.DataType == memCom.GeoPoint
  1003  
  1004  			rhsRef, isVarRef := e.Args[1].(*expr.VarRef)
  1005  			if !isVarRef || (rhsRef.DataType != memCom.GeoShape && rhsRef.DataType != memCom.GeoPoint) {
  1006  				qc.Error = utils.StackError(
  1007  					nil, "expect argument to be a valid geo shape or geo point column for %s, but got %s of type %s",
  1008  					e.Name, e.Args[1].String(), memCom.DataTypeName[rhsRef.DataType])
  1009  				break
  1010  			}
  1011  
  1012  			rhsGeoPoint := rhsRef.DataType == memCom.GeoPoint
  1013  
  1014  			if lhsGeoPoint == rhsGeoPoint {
  1015  				qc.Error = utils.StackError(
  1016  					nil, "expect exactly one geo shape column and one geo point column for %s, got %s",
  1017  					e.Name, e.String())
  1018  				break
  1019  			}
  1020  
  1021  			// Switch geo point so that lhs is geo shape and rhs is geo point
  1022  			if lhsGeoPoint {
  1023  				e.Args[0], e.Args[1] = e.Args[1], e.Args[0]
  1024  			}
  1025  
  1026  			e.ExprType = expr.Boolean
  1027  		case hexCallName:
  1028  			if len(e.Args) != 1 {
  1029  				qc.Error = utils.StackError(
  1030  					nil, "expect 1 argument for %s, but got %s", e.Name, e.String())
  1031  				break
  1032  			}
  1033  			colRef, isVarRef := e.Args[0].(*expr.VarRef)
  1034  			if !isVarRef || colRef.DataType != memCom.UUID {
  1035  				qc.Error = utils.StackError(
  1036  					nil, "expect 1 argument to be a valid uuid column for %s, but got %s of type %s",
  1037  					e.Name, e.Args[0].String(), memCom.DataTypeName[colRef.DataType])
  1038  				break
  1039  			}
  1040  			e.ExprType = e.Args[0].Type()
  1041  		case countDistinctHllCallName:
  1042  			if len(e.Args) != 1 {
  1043  				qc.Error = utils.StackError(
  1044  					nil, "expect 1 argument for %s, but got %s", e.Name, e.String())
  1045  				break
  1046  			}
  1047  			colRef, isVarRef := e.Args[0].(*expr.VarRef)
  1048  			if !isVarRef {
  1049  				qc.Error = utils.StackError(
  1050  					nil, "expect 1 argument to be a column for %s", e.Name)
  1051  				break
  1052  			}
  1053  
  1054  			e.Name = hllCallName
  1055  			// 1. noop when column itself is hll column
  1056  			// 2. compute hll on the fly when column is not hll column
  1057  			if !colRef.IsHLLColumn {
  1058  				e.Args[0] = &expr.UnaryExpr{
  1059  					Op:       expr.GET_HLL_VALUE,
  1060  					Expr:     colRef,
  1061  					ExprType: expr.Unsigned,
  1062  				}
  1063  			}
  1064  			e.ExprType = expr.Unsigned
  1065  		case hllCallName:
  1066  			if len(e.Args) != 1 {
  1067  				qc.Error = utils.StackError(
  1068  					nil, "expect 1 argument for %s, but got %s", e.Name, e.String())
  1069  				break
  1070  			}
  1071  			colRef, isVarRef := e.Args[0].(*expr.VarRef)
  1072  			if !isVarRef || colRef.DataType != memCom.Uint32 {
  1073  				qc.Error = utils.StackError(
  1074  					nil, "expect 1 argument to be a valid hll column for %s, but got %s of type %s",
  1075  					e.Name, e.Args[0].String(), memCom.DataTypeName[colRef.DataType])
  1076  				break
  1077  			}
  1078  			e.ExprType = e.Args[0].Type()
  1079  		case sumCallName, minCallName, maxCallName, avgCallName:
  1080  			if len(e.Args) != 1 {
  1081  				qc.Error = utils.StackError(
  1082  					nil, "expect 1 argument for %s, but got %s", e.Name, e.String())
  1083  				break
  1084  			}
  1085  			// For avg, the expression type should always be float.
  1086  			if e.Name == avgCallName {
  1087  				e.Args[0] = cast(e.Args[0], expr.Float)
  1088  			}
  1089  			e.ExprType = e.Args[0].Type()
  1090  		default:
  1091  			qc.Error = utils.StackError(nil, "unknown function %s", e.Name)
  1092  		}
  1093  	case *expr.Case:
  1094  		highestType := e.Else.Type()
  1095  		for _, whenThen := range e.WhenThens {
  1096  			if whenThen.Then.Type() > highestType {
  1097  				highestType = whenThen.Then.Type()
  1098  			}
  1099  		}
  1100  		// Cast else and thens to highestType, cast whens to boolean.
  1101  		e.Else = cast(e.Else, highestType)
  1102  		for i, whenThen := range e.WhenThens {
  1103  			whenThen.When = cast(whenThen.When, expr.Boolean)
  1104  			whenThen.Then = cast(whenThen.Then, highestType)
  1105  			e.WhenThens[i] = whenThen
  1106  		}
  1107  		e.ExprType = highestType
  1108  	}
  1109  	return expression
  1110  }
  1111  
  1112  // normalizeAndFilters extracts top AND operators and flatten them out to the
  1113  // filter slice.
  1114  func normalizeAndFilters(filters []expr.Expr) []expr.Expr {
  1115  	i := 0
  1116  	for i < len(filters) {
  1117  		f, _ := filters[i].(*expr.BinaryExpr)
  1118  		if f != nil && f.Op == expr.AND {
  1119  			filters[i] = f.LHS
  1120  			filters = append(filters, f.RHS)
  1121  		} else {
  1122  			i++
  1123  		}
  1124  	}
  1125  	return filters
  1126  }
  1127  
  1128  // resolveTypes walks all expresison ASTs and resolves data types bottom up.
  1129  // In addition it also translates enum strings and rewrites their predicates.
  1130  func (qc *AQLQueryContext) resolveTypes() {
  1131  	// Join conditions.
  1132  	for i, join := range qc.Query.Joins {
  1133  		for j, cond := range join.conditions {
  1134  			join.conditions[j] = expr.Rewrite(qc, cond)
  1135  			if qc.Error != nil {
  1136  				return
  1137  			}
  1138  		}
  1139  		qc.Query.Joins[i] = join
  1140  	}
  1141  
  1142  	// Dimensions.
  1143  	for i, dim := range qc.Query.Dimensions {
  1144  		dim.expr = expr.Rewrite(qc, dim.expr)
  1145  		if qc.Error != nil {
  1146  			return
  1147  		}
  1148  		qc.Query.Dimensions[i] = dim
  1149  	}
  1150  
  1151  	// Measures.
  1152  	for i, measure := range qc.Query.Measures {
  1153  		measure.expr = expr.Rewrite(qc, measure.expr)
  1154  		if qc.Error != nil {
  1155  			return
  1156  		}
  1157  		for j, filter := range measure.filters {
  1158  			measure.filters[j] = expr.Rewrite(qc, filter)
  1159  			if qc.Error != nil {
  1160  				return
  1161  			}
  1162  		}
  1163  		measure.filters = normalizeAndFilters(measure.filters)
  1164  		qc.Query.Measures[i] = measure
  1165  	}
  1166  
  1167  	// Filters.
  1168  	for i, filter := range qc.Query.filters {
  1169  		qc.Query.filters[i] = expr.Rewrite(qc, filter)
  1170  		if qc.Error != nil {
  1171  			return
  1172  		}
  1173  	}
  1174  	qc.Query.filters = normalizeAndFilters(qc.Query.filters)
  1175  }
  1176  
  1177  // extractFitler processes the specified query level filter and matches it
  1178  // against the following formats:
  1179  //   column = value
  1180  //   column > value
  1181  //   column >= value
  1182  //   column < value
  1183  //   column <= value
  1184  //   column
  1185  //   not column
  1186  // It returns the numeric constant value associated with the filter in a uint32
  1187  // space (for all types including float32).
  1188  // In addition it also returns the boundaryType for >, >=, <, <= operators.
  1189  // Note that since the candidate filters have already been preselected against
  1190  // some criterias, this function does not perform full format validation.
  1191  func (qc *AQLQueryContext) extractFilter(filterID int) (
  1192  	value uint32, boundary boundaryType, success bool) {
  1193  	switch f := qc.Query.filters[filterID].(type) {
  1194  	case *expr.VarRef:
  1195  		// Match `column` format
  1196  		value = 1
  1197  		success = true
  1198  	case *expr.UnaryExpr:
  1199  		// Match `not column` format
  1200  		success = true
  1201  	case *expr.BinaryExpr:
  1202  		// Match `column op value` format
  1203  		rhs, _ := f.RHS.(*expr.NumberLiteral)
  1204  		if rhs == nil {
  1205  			return
  1206  		}
  1207  		switch rhs.ExprType {
  1208  		case expr.Float:
  1209  			*(*float32)(unsafe.Pointer(&value)) = float32(rhs.Val)
  1210  		case expr.Signed:
  1211  			*(*int32)(unsafe.Pointer(&value)) = int32(rhs.Int)
  1212  		case expr.Unsigned:
  1213  			value = uint32(rhs.Int)
  1214  		default:
  1215  			return
  1216  		}
  1217  		switch f.Op {
  1218  		case expr.GTE, expr.LTE:
  1219  			boundary = inclusiveBoundary
  1220  		case expr.GT, expr.LT:
  1221  			boundary = exclusiveBoundary
  1222  		}
  1223  		success = true
  1224  	}
  1225  	return
  1226  }
  1227  
  1228  // matchPrefilters identifies all prefilters from query level filters,
  1229  // stores them in AQLQueryContext.Prefilters,
  1230  // and stores their values in TableScanner for future prefilter vector slicing.
  1231  func (qc *AQLQueryContext) matchPrefilters() {
  1232  	// Format of candidateFilters:
  1233  	// [tableID]map[columnID]{filterIDs for lower bound, upper bound, equality}
  1234  	// tableID is query scoped, while columnID is schema scoped.
  1235  	candidateFilters := make([]map[int][3]int, len(qc.TableScanners))
  1236  	for tableID := range qc.TableScanners {
  1237  		candidateFilters[tableID] = make(map[int][3]int)
  1238  	}
  1239  
  1240  	// Index candidate filters by table/column
  1241  	for filterID, filter := range qc.Query.filters {
  1242  		f, _ := filter.(*expr.BinaryExpr)
  1243  		if f == nil {
  1244  			switch f := filter.(type) {
  1245  			case *expr.VarRef:
  1246  				// Match `column` format
  1247  				if f.ExprType == expr.Boolean {
  1248  					candidateFilters[f.TableID][f.ColumnID] = [3]int{-1, -1, filterID}
  1249  				}
  1250  			case *expr.UnaryExpr:
  1251  				// Match `not column` format
  1252  				if f.Op == expr.NOT {
  1253  					f, _ := f.Expr.(*expr.VarRef)
  1254  					if f != nil && f.ExprType == expr.Boolean {
  1255  						candidateFilters[f.TableID][f.ColumnID] = [3]int{-1, -1, filterID}
  1256  					}
  1257  				}
  1258  				// TODO: IS_NULL can be matched as an equality filter.
  1259  				// TODO: IS_NOT_NULL can be matched as the final range filter.
  1260  			}
  1261  			continue
  1262  		}
  1263  
  1264  		// Match `column op value` format, where op can be =, <, <=, >, >=.
  1265  		if f.Op < expr.EQ || f.Op > expr.GTE {
  1266  			continue
  1267  		}
  1268  
  1269  		lhs, _ := f.LHS.(*expr.VarRef)
  1270  		if lhs == nil {
  1271  			continue
  1272  		}
  1273  
  1274  		columnToFilterMap := candidateFilters[lhs.TableID]
  1275  		filters, exists := columnToFilterMap[lhs.ColumnID]
  1276  		if !exists {
  1277  			filters = [3]int{-1, -1, -1}
  1278  		}
  1279  		switch f.Op {
  1280  		case expr.GT, expr.GTE:
  1281  			filters[0] = filterID
  1282  		case expr.LT, expr.LTE:
  1283  			filters[1] = filterID
  1284  		case expr.EQ:
  1285  			filters[2] = filterID
  1286  		}
  1287  		columnToFilterMap[lhs.ColumnID] = filters
  1288  	}
  1289  
  1290  	// Prefilter matching
  1291  	for tableID, scanner := range qc.TableScanners {
  1292  		// Match in archiving sort column order
  1293  		for _, columnID := range scanner.Schema.Schema.ArchivingSortColumns {
  1294  			filterIndex, exists := candidateFilters[tableID][columnID]
  1295  			if !exists {
  1296  				// Stop on first missing column
  1297  				break
  1298  			}
  1299  			// Equality
  1300  			if filterIndex[2] >= 0 {
  1301  				value, _, success := qc.extractFilter(filterIndex[2])
  1302  				if !success {
  1303  					// Stop if the value fails to be extracted
  1304  					break
  1305  				}
  1306  				scanner.EqualityPrefilterValues = append(
  1307  					scanner.EqualityPrefilterValues, value)
  1308  				qc.Prefilters = append(qc.Prefilters, filterIndex[2])
  1309  				scanner.ColumnUsages[columnID] |= columnUsedByPrefilter
  1310  				// Continue matching the next column
  1311  				continue
  1312  			}
  1313  			// Lower bound
  1314  			if filterIndex[0] >= 0 {
  1315  				value, boundaryType, success := qc.extractFilter(filterIndex[0])
  1316  				if success {
  1317  					scanner.RangePrefilterValues[0] = value
  1318  					scanner.RangePrefilterBoundaries[0] = boundaryType
  1319  					qc.Prefilters = append(qc.Prefilters, filterIndex[0])
  1320  					scanner.ColumnUsages[columnID] |= columnUsedByPrefilter
  1321  				}
  1322  			}
  1323  			// Upper bound
  1324  			if filterIndex[1] >= 0 {
  1325  				value, boundaryType, success := qc.extractFilter(filterIndex[1])
  1326  				if success {
  1327  					scanner.RangePrefilterValues[1] = value
  1328  					scanner.RangePrefilterBoundaries[1] = boundaryType
  1329  					qc.Prefilters = append(qc.Prefilters, filterIndex[1])
  1330  					scanner.ColumnUsages[columnID] |= columnUsedByPrefilter
  1331  				}
  1332  			}
  1333  			// Stop after the first range filter
  1334  			break
  1335  		}
  1336  	}
  1337  
  1338  	sort.Ints(qc.Prefilters)
  1339  }
  1340  
  1341  // columnUsageCollector is the visitor used to traverses an AST, finds VarRef columns
  1342  // and sets the usage bits in tableScanners. The VarRef nodes must have already
  1343  // been resolved and annotated with TableID and ColumnID.
  1344  type columnUsageCollector struct {
  1345  	tableScanners []*TableScanner
  1346  	usages        columnUsage
  1347  }
  1348  
  1349  func (c columnUsageCollector) Visit(expression expr.Expr) expr.Visitor {
  1350  	switch e := expression.(type) {
  1351  	case *expr.VarRef:
  1352  		c.tableScanners[e.TableID].ColumnUsages[e.ColumnID] |= c.usages
  1353  	}
  1354  	return c
  1355  }
  1356  
  1357  // foreignTableColumnDetector detects foreign table columns involved in AST
  1358  type foreignTableColumnDetector struct {
  1359  	hasForeignTableColumn bool
  1360  }
  1361  
  1362  func (c *foreignTableColumnDetector) Visit(expression expr.Expr) expr.Visitor {
  1363  	switch e := expression.(type) {
  1364  	case *expr.VarRef:
  1365  		c.hasForeignTableColumn = c.hasForeignTableColumn || (e.TableID > 0)
  1366  	}
  1367  	return c
  1368  }
  1369  
  1370  // processFilters processes all filters and categorize them into common filters,
  1371  // prefilters, and time filters. It also collect column usages from the filters.
  1372  func (qc *AQLQueryContext) processFilters() {
  1373  	// OOPK engine only supports one measure per query.
  1374  	if len(qc.Query.Measures) != 1 {
  1375  		qc.Error = utils.StackError(nil, "expect one measure per query, but got %d",
  1376  			len(qc.Query.Measures))
  1377  		return
  1378  	}
  1379  
  1380  	// Categorize common filters and prefilters based on matched prefilters.
  1381  	commonFilters := qc.Query.Measures[0].filters
  1382  	prefilters := qc.Prefilters
  1383  	for index, filter := range qc.Query.filters {
  1384  		if len(prefilters) == 0 || prefilters[0] > index {
  1385  			// common filters
  1386  			commonFilters = append(commonFilters, filter)
  1387  		} else {
  1388  			qc.OOPK.Prefilters = append(qc.OOPK.Prefilters, filter)
  1389  			prefilters = prefilters[1:]
  1390  		}
  1391  	}
  1392  
  1393  	var geoFilterFound bool
  1394  	for _, filter := range commonFilters {
  1395  		foreignTableColumnDetector := foreignTableColumnDetector{}
  1396  		expr.Walk(&foreignTableColumnDetector, filter)
  1397  		if foreignTableColumnDetector.hasForeignTableColumn {
  1398  			var isGeoFilter bool
  1399  			if qc.OOPK.geoIntersection != nil {
  1400  				geoTableID := qc.OOPK.geoIntersection.shapeTableID
  1401  				joinSchema := qc.TableSchemaByName[qc.Query.Joins[geoTableID-1].Table]
  1402  				isGeoFilter = qc.matchGeoFilter(filter, geoTableID, joinSchema, geoFilterFound)
  1403  				if qc.Error != nil {
  1404  					return
  1405  				}
  1406  			}
  1407  
  1408  			if !isGeoFilter {
  1409  				qc.OOPK.ForeignTableCommonFilters = append(qc.OOPK.ForeignTableCommonFilters, filter)
  1410  			} else {
  1411  				geoFilterFound = true
  1412  			}
  1413  		} else {
  1414  			qc.OOPK.MainTableCommonFilters = append(qc.OOPK.MainTableCommonFilters, filter)
  1415  		}
  1416  	}
  1417  
  1418  	if qc.OOPK.geoIntersection != nil && !geoFilterFound {
  1419  		qc.Error = utils.StackError(nil, "Exact one geo filter is needed if geo intersection"+
  1420  			" is used during join")
  1421  		return
  1422  	}
  1423  
  1424  	// Process time filter.
  1425  	qc.processTimeFilter()
  1426  	if qc.Error != nil {
  1427  		return
  1428  	}
  1429  
  1430  	// Collect column usages from the filters.
  1431  	for _, f := range qc.OOPK.MainTableCommonFilters {
  1432  		expr.Walk(columnUsageCollector{
  1433  			tableScanners: qc.TableScanners,
  1434  			usages:        columnUsedByAllBatches,
  1435  		}, f)
  1436  	}
  1437  
  1438  	for _, f := range qc.OOPK.ForeignTableCommonFilters {
  1439  		expr.Walk(columnUsageCollector{
  1440  			tableScanners: qc.TableScanners,
  1441  			usages:        columnUsedByAllBatches,
  1442  		}, f)
  1443  	}
  1444  
  1445  	for _, f := range qc.OOPK.Prefilters {
  1446  		expr.Walk(columnUsageCollector{
  1447  			tableScanners: qc.TableScanners,
  1448  			usages:        columnUsedByLiveBatches,
  1449  		}, f)
  1450  	}
  1451  
  1452  	if qc.OOPK.TimeFilters[0] != nil {
  1453  		expr.Walk(columnUsageCollector{
  1454  			tableScanners: qc.TableScanners,
  1455  			usages:        columnUsedByFirstArchiveBatch | columnUsedByLiveBatches,
  1456  		}, qc.OOPK.TimeFilters[0])
  1457  	}
  1458  
  1459  	if qc.OOPK.TimeFilters[1] != nil {
  1460  		expr.Walk(columnUsageCollector{
  1461  			tableScanners: qc.TableScanners,
  1462  			usages:        columnUsedByLastArchiveBatch | columnUsedByLiveBatches,
  1463  		}, qc.OOPK.TimeFilters[1])
  1464  	}
  1465  }
  1466  
  1467  func getStrFromNumericalOrStrLiteral(e expr.Expr) (string, error) {
  1468  	var str string
  1469  	if strExpr, ok := e.(*expr.StringLiteral); ok {
  1470  		str = strExpr.Val
  1471  	} else {
  1472  		if numExpr, ok := e.(*expr.NumberLiteral); ok {
  1473  			str = numExpr.String()
  1474  		} else {
  1475  			return str, utils.StackError(nil,
  1476  				"Unable to extract string from %s", e.String())
  1477  		}
  1478  	}
  1479  	return str, nil
  1480  }
  1481  
  1482  // matchGeoFilter tries to match the filter as a geo filter and prepare shapeUUIDs for aql processor. It returns whether
  1483  // the filterExpr is a geo filter.
  1484  func (qc *AQLQueryContext) matchGeoFilter(filterExpr expr.Expr, joinTableID int,
  1485  	joinSchema *memstore.TableSchema, geoFilterFound bool) (geoFilterFoundInCurrentExpr bool) {
  1486  	var shapeUUIDs []string
  1487  	invalidOpsFound, geoFilterFoundInCurrentExpr := qc.matchGeoFilterHelper(filterExpr, joinTableID, joinSchema, &shapeUUIDs)
  1488  	if qc.Error != nil {
  1489  		return
  1490  	}
  1491  	if geoFilterFoundInCurrentExpr && invalidOpsFound {
  1492  		qc.Error = utils.StackError(nil, "Only EQ and IN allowed for geo filters")
  1493  		return
  1494  	}
  1495  	if geoFilterFoundInCurrentExpr && geoFilterFound {
  1496  		qc.Error = utils.StackError(nil, "Only one geo filter is allowed")
  1497  		return
  1498  	}
  1499  
  1500  	if len(shapeUUIDs) > geoShapeLimit {
  1501  		qc.Error = utils.StackError(nil, "At most %d gep shapes supported, got %d", geoShapeLimit, len(shapeUUIDs))
  1502  		return
  1503  	}
  1504  
  1505  	if geoFilterFoundInCurrentExpr {
  1506  		qc.OOPK.geoIntersection.shapeUUIDs = shapeUUIDs
  1507  	}
  1508  	return
  1509  }
  1510  
  1511  func (qc *AQLQueryContext) matchGeoFilterHelper(filterExpr expr.Expr, joinTableID int,
  1512  	joinSchema *memstore.TableSchema, shapeUUIDs *[]string) (inValidOpFound, foundGeoFilter bool) {
  1513  	switch e := filterExpr.(type) {
  1514  	case *expr.BinaryExpr:
  1515  		if e.Op == expr.OR {
  1516  			inValidOpFoundL, foundGeoFilterL := qc.matchGeoFilterHelper(e.LHS, joinTableID, joinSchema, shapeUUIDs)
  1517  			inValidOpFoundR, foundGeoFilterR := qc.matchGeoFilterHelper(e.RHS, joinTableID, joinSchema, shapeUUIDs)
  1518  			inValidOpFound = inValidOpFoundL || inValidOpFoundR
  1519  			foundGeoFilter = foundGeoFilterL || foundGeoFilterR
  1520  		} else if e.Op == expr.EQ {
  1521  			columnExpr := e.LHS
  1522  
  1523  			if paren, ok := columnExpr.(*expr.ParenExpr); ok {
  1524  				columnExpr = paren.Expr
  1525  			}
  1526  			if column, ok := columnExpr.(*expr.VarRef); ok && column.TableID == joinTableID {
  1527  				// geo filter's column must be primary key.
  1528  				if joinSchema.Schema.PrimaryKeyColumns[0] != column.ColumnID {
  1529  					qc.Error = utils.StackError(nil, "Geo filter column is not the primary key")
  1530  					return
  1531  				}
  1532  				uuidStr, err := getStrFromNumericalOrStrLiteral(e.RHS)
  1533  				if err != nil {
  1534  					qc.Error = utils.StackError(err,
  1535  						"Unable to extract uuid from expression %s", e.RHS.String())
  1536  					return
  1537  				}
  1538  				normalizedUUID, err := utils.NormalizeUUIDString(uuidStr)
  1539  				if err != nil {
  1540  					qc.Error = err
  1541  					return
  1542  				}
  1543  				foundGeoFilter = true
  1544  				*shapeUUIDs = append(*shapeUUIDs, normalizedUUID)
  1545  			}
  1546  		} else {
  1547  			inValidOpFound = true
  1548  			// keep traversing to find geo fields
  1549  			_, foundGeoFilterL := qc.matchGeoFilterHelper(e.LHS, joinTableID, joinSchema, shapeUUIDs)
  1550  			_, foundGeoFilterR := qc.matchGeoFilterHelper(e.RHS, joinTableID, joinSchema, shapeUUIDs)
  1551  			foundGeoFilter = foundGeoFilterL || foundGeoFilterR
  1552  		}
  1553  	case *expr.UnaryExpr:
  1554  		inValidOpFound = true
  1555  		_, foundGeoFilter = qc.matchGeoFilterHelper(e.Expr, joinTableID, joinSchema, shapeUUIDs)
  1556  	}
  1557  	return
  1558  }
  1559  
  1560  // processTimeFilter processes the time filter by matching it against the time
  1561  // column of the main fact table. The time filter will be identified as common
  1562  // filter if it does not match with the designated time column.
  1563  func (qc *AQLQueryContext) processTimeFilter() {
  1564  	from, to := qc.fromTime, qc.toTime
  1565  
  1566  	// Match against time column of the main fact table.
  1567  	var timeColumnMatched bool
  1568  
  1569  	tableColumnPair := strings.SplitN(qc.Query.TimeFilter.Column, ".", 2)
  1570  	if len(tableColumnPair) < 2 {
  1571  		qc.Query.TimeFilter.Column = tableColumnPair[0]
  1572  	} else {
  1573  		qc.Query.TimeFilter.Column = tableColumnPair[1]
  1574  		if tableColumnPair[0] != qc.Query.Table {
  1575  			qc.Error = utils.StackError(nil, "timeFilter only supports main table: %s, got: %s", qc.Query.Table, tableColumnPair[0])
  1576  			return
  1577  		}
  1578  	}
  1579  
  1580  	if qc.TableScanners[0].Schema.Schema.IsFactTable {
  1581  		if from == nil {
  1582  			qc.Error = utils.StackError(nil, "'from' of time filter is missing")
  1583  			return
  1584  		}
  1585  
  1586  		timeColumn := qc.TableScanners[0].Schema.Schema.Columns[0].Name
  1587  		if qc.Query.TimeFilter.Column == "" || qc.Query.TimeFilter.Column == timeColumn {
  1588  			timeColumnMatched = true
  1589  			qc.Query.TimeFilter.Column = timeColumn
  1590  		}
  1591  	}
  1592  
  1593  	// TODO: resolve time filter column against foreign tables.
  1594  	timeColumnID := 0
  1595  	found := false
  1596  	if qc.Query.TimeFilter.Column != "" {
  1597  		// Validate column existence and type.
  1598  		timeColumnID, found = qc.TableScanners[0].Schema.ColumnIDs[qc.Query.TimeFilter.Column]
  1599  		if !found {
  1600  			qc.Error = utils.StackError(nil, "unknown time filter column %s",
  1601  				qc.Query.TimeFilter.Column)
  1602  			return
  1603  		}
  1604  		timeColumnType := qc.TableScanners[0].Schema.ValueTypeByColumn[timeColumnID]
  1605  		if timeColumnType != memCom.Uint32 {
  1606  			qc.Error = utils.StackError(nil,
  1607  				"expect time filter column %s of type Uint32, but got %s",
  1608  				qc.Query.TimeFilter.Column, memCom.DataTypeName[timeColumnType])
  1609  			return
  1610  		}
  1611  	}
  1612  	fromExpr, toExpr := createTimeFilterExpr(&expr.VarRef{
  1613  		Val:      qc.Query.TimeFilter.Column,
  1614  		ExprType: expr.Unsigned,
  1615  		TableID:  0,
  1616  		ColumnID: timeColumnID,
  1617  		DataType: memCom.Uint32,
  1618  	}, from, to)
  1619  
  1620  	qc.TableScanners[0].ArchiveBatchIDEnd = int((utils.Now().Unix() + 86399) / 86400)
  1621  	if timeColumnMatched {
  1622  		qc.OOPK.TimeFilters[0] = fromExpr
  1623  		qc.OOPK.TimeFilters[1] = toExpr
  1624  		if from != nil {
  1625  			qc.TableScanners[0].ArchiveBatchIDStart = int(from.Time.Unix() / 86400)
  1626  		}
  1627  		if to != nil {
  1628  			qc.TableScanners[0].ArchiveBatchIDEnd = int((to.Time.Unix() + 86399) / 86400)
  1629  		}
  1630  	} else {
  1631  		if fromExpr != nil {
  1632  			qc.OOPK.MainTableCommonFilters = append(qc.OOPK.MainTableCommonFilters, fromExpr)
  1633  		}
  1634  		if toExpr != nil {
  1635  			qc.OOPK.MainTableCommonFilters = append(qc.OOPK.MainTableCommonFilters, toExpr)
  1636  		}
  1637  	}
  1638  }
  1639  
  1640  // matchAndRewriteGeoDimension tells whether a dimension matches geo join and whether it's a valid
  1641  // geo join. It returns the rewritten geo dimension and error. If the err is non nil, it means it's a invalid geo join.
  1642  // A valid geo dimension can only in one of the following format:
  1643  // 	1. UUID
  1644  //  2. hex(UUID)
  1645  func (qc *AQLQueryContext) matchAndRewriteGeoDimension(dimExpr expr.Expr) (expr.Expr, error) {
  1646  	gc := &geoTableUsageCollector{
  1647  		geoIntersection: *qc.OOPK.geoIntersection,
  1648  	}
  1649  
  1650  	expr.Walk(gc, dimExpr)
  1651  	if !gc.useGeoTable {
  1652  		return nil, nil
  1653  	}
  1654  
  1655  	if callExpr, ok := dimExpr.(*expr.Call); ok {
  1656  		if callExpr.Name != hexCallName {
  1657  			return nil, utils.StackError(nil,
  1658  				"Only hex function is supported on UUID type, but got %s", callExpr.Name)
  1659  		}
  1660  
  1661  		if len(callExpr.Args) != 1 {
  1662  			return nil, utils.StackError(nil,
  1663  				"Exactly 1 argument allowed for hex, got %d", len(callExpr.Args))
  1664  		}
  1665  
  1666  		dimExpr = callExpr.Args[0]
  1667  	}
  1668  
  1669  	joinSchema := qc.TableSchemaByName[qc.Query.Joins[gc.geoIntersection.shapeTableID-1].Table]
  1670  	if varRefExpr, ok := dimExpr.(*expr.VarRef); ok {
  1671  		var err error
  1672  		if varRefExpr.ColumnID != joinSchema.Schema.PrimaryKeyColumns[0] {
  1673  			err = utils.StackError(nil, "Only geo uuid is allowed in dimensions")
  1674  		}
  1675  
  1676  		varRefExpr.DataType = memCom.Uint8
  1677  		return varRefExpr, err
  1678  	}
  1679  
  1680  	return nil, utils.StackError(nil, "Only hex(uuid) or uuid supported, got %s", dimExpr.String())
  1681  }
  1682  
  1683  // geoTableUsageCollector traverses an AST expression tree, finds VarRef columns
  1684  // and check whether it uses any geo table columns.
  1685  type geoTableUsageCollector struct {
  1686  	geoIntersection geoIntersection
  1687  	useGeoTable     bool
  1688  }
  1689  
  1690  func (g *geoTableUsageCollector) Visit(expression expr.Expr) expr.Visitor {
  1691  	switch e := expression.(type) {
  1692  	case *expr.VarRef:
  1693  		g.useGeoTable = g.useGeoTable || e.TableID == g.geoIntersection.shapeTableID
  1694  	}
  1695  	return g
  1696  }
  1697  
  1698  func (qc *AQLQueryContext) processMeasure() {
  1699  	// OOPK engine only supports one measure per query.
  1700  	if len(qc.Query.Measures) != 1 {
  1701  		qc.Error = utils.StackError(nil, "expect one measure per query, but got %d",
  1702  			len(qc.Query.Measures))
  1703  		return
  1704  	}
  1705  
  1706  	if _, ok := qc.Query.Measures[0].expr.(*expr.NumberLiteral); ok {
  1707  		qc.isNonAggregationQuery = true
  1708  		if qc.Query.Limit <= 0 {
  1709  			qc.Query.Limit = nonAggregationQueryLimit
  1710  		}
  1711  		return
  1712  	}
  1713  
  1714  	// Match and strip the aggregate function.
  1715  	aggregate, ok := qc.Query.Measures[0].expr.(*expr.Call)
  1716  	if !ok {
  1717  		qc.Error = utils.StackError(nil, "expect aggregate function, but got %s",
  1718  			qc.Query.Measures[0].Expr)
  1719  		return
  1720  	}
  1721  
  1722  	if qc.ReturnHLLData && aggregate.Name != hllCallName {
  1723  		qc.Error = utils.StackError(nil, "expect hll aggregate function as client specify 'Accept' as "+
  1724  			"'application/hll', but got %s",
  1725  			qc.Query.Measures[0].Expr)
  1726  		return
  1727  	}
  1728  
  1729  	if len(aggregate.Args) != 1 {
  1730  		qc.Error = utils.StackError(nil,
  1731  			"expect one parameter for aggregate function %s, but got %d",
  1732  			aggregate.Name, len(aggregate.Args))
  1733  		return
  1734  	}
  1735  	qc.OOPK.Measure = aggregate.Args[0]
  1736  	// default is 4 bytes
  1737  	qc.OOPK.MeasureBytes = 4
  1738  	switch strings.ToLower(aggregate.Name) {
  1739  	case countCallName:
  1740  		qc.OOPK.Measure = &expr.NumberLiteral{
  1741  			Int:      1,
  1742  			Expr:     "1",
  1743  			ExprType: expr.Unsigned,
  1744  		}
  1745  		qc.OOPK.AggregateType = C.AGGR_SUM_UNSIGNED
  1746  	case sumCallName:
  1747  		qc.OOPK.MeasureBytes = 8
  1748  		switch qc.OOPK.Measure.Type() {
  1749  		case expr.Float:
  1750  			qc.OOPK.AggregateType = C.AGGR_SUM_FLOAT
  1751  		case expr.Signed:
  1752  			qc.OOPK.AggregateType = C.AGGR_SUM_SIGNED
  1753  		case expr.Unsigned:
  1754  			qc.OOPK.AggregateType = C.AGGR_SUM_UNSIGNED
  1755  		default:
  1756  			qc.Error = utils.StackError(nil,
  1757  				unsupportedInputType, sumCallName, qc.OOPK.Measure.String())
  1758  			return
  1759  		}
  1760  	case avgCallName:
  1761  		// 4 bytes for storing average result and another 4 byte for count
  1762  		qc.OOPK.MeasureBytes = 8
  1763  		// for average, we should always use float type as the agg type.
  1764  		qc.OOPK.AggregateType = C.AGGR_AVG_FLOAT
  1765  	case minCallName:
  1766  		switch qc.OOPK.Measure.Type() {
  1767  		case expr.Float:
  1768  			qc.OOPK.AggregateType = C.AGGR_MIN_FLOAT
  1769  		case expr.Signed:
  1770  			qc.OOPK.AggregateType = C.AGGR_MIN_SIGNED
  1771  		case expr.Unsigned:
  1772  			qc.OOPK.AggregateType = C.AGGR_MIN_UNSIGNED
  1773  		default:
  1774  			qc.Error = utils.StackError(nil,
  1775  				unsupportedInputType, minCallName, qc.OOPK.Measure.String())
  1776  			return
  1777  		}
  1778  	case maxCallName:
  1779  		switch qc.OOPK.Measure.Type() {
  1780  		case expr.Float:
  1781  			qc.OOPK.AggregateType = C.AGGR_MAX_FLOAT
  1782  		case expr.Signed:
  1783  			qc.OOPK.AggregateType = C.AGGR_MAX_SIGNED
  1784  		case expr.Unsigned:
  1785  			qc.OOPK.AggregateType = C.AGGR_MAX_UNSIGNED
  1786  		default:
  1787  			qc.Error = utils.StackError(nil,
  1788  				unsupportedInputType, maxCallName, qc.OOPK.Measure.String())
  1789  			return
  1790  		}
  1791  	case hllCallName:
  1792  		qc.OOPK.AggregateType = C.AGGR_HLL
  1793  	default:
  1794  		qc.Error = utils.StackError(nil,
  1795  			"unsupported aggregate function: %s", aggregate.Name)
  1796  		return
  1797  	}
  1798  }
  1799  
  1800  func (qc *AQLQueryContext) getAllColumnsDimension() (columns []Dimension) {
  1801  	// only main table columns wildcard match supported
  1802  	for _, column := range qc.TableScanners[0].Schema.Schema.Columns {
  1803  		if !column.Deleted && column.Type != metaCom.GeoShape {
  1804  			columns = append(columns, Dimension{
  1805  				expr: &expr.VarRef{Val: column.Name},
  1806  				Expr: column.Name,
  1807  			})
  1808  		}
  1809  	}
  1810  	return
  1811  }
  1812  
  1813  func (qc *AQLQueryContext) processDimensions() {
  1814  	// Copy dimension ASTs.
  1815  	qc.OOPK.Dimensions = make([]expr.Expr, len(qc.Query.Dimensions))
  1816  	for i, dim := range qc.Query.Dimensions {
  1817  		// TODO: support numeric bucketizer.
  1818  		qc.OOPK.Dimensions[i] = dim.expr
  1819  		if dim.expr.Type() == expr.GeoShape {
  1820  			qc.Error = utils.StackError(nil,
  1821  				"GeoShape can not be used for dimension: %s", dim.Expr)
  1822  			return
  1823  		}
  1824  	}
  1825  
  1826  	if qc.OOPK.geoIntersection != nil {
  1827  		gc := &geoTableUsageCollector{
  1828  			geoIntersection: *qc.OOPK.geoIntersection,
  1829  		}
  1830  		// Check whether measure and dimensions are referencing any geo table columns.
  1831  		expr.Walk(gc, qc.OOPK.Measure)
  1832  
  1833  		if gc.useGeoTable {
  1834  			qc.Error = utils.StackError(nil,
  1835  				"Geo table column is not allowed to be used in measure: %s", qc.OOPK.Measure.String())
  1836  			return
  1837  		}
  1838  
  1839  		foundGeoJoin := false
  1840  		for i, dimExpr := range qc.OOPK.Dimensions {
  1841  			geoDimExpr, err := qc.matchAndRewriteGeoDimension(dimExpr)
  1842  			if err != nil {
  1843  				qc.Error = err
  1844  				return
  1845  			}
  1846  
  1847  			if geoDimExpr != nil {
  1848  				if foundGeoJoin {
  1849  					qc.Error = utils.StackError(nil,
  1850  						"Only one geo dimension allowed: %s", dimExpr.String())
  1851  					return
  1852  				}
  1853  				foundGeoJoin = true
  1854  				qc.OOPK.Dimensions[i] = geoDimExpr
  1855  				qc.OOPK.geoIntersection.dimIndex = i
  1856  			}
  1857  		}
  1858  	}
  1859  
  1860  	// Collect column usage from measure and dimensions
  1861  	expr.Walk(columnUsageCollector{
  1862  		tableScanners: qc.TableScanners,
  1863  		usages:        columnUsedByAllBatches,
  1864  	}, qc.OOPK.Measure)
  1865  
  1866  	for _, dim := range qc.OOPK.Dimensions {
  1867  		expr.Walk(columnUsageCollector{
  1868  			tableScanners: qc.TableScanners,
  1869  			usages:        columnUsedByAllBatches,
  1870  		}, dim)
  1871  	}
  1872  }
  1873  
  1874  func getDimensionDataType(expression expr.Expr) memCom.DataType {
  1875  	if e, ok := expression.(*expr.VarRef); ok {
  1876  		return e.DataType
  1877  	}
  1878  	switch expression.Type() {
  1879  	case expr.Boolean:
  1880  		return memCom.Bool
  1881  	case expr.Unsigned:
  1882  		return memCom.Uint32
  1883  	case expr.Signed:
  1884  		return memCom.Int32
  1885  	case expr.Float:
  1886  		return memCom.Float32
  1887  	default:
  1888  		return memCom.Uint32
  1889  	}
  1890  }
  1891  
  1892  func getDimensionDataBytes(expression expr.Expr) int {
  1893  	return memCom.DataTypeBytes(getDimensionDataType(expression))
  1894  }
  1895  
  1896  // Sort dimension columns based on the data width in bytes
  1897  // dimension columns in OOPK will not be reordered, but a mapping
  1898  // from original id to ordered offsets (value and validity) in
  1899  // dimension vector will be stored.
  1900  // GeoUUID dimension will be 1 bytes. VarRef expression will use column data length,
  1901  // others will be default to 4 bytes.
  1902  func (qc *AQLQueryContext) sortDimensionColumns() {
  1903  	orderedIndex := 0
  1904  	numDimensions := len(qc.OOPK.Dimensions)
  1905  	qc.OOPK.DimensionVectorIndex = make([]int, numDimensions)
  1906  	byteWidth := 1 << uint(len(qc.OOPK.NumDimsPerDimWidth)-1)
  1907  	for byteIndex := range qc.OOPK.NumDimsPerDimWidth {
  1908  		for originIndex, dim := range qc.OOPK.Dimensions {
  1909  			dataBytes := getDimensionDataBytes(dim)
  1910  			if dataBytes == byteWidth {
  1911  				// record value offset, null offset pair
  1912  				// null offsets will have to add total dim bytes later
  1913  				qc.OOPK.DimensionVectorIndex[originIndex] = orderedIndex
  1914  				qc.OOPK.NumDimsPerDimWidth[byteIndex]++
  1915  				qc.OOPK.DimRowBytes += dataBytes
  1916  				orderedIndex++
  1917  			}
  1918  		}
  1919  		byteWidth >>= 1
  1920  	}
  1921  	// plus one byte per dimension column for validity
  1922  	qc.OOPK.DimRowBytes += numDimensions
  1923  
  1924  	if !qc.isNonAggregationQuery {
  1925  		// no dimension size checking for non-aggregation query
  1926  		if qc.OOPK.DimRowBytes > C.MAX_DIMENSION_BYTES {
  1927  			qc.Error = utils.StackError(nil, "maximum dimension bytes: %d, got: %d", C.MAX_DIMENSION_BYTES, qc.OOPK.DimRowBytes)
  1928  			return
  1929  		}
  1930  	}
  1931  }
  1932  
  1933  func (qc *AQLQueryContext) sortUsedColumns() {
  1934  	for _, scanner := range qc.TableScanners {
  1935  		scanner.Columns = make([]int, 0, len(scanner.ColumnUsages))
  1936  		scanner.ColumnsByIDs = make(map[int]int)
  1937  		// Unsorted/uncompressed columns
  1938  		for columnID := range scanner.ColumnUsages {
  1939  			if utils.IndexOfInt(scanner.Schema.Schema.ArchivingSortColumns, columnID) < 0 {
  1940  				scanner.ColumnsByIDs[columnID] = len(scanner.Columns)
  1941  				scanner.Columns = append(scanner.Columns, columnID)
  1942  			}
  1943  		}
  1944  		// Sorted/compressed columns
  1945  		for i := len(scanner.Schema.Schema.ArchivingSortColumns) - 1; i >= 0; i-- {
  1946  			columnID := scanner.Schema.Schema.ArchivingSortColumns[i]
  1947  			_, found := scanner.ColumnUsages[columnID]
  1948  			if found {
  1949  				scanner.ColumnsByIDs[columnID] = len(scanner.Columns)
  1950  				scanner.Columns = append(scanner.Columns, columnID)
  1951  			}
  1952  		}
  1953  	}
  1954  }
  1955  
  1956  func parseTimezoneColumnString(timezoneColumnString string) (column, joinKey string, success bool) {
  1957  	exp, err := expr.ParseExpr(timezoneColumnString)
  1958  	if err != nil {
  1959  		return
  1960  	}
  1961  	if c, ok := exp.(*expr.Call); ok {
  1962  		if len(c.Args) == 1 {
  1963  			return c.Name, c.Args[0].String(), true
  1964  		}
  1965  	}
  1966  	return
  1967  }
  1968  
  1969  func (qc *AQLQueryContext) expandINop(e *expr.BinaryExpr) (expandedExpr expr.Expr) {
  1970  	lhs, ok := e.LHS.(*expr.VarRef)
  1971  	if !ok {
  1972  		qc.Error = utils.StackError(nil, "lhs of IN or NOT_IN must be a valid column")
  1973  	}
  1974  	rhs := e.RHS
  1975  	switch rhsTyped := rhs.(type) {
  1976  	case *expr.Call:
  1977  		expandedExpr = &expr.BooleanLiteral{Val: false}
  1978  		for _, value := range rhsTyped.Args {
  1979  			switch expandedExpr.(type) {
  1980  			case *expr.BooleanLiteral:
  1981  				expandedExpr = qc.Rewrite(&expr.BinaryExpr{
  1982  					Op:  expr.EQ,
  1983  					LHS: lhs,
  1984  					RHS: value,
  1985  				}).(*expr.BinaryExpr)
  1986  			default:
  1987  				lastExpr := expandedExpr
  1988  				expandedExpr = &expr.BinaryExpr{
  1989  					Op:  expr.OR,
  1990  					LHS: lastExpr,
  1991  					RHS: qc.Rewrite(&expr.BinaryExpr{
  1992  						Op:  expr.EQ,
  1993  						LHS: lhs,
  1994  						RHS: value,
  1995  					}).(*expr.BinaryExpr),
  1996  				}
  1997  			}
  1998  		}
  1999  		break
  2000  	default:
  2001  		qc.Error = utils.StackError(nil, "only EQ and IN operators are supported for geo fields")
  2002  	}
  2003  	return
  2004  }