github.com/rohankumardubey/aresdb@v0.0.2-0.20190517170215-e54e3ca06b9c/query/expr/ast.go (about)

     1  // Modifications Copyright (c) 2017-2018 Uber Technologies, Inc.
     2  // Copyright (c) 2013-2016 Errplane Inc.
     3  //
     4  // Permission is hereby granted, free of charge, to any person obtaining a copy of
     5  // this software and associated documentation files (the "Software"), to deal in
     6  // the Software without restriction, including without limitation the rights to
     7  // use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
     8  // the Software, and to permit persons to whom the Software is furnished to do so,
     9  // subject to the following conditions:
    10  //
    11  // The above copyright notice and this permission notice shall be included in all
    12  // copies or substantial portions of the Software.
    13  //
    14  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    15  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
    16  // FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
    17  // COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
    18  // IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
    19  // CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
    20  
    21  package expr
    22  
    23  import (
    24  	"bytes"
    25  	"fmt"
    26  	memCom "github.com/uber/aresdb/memstore/common"
    27  	"strconv"
    28  	"strings"
    29  )
    30  
    31  // Type defines data types for expression evaluation.
    32  // Expression types are determined at query compilation time, type castings are
    33  // generated when apprioperiate. Notice that word widths are not specified here.
    34  type Type int
    35  
    36  const (
    37  	UnknownType Type = iota
    38  	Boolean
    39  	Unsigned
    40  	Signed
    41  	Float
    42  	GeoPoint
    43  	GeoShape
    44  )
    45  
    46  var typeNames = map[Type]string{
    47  	UnknownType: "Unknown",
    48  	Boolean:     "Boolean",
    49  	Unsigned:    "Unsigned",
    50  	Signed:      "Signed",
    51  	Float:       "Float",
    52  	GeoPoint:    "GeoPoint",
    53  	GeoShape:    "GeoShape",
    54  }
    55  
    56  func (t Type) String() string {
    57  	return typeNames[t]
    58  }
    59  
    60  func (t Type) MarshalJSON() ([]byte, error) {
    61  	buffer := bytes.NewBufferString(`"`)
    62  	buffer.WriteString(typeNames[t])
    63  	buffer.WriteString(`"`)
    64  	return buffer.Bytes(), nil
    65  }
    66  
    67  // Expr represents an expression that can be evaluated to a value.
    68  type Expr interface {
    69  	expr()
    70  	String() string
    71  	Type() Type
    72  }
    73  
    74  func (*BinaryExpr) expr()      {}
    75  func (*BooleanLiteral) expr()  {}
    76  func (*Call) expr()            {}
    77  func (*Case) expr()            {}
    78  func (*Distinct) expr()        {}
    79  func (*NullLiteral) expr()     {}
    80  func (*NumberLiteral) expr()   {}
    81  func (*ParenExpr) expr()       {}
    82  func (*StringLiteral) expr()   {}
    83  func (*UnaryExpr) expr()       {}
    84  func (*UnknownLiteral) expr()  {}
    85  func (*VarRef) expr()          {}
    86  func (*Wildcard) expr()        {}
    87  func (*GeopointLiteral) expr() {}
    88  
    89  // walkNames will walk the Expr and return the database fields
    90  func walkNames(exp Expr) []string {
    91  	switch expr := exp.(type) {
    92  	case *VarRef:
    93  		return []string{expr.Val}
    94  	case *Call:
    95  		if len(expr.Args) == 0 {
    96  			return nil
    97  		}
    98  		lit, ok := expr.Args[0].(*VarRef)
    99  		if !ok {
   100  			return nil
   101  		}
   102  
   103  		return []string{lit.Val}
   104  	case *UnaryExpr:
   105  		return walkNames(expr.Expr)
   106  	case *BinaryExpr:
   107  		var ret []string
   108  		ret = append(ret, walkNames(expr.LHS)...)
   109  		ret = append(ret, walkNames(expr.RHS)...)
   110  		return ret
   111  	case *Case:
   112  		var ret []string
   113  		for _, cond := range expr.WhenThens {
   114  			ret = append(ret, walkNames(cond.When)...)
   115  			ret = append(ret, walkNames(cond.Then)...)
   116  		}
   117  		if expr.Else != nil {
   118  			ret = append(ret, walkNames(expr.Else)...)
   119  		}
   120  		return ret
   121  	case *ParenExpr:
   122  		return walkNames(expr.Expr)
   123  	}
   124  
   125  	return nil
   126  }
   127  
   128  // walkFunctionCalls walks the Field of a query for any function calls made
   129  func walkFunctionCalls(exp Expr) []*Call {
   130  	switch expr := exp.(type) {
   131  	case *Call:
   132  		return []*Call{expr}
   133  	case *UnaryExpr:
   134  		return walkFunctionCalls(expr.Expr)
   135  	case *BinaryExpr:
   136  		var ret []*Call
   137  		ret = append(ret, walkFunctionCalls(expr.LHS)...)
   138  		ret = append(ret, walkFunctionCalls(expr.RHS)...)
   139  		return ret
   140  	case *Case:
   141  		var ret []*Call
   142  		for _, cond := range expr.WhenThens {
   143  			ret = append(ret, walkFunctionCalls(cond.When)...)
   144  			ret = append(ret, walkFunctionCalls(cond.Then)...)
   145  		}
   146  		if expr.Else != nil {
   147  			ret = append(ret, walkFunctionCalls(expr.Else)...)
   148  		}
   149  		return ret
   150  	case *ParenExpr:
   151  		return walkFunctionCalls(expr.Expr)
   152  	}
   153  
   154  	return nil
   155  }
   156  
   157  // VarRef represents a reference to a variable.
   158  type VarRef struct {
   159  	Val      string
   160  	ExprType Type
   161  
   162  	// The following fields are populated for convenience after the query is
   163  	// validated against the schema.
   164  
   165  	// ID of the table in the query scope (0 for the main table, 1+ for foreign
   166  	// tables).
   167  	TableID int
   168  	// ID of the column in the schema.
   169  	ColumnID int
   170  	// Enum dictionary for enum typed column. Can only be accessed while holding
   171  	// the schema lock.
   172  	EnumDict map[string]int `json:"-"`
   173  	// Setting enum reverse dict requires holding the schema lock,
   174  	// while reading from it does not require holding the schema lock.
   175  	EnumReverseDict []string `json:"-"`
   176  
   177  	DataType memCom.DataType
   178  
   179  	// Whether this column is hll column (can run hll directly)
   180  	IsHLLColumn bool
   181  }
   182  
   183  // Type returns the type.
   184  func (r *VarRef) Type() Type {
   185  	return r.ExprType
   186  }
   187  
   188  // String returns a string representation of the variable reference.
   189  func (r *VarRef) String() string {
   190  	return r.Val
   191  }
   192  
   193  // Call represents a function call.
   194  type Call struct {
   195  	Name     string
   196  	Args     []Expr
   197  	ExprType Type
   198  }
   199  
   200  // Type returns the type.
   201  func (c *Call) Type() Type {
   202  	return c.ExprType
   203  }
   204  
   205  // String returns a string representation of the call.
   206  func (c *Call) String() string {
   207  	// Join arguments.
   208  	var strs []string
   209  	for _, arg := range c.Args {
   210  		if arg == nil {
   211  			strs = append(strs, "ERROR_ARGUMENT_NIL")
   212  		} else {
   213  			strs = append(strs, arg.String())
   214  		}
   215  	}
   216  
   217  	// Write function name and args.
   218  	return fmt.Sprintf("%s(%s)", c.Name, strings.Join(strs, ", "))
   219  }
   220  
   221  // WhenThen represents a when-then conditional expression pair in a case expression.
   222  type WhenThen struct {
   223  	When Expr
   224  	Then Expr
   225  }
   226  
   227  // Case represents a CASE WHEN .. THEN .. ELSE .. THEN expression.
   228  type Case struct {
   229  	WhenThens []WhenThen
   230  	Else      Expr
   231  	ExprType  Type
   232  }
   233  
   234  // Type returns the type.
   235  func (c *Case) Type() Type {
   236  	return c.ExprType
   237  }
   238  
   239  // String returns a string representation of the expression.
   240  func (c *Case) String() string {
   241  	whenThens := make([]string, len(c.WhenThens))
   242  	for i, whenThen := range c.WhenThens {
   243  		whenThens[i] = fmt.Sprintf("WHEN %s THEN %s", whenThen.When.String(), whenThen.Then.String())
   244  	}
   245  	if c.Else == nil {
   246  		return fmt.Sprintf("CASE %s END", strings.Join(whenThens, " "))
   247  	}
   248  	return fmt.Sprintf("CASE %s ELSE %s END", strings.Join(whenThens, " "), c.Else.String())
   249  }
   250  
   251  // Distinct represents a DISTINCT expression.
   252  type Distinct struct {
   253  	// Identifier following DISTINCT
   254  	Val string
   255  }
   256  
   257  // Type returns the type.
   258  func (d *Distinct) Type() Type {
   259  	return UnknownType
   260  }
   261  
   262  // String returns a string representation of the expression.
   263  func (d *Distinct) String() string {
   264  	return fmt.Sprintf("DISTINCT %s", d.Val)
   265  }
   266  
   267  // NewCall returns a new call expression from this expressions.
   268  func (d *Distinct) NewCall() *Call {
   269  	return &Call{
   270  		Name: "distinct",
   271  		Args: []Expr{
   272  			&VarRef{Val: d.Val},
   273  		},
   274  	}
   275  }
   276  
   277  // NumberLiteral represents a numeric literal.
   278  type NumberLiteral struct {
   279  	Val      float64
   280  	Int      int
   281  	Expr     string
   282  	ExprType Type
   283  }
   284  
   285  // Type returns the type.
   286  func (l *NumberLiteral) Type() Type {
   287  	return l.ExprType
   288  }
   289  
   290  // String returns a string representation of the literal.
   291  func (l *NumberLiteral) String() string {
   292  	if l.Expr != "" {
   293  		return l.Expr
   294  	}
   295  	return strconv.FormatFloat(l.Val, 'f', 3, 64)
   296  }
   297  
   298  // BooleanLiteral represents a boolean literal.
   299  type BooleanLiteral struct {
   300  	Val bool
   301  }
   302  
   303  // Type returns the type.
   304  func (l *BooleanLiteral) Type() Type {
   305  	return Boolean
   306  }
   307  
   308  // String returns a string representation of the literal.
   309  func (l *BooleanLiteral) String() string {
   310  	if l.Val {
   311  		return "true"
   312  	}
   313  	return "false"
   314  }
   315  
   316  // isTrueLiteral returns true if the expression is a literal "true" value.
   317  func isTrueLiteral(expr Expr) bool {
   318  	if expr, ok := expr.(*BooleanLiteral); ok {
   319  		return expr.Val == true
   320  	}
   321  	return false
   322  }
   323  
   324  // isFalseLiteral returns true if the expression is a literal "false" value.
   325  func isFalseLiteral(expr Expr) bool {
   326  	if expr, ok := expr.(*BooleanLiteral); ok {
   327  		return expr.Val == false
   328  	}
   329  	return false
   330  }
   331  
   332  // StringLiteral represents a string literal.
   333  type StringLiteral struct {
   334  	Val string
   335  }
   336  
   337  // Type returns the type.
   338  func (l *StringLiteral) Type() Type {
   339  	return UnknownType
   340  }
   341  
   342  // String returns a string representation of the literal.
   343  func (l *StringLiteral) String() string { return QuoteString(l.Val) }
   344  
   345  // GeopointLiteral represents a literal for GeoPoint
   346  type GeopointLiteral struct {
   347  	Val [2]float32
   348  }
   349  
   350  // Type returns the type.
   351  func (l *GeopointLiteral) Type() Type {
   352  	return GeoPoint
   353  }
   354  
   355  // String returns a string representation of the literal.
   356  func (l *GeopointLiteral) String() string {
   357  	return fmt.Sprintf("point(%f, %f)", l.Val[0], l.Val[1])
   358  }
   359  
   360  // NullLiteral represents a NULL literal.
   361  type NullLiteral struct{}
   362  
   363  // Type returns the type.
   364  func (l *NullLiteral) Type() Type {
   365  	return UnknownType
   366  }
   367  
   368  // String returns "NULL".
   369  func (l *NullLiteral) String() string { return "NULL" }
   370  
   371  // UnknownLiteral represents an UNKNOWN literal.
   372  type UnknownLiteral struct{}
   373  
   374  // Type returns the type.
   375  func (l *UnknownLiteral) Type() Type {
   376  	return UnknownType
   377  }
   378  
   379  // String returns "UNKNOWN".
   380  func (l *UnknownLiteral) String() string { return "UNKNOWN" }
   381  
   382  // UnaryExpr represents an operation on a single expression.
   383  type UnaryExpr struct {
   384  	Op       Token
   385  	Expr     Expr
   386  	ExprType Type
   387  }
   388  
   389  // Type returns the type.
   390  func (e *UnaryExpr) Type() Type {
   391  	return e.ExprType
   392  }
   393  
   394  // String returns a string representation of the unary expression.
   395  func (e *UnaryExpr) String() string {
   396  	if e.Op.isDerivedUnaryOperator() {
   397  		return fmt.Sprintf("%s %s", e.Expr.String(), e.Op.String())
   398  	}
   399  	return fmt.Sprintf("%s(%s)", e.Op.String(), e.Expr.String())
   400  }
   401  
   402  // BinaryExpr represents an operation between two expressions.
   403  type BinaryExpr struct {
   404  	Op       Token
   405  	LHS      Expr
   406  	RHS      Expr
   407  	ExprType Type
   408  }
   409  
   410  // Type returns the type.
   411  func (e *BinaryExpr) Type() Type {
   412  	return e.ExprType
   413  }
   414  
   415  // String returns a string representation of the binary expression.
   416  func (e *BinaryExpr) String() string {
   417  	return fmt.Sprintf("%s %s %s", e.LHS.String(), e.Op.String(), e.RHS.String())
   418  }
   419  
   420  // ParenExpr represents a parenthesized expression.
   421  type ParenExpr struct {
   422  	Expr     Expr
   423  	ExprType Type // used for type casting
   424  }
   425  
   426  // Type returns the type.
   427  func (e *ParenExpr) Type() Type {
   428  	if e.ExprType != UnknownType {
   429  		return e.ExprType
   430  	}
   431  	return e.Expr.Type()
   432  }
   433  
   434  // String returns a string representation of the parenthesized expression.
   435  func (e *ParenExpr) String() string {
   436  	if e.Expr == nil {
   437  		return "(ERROR_PAREN_EXPRESSION_NIL)"
   438  	}
   439  	return fmt.Sprintf("(%s)", e.Expr.String())
   440  }
   441  
   442  // Wildcard represents a wild card expression.
   443  type Wildcard struct{}
   444  
   445  // Type returns the type.
   446  func (e *Wildcard) Type() Type {
   447  	return UnknownType
   448  }
   449  
   450  // String returns a string representation of the wildcard.
   451  func (e *Wildcard) String() string { return "*" }
   452  
   453  // CloneExpr returns a deep copy of the expression.
   454  func CloneExpr(expr Expr) Expr {
   455  	if expr == nil {
   456  		return nil
   457  	}
   458  	switch expr := expr.(type) {
   459  	case *UnaryExpr:
   460  		return &UnaryExpr{Op: expr.Op, Expr: CloneExpr(expr.Expr)}
   461  	case *BinaryExpr:
   462  		return &BinaryExpr{Op: expr.Op, LHS: CloneExpr(expr.LHS), RHS: CloneExpr(expr.RHS)}
   463  	case *BooleanLiteral:
   464  		return &BooleanLiteral{Val: expr.Val}
   465  	case *Call:
   466  		args := make([]Expr, len(expr.Args))
   467  		for i, arg := range expr.Args {
   468  			args[i] = CloneExpr(arg)
   469  		}
   470  		return &Call{Name: expr.Name, Args: args}
   471  	case *Case:
   472  		conds := make([]WhenThen, len(expr.WhenThens))
   473  		for i, cond := range expr.WhenThens {
   474  			conds[i].When = CloneExpr(cond.When)
   475  			conds[i].Then = CloneExpr(cond.Then)
   476  		}
   477  		var elce Expr
   478  		if expr.Else != nil {
   479  			elce = CloneExpr(expr.Else)
   480  		}
   481  		return &Case{WhenThens: conds, Else: elce}
   482  	case *Distinct:
   483  		return &Distinct{Val: expr.Val}
   484  	case *NumberLiteral:
   485  		return &NumberLiteral{Val: expr.Val, Expr: expr.Expr}
   486  	case *ParenExpr:
   487  		return &ParenExpr{Expr: CloneExpr(expr.Expr)}
   488  	case *StringLiteral:
   489  		return &StringLiteral{Val: expr.Val}
   490  	case *GeopointLiteral:
   491  		return &GeopointLiteral{Val: expr.Val}
   492  	case *VarRef:
   493  		return &VarRef{Val: expr.Val}
   494  	case *Wildcard:
   495  		return &Wildcard{}
   496  	}
   497  	panic("unreachable")
   498  }
   499  
   500  // Visitor can be called by Walk to traverse an AST hierarchy.
   501  // The Visit() function is called once per expression.
   502  type Visitor interface {
   503  	Visit(Expr) Visitor
   504  }
   505  
   506  // Walk traverses an expression hierarchy in depth-first order.
   507  func Walk(v Visitor, expr Expr) {
   508  	if expr == nil {
   509  		return
   510  	}
   511  
   512  	if v = v.Visit(expr); v == nil {
   513  		return
   514  	}
   515  
   516  	switch e := expr.(type) {
   517  	case *UnaryExpr:
   518  		Walk(v, e.Expr)
   519  
   520  	case *BinaryExpr:
   521  		Walk(v, e.LHS)
   522  		Walk(v, e.RHS)
   523  
   524  	case *Case:
   525  		for _, cond := range e.WhenThens {
   526  			Walk(v, cond.When)
   527  			Walk(v, cond.Then)
   528  		}
   529  		if e.Else != nil {
   530  			Walk(v, e.Else)
   531  		}
   532  
   533  	case *Call:
   534  		for _, expr := range e.Args {
   535  			Walk(v, expr)
   536  		}
   537  
   538  	case *ParenExpr:
   539  		Walk(v, e.Expr)
   540  
   541  	}
   542  }
   543  
   544  // WalkFunc traverses an expression hierarchy in depth-first order.
   545  func WalkFunc(e Expr, fn func(Expr)) {
   546  	Walk(walkFuncVisitor(fn), e)
   547  }
   548  
   549  type walkFuncVisitor func(Expr)
   550  
   551  func (fn walkFuncVisitor) Visit(e Expr) Visitor { fn(e); return fn }
   552  
   553  // Rewriter can be called by Rewrite to replace nodes in the AST hierarchy.
   554  // The Rewrite() function is called once per expression.
   555  type Rewriter interface {
   556  	Rewrite(Expr) Expr
   557  }
   558  
   559  // Rewrite recursively invokes the rewriter to replace each expression.
   560  // Nodes are traversed depth-first and rewritten from leaf to root.
   561  func Rewrite(r Rewriter, expr Expr) Expr {
   562  	switch e := expr.(type) {
   563  	case *UnaryExpr:
   564  		e.Expr = Rewrite(r, e.Expr)
   565  
   566  	case *BinaryExpr:
   567  		e.LHS = Rewrite(r, e.LHS)
   568  		e.RHS = Rewrite(r, e.RHS)
   569  
   570  	case *Case:
   571  		for i, cond := range e.WhenThens {
   572  			cond.When = Rewrite(r, cond.When)
   573  			cond.Then = Rewrite(r, cond.Then)
   574  			e.WhenThens[i] = cond
   575  		}
   576  		if e.Else != nil {
   577  			e.Else = Rewrite(r, e.Else)
   578  		}
   579  
   580  	case *ParenExpr:
   581  		e.Expr = Rewrite(r, e.Expr)
   582  
   583  	case *Call:
   584  		for i, expr := range e.Args {
   585  			e.Args[i] = Rewrite(r, expr)
   586  		}
   587  	}
   588  
   589  	return r.Rewrite(expr)
   590  }
   591  
   592  // RewriteFunc rewrites an expression hierarchy.
   593  func RewriteFunc(e Expr, fn func(Expr) Expr) Expr {
   594  	return Rewrite(rewriterFunc(fn), e)
   595  }
   596  
   597  type rewriterFunc func(Expr) Expr
   598  
   599  func (fn rewriterFunc) Rewrite(e Expr) Expr { return fn(e) }