github.com/CiscoM31/godata@v1.0.10/expression_parser.go (about)

     1  package godata
     2  
     3  import (
     4  	"context"
     5  	"strings"
     6  )
     7  
     8  // tokenDurationRe is a regex for a token of type duration.
     9  // The token value is set to the ISO 8601 string inside the single quotes
    10  // For example, if the input data is duration'PT2H', then the token value is set to PT2H without quotes.
    11  const tokenDurationRe = `^(duration)?'(?P<subtoken>-?P((([0-9]+Y([0-9]+M)?([0-9]+D)?|([0-9]+M)([0-9]+D)?|([0-9]+D))(T(([0-9]+H)([0-9]+M)?([0-9]+(\.[0-9]+)?S)?|([0-9]+M)([0-9]+(\.[0-9]+)?S)?|([0-9]+(\.[0-9]+)?S)))?)|(T(([0-9]+H)([0-9]+M)?([0-9]+(\.[0-9]+)?S)?|([0-9]+M)([0-9]+(\.[0-9]+)?S)?|([0-9]+(\.[0-9]+)?S)))))'`
    12  
    13  // Addressing properties.
    14  // Addressing items within a collection:
    15  //   ABNF: entityColNavigationProperty [ collectionNavigation ]
    16  //         collectionNavigation = [ "/" qualifiedEntityTypeName ] [ collectionNavPath ]
    17  //   Description: OData identifier, optionally followed by collection navigation.
    18  //
    19  // propertyPath = entityColNavigationProperty [ collectionNavigation ]
    20  //             / entityNavigationProperty    [ singleNavigation ]
    21  //             / complexColProperty          [ collectionPath ]
    22  //             / complexProperty             [ complexPath ]
    23  //             / primitiveColProperty        [ collectionPath ]
    24  //             / primitiveProperty           [ singlePath ]
    25  //             / streamProperty              [ boundOperation ]
    26  
    27  type ExpressionTokenType int
    28  
    29  func (e ExpressionTokenType) Value() int {
    30  	return (int)(e)
    31  }
    32  
    33  const (
    34  	ExpressionTokenOpenParen        ExpressionTokenType = iota // Open parenthesis - parenthesis expression, list expression, or path segment selector.
    35  	ExpressionTokenCloseParen                                  // Close parenthesis
    36  	ExpressionTokenWhitespace                                  // white space token
    37  	ExpressionTokenNav                                         // Property navigation
    38  	ExpressionTokenColon                                       // Function arg separator for 'any(v:boolExpr)' and 'all(v:boolExpr)' lambda operators
    39  	ExpressionTokenComma                                       // [5] List delimiter and function argument delimiter.
    40  	ExpressionTokenLogical                                     // eq|ne|gt|ge|lt|le|and|or|not|has|in
    41  	ExpressionTokenOp                                          // add|sub|mul|divby|div|mod
    42  	ExpressionTokenFunc                                        // Function, e.g. contains, substring...
    43  	ExpressionTokenLambdaNav                                   // "/" token when used in lambda expression, e.g. tags/any()
    44  	ExpressionTokenLambda                                      // [10] any(), all() lambda functions
    45  	ExpressionTokenCase                                        // A case() statement. See https://docs.oasis-open.org/odata/odata/v4.01/odata-v4.01-part2-url-conventions.html#sec_case
    46  	ExpressionTokenCasePair                                    // A case statement expression pair [ <boolean expression> : <value expression> ]
    47  	ExpressionTokenNull                                        //
    48  	ExpressionTokenIt                                          // The '$it' token
    49  	ExpressionTokenRoot                                        // [15] The '$root' token
    50  	ExpressionTokenFloat                                       // A floating point value.
    51  	ExpressionTokenInteger                                     // An integer value
    52  	ExpressionTokenString                                      // SQUOTE *( SQUOTE-in-string / pchar-no-SQUOTE ) SQUOTE
    53  	ExpressionTokenDate                                        // A date value
    54  	ExpressionTokenTime                                        // [20] A time value
    55  	ExpressionTokenDateTime                                    // A date-time value
    56  	ExpressionTokenBoolean                                     // A literal boolean value
    57  	ExpressionTokenLiteral                                     // A literal non-boolean value
    58  	ExpressionTokenDuration                                    // duration      = [ "duration" ] SQUOTE durationValue SQUOTE
    59  	ExpressionTokenGuid                                        // [25] A 128-bit GUID
    60  	ExpressionTokenAssignement                                 // The '=' assignement for function arguments.
    61  	ExpressionTokenGeographyPolygon                            //
    62  	ExpressionTokenGeometryPolygon                             //
    63  	expressionTokenLast
    64  )
    65  
    66  func (e ExpressionTokenType) String() string {
    67  	return [...]string{
    68  		"ExpressionTokenOpenParen",
    69  		"ExpressionTokenCloseParen",
    70  		"ExpressionTokenWhitespace",
    71  		"ExpressionTokenNav",
    72  		"ExpressionTokenColon",
    73  		"ExpressionTokenComma",
    74  		"ExpressionTokenLogical",
    75  		"ExpressionTokenOp",
    76  		"ExpressionTokenFunc",
    77  		"ExpressionTokenLambdaNav",
    78  		"ExpressionTokenLambda",
    79  		"ExpressionTokenCase",
    80  		"ExpressionTokenCasePair",
    81  		"ExpressionTokenNull",
    82  		"ExpressionTokenIt",
    83  		"ExpressionTokenRoot",
    84  		"ExpressionTokenFloat",
    85  		"ExpressionTokenInteger",
    86  		"ExpressionTokenString",
    87  		"ExpressionTokenDate",
    88  		"ExpressionTokenTime",
    89  		"ExpressionTokenDateTime",
    90  		"ExpressionTokenBoolean",
    91  		"ExpressionTokenLiteral",
    92  		"ExpressionTokenDuration",
    93  		"ExpressionTokenGuid",
    94  		"ExpressionTokenAssignement",
    95  		"ExpressionTokenGeographyPolygon",
    96  		"ExpressionTokenGeometryPolygon",
    97  		"expressionTokenLast",
    98  	}[e]
    99  }
   100  
   101  // ExpressionParser is a ODATA expression parser.
   102  type ExpressionParser struct {
   103  	*Parser
   104  	ExpectBoolExpr bool       // Request expression to validate it is a boolean expression.
   105  	tokenizer      *Tokenizer // The expression tokenizer.
   106  }
   107  
   108  // ParseExpressionString converts a ODATA expression input string into a parse
   109  // tree that can be used by providers to create a response.
   110  // Expressions can be used within $filter and $orderby query options.
   111  func (p *ExpressionParser) ParseExpressionString(ctx context.Context, expression string) (*GoDataExpression, error) {
   112  	tokens, err := p.tokenizer.Tokenize(ctx, expression)
   113  	if err != nil {
   114  		return nil, err
   115  	}
   116  	// TODO: can we do this in one fell swoop?
   117  	postfix, err := p.InfixToPostfix(ctx, tokens)
   118  	if err != nil {
   119  		return nil, err
   120  	}
   121  	tree, err := p.PostfixToTree(ctx, postfix)
   122  	if err != nil {
   123  		return nil, err
   124  	}
   125  	if tree == nil || tree.Token == nil {
   126  		return nil, BadRequestError("Expression cannot be nil")
   127  	}
   128  	if p.ExpectBoolExpr && !p.isBooleanExpression(tree.Token) {
   129  		return nil, BadRequestError("Expression does not return a boolean value")
   130  	}
   131  	return &GoDataExpression{tree, expression}, nil
   132  }
   133  
   134  var GlobalExpressionTokenizer *Tokenizer
   135  var GlobalExpressionParser *ExpressionParser
   136  
   137  // init constructs single instances of Tokenizer and ExpressionParser and initializes their
   138  // respective packages variables.
   139  func init() {
   140  	p := NewExpressionParser()
   141  	t := p.tokenizer // use the Tokenizer instance created by
   142  
   143  	GlobalExpressionTokenizer = t
   144  	GlobalExpressionParser = p
   145  
   146  	GlobalFilterTokenizer = t
   147  	GlobalFilterParser = p
   148  }
   149  
   150  // ExpressionTokenizer creates a tokenizer capable of tokenizing ODATA expressions.
   151  // 4.01 Services MUST support case-insensitive operator names.
   152  // See https://docs.oasis-open.org/odata/odata/v4.01/odata-v4.01-part2-url-conventions.html#_Toc31360955
   153  func NewExpressionTokenizer() *Tokenizer {
   154  	t := Tokenizer{}
   155  	// guidValue = 8HEXDIG "-" 4HEXDIG "-" 4HEXDIG "-" 4HEXDIG "-" 12HEXDIG
   156  	t.Add(`^[[:xdigit:]]{8}-[[:xdigit:]]{4}-[[:xdigit:]]{4}-[[:xdigit:]]{4}-[[:xdigit:]]{12}`, ExpressionTokenGuid)
   157  	// duration      = [ "duration" ] SQUOTE durationValue SQUOTE
   158  	// durationValue = [ SIGN ] "P" [ 1*DIGIT "D" ] [ "T" [ 1*DIGIT "H" ] [ 1*DIGIT "M" ] [ 1*DIGIT [ "." 1*DIGIT ] "S" ] ]
   159  	// Duration literals in OData 4.0 required prefixing with “duration”.
   160  	// In OData 4.01, services MUST support duration and enumeration literals with or without the type prefix.
   161  	// OData clients that want to operate across OData 4.0 and OData 4.01 services should always include the prefix for duration and enumeration types.
   162  	t.Add(tokenDurationRe, ExpressionTokenDuration)
   163  	t.Add("^[0-9]{4,4}-[0-9]{2,2}-[0-9]{2,2}T[0-9]{2,2}:[0-9]{2,2}(:[0-9]{2,2}(.[0-9]+)?)?(Z|[+-][0-9]{2,2}:[0-9]{2,2})", ExpressionTokenDateTime)
   164  	t.Add("^-?[0-9]{4,4}-[0-9]{2,2}-[0-9]{2,2}", ExpressionTokenDate)
   165  	t.Add("^[0-9]{2,2}:[0-9]{2,2}(:[0-9]{2,2}(.[0-9]+)?)?", ExpressionTokenTime)
   166  	t.Add("^\\(", ExpressionTokenOpenParen)
   167  	t.Add("^\\)", ExpressionTokenCloseParen)
   168  	t.Add("^(?P<token>/)(?i)(any|all)", ExpressionTokenLambdaNav)                              // '/' as a token between a collection expression and a lambda function any() or all()
   169  	t.Add("^/", ExpressionTokenNav)                                                            // '/' as a token for property navigation.
   170  	t.Add("^=", ExpressionTokenAssignement)                                                    // '=' as a token for function argument assignment.
   171  	t.AddWithSubstituteFunc("^:", ExpressionTokenColon, func(in string) string { return "," }) // Function arg separator for lambda functions (any, all)
   172  	t.Add("^,", ExpressionTokenComma)                                                          // Default arg separator for functions
   173  	// Per ODATA ABNF grammar, functions must be followed by a open parenthesis.
   174  	// This implementation is a bit more lenient and allows space character between
   175  	// the function name and the open parenthesis.
   176  	// TODO: If we remove the optional space character, the function token will be
   177  	// mistakenly interpreted as a literal.
   178  	// E.g. ABNF for 'geo.distance':
   179  	// distanceMethodCallExpr   = "geo.distance"   OPEN BWS commonExpr BWS COMMA BWS commonExpr BWS CLOSE
   180  	t.Add("(?i)^(?P<token>(geo.distance|geo.intersects|geo.length))[\\s(]", ExpressionTokenFunc)
   181  	// geographyPolygon   = geographyPrefix SQUOTE fullPolygonLiteral SQUOTE
   182  	// fullPolygonLiteral = sridLiteral polygonLiteral
   183  	// sridLiteral      = "SRID" EQ 1*5DIGIT SEMI
   184  	// polygonLiteral     = "Polygon" polygonData
   185  	// polygonData        = OPEN ringLiteral *( COMMA ringLiteral ) CLOSE
   186  	// Example: geography'SRID=0;Polygon((-122.031577 47.578581, -122.031577 47.678581, -122.131577 47.678581))'
   187  	t.Add(`^geography'SRID=[0-9]{1,5};Polygon\(\((-?[0-9]+\.[0-9]+\s+-?[0-9]+\.[0-9]+)(,\s-?[0-9]+\.[0-9]+\s+-?[0-9]+\.[0-9]+)*\)\)'`, ExpressionTokenGeographyPolygon)
   188  	// geometryPolygon    = geometryPrefix SQUOTE fullPolygonLiteral         SQUOTE
   189  	t.Add(`^geometry'SRID=[0-9]{1,5};Polygon\(\((-?[0-9]+\.[0-9]+\s+-?[0-9]+\.[0-9]+)(,\s-?[0-9]+\.[0-9]+\s+-?[0-9]+\.[0-9]+)*\)\)'`, ExpressionTokenGeometryPolygon)
   190  	// According to ODATA ABNF notation, functions must be followed by a open parenthesis with no space
   191  	// between the function name and the open parenthesis.
   192  	// However, we are leniently allowing space characters between the function and the open parenthesis.
   193  	// TODO make leniency configurable.
   194  	// E.g. ABNF for 'indexof':
   195  	// indexOfMethodCallExpr    = "indexof"    OPEN BWS commonExpr BWS COMMA BWS commonExpr BWS CLOSE
   196  	t.Add("(?i)^(?P<token>(substringof|substring|length|indexof|exists|"+
   197  		"contains|endswith|startswith|tolower|toupper|trim|concat|year|month|day|"+
   198  		"hour|minute|second|fractionalseconds|date|time|totaloffsetminutes|now|"+
   199  		"maxdatetime|mindatetime|totalseconds|round|floor|ceiling|isof|cast))[\\s(]", ExpressionTokenFunc)
   200  	// Logical operators must be followed by a space character.
   201  	// However, in practice user have written requests such as not(City eq 'Seattle')
   202  	// We are leniently allowing space characters between the operator name and the open parenthesis.
   203  	// TODO make leniency configurable.
   204  	// Example:
   205  	// notExpr = "not" RWS boolCommonExpr
   206  	t.Add("(?i)^(?P<token>(eq|ne|gt|ge|lt|le|and|or|not|has|in))[\\s(]", ExpressionTokenLogical)
   207  	// Arithmetic operators must be followed by a space character.
   208  	t.Add("(?i)^(?P<token>(add|sub|mul|divby|div|mod))\\s", ExpressionTokenOp)
   209  	// anyExpr = "any" OPEN BWS [ lambdaVariableExpr BWS COLON BWS lambdaPredicateExpr ] BWS CLOSE
   210  	// allExpr = "all" OPEN BWS   lambdaVariableExpr BWS COLON BWS lambdaPredicateExpr   BWS CLOSE
   211  	t.Add("(?i)^(?P<token>(any|all))[\\s(]", ExpressionTokenLambda)
   212  	t.Add("(?i)^(?P<token>(case))[\\s(]", ExpressionTokenCase)
   213  	t.Add("^null", ExpressionTokenNull)
   214  	t.Add("^\\$it", ExpressionTokenIt)
   215  	t.Add("^\\$root", ExpressionTokenRoot)
   216  	t.Add("^-?[0-9]+\\.[0-9]+", ExpressionTokenFloat)
   217  	t.Add("^-?[0-9]+", ExpressionTokenInteger)
   218  	t.AddWithSubstituteFunc("^'(''|[^'])*'", ExpressionTokenString, unescapeTokenString)
   219  	t.Add("^(true|false)", ExpressionTokenBoolean)
   220  	t.AddWithSubstituteFunc("^@*[a-zA-Z][a-zA-Z0-9_.]*",
   221  		ExpressionTokenLiteral, unescapeUtfEncoding) // The optional '@' character is used to identify parameter aliases
   222  	t.Ignore("^ ", ExpressionTokenWhitespace)
   223  
   224  	return &t
   225  }
   226  
   227  // unescapeTokenString unescapes the input string according to the ODATA ABNF rules
   228  // and returns the unescaped string.
   229  // In ODATA ABNF, strings are encoded according to the following rules:
   230  // string           = SQUOTE *( SQUOTE-in-string / pchar-no-SQUOTE ) SQUOTE
   231  // SQUOTE-in-string = SQUOTE SQUOTE ; two consecutive single quotes represent one within a string literal
   232  // pchar-no-SQUOTE       = unreserved / pct-encoded-no-SQUOTE / other-delims / "$" / "&" / "=" / ":" / "@"
   233  // pct-encoded-no-SQUOTE = "%" ( "0" / "1" /   "3" / "4" / "5" / "6" / "8" / "9" / A-to-F ) HEXDIG
   234  // / "%" "2" ( "0" / "1" / "2" / "3" / "4" / "5" / "6" /   "8" / "9" / A-to-F )
   235  // unreserved    = ALPHA / DIGIT / "-" / "." / "_" / "~"
   236  //
   237  // See http://docs.oasis-open.org/odata/odata/v4.01/csprd03/abnf/odata-abnf-construction-rules.txt
   238  func unescapeTokenString(in string) string {
   239  	// The call to ReplaceAll() implements
   240  	// SQUOTE-in-string = SQUOTE SQUOTE ; two consecutive single quotes represent one within a string literal
   241  	if in == "''" {
   242  		return in
   243  	}
   244  	return strings.ReplaceAll(in, "''", "'")
   245  }
   246  
   247  // TODO: should we make this configurable?
   248  func unescapeUtfEncoding(in string) string {
   249  	return strings.ReplaceAll(in, "_x0020_", " ")
   250  }
   251  
   252  func NewExpressionParser() *ExpressionParser {
   253  	parser := &ExpressionParser{
   254  		Parser:         EmptyParser().WithLiteralToken(ExpressionTokenLiteral),
   255  		ExpectBoolExpr: false,
   256  		tokenizer:      NewExpressionTokenizer(),
   257  	}
   258  	parser.DefineOperator("/", 2, OpAssociationLeft, 8) // Note: '/' is used as a property navigator and between a collExpr and lambda function.
   259  	parser.DefineOperator("has", 2, OpAssociationLeft, 8)
   260  	// 'in' operator takes a literal list.
   261  	// City in ('Seattle') needs to be interpreted as a list expression, not a paren expression.
   262  	parser.DefineOperator("in", 2, OpAssociationLeft, 8).WithListExprPreference(true)
   263  	parser.DefineOperator("-", 1, OpAssociationNone, 7)
   264  	parser.DefineOperator("not", 1, OpAssociationRight, 7)
   265  	parser.DefineOperator("cast", 2, OpAssociationNone, 7)
   266  	parser.DefineOperator("mul", 2, OpAssociationNone, 6)
   267  	parser.DefineOperator("div", 2, OpAssociationNone, 6)   // Division
   268  	parser.DefineOperator("divby", 2, OpAssociationNone, 6) // Decimal Division
   269  	parser.DefineOperator("mod", 2, OpAssociationNone, 6)
   270  	parser.DefineOperator("add", 2, OpAssociationNone, 5)
   271  	parser.DefineOperator("sub", 2, OpAssociationNone, 5)
   272  	parser.DefineOperator("gt", 2, OpAssociationLeft, 4)
   273  	parser.DefineOperator("ge", 2, OpAssociationLeft, 4)
   274  	parser.DefineOperator("lt", 2, OpAssociationLeft, 4)
   275  	parser.DefineOperator("le", 2, OpAssociationLeft, 4)
   276  	parser.DefineOperator("eq", 2, OpAssociationLeft, 3)
   277  	parser.DefineOperator("ne", 2, OpAssociationLeft, 3)
   278  	parser.DefineOperator("and", 2, OpAssociationLeft, 2)
   279  	parser.DefineOperator("or", 2, OpAssociationLeft, 1)
   280  	parser.DefineOperator("=", 2, OpAssociationRight, 0) // Function argument assignment. E.g. MyFunc(Arg1='abc')
   281  	parser.DefineFunction("contains", []int{2}, true)
   282  	parser.DefineFunction("endswith", []int{2}, true)
   283  	parser.DefineFunction("startswith", []int{2}, true)
   284  	parser.DefineFunction("exists", []int{2}, true)
   285  	parser.DefineFunction("length", []int{1}, false)
   286  	parser.DefineFunction("indexof", []int{2}, false)
   287  	parser.DefineFunction("substring", []int{2, 3}, false)
   288  	parser.DefineFunction("substringof", []int{2}, false)
   289  	parser.DefineFunction("tolower", []int{1}, false)
   290  	parser.DefineFunction("toupper", []int{1}, false)
   291  	parser.DefineFunction("trim", []int{1}, false)
   292  	parser.DefineFunction("concat", []int{2}, false)
   293  	parser.DefineFunction("year", []int{1}, false)
   294  	parser.DefineFunction("month", []int{1}, false)
   295  	parser.DefineFunction("day", []int{1}, false)
   296  	parser.DefineFunction("hour", []int{1}, false)
   297  	parser.DefineFunction("minute", []int{1}, false)
   298  	parser.DefineFunction("second", []int{1}, false)
   299  	parser.DefineFunction("fractionalseconds", []int{1}, false)
   300  	parser.DefineFunction("date", []int{1}, false)
   301  	parser.DefineFunction("time", []int{1}, false)
   302  	parser.DefineFunction("totaloffsetminutes", []int{1}, false)
   303  	parser.DefineFunction("now", []int{0}, false)
   304  	parser.DefineFunction("maxdatetime", []int{0}, false)
   305  	parser.DefineFunction("mindatetime", []int{0}, false)
   306  	parser.DefineFunction("totalseconds", []int{1}, false)
   307  	parser.DefineFunction("round", []int{1}, false)
   308  	parser.DefineFunction("floor", []int{1}, false)
   309  	parser.DefineFunction("ceiling", []int{1}, false)
   310  	parser.DefineFunction("isof", []int{1, 2}, true) // isof function can take one or two arguments.
   311  	parser.DefineFunction("cast", []int{2}, false)
   312  	parser.DefineFunction("geo.distance", []int{2}, false)
   313  	// The geo.intersects function has the following signatures:
   314  	//   Edm.Boolean geo.intersects(Edm.GeographyPoint,Edm.GeographyPolygon)
   315  	//   Edm.Boolean geo.intersects(Edm.GeometryPoint,Edm.GeometryPolygon)
   316  	// The geo.intersects function returns true if the specified point lies within the interior
   317  	// or on the boundary of the specified polygon, otherwise it returns false.
   318  	parser.DefineFunction("geo.intersects", []int{2}, false)
   319  	// The geo.length function has the following signatures:
   320  	//   Edm.Double geo.length(Edm.GeographyLineString)
   321  	//   Edm.Double geo.length(Edm.GeometryLineString)
   322  	// The geo.length function returns the total length of its line string parameter
   323  	// in the coordinate reference system signified by its SRID.
   324  	parser.DefineFunction("geo.length", []int{1}, false)
   325  	// 'any' can take either zero or two arguments with the later having the form any(d:d/Prop eq 1).
   326  	// Godata interprets the colon as an argument delimiter and considers the function to have two arguments.
   327  	parser.DefineFunction("any", []int{0, 2}, true)
   328  	// 'all' requires two arguments of a form similar to 'any'.
   329  	parser.DefineFunction("all", []int{2}, true)
   330  	// Define 'case' as a function accepting 1-10 arguments. Each argument is a pair of expressions separated by a colon.
   331  	// See https://docs.oasis-open.org/odata/odata/v4.01/odata-v4.01-part2-url-conventions.html#sec_case
   332  	parser.DefineFunction("case", []int{1,2,3,4,5,6,7,8,9,10}, true)
   333  
   334  	return parser
   335  }
   336  
   337  func (p *ExpressionParser) SemanticizeExpression(
   338  	expression *GoDataExpression,
   339  	service *GoDataService,
   340  	entity *GoDataEntityType,
   341  ) error {
   342  
   343  	if expression == nil || expression.Tree == nil {
   344  		return nil
   345  	}
   346  
   347  	var semanticizeExpressionNode func(node *ParseNode) error
   348  	semanticizeExpressionNode = func(node *ParseNode) error {
   349  
   350  		if node.Token.Type == ExpressionTokenLiteral {
   351  			prop, ok := service.PropertyLookup[entity][node.Token.Value]
   352  			if !ok {
   353  				return BadRequestError("No property found " + node.Token.Value + " on entity " + entity.Name)
   354  			}
   355  			node.Token.SemanticType = SemanticTypeProperty
   356  			node.Token.SemanticReference = prop
   357  		} else {
   358  			node.Token.SemanticType = SemanticTypePropertyValue
   359  			node.Token.SemanticReference = &node.Token.Value
   360  		}
   361  
   362  		for _, child := range node.Children {
   363  			err := semanticizeExpressionNode(child)
   364  			if err != nil {
   365  				return err
   366  			}
   367  		}
   368  
   369  		return nil
   370  	}
   371  
   372  	return semanticizeExpressionNode(expression.Tree)
   373  }