github.com/CiscoM31/godata@v1.0.10/expression_parser.go (about) 1 package godata 2 3 import ( 4 "context" 5 "strings" 6 ) 7 8 // tokenDurationRe is a regex for a token of type duration. 9 // The token value is set to the ISO 8601 string inside the single quotes 10 // For example, if the input data is duration'PT2H', then the token value is set to PT2H without quotes. 11 const tokenDurationRe = `^(duration)?'(?P<subtoken>-?P((([0-9]+Y([0-9]+M)?([0-9]+D)?|([0-9]+M)([0-9]+D)?|([0-9]+D))(T(([0-9]+H)([0-9]+M)?([0-9]+(\.[0-9]+)?S)?|([0-9]+M)([0-9]+(\.[0-9]+)?S)?|([0-9]+(\.[0-9]+)?S)))?)|(T(([0-9]+H)([0-9]+M)?([0-9]+(\.[0-9]+)?S)?|([0-9]+M)([0-9]+(\.[0-9]+)?S)?|([0-9]+(\.[0-9]+)?S)))))'` 12 13 // Addressing properties. 14 // Addressing items within a collection: 15 // ABNF: entityColNavigationProperty [ collectionNavigation ] 16 // collectionNavigation = [ "/" qualifiedEntityTypeName ] [ collectionNavPath ] 17 // Description: OData identifier, optionally followed by collection navigation. 18 // 19 // propertyPath = entityColNavigationProperty [ collectionNavigation ] 20 // / entityNavigationProperty [ singleNavigation ] 21 // / complexColProperty [ collectionPath ] 22 // / complexProperty [ complexPath ] 23 // / primitiveColProperty [ collectionPath ] 24 // / primitiveProperty [ singlePath ] 25 // / streamProperty [ boundOperation ] 26 27 type ExpressionTokenType int 28 29 func (e ExpressionTokenType) Value() int { 30 return (int)(e) 31 } 32 33 const ( 34 ExpressionTokenOpenParen ExpressionTokenType = iota // Open parenthesis - parenthesis expression, list expression, or path segment selector. 35 ExpressionTokenCloseParen // Close parenthesis 36 ExpressionTokenWhitespace // white space token 37 ExpressionTokenNav // Property navigation 38 ExpressionTokenColon // Function arg separator for 'any(v:boolExpr)' and 'all(v:boolExpr)' lambda operators 39 ExpressionTokenComma // [5] List delimiter and function argument delimiter. 40 ExpressionTokenLogical // eq|ne|gt|ge|lt|le|and|or|not|has|in 41 ExpressionTokenOp // add|sub|mul|divby|div|mod 42 ExpressionTokenFunc // Function, e.g. contains, substring... 43 ExpressionTokenLambdaNav // "/" token when used in lambda expression, e.g. tags/any() 44 ExpressionTokenLambda // [10] any(), all() lambda functions 45 ExpressionTokenCase // A case() statement. See https://docs.oasis-open.org/odata/odata/v4.01/odata-v4.01-part2-url-conventions.html#sec_case 46 ExpressionTokenCasePair // A case statement expression pair [ <boolean expression> : <value expression> ] 47 ExpressionTokenNull // 48 ExpressionTokenIt // The '$it' token 49 ExpressionTokenRoot // [15] The '$root' token 50 ExpressionTokenFloat // A floating point value. 51 ExpressionTokenInteger // An integer value 52 ExpressionTokenString // SQUOTE *( SQUOTE-in-string / pchar-no-SQUOTE ) SQUOTE 53 ExpressionTokenDate // A date value 54 ExpressionTokenTime // [20] A time value 55 ExpressionTokenDateTime // A date-time value 56 ExpressionTokenBoolean // A literal boolean value 57 ExpressionTokenLiteral // A literal non-boolean value 58 ExpressionTokenDuration // duration = [ "duration" ] SQUOTE durationValue SQUOTE 59 ExpressionTokenGuid // [25] A 128-bit GUID 60 ExpressionTokenAssignement // The '=' assignement for function arguments. 61 ExpressionTokenGeographyPolygon // 62 ExpressionTokenGeometryPolygon // 63 expressionTokenLast 64 ) 65 66 func (e ExpressionTokenType) String() string { 67 return [...]string{ 68 "ExpressionTokenOpenParen", 69 "ExpressionTokenCloseParen", 70 "ExpressionTokenWhitespace", 71 "ExpressionTokenNav", 72 "ExpressionTokenColon", 73 "ExpressionTokenComma", 74 "ExpressionTokenLogical", 75 "ExpressionTokenOp", 76 "ExpressionTokenFunc", 77 "ExpressionTokenLambdaNav", 78 "ExpressionTokenLambda", 79 "ExpressionTokenCase", 80 "ExpressionTokenCasePair", 81 "ExpressionTokenNull", 82 "ExpressionTokenIt", 83 "ExpressionTokenRoot", 84 "ExpressionTokenFloat", 85 "ExpressionTokenInteger", 86 "ExpressionTokenString", 87 "ExpressionTokenDate", 88 "ExpressionTokenTime", 89 "ExpressionTokenDateTime", 90 "ExpressionTokenBoolean", 91 "ExpressionTokenLiteral", 92 "ExpressionTokenDuration", 93 "ExpressionTokenGuid", 94 "ExpressionTokenAssignement", 95 "ExpressionTokenGeographyPolygon", 96 "ExpressionTokenGeometryPolygon", 97 "expressionTokenLast", 98 }[e] 99 } 100 101 // ExpressionParser is a ODATA expression parser. 102 type ExpressionParser struct { 103 *Parser 104 ExpectBoolExpr bool // Request expression to validate it is a boolean expression. 105 tokenizer *Tokenizer // The expression tokenizer. 106 } 107 108 // ParseExpressionString converts a ODATA expression input string into a parse 109 // tree that can be used by providers to create a response. 110 // Expressions can be used within $filter and $orderby query options. 111 func (p *ExpressionParser) ParseExpressionString(ctx context.Context, expression string) (*GoDataExpression, error) { 112 tokens, err := p.tokenizer.Tokenize(ctx, expression) 113 if err != nil { 114 return nil, err 115 } 116 // TODO: can we do this in one fell swoop? 117 postfix, err := p.InfixToPostfix(ctx, tokens) 118 if err != nil { 119 return nil, err 120 } 121 tree, err := p.PostfixToTree(ctx, postfix) 122 if err != nil { 123 return nil, err 124 } 125 if tree == nil || tree.Token == nil { 126 return nil, BadRequestError("Expression cannot be nil") 127 } 128 if p.ExpectBoolExpr && !p.isBooleanExpression(tree.Token) { 129 return nil, BadRequestError("Expression does not return a boolean value") 130 } 131 return &GoDataExpression{tree, expression}, nil 132 } 133 134 var GlobalExpressionTokenizer *Tokenizer 135 var GlobalExpressionParser *ExpressionParser 136 137 // init constructs single instances of Tokenizer and ExpressionParser and initializes their 138 // respective packages variables. 139 func init() { 140 p := NewExpressionParser() 141 t := p.tokenizer // use the Tokenizer instance created by 142 143 GlobalExpressionTokenizer = t 144 GlobalExpressionParser = p 145 146 GlobalFilterTokenizer = t 147 GlobalFilterParser = p 148 } 149 150 // ExpressionTokenizer creates a tokenizer capable of tokenizing ODATA expressions. 151 // 4.01 Services MUST support case-insensitive operator names. 152 // See https://docs.oasis-open.org/odata/odata/v4.01/odata-v4.01-part2-url-conventions.html#_Toc31360955 153 func NewExpressionTokenizer() *Tokenizer { 154 t := Tokenizer{} 155 // guidValue = 8HEXDIG "-" 4HEXDIG "-" 4HEXDIG "-" 4HEXDIG "-" 12HEXDIG 156 t.Add(`^[[:xdigit:]]{8}-[[:xdigit:]]{4}-[[:xdigit:]]{4}-[[:xdigit:]]{4}-[[:xdigit:]]{12}`, ExpressionTokenGuid) 157 // duration = [ "duration" ] SQUOTE durationValue SQUOTE 158 // durationValue = [ SIGN ] "P" [ 1*DIGIT "D" ] [ "T" [ 1*DIGIT "H" ] [ 1*DIGIT "M" ] [ 1*DIGIT [ "." 1*DIGIT ] "S" ] ] 159 // Duration literals in OData 4.0 required prefixing with “duration”. 160 // In OData 4.01, services MUST support duration and enumeration literals with or without the type prefix. 161 // OData clients that want to operate across OData 4.0 and OData 4.01 services should always include the prefix for duration and enumeration types. 162 t.Add(tokenDurationRe, ExpressionTokenDuration) 163 t.Add("^[0-9]{4,4}-[0-9]{2,2}-[0-9]{2,2}T[0-9]{2,2}:[0-9]{2,2}(:[0-9]{2,2}(.[0-9]+)?)?(Z|[+-][0-9]{2,2}:[0-9]{2,2})", ExpressionTokenDateTime) 164 t.Add("^-?[0-9]{4,4}-[0-9]{2,2}-[0-9]{2,2}", ExpressionTokenDate) 165 t.Add("^[0-9]{2,2}:[0-9]{2,2}(:[0-9]{2,2}(.[0-9]+)?)?", ExpressionTokenTime) 166 t.Add("^\\(", ExpressionTokenOpenParen) 167 t.Add("^\\)", ExpressionTokenCloseParen) 168 t.Add("^(?P<token>/)(?i)(any|all)", ExpressionTokenLambdaNav) // '/' as a token between a collection expression and a lambda function any() or all() 169 t.Add("^/", ExpressionTokenNav) // '/' as a token for property navigation. 170 t.Add("^=", ExpressionTokenAssignement) // '=' as a token for function argument assignment. 171 t.AddWithSubstituteFunc("^:", ExpressionTokenColon, func(in string) string { return "," }) // Function arg separator for lambda functions (any, all) 172 t.Add("^,", ExpressionTokenComma) // Default arg separator for functions 173 // Per ODATA ABNF grammar, functions must be followed by a open parenthesis. 174 // This implementation is a bit more lenient and allows space character between 175 // the function name and the open parenthesis. 176 // TODO: If we remove the optional space character, the function token will be 177 // mistakenly interpreted as a literal. 178 // E.g. ABNF for 'geo.distance': 179 // distanceMethodCallExpr = "geo.distance" OPEN BWS commonExpr BWS COMMA BWS commonExpr BWS CLOSE 180 t.Add("(?i)^(?P<token>(geo.distance|geo.intersects|geo.length))[\\s(]", ExpressionTokenFunc) 181 // geographyPolygon = geographyPrefix SQUOTE fullPolygonLiteral SQUOTE 182 // fullPolygonLiteral = sridLiteral polygonLiteral 183 // sridLiteral = "SRID" EQ 1*5DIGIT SEMI 184 // polygonLiteral = "Polygon" polygonData 185 // polygonData = OPEN ringLiteral *( COMMA ringLiteral ) CLOSE 186 // Example: geography'SRID=0;Polygon((-122.031577 47.578581, -122.031577 47.678581, -122.131577 47.678581))' 187 t.Add(`^geography'SRID=[0-9]{1,5};Polygon\(\((-?[0-9]+\.[0-9]+\s+-?[0-9]+\.[0-9]+)(,\s-?[0-9]+\.[0-9]+\s+-?[0-9]+\.[0-9]+)*\)\)'`, ExpressionTokenGeographyPolygon) 188 // geometryPolygon = geometryPrefix SQUOTE fullPolygonLiteral SQUOTE 189 t.Add(`^geometry'SRID=[0-9]{1,5};Polygon\(\((-?[0-9]+\.[0-9]+\s+-?[0-9]+\.[0-9]+)(,\s-?[0-9]+\.[0-9]+\s+-?[0-9]+\.[0-9]+)*\)\)'`, ExpressionTokenGeometryPolygon) 190 // According to ODATA ABNF notation, functions must be followed by a open parenthesis with no space 191 // between the function name and the open parenthesis. 192 // However, we are leniently allowing space characters between the function and the open parenthesis. 193 // TODO make leniency configurable. 194 // E.g. ABNF for 'indexof': 195 // indexOfMethodCallExpr = "indexof" OPEN BWS commonExpr BWS COMMA BWS commonExpr BWS CLOSE 196 t.Add("(?i)^(?P<token>(substringof|substring|length|indexof|exists|"+ 197 "contains|endswith|startswith|tolower|toupper|trim|concat|year|month|day|"+ 198 "hour|minute|second|fractionalseconds|date|time|totaloffsetminutes|now|"+ 199 "maxdatetime|mindatetime|totalseconds|round|floor|ceiling|isof|cast))[\\s(]", ExpressionTokenFunc) 200 // Logical operators must be followed by a space character. 201 // However, in practice user have written requests such as not(City eq 'Seattle') 202 // We are leniently allowing space characters between the operator name and the open parenthesis. 203 // TODO make leniency configurable. 204 // Example: 205 // notExpr = "not" RWS boolCommonExpr 206 t.Add("(?i)^(?P<token>(eq|ne|gt|ge|lt|le|and|or|not|has|in))[\\s(]", ExpressionTokenLogical) 207 // Arithmetic operators must be followed by a space character. 208 t.Add("(?i)^(?P<token>(add|sub|mul|divby|div|mod))\\s", ExpressionTokenOp) 209 // anyExpr = "any" OPEN BWS [ lambdaVariableExpr BWS COLON BWS lambdaPredicateExpr ] BWS CLOSE 210 // allExpr = "all" OPEN BWS lambdaVariableExpr BWS COLON BWS lambdaPredicateExpr BWS CLOSE 211 t.Add("(?i)^(?P<token>(any|all))[\\s(]", ExpressionTokenLambda) 212 t.Add("(?i)^(?P<token>(case))[\\s(]", ExpressionTokenCase) 213 t.Add("^null", ExpressionTokenNull) 214 t.Add("^\\$it", ExpressionTokenIt) 215 t.Add("^\\$root", ExpressionTokenRoot) 216 t.Add("^-?[0-9]+\\.[0-9]+", ExpressionTokenFloat) 217 t.Add("^-?[0-9]+", ExpressionTokenInteger) 218 t.AddWithSubstituteFunc("^'(''|[^'])*'", ExpressionTokenString, unescapeTokenString) 219 t.Add("^(true|false)", ExpressionTokenBoolean) 220 t.AddWithSubstituteFunc("^@*[a-zA-Z][a-zA-Z0-9_.]*", 221 ExpressionTokenLiteral, unescapeUtfEncoding) // The optional '@' character is used to identify parameter aliases 222 t.Ignore("^ ", ExpressionTokenWhitespace) 223 224 return &t 225 } 226 227 // unescapeTokenString unescapes the input string according to the ODATA ABNF rules 228 // and returns the unescaped string. 229 // In ODATA ABNF, strings are encoded according to the following rules: 230 // string = SQUOTE *( SQUOTE-in-string / pchar-no-SQUOTE ) SQUOTE 231 // SQUOTE-in-string = SQUOTE SQUOTE ; two consecutive single quotes represent one within a string literal 232 // pchar-no-SQUOTE = unreserved / pct-encoded-no-SQUOTE / other-delims / "$" / "&" / "=" / ":" / "@" 233 // pct-encoded-no-SQUOTE = "%" ( "0" / "1" / "3" / "4" / "5" / "6" / "8" / "9" / A-to-F ) HEXDIG 234 // / "%" "2" ( "0" / "1" / "2" / "3" / "4" / "5" / "6" / "8" / "9" / A-to-F ) 235 // unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" 236 // 237 // See http://docs.oasis-open.org/odata/odata/v4.01/csprd03/abnf/odata-abnf-construction-rules.txt 238 func unescapeTokenString(in string) string { 239 // The call to ReplaceAll() implements 240 // SQUOTE-in-string = SQUOTE SQUOTE ; two consecutive single quotes represent one within a string literal 241 if in == "''" { 242 return in 243 } 244 return strings.ReplaceAll(in, "''", "'") 245 } 246 247 // TODO: should we make this configurable? 248 func unescapeUtfEncoding(in string) string { 249 return strings.ReplaceAll(in, "_x0020_", " ") 250 } 251 252 func NewExpressionParser() *ExpressionParser { 253 parser := &ExpressionParser{ 254 Parser: EmptyParser().WithLiteralToken(ExpressionTokenLiteral), 255 ExpectBoolExpr: false, 256 tokenizer: NewExpressionTokenizer(), 257 } 258 parser.DefineOperator("/", 2, OpAssociationLeft, 8) // Note: '/' is used as a property navigator and between a collExpr and lambda function. 259 parser.DefineOperator("has", 2, OpAssociationLeft, 8) 260 // 'in' operator takes a literal list. 261 // City in ('Seattle') needs to be interpreted as a list expression, not a paren expression. 262 parser.DefineOperator("in", 2, OpAssociationLeft, 8).WithListExprPreference(true) 263 parser.DefineOperator("-", 1, OpAssociationNone, 7) 264 parser.DefineOperator("not", 1, OpAssociationRight, 7) 265 parser.DefineOperator("cast", 2, OpAssociationNone, 7) 266 parser.DefineOperator("mul", 2, OpAssociationNone, 6) 267 parser.DefineOperator("div", 2, OpAssociationNone, 6) // Division 268 parser.DefineOperator("divby", 2, OpAssociationNone, 6) // Decimal Division 269 parser.DefineOperator("mod", 2, OpAssociationNone, 6) 270 parser.DefineOperator("add", 2, OpAssociationNone, 5) 271 parser.DefineOperator("sub", 2, OpAssociationNone, 5) 272 parser.DefineOperator("gt", 2, OpAssociationLeft, 4) 273 parser.DefineOperator("ge", 2, OpAssociationLeft, 4) 274 parser.DefineOperator("lt", 2, OpAssociationLeft, 4) 275 parser.DefineOperator("le", 2, OpAssociationLeft, 4) 276 parser.DefineOperator("eq", 2, OpAssociationLeft, 3) 277 parser.DefineOperator("ne", 2, OpAssociationLeft, 3) 278 parser.DefineOperator("and", 2, OpAssociationLeft, 2) 279 parser.DefineOperator("or", 2, OpAssociationLeft, 1) 280 parser.DefineOperator("=", 2, OpAssociationRight, 0) // Function argument assignment. E.g. MyFunc(Arg1='abc') 281 parser.DefineFunction("contains", []int{2}, true) 282 parser.DefineFunction("endswith", []int{2}, true) 283 parser.DefineFunction("startswith", []int{2}, true) 284 parser.DefineFunction("exists", []int{2}, true) 285 parser.DefineFunction("length", []int{1}, false) 286 parser.DefineFunction("indexof", []int{2}, false) 287 parser.DefineFunction("substring", []int{2, 3}, false) 288 parser.DefineFunction("substringof", []int{2}, false) 289 parser.DefineFunction("tolower", []int{1}, false) 290 parser.DefineFunction("toupper", []int{1}, false) 291 parser.DefineFunction("trim", []int{1}, false) 292 parser.DefineFunction("concat", []int{2}, false) 293 parser.DefineFunction("year", []int{1}, false) 294 parser.DefineFunction("month", []int{1}, false) 295 parser.DefineFunction("day", []int{1}, false) 296 parser.DefineFunction("hour", []int{1}, false) 297 parser.DefineFunction("minute", []int{1}, false) 298 parser.DefineFunction("second", []int{1}, false) 299 parser.DefineFunction("fractionalseconds", []int{1}, false) 300 parser.DefineFunction("date", []int{1}, false) 301 parser.DefineFunction("time", []int{1}, false) 302 parser.DefineFunction("totaloffsetminutes", []int{1}, false) 303 parser.DefineFunction("now", []int{0}, false) 304 parser.DefineFunction("maxdatetime", []int{0}, false) 305 parser.DefineFunction("mindatetime", []int{0}, false) 306 parser.DefineFunction("totalseconds", []int{1}, false) 307 parser.DefineFunction("round", []int{1}, false) 308 parser.DefineFunction("floor", []int{1}, false) 309 parser.DefineFunction("ceiling", []int{1}, false) 310 parser.DefineFunction("isof", []int{1, 2}, true) // isof function can take one or two arguments. 311 parser.DefineFunction("cast", []int{2}, false) 312 parser.DefineFunction("geo.distance", []int{2}, false) 313 // The geo.intersects function has the following signatures: 314 // Edm.Boolean geo.intersects(Edm.GeographyPoint,Edm.GeographyPolygon) 315 // Edm.Boolean geo.intersects(Edm.GeometryPoint,Edm.GeometryPolygon) 316 // The geo.intersects function returns true if the specified point lies within the interior 317 // or on the boundary of the specified polygon, otherwise it returns false. 318 parser.DefineFunction("geo.intersects", []int{2}, false) 319 // The geo.length function has the following signatures: 320 // Edm.Double geo.length(Edm.GeographyLineString) 321 // Edm.Double geo.length(Edm.GeometryLineString) 322 // The geo.length function returns the total length of its line string parameter 323 // in the coordinate reference system signified by its SRID. 324 parser.DefineFunction("geo.length", []int{1}, false) 325 // 'any' can take either zero or two arguments with the later having the form any(d:d/Prop eq 1). 326 // Godata interprets the colon as an argument delimiter and considers the function to have two arguments. 327 parser.DefineFunction("any", []int{0, 2}, true) 328 // 'all' requires two arguments of a form similar to 'any'. 329 parser.DefineFunction("all", []int{2}, true) 330 // Define 'case' as a function accepting 1-10 arguments. Each argument is a pair of expressions separated by a colon. 331 // See https://docs.oasis-open.org/odata/odata/v4.01/odata-v4.01-part2-url-conventions.html#sec_case 332 parser.DefineFunction("case", []int{1,2,3,4,5,6,7,8,9,10}, true) 333 334 return parser 335 } 336 337 func (p *ExpressionParser) SemanticizeExpression( 338 expression *GoDataExpression, 339 service *GoDataService, 340 entity *GoDataEntityType, 341 ) error { 342 343 if expression == nil || expression.Tree == nil { 344 return nil 345 } 346 347 var semanticizeExpressionNode func(node *ParseNode) error 348 semanticizeExpressionNode = func(node *ParseNode) error { 349 350 if node.Token.Type == ExpressionTokenLiteral { 351 prop, ok := service.PropertyLookup[entity][node.Token.Value] 352 if !ok { 353 return BadRequestError("No property found " + node.Token.Value + " on entity " + entity.Name) 354 } 355 node.Token.SemanticType = SemanticTypeProperty 356 node.Token.SemanticReference = prop 357 } else { 358 node.Token.SemanticType = SemanticTypePropertyValue 359 node.Token.SemanticReference = &node.Token.Value 360 } 361 362 for _, child := range node.Children { 363 err := semanticizeExpressionNode(child) 364 if err != nil { 365 return err 366 } 367 } 368 369 return nil 370 } 371 372 return semanticizeExpressionNode(expression.Tree) 373 }