github.com/blend/go-sdk@v1.20220411.3/selector/parser.go (about) 1 /* 2 3 Copyright (c) 2022 - Present. Blend Labs, Inc. All rights reserved 4 Use of this source code is governed by a MIT license that can be found in the LICENSE file. 5 6 */ 7 8 package selector 9 10 import ( 11 "fmt" 12 "strings" 13 "unicode/utf8" 14 ) 15 16 // Parser parses a selector incrementally. 17 type Parser struct { 18 // s stores the string to be tokenized 19 s string 20 // pos is the position currently tokenized 21 pos int 22 // m is an optional mark 23 m int 24 25 skipValidation bool 26 } 27 28 // Parse does the actual parsing. 29 func (p *Parser) Parse() (Selector, error) { 30 p.s = strings.TrimSpace(p.s) 31 if len(p.s) == 0 { 32 return Any{}, nil 33 } 34 35 var b rune 36 var selector, subSelector Selector 37 var err error 38 var word string 39 var op string 40 41 // loop over "clauses" 42 // clauses are separated by commas and grouped logically as "ands" 43 for { 44 // sniff the !haskey form 45 b = p.current() 46 47 if b == Bang { 48 p.advance() // we aren't going to use the '!' 49 50 // read off the !KEY 51 // readWord will leave us on the next non-alpha char 52 word, err = p.readWord() 53 if err != nil { 54 return nil, err 55 } 56 57 selector = p.addAnd(selector, p.notHasKey(word)) // add the !KEY term 58 if p.done() { 59 break 60 } 61 62 p.skipToNonWhitespace() 63 b = p.current() 64 if b != Comma { 65 return nil, p.parseError("consecutive not has key terms") 66 } 67 68 p.advance() 69 continue 70 } 71 72 // we're done peeking the first char 73 // read the first KEY 74 word, err = p.readWord() 75 if err != nil { 76 return nil, err 77 } 78 79 p.mark() // mark to revert if the sniff for the `KEY` form fails 80 81 // sniff if the next character after the word is a comma 82 // this indicates it's a "key" form, or existence check on a key 83 b = p.skipToNonWhitespace() // the comma is not whitespace 84 if b == Comma || p.done() { 85 selector = p.addAnd(selector, p.hasKey(word)) 86 87 if b == Comma { 88 // this is largely a no-op unless we hit a comma 89 p.advance() 90 // we _have_ to eat the next whitespace 91 _ = p.skipToNonWhitespace() 92 if p.done() { 93 return nil, p.parseError() 94 } 95 } 96 if p.done() { 97 break 98 } 99 continue 100 } else { 101 p.popMark() 102 } 103 104 op, err = p.readOp() 105 if err != nil { 106 return nil, err 107 } 108 109 switch op { 110 case OpEquals, OpDoubleEquals: 111 subSelector, err = p.equals(word) 112 case OpNotEquals: 113 subSelector, err = p.notEquals(word) 114 case OpIn: 115 subSelector, err = p.in(word) 116 case OpNotIn: 117 subSelector, err = p.notIn(word) 118 default: 119 return nil, p.parseError("invalid operator") 120 } 121 if err != nil { 122 return nil, err 123 } 124 selector = p.addAnd(selector, subSelector) 125 126 b = p.skipToNonWhitespace() 127 if b == Comma { 128 p.advance() 129 if p.done() { 130 return nil, p.parseError(errExpectedNonEmptyKey) 131 } 132 p.skipToNonWhitespace() 133 continue 134 } 135 136 if p.done() { 137 break 138 } 139 140 // we have a "foo == bar foo" situation 141 return nil, p.parseError("keys not separated by comma") 142 } 143 144 if !p.skipValidation { 145 err = selector.Validate() 146 if err != nil { 147 return nil, err 148 } 149 } 150 151 return selector, nil 152 } 153 154 // addAnd starts grouping selectors into a high level `and`, returning the aggregate selector. 155 func (p *Parser) addAnd(current, next Selector) Selector { 156 if current == nil { 157 return next 158 } 159 if typed, isTyped := current.(And); isTyped { 160 return append(typed, next) 161 } 162 return And([]Selector{current, next}) 163 } 164 165 func (p *Parser) hasKey(key string) Selector { 166 return HasKey(key) 167 } 168 169 func (p *Parser) notHasKey(key string) Selector { 170 return NotHasKey(key) 171 } 172 173 func (p *Parser) equals(key string) (Selector, error) { 174 value, err := p.readWord() 175 if err != nil { 176 return nil, err 177 } 178 return Equals{Key: key, Value: value}, nil 179 } 180 181 func (p *Parser) notEquals(key string) (Selector, error) { 182 value, err := p.readWord() 183 if err != nil { 184 return nil, err 185 } 186 return NotEquals{Key: key, Value: value}, nil 187 } 188 189 func (p *Parser) in(key string) (Selector, error) { 190 csv, err := p.readCSV() 191 if err != nil { 192 return nil, err 193 } 194 return In{Key: key, Values: csv}, nil 195 } 196 197 func (p *Parser) notIn(key string) (Selector, error) { 198 csv, err := p.readCSV() 199 if err != nil { 200 return nil, err 201 } 202 return NotIn{Key: key, Values: csv}, nil 203 } 204 205 // done indicates the cursor is past the usable length of the string. 206 func (p *Parser) done() bool { 207 return p.pos == len(p.s) 208 } 209 210 // mark sets a mark at the current position. 211 func (p *Parser) mark() { 212 p.m = p.pos 213 } 214 215 // popMark moves the cursor back to the previous mark. 216 func (p *Parser) popMark() { 217 if p.m > 0 { 218 p.pos = p.m 219 } 220 p.m = 0 221 } 222 223 // current returns the rune at the current position. 224 func (p *Parser) current() (r rune) { 225 r, _ = utf8.DecodeRuneInString(p.s[p.pos:]) 226 return 227 } 228 229 // advance moves the cursor forward one rune. 230 func (p *Parser) advance() { 231 if p.pos < len(p.s) { 232 _, width := utf8.DecodeRuneInString(p.s[p.pos:]) 233 p.pos += width 234 } 235 } 236 237 // readOp reads a valid operator. 238 // valid operators include: 239 // [ =, ==, !=, in, notin ] 240 // errors if it doesn't read one of the above, or there is another structural issue. 241 // this will leave the position on the character after the operator 242 func (p *Parser) readOp() (string, error) { 243 // skip preceding whitespace 244 p.skipWhiteSpace() 245 246 const ( 247 stateFirstOpChar = 0 248 stateEqual = 1 249 stateBang = 2 250 stateInI = 3 251 stateNotInN = 4 252 stateNotInO = 5 253 stateNotInT = 6 254 stateNotInI = 7 255 ) 256 257 var state int 258 var ch rune 259 var op []rune 260 for { 261 if p.done() { 262 return "", p.parseError("invalid operator") 263 } 264 265 ch = p.current() 266 267 switch state { 268 case stateFirstOpChar: // initial state, determine what op we're reading for 269 if ch == Equal { 270 state = stateEqual 271 break 272 } 273 if ch == Bang { 274 state = stateBang 275 break 276 } 277 if ch == 'i' { 278 state = stateInI 279 break 280 } 281 if ch == 'n' { 282 state = stateNotInN 283 break 284 } 285 286 return "", p.parseError("invalid operator") 287 288 case stateEqual: 289 if p.isWhitespace(ch) || isAlpha(ch) || ch == Comma { 290 return string(op), nil 291 } 292 if ch == Equal { 293 op = append(op, ch) 294 p.advance() 295 return string(op), nil 296 } 297 298 return "", p.parseError("invalid operator") 299 300 case stateBang: 301 if ch == Equal { 302 op = append(op, ch) 303 p.advance() 304 return string(op), nil 305 } 306 307 return "", p.parseError("invalid operator") 308 309 case stateInI: 310 if ch == 'n' { 311 op = append(op, ch) 312 p.advance() 313 return string(op), nil 314 } 315 316 return "", p.parseError("invalid operator") 317 318 case stateNotInN: 319 if ch == 'o' { 320 state = stateNotInO 321 break 322 } 323 324 return "", p.parseError("invalid operator") 325 326 case stateNotInO: 327 if ch == 't' { 328 state = stateNotInT 329 break 330 } 331 332 return "", p.parseError("invalid operator") 333 334 case stateNotInT: 335 if ch == 'i' { 336 state = stateNotInI 337 break 338 } 339 340 return "", p.parseError("invalid operator") 341 342 case stateNotInI: 343 if ch == 'n' { 344 op = append(op, ch) 345 p.advance() 346 return string(op), nil 347 } 348 349 return "", p.parseError("invalid operator") 350 } 351 352 op = append(op, ch) 353 p.advance() 354 } 355 } 356 357 // readWord skips whitespace, then reads a word until whitespace or a token. 358 // it will leave the cursor on the next char after the word, i.e. the space or token. 359 func (p *Parser) readWord() (string, error) { 360 p.skipWhiteSpace() 361 362 var word []rune 363 var ch rune 364 for { 365 if p.done() { 366 break 367 } 368 369 ch = p.current() 370 if isWhitespace(ch) || 371 ch == Comma || 372 isOperatorSymbol(ch) { 373 break 374 } 375 376 word = append(word, ch) 377 p.advance() 378 } 379 380 if len(word) == 0 { 381 return "", p.parseError(errExpectedNonEmptyKey) 382 } 383 384 return string(word), nil 385 } 386 387 // readCSV reads an array of strings in csv form. 388 // it expects to start just before the first `(` and 389 // will read until just past the closing `)` 390 func (p *Parser) readCSV() (results []string, err error) { 391 // skip preceding whitespace 392 p.skipWhiteSpace() 393 394 const ( 395 stateBeforeParens = 0 396 stateWord = 1 397 stateWhitespaceAfterOpenParens = 2 398 stateWhitespaceAfterComma = 3 399 stateWhitespaceAfterWord = 4 400 ) 401 402 var word []rune 403 var ch rune 404 var state int 405 406 for { 407 if p.done() { 408 results = nil 409 err = p.parseError("csv; expects close parenthesis") 410 // err = ErrInvalidSelector 411 return 412 } 413 414 ch = p.current() 415 416 switch state { 417 case stateBeforeParens: 418 if ch == OpenParens { 419 state = stateWhitespaceAfterOpenParens 420 p.advance() 421 continue 422 } 423 424 // not open parens, bail 425 err = p.parseError("csv; expects open parenthesis") 426 results = nil 427 return 428 429 case stateWord: 430 431 if ch == Comma { 432 if len(word) > 0 { 433 results = append(results, string(word)) 434 word = nil 435 } 436 437 // the symbol is the comma 438 state = stateWhitespaceAfterComma 439 p.advance() 440 continue 441 } 442 443 if ch == CloseParens { 444 if len(word) > 0 { 445 results = append(results, string(word)) 446 } 447 p.advance() 448 return 449 } 450 451 if p.isWhitespace(ch) { 452 if len(word) > 0 { 453 results = append(results, string(word)) 454 word = nil 455 } 456 457 state = stateWhitespaceAfterWord 458 p.advance() 459 continue 460 } 461 462 if !p.isValidValue(ch) { 463 err = p.parseError("csv; word contains invalid characters") 464 results = nil 465 return 466 } 467 468 word = append(word, ch) 469 p.advance() 470 continue 471 472 case stateWhitespaceAfterOpenParens, stateWhitespaceAfterComma: 473 if p.isWhitespace(ch) { 474 p.advance() 475 continue 476 } 477 if isAlpha(ch) { 478 state = stateWord 479 continue 480 } 481 if ch == Comma { 482 p.advance() 483 state = stateWhitespaceAfterComma 484 continue 485 } 486 if ch == CloseParens { 487 p.advance() 488 return // exit reading the csv 489 } 490 491 if state == stateWhitespaceAfterOpenParens { 492 err = p.parseError("csv; invalid characters after '('") 493 return 494 } 495 err = p.parseError("csv; invalid characters after ','") 496 return 497 498 case stateWhitespaceAfterWord: 499 500 if ch == CloseParens { 501 if len(word) > 0 { 502 results = append(results, string(word)) 503 } 504 p.advance() 505 return 506 } 507 508 if p.isWhitespace(ch) { 509 p.advance() 510 continue 511 } 512 513 if ch == Comma { 514 state = stateWhitespaceAfterComma 515 p.advance() 516 continue 517 } 518 519 err = p.parseError("csv; consecutive whitespace separated words without a comma") 520 results = nil 521 return 522 } 523 } 524 } 525 526 func (p *Parser) skipWhiteSpace() { 527 var ch rune 528 for { 529 if p.done() { 530 return 531 } 532 ch = p.current() 533 if !p.isWhitespace(ch) { 534 return 535 } 536 p.advance() 537 } 538 } 539 540 func (p *Parser) skipToNonWhitespace() (ch rune) { 541 for { 542 if p.done() { 543 return 544 } 545 ch = p.current() 546 if ch == Comma || !p.isWhitespace(ch) { 547 return 548 } 549 p.advance() 550 } 551 } 552 553 // isWhitespace returns true if the rune is a space, tab, or newline. 554 func (p *Parser) isWhitespace(ch rune) bool { 555 return ch == Space || ch == Tab || ch == CarriageReturn || ch == NewLine 556 } 557 558 func (p *Parser) isValidValue(ch rune) bool { 559 return isAlpha(ch) || isNameSymbol(ch) 560 } 561 562 func (p *Parser) parseError(message ...interface{}) error { 563 return &ParseError{ 564 Err: ErrInvalidSelector, 565 Input: p.s, 566 Position: p.pos, 567 Message: fmt.Sprint(message...), 568 } 569 }