github.com/blend/go-sdk@v1.20220411.3/selector/parser.go (about)

     1  /*
     2  
     3  Copyright (c) 2022 - Present. Blend Labs, Inc. All rights reserved
     4  Use of this source code is governed by a MIT license that can be found in the LICENSE file.
     5  
     6  */
     7  
     8  package selector
     9  
    10  import (
    11  	"fmt"
    12  	"strings"
    13  	"unicode/utf8"
    14  )
    15  
    16  // Parser parses a selector incrementally.
    17  type Parser struct {
    18  	// s stores the string to be tokenized
    19  	s string
    20  	// pos is the position currently tokenized
    21  	pos int
    22  	// m is an optional mark
    23  	m int
    24  
    25  	skipValidation bool
    26  }
    27  
    28  // Parse does the actual parsing.
    29  func (p *Parser) Parse() (Selector, error) {
    30  	p.s = strings.TrimSpace(p.s)
    31  	if len(p.s) == 0 {
    32  		return Any{}, nil
    33  	}
    34  
    35  	var b rune
    36  	var selector, subSelector Selector
    37  	var err error
    38  	var word string
    39  	var op string
    40  
    41  	// loop over "clauses"
    42  	// clauses are separated by commas and grouped logically as "ands"
    43  	for {
    44  		// sniff the !haskey form
    45  		b = p.current()
    46  
    47  		if b == Bang {
    48  			p.advance() // we aren't going to use the '!'
    49  
    50  			// read off the !KEY
    51  			// readWord will leave us on the next non-alpha char
    52  			word, err = p.readWord()
    53  			if err != nil {
    54  				return nil, err
    55  			}
    56  
    57  			selector = p.addAnd(selector, p.notHasKey(word)) // add the !KEY term
    58  			if p.done() {
    59  				break
    60  			}
    61  
    62  			p.skipToNonWhitespace()
    63  			b = p.current()
    64  			if b != Comma {
    65  				return nil, p.parseError("consecutive not has key terms")
    66  			}
    67  
    68  			p.advance()
    69  			continue
    70  		}
    71  
    72  		// we're done peeking the first char
    73  		// read the first KEY
    74  		word, err = p.readWord()
    75  		if err != nil {
    76  			return nil, err
    77  		}
    78  
    79  		p.mark() // mark to revert if the sniff for the `KEY` form fails
    80  
    81  		// sniff if the next character after the word is a comma
    82  		// this indicates it's a "key" form, or existence check on a key
    83  		b = p.skipToNonWhitespace() // the comma is not whitespace
    84  		if b == Comma || p.done() {
    85  			selector = p.addAnd(selector, p.hasKey(word))
    86  
    87  			if b == Comma {
    88  				// this is largely a no-op unless we hit a comma
    89  				p.advance()
    90  				// we _have_ to eat the next whitespace
    91  				_ = p.skipToNonWhitespace()
    92  				if p.done() {
    93  					return nil, p.parseError()
    94  				}
    95  			}
    96  			if p.done() {
    97  				break
    98  			}
    99  			continue
   100  		} else {
   101  			p.popMark()
   102  		}
   103  
   104  		op, err = p.readOp()
   105  		if err != nil {
   106  			return nil, err
   107  		}
   108  
   109  		switch op {
   110  		case OpEquals, OpDoubleEquals:
   111  			subSelector, err = p.equals(word)
   112  		case OpNotEquals:
   113  			subSelector, err = p.notEquals(word)
   114  		case OpIn:
   115  			subSelector, err = p.in(word)
   116  		case OpNotIn:
   117  			subSelector, err = p.notIn(word)
   118  		default:
   119  			return nil, p.parseError("invalid operator")
   120  		}
   121  		if err != nil {
   122  			return nil, err
   123  		}
   124  		selector = p.addAnd(selector, subSelector)
   125  
   126  		b = p.skipToNonWhitespace()
   127  		if b == Comma {
   128  			p.advance()
   129  			if p.done() {
   130  				return nil, p.parseError(errExpectedNonEmptyKey)
   131  			}
   132  			p.skipToNonWhitespace()
   133  			continue
   134  		}
   135  
   136  		if p.done() {
   137  			break
   138  		}
   139  
   140  		// we have a "foo == bar foo" situation
   141  		return nil, p.parseError("keys not separated by comma")
   142  	}
   143  
   144  	if !p.skipValidation {
   145  		err = selector.Validate()
   146  		if err != nil {
   147  			return nil, err
   148  		}
   149  	}
   150  
   151  	return selector, nil
   152  }
   153  
   154  // addAnd starts grouping selectors into a high level `and`, returning the aggregate selector.
   155  func (p *Parser) addAnd(current, next Selector) Selector {
   156  	if current == nil {
   157  		return next
   158  	}
   159  	if typed, isTyped := current.(And); isTyped {
   160  		return append(typed, next)
   161  	}
   162  	return And([]Selector{current, next})
   163  }
   164  
   165  func (p *Parser) hasKey(key string) Selector {
   166  	return HasKey(key)
   167  }
   168  
   169  func (p *Parser) notHasKey(key string) Selector {
   170  	return NotHasKey(key)
   171  }
   172  
   173  func (p *Parser) equals(key string) (Selector, error) {
   174  	value, err := p.readWord()
   175  	if err != nil {
   176  		return nil, err
   177  	}
   178  	return Equals{Key: key, Value: value}, nil
   179  }
   180  
   181  func (p *Parser) notEquals(key string) (Selector, error) {
   182  	value, err := p.readWord()
   183  	if err != nil {
   184  		return nil, err
   185  	}
   186  	return NotEquals{Key: key, Value: value}, nil
   187  }
   188  
   189  func (p *Parser) in(key string) (Selector, error) {
   190  	csv, err := p.readCSV()
   191  	if err != nil {
   192  		return nil, err
   193  	}
   194  	return In{Key: key, Values: csv}, nil
   195  }
   196  
   197  func (p *Parser) notIn(key string) (Selector, error) {
   198  	csv, err := p.readCSV()
   199  	if err != nil {
   200  		return nil, err
   201  	}
   202  	return NotIn{Key: key, Values: csv}, nil
   203  }
   204  
   205  // done indicates the cursor is past the usable length of the string.
   206  func (p *Parser) done() bool {
   207  	return p.pos == len(p.s)
   208  }
   209  
   210  // mark sets a mark at the current position.
   211  func (p *Parser) mark() {
   212  	p.m = p.pos
   213  }
   214  
   215  // popMark moves the cursor back to the previous mark.
   216  func (p *Parser) popMark() {
   217  	if p.m > 0 {
   218  		p.pos = p.m
   219  	}
   220  	p.m = 0
   221  }
   222  
   223  // current returns the rune at the current position.
   224  func (p *Parser) current() (r rune) {
   225  	r, _ = utf8.DecodeRuneInString(p.s[p.pos:])
   226  	return
   227  }
   228  
   229  // advance moves the cursor forward one rune.
   230  func (p *Parser) advance() {
   231  	if p.pos < len(p.s) {
   232  		_, width := utf8.DecodeRuneInString(p.s[p.pos:])
   233  		p.pos += width
   234  	}
   235  }
   236  
   237  // readOp reads a valid operator.
   238  // valid operators include:
   239  // [ =, ==, !=, in, notin ]
   240  // errors if it doesn't read one of the above, or there is another structural issue.
   241  // this will leave the position on the character after the operator
   242  func (p *Parser) readOp() (string, error) {
   243  	// skip preceding whitespace
   244  	p.skipWhiteSpace()
   245  
   246  	const (
   247  		stateFirstOpChar = 0
   248  		stateEqual       = 1
   249  		stateBang        = 2
   250  		stateInI         = 3
   251  		stateNotInN      = 4
   252  		stateNotInO      = 5
   253  		stateNotInT      = 6
   254  		stateNotInI      = 7
   255  	)
   256  
   257  	var state int
   258  	var ch rune
   259  	var op []rune
   260  	for {
   261  		if p.done() {
   262  			return "", p.parseError("invalid operator")
   263  		}
   264  
   265  		ch = p.current()
   266  
   267  		switch state {
   268  		case stateFirstOpChar: // initial state, determine what op we're reading for
   269  			if ch == Equal {
   270  				state = stateEqual
   271  				break
   272  			}
   273  			if ch == Bang {
   274  				state = stateBang
   275  				break
   276  			}
   277  			if ch == 'i' {
   278  				state = stateInI
   279  				break
   280  			}
   281  			if ch == 'n' {
   282  				state = stateNotInN
   283  				break
   284  			}
   285  
   286  			return "", p.parseError("invalid operator")
   287  
   288  		case stateEqual:
   289  			if p.isWhitespace(ch) || isAlpha(ch) || ch == Comma {
   290  				return string(op), nil
   291  			}
   292  			if ch == Equal {
   293  				op = append(op, ch)
   294  				p.advance()
   295  				return string(op), nil
   296  			}
   297  
   298  			return "", p.parseError("invalid operator")
   299  
   300  		case stateBang:
   301  			if ch == Equal {
   302  				op = append(op, ch)
   303  				p.advance()
   304  				return string(op), nil
   305  			}
   306  
   307  			return "", p.parseError("invalid operator")
   308  
   309  		case stateInI:
   310  			if ch == 'n' {
   311  				op = append(op, ch)
   312  				p.advance()
   313  				return string(op), nil
   314  			}
   315  
   316  			return "", p.parseError("invalid operator")
   317  
   318  		case stateNotInN:
   319  			if ch == 'o' {
   320  				state = stateNotInO
   321  				break
   322  			}
   323  
   324  			return "", p.parseError("invalid operator")
   325  
   326  		case stateNotInO:
   327  			if ch == 't' {
   328  				state = stateNotInT
   329  				break
   330  			}
   331  
   332  			return "", p.parseError("invalid operator")
   333  
   334  		case stateNotInT:
   335  			if ch == 'i' {
   336  				state = stateNotInI
   337  				break
   338  			}
   339  
   340  			return "", p.parseError("invalid operator")
   341  
   342  		case stateNotInI:
   343  			if ch == 'n' {
   344  				op = append(op, ch)
   345  				p.advance()
   346  				return string(op), nil
   347  			}
   348  
   349  			return "", p.parseError("invalid operator")
   350  		}
   351  
   352  		op = append(op, ch)
   353  		p.advance()
   354  	}
   355  }
   356  
   357  // readWord skips whitespace, then reads a word until whitespace or a token.
   358  // it will leave the cursor on the next char after the word, i.e. the space or token.
   359  func (p *Parser) readWord() (string, error) {
   360  	p.skipWhiteSpace()
   361  
   362  	var word []rune
   363  	var ch rune
   364  	for {
   365  		if p.done() {
   366  			break
   367  		}
   368  
   369  		ch = p.current()
   370  		if isWhitespace(ch) ||
   371  			ch == Comma ||
   372  			isOperatorSymbol(ch) {
   373  			break
   374  		}
   375  
   376  		word = append(word, ch)
   377  		p.advance()
   378  	}
   379  
   380  	if len(word) == 0 {
   381  		return "", p.parseError(errExpectedNonEmptyKey)
   382  	}
   383  
   384  	return string(word), nil
   385  }
   386  
   387  // readCSV reads an array of strings in csv form.
   388  // it expects to start just before the first `(` and
   389  // will read until just past the closing `)`
   390  func (p *Parser) readCSV() (results []string, err error) {
   391  	// skip preceding whitespace
   392  	p.skipWhiteSpace()
   393  
   394  	const (
   395  		stateBeforeParens              = 0
   396  		stateWord                      = 1
   397  		stateWhitespaceAfterOpenParens = 2
   398  		stateWhitespaceAfterComma      = 3
   399  		stateWhitespaceAfterWord       = 4
   400  	)
   401  
   402  	var word []rune
   403  	var ch rune
   404  	var state int
   405  
   406  	for {
   407  		if p.done() {
   408  			results = nil
   409  			err = p.parseError("csv; expects close parenthesis")
   410  			// err = ErrInvalidSelector
   411  			return
   412  		}
   413  
   414  		ch = p.current()
   415  
   416  		switch state {
   417  		case stateBeforeParens:
   418  			if ch == OpenParens {
   419  				state = stateWhitespaceAfterOpenParens
   420  				p.advance()
   421  				continue
   422  			}
   423  
   424  			// not open parens, bail
   425  			err = p.parseError("csv; expects open parenthesis")
   426  			results = nil
   427  			return
   428  
   429  		case stateWord:
   430  
   431  			if ch == Comma {
   432  				if len(word) > 0 {
   433  					results = append(results, string(word))
   434  					word = nil
   435  				}
   436  
   437  				// the symbol is the comma
   438  				state = stateWhitespaceAfterComma
   439  				p.advance()
   440  				continue
   441  			}
   442  
   443  			if ch == CloseParens {
   444  				if len(word) > 0 {
   445  					results = append(results, string(word))
   446  				}
   447  				p.advance()
   448  				return
   449  			}
   450  
   451  			if p.isWhitespace(ch) {
   452  				if len(word) > 0 {
   453  					results = append(results, string(word))
   454  					word = nil
   455  				}
   456  
   457  				state = stateWhitespaceAfterWord
   458  				p.advance()
   459  				continue
   460  			}
   461  
   462  			if !p.isValidValue(ch) {
   463  				err = p.parseError("csv; word contains invalid characters")
   464  				results = nil
   465  				return
   466  			}
   467  
   468  			word = append(word, ch)
   469  			p.advance()
   470  			continue
   471  
   472  		case stateWhitespaceAfterOpenParens, stateWhitespaceAfterComma:
   473  			if p.isWhitespace(ch) {
   474  				p.advance()
   475  				continue
   476  			}
   477  			if isAlpha(ch) {
   478  				state = stateWord
   479  				continue
   480  			}
   481  			if ch == Comma {
   482  				p.advance()
   483  				state = stateWhitespaceAfterComma
   484  				continue
   485  			}
   486  			if ch == CloseParens {
   487  				p.advance()
   488  				return // exit reading the csv
   489  			}
   490  
   491  			if state == stateWhitespaceAfterOpenParens {
   492  				err = p.parseError("csv; invalid characters after '('")
   493  				return
   494  			}
   495  			err = p.parseError("csv; invalid characters after ','")
   496  			return
   497  
   498  		case stateWhitespaceAfterWord:
   499  
   500  			if ch == CloseParens {
   501  				if len(word) > 0 {
   502  					results = append(results, string(word))
   503  				}
   504  				p.advance()
   505  				return
   506  			}
   507  
   508  			if p.isWhitespace(ch) {
   509  				p.advance()
   510  				continue
   511  			}
   512  
   513  			if ch == Comma {
   514  				state = stateWhitespaceAfterComma
   515  				p.advance()
   516  				continue
   517  			}
   518  
   519  			err = p.parseError("csv; consecutive whitespace separated words without a comma")
   520  			results = nil
   521  			return
   522  		}
   523  	}
   524  }
   525  
   526  func (p *Parser) skipWhiteSpace() {
   527  	var ch rune
   528  	for {
   529  		if p.done() {
   530  			return
   531  		}
   532  		ch = p.current()
   533  		if !p.isWhitespace(ch) {
   534  			return
   535  		}
   536  		p.advance()
   537  	}
   538  }
   539  
   540  func (p *Parser) skipToNonWhitespace() (ch rune) {
   541  	for {
   542  		if p.done() {
   543  			return
   544  		}
   545  		ch = p.current()
   546  		if ch == Comma || !p.isWhitespace(ch) {
   547  			return
   548  		}
   549  		p.advance()
   550  	}
   551  }
   552  
   553  // isWhitespace returns true if the rune is a space, tab, or newline.
   554  func (p *Parser) isWhitespace(ch rune) bool {
   555  	return ch == Space || ch == Tab || ch == CarriageReturn || ch == NewLine
   556  }
   557  
   558  func (p *Parser) isValidValue(ch rune) bool {
   559  	return isAlpha(ch) || isNameSymbol(ch)
   560  }
   561  
   562  func (p *Parser) parseError(message ...interface{}) error {
   563  	return &ParseError{
   564  		Err:      ErrInvalidSelector,
   565  		Input:    p.s,
   566  		Position: p.pos,
   567  		Message:  fmt.Sprint(message...),
   568  	}
   569  }