github.com/expr-lang/expr@v1.16.9/parser/lexer/lexer.go (about)

     1  package lexer
     2  
     3  import (
     4  	"fmt"
     5  	"strings"
     6  
     7  	"github.com/expr-lang/expr/file"
     8  )
     9  
    10  func Lex(source file.Source) ([]Token, error) {
    11  	l := &lexer{
    12  		source: source,
    13  		tokens: make([]Token, 0),
    14  		start:  0,
    15  		end:    0,
    16  	}
    17  	l.commit()
    18  
    19  	for state := root; state != nil; {
    20  		state = state(l)
    21  	}
    22  
    23  	if l.err != nil {
    24  		return nil, l.err.Bind(source)
    25  	}
    26  
    27  	return l.tokens, nil
    28  }
    29  
    30  type lexer struct {
    31  	source     file.Source
    32  	tokens     []Token
    33  	start, end int
    34  	err        *file.Error
    35  }
    36  
    37  const eof rune = -1
    38  
    39  func (l *lexer) commit() {
    40  	l.start = l.end
    41  }
    42  
    43  func (l *lexer) next() rune {
    44  	if l.end >= len(l.source) {
    45  		l.end++
    46  		return eof
    47  	}
    48  	r := l.source[l.end]
    49  	l.end++
    50  	return r
    51  }
    52  
    53  func (l *lexer) peek() rune {
    54  	r := l.next()
    55  	l.backup()
    56  	return r
    57  }
    58  
    59  func (l *lexer) backup() {
    60  	l.end--
    61  }
    62  
    63  func (l *lexer) emit(t Kind) {
    64  	l.emitValue(t, l.word())
    65  }
    66  
    67  func (l *lexer) emitValue(t Kind, value string) {
    68  	l.tokens = append(l.tokens, Token{
    69  		Location: file.Location{From: l.start, To: l.end},
    70  		Kind:     t,
    71  		Value:    value,
    72  	})
    73  	l.commit()
    74  }
    75  
    76  func (l *lexer) emitEOF() {
    77  	from := l.end - 2
    78  	if from < 0 {
    79  		from = 0
    80  	}
    81  	to := l.end - 1
    82  	if to < 0 {
    83  		to = 0
    84  	}
    85  	l.tokens = append(l.tokens, Token{
    86  		Location: file.Location{From: from, To: to},
    87  		Kind:     EOF,
    88  	})
    89  	l.commit()
    90  }
    91  
    92  func (l *lexer) skip() {
    93  	l.commit()
    94  }
    95  
    96  func (l *lexer) word() string {
    97  	// TODO: boundary check is NOT needed here, but for some reason CI fuzz tests are failing.
    98  	if l.start > len(l.source) || l.end > len(l.source) {
    99  		return "__invalid__"
   100  	}
   101  	return string(l.source[l.start:l.end])
   102  }
   103  
   104  func (l *lexer) accept(valid string) bool {
   105  	if strings.ContainsRune(valid, l.next()) {
   106  		return true
   107  	}
   108  	l.backup()
   109  	return false
   110  }
   111  
   112  func (l *lexer) acceptRun(valid string) {
   113  	for strings.ContainsRune(valid, l.next()) {
   114  	}
   115  	l.backup()
   116  }
   117  
   118  func (l *lexer) skipSpaces() {
   119  	r := l.peek()
   120  	for ; r == ' '; r = l.peek() {
   121  		l.next()
   122  	}
   123  	l.skip()
   124  }
   125  
   126  func (l *lexer) acceptWord(word string) bool {
   127  	pos := l.end
   128  
   129  	l.skipSpaces()
   130  
   131  	for _, ch := range word {
   132  		if l.next() != ch {
   133  			l.end = pos
   134  			return false
   135  		}
   136  	}
   137  	if r := l.peek(); r != ' ' && r != eof {
   138  		l.end = pos
   139  		return false
   140  	}
   141  
   142  	return true
   143  }
   144  
   145  func (l *lexer) error(format string, args ...any) stateFn {
   146  	if l.err == nil { // show first error
   147  		l.err = &file.Error{
   148  			Location: file.Location{
   149  				From: l.end - 1,
   150  				To:   l.end,
   151  			},
   152  			Message: fmt.Sprintf(format, args...),
   153  		}
   154  	}
   155  	return nil
   156  }
   157  
   158  func digitVal(ch rune) int {
   159  	switch {
   160  	case '0' <= ch && ch <= '9':
   161  		return int(ch - '0')
   162  	case 'a' <= lower(ch) && lower(ch) <= 'f':
   163  		return int(lower(ch) - 'a' + 10)
   164  	}
   165  	return 16 // larger than any legal digit val
   166  }
   167  
   168  func lower(ch rune) rune { return ('a' - 'A') | ch } // returns lower-case ch iff ch is ASCII letter
   169  
   170  func (l *lexer) scanDigits(ch rune, base, n int) rune {
   171  	for n > 0 && digitVal(ch) < base {
   172  		ch = l.next()
   173  		n--
   174  	}
   175  	if n > 0 {
   176  		l.error("invalid char escape")
   177  	}
   178  	return ch
   179  }
   180  
   181  func (l *lexer) scanEscape(quote rune) rune {
   182  	ch := l.next() // read character after '/'
   183  	switch ch {
   184  	case 'a', 'b', 'f', 'n', 'r', 't', 'v', '\\', quote:
   185  		// nothing to do
   186  		ch = l.next()
   187  	case '0', '1', '2', '3', '4', '5', '6', '7':
   188  		ch = l.scanDigits(ch, 8, 3)
   189  	case 'x':
   190  		ch = l.scanDigits(l.next(), 16, 2)
   191  	case 'u':
   192  		ch = l.scanDigits(l.next(), 16, 4)
   193  	case 'U':
   194  		ch = l.scanDigits(l.next(), 16, 8)
   195  	default:
   196  		l.error("invalid char escape")
   197  	}
   198  	return ch
   199  }
   200  
   201  func (l *lexer) scanString(quote rune) (n int) {
   202  	ch := l.next() // read character after quote
   203  	for ch != quote {
   204  		if ch == '\n' || ch == eof {
   205  			l.error("literal not terminated")
   206  			return
   207  		}
   208  		if ch == '\\' {
   209  			ch = l.scanEscape(quote)
   210  		} else {
   211  			ch = l.next()
   212  		}
   213  		n++
   214  	}
   215  	return
   216  }
   217  
   218  func (l *lexer) scanRawString(quote rune) (n int) {
   219  	ch := l.next() // read character after back tick
   220  	for ch != quote {
   221  		if ch == eof {
   222  			l.error("literal not terminated")
   223  			return
   224  		}
   225  		ch = l.next()
   226  		n++
   227  	}
   228  	l.emitValue(String, string(l.source[l.start+1:l.end-1]))
   229  	return
   230  }