github.com/expr-lang/expr@v1.16.9/parser/lexer/lexer.go (about) 1 package lexer 2 3 import ( 4 "fmt" 5 "strings" 6 7 "github.com/expr-lang/expr/file" 8 ) 9 10 func Lex(source file.Source) ([]Token, error) { 11 l := &lexer{ 12 source: source, 13 tokens: make([]Token, 0), 14 start: 0, 15 end: 0, 16 } 17 l.commit() 18 19 for state := root; state != nil; { 20 state = state(l) 21 } 22 23 if l.err != nil { 24 return nil, l.err.Bind(source) 25 } 26 27 return l.tokens, nil 28 } 29 30 type lexer struct { 31 source file.Source 32 tokens []Token 33 start, end int 34 err *file.Error 35 } 36 37 const eof rune = -1 38 39 func (l *lexer) commit() { 40 l.start = l.end 41 } 42 43 func (l *lexer) next() rune { 44 if l.end >= len(l.source) { 45 l.end++ 46 return eof 47 } 48 r := l.source[l.end] 49 l.end++ 50 return r 51 } 52 53 func (l *lexer) peek() rune { 54 r := l.next() 55 l.backup() 56 return r 57 } 58 59 func (l *lexer) backup() { 60 l.end-- 61 } 62 63 func (l *lexer) emit(t Kind) { 64 l.emitValue(t, l.word()) 65 } 66 67 func (l *lexer) emitValue(t Kind, value string) { 68 l.tokens = append(l.tokens, Token{ 69 Location: file.Location{From: l.start, To: l.end}, 70 Kind: t, 71 Value: value, 72 }) 73 l.commit() 74 } 75 76 func (l *lexer) emitEOF() { 77 from := l.end - 2 78 if from < 0 { 79 from = 0 80 } 81 to := l.end - 1 82 if to < 0 { 83 to = 0 84 } 85 l.tokens = append(l.tokens, Token{ 86 Location: file.Location{From: from, To: to}, 87 Kind: EOF, 88 }) 89 l.commit() 90 } 91 92 func (l *lexer) skip() { 93 l.commit() 94 } 95 96 func (l *lexer) word() string { 97 // TODO: boundary check is NOT needed here, but for some reason CI fuzz tests are failing. 98 if l.start > len(l.source) || l.end > len(l.source) { 99 return "__invalid__" 100 } 101 return string(l.source[l.start:l.end]) 102 } 103 104 func (l *lexer) accept(valid string) bool { 105 if strings.ContainsRune(valid, l.next()) { 106 return true 107 } 108 l.backup() 109 return false 110 } 111 112 func (l *lexer) acceptRun(valid string) { 113 for strings.ContainsRune(valid, l.next()) { 114 } 115 l.backup() 116 } 117 118 func (l *lexer) skipSpaces() { 119 r := l.peek() 120 for ; r == ' '; r = l.peek() { 121 l.next() 122 } 123 l.skip() 124 } 125 126 func (l *lexer) acceptWord(word string) bool { 127 pos := l.end 128 129 l.skipSpaces() 130 131 for _, ch := range word { 132 if l.next() != ch { 133 l.end = pos 134 return false 135 } 136 } 137 if r := l.peek(); r != ' ' && r != eof { 138 l.end = pos 139 return false 140 } 141 142 return true 143 } 144 145 func (l *lexer) error(format string, args ...any) stateFn { 146 if l.err == nil { // show first error 147 l.err = &file.Error{ 148 Location: file.Location{ 149 From: l.end - 1, 150 To: l.end, 151 }, 152 Message: fmt.Sprintf(format, args...), 153 } 154 } 155 return nil 156 } 157 158 func digitVal(ch rune) int { 159 switch { 160 case '0' <= ch && ch <= '9': 161 return int(ch - '0') 162 case 'a' <= lower(ch) && lower(ch) <= 'f': 163 return int(lower(ch) - 'a' + 10) 164 } 165 return 16 // larger than any legal digit val 166 } 167 168 func lower(ch rune) rune { return ('a' - 'A') | ch } // returns lower-case ch iff ch is ASCII letter 169 170 func (l *lexer) scanDigits(ch rune, base, n int) rune { 171 for n > 0 && digitVal(ch) < base { 172 ch = l.next() 173 n-- 174 } 175 if n > 0 { 176 l.error("invalid char escape") 177 } 178 return ch 179 } 180 181 func (l *lexer) scanEscape(quote rune) rune { 182 ch := l.next() // read character after '/' 183 switch ch { 184 case 'a', 'b', 'f', 'n', 'r', 't', 'v', '\\', quote: 185 // nothing to do 186 ch = l.next() 187 case '0', '1', '2', '3', '4', '5', '6', '7': 188 ch = l.scanDigits(ch, 8, 3) 189 case 'x': 190 ch = l.scanDigits(l.next(), 16, 2) 191 case 'u': 192 ch = l.scanDigits(l.next(), 16, 4) 193 case 'U': 194 ch = l.scanDigits(l.next(), 16, 8) 195 default: 196 l.error("invalid char escape") 197 } 198 return ch 199 } 200 201 func (l *lexer) scanString(quote rune) (n int) { 202 ch := l.next() // read character after quote 203 for ch != quote { 204 if ch == '\n' || ch == eof { 205 l.error("literal not terminated") 206 return 207 } 208 if ch == '\\' { 209 ch = l.scanEscape(quote) 210 } else { 211 ch = l.next() 212 } 213 n++ 214 } 215 return 216 } 217 218 func (l *lexer) scanRawString(quote rune) (n int) { 219 ch := l.next() // read character after back tick 220 for ch != quote { 221 if ch == eof { 222 l.error("literal not terminated") 223 return 224 } 225 ch = l.next() 226 n++ 227 } 228 l.emitValue(String, string(l.source[l.start+1:l.end-1])) 229 return 230 }