github.com/vcilabs/webrpc@v0.5.2-0.20201116131534-162e27b1b33b/schema/ridl/lexer.go (about) 1 package ridl 2 3 import ( 4 "fmt" 5 ) 6 7 var ( 8 empty = rune(0) 9 ) 10 11 var ( 12 wordBeginning = []rune("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_") 13 wordBreak = []rune("\x00 \t\r\n[]()<>{}=:¿?¡!,\"") 14 ) 15 16 type tokenType uint8 17 18 func (tt tokenType) String() string { 19 if name := tokenTypeName[tt]; name != "" { 20 return name 21 } 22 return tokenInvalid.String() 23 } 24 25 type token struct { 26 tt tokenType 27 val string 28 29 pos int 30 line int 31 col int 32 } 33 34 func (t token) String() string { 35 if t.val != "" { 36 return t.val 37 } 38 return t.tt.String() 39 } 40 41 type lexState func(*lexer) lexState 42 43 const ( 44 tokenInvalid tokenType = iota 45 tokenWhitespace // " " 46 tokenNewLine // "\n" 47 tokenEqual // "=" 48 tokenOpenParen // "(" 49 tokenCloseParen // ")" 50 tokenOpenBracket // "[" 51 tokenCloseBracket // "]" 52 tokenOpenAngleBracket // "<" 53 tokenCloseAngleBracket // ">" 54 tokenPlusSign // "+" 55 tokenMinusSign // "-" 56 tokenHash // "#" 57 tokenColon // ":" 58 tokenComma // "," 59 tokenBackslash // "\" 60 tokenSlash // "/" 61 tokenQuote // " 62 tokenDot // "." 63 tokenQuestionMark // "?" 64 tokenRocket // "=>" 65 tokenWord // ..wordCharset.. 66 67 tokenExtra // other 68 tokenOptionalWhitespace 69 tokenComposed 70 71 tokenEOL 72 tokenEOF 73 ) 74 75 const tokenDash = tokenMinusSign 76 77 var tokenTypeName = map[tokenType]string{ 78 tokenInvalid: "[invalid]", 79 tokenWhitespace: "[space]", 80 tokenNewLine: "[newline]", 81 tokenEqual: "[equal sign]", 82 tokenOpenParen: "[open parenthesis]", 83 tokenCloseParen: "[close parenthesis]", 84 tokenOpenBracket: "[open bracket]", 85 tokenCloseBracket: "[close bracket]", 86 tokenOpenAngleBracket: "[open angle bracket]", 87 tokenCloseAngleBracket: "[close angle bracket]", 88 tokenPlusSign: "[plus]", 89 tokenMinusSign: "[minus]", 90 tokenHash: "[hash]", 91 tokenColon: "[colon]", 92 tokenComma: "[comma]", 93 tokenDot: "[dot]", 94 tokenQuote: "[quote]", 95 tokenBackslash: "[backslash]", 96 tokenSlash: "[slash]", 97 tokenQuestionMark: "[question mark]", 98 tokenRocket: "[rocket]", 99 tokenWord: "[word]", 100 tokenExtra: "[extra]", 101 tokenComposed: "[composed]", 102 tokenEOF: "[EOF]", 103 } 104 105 var tokenTypeValue = map[tokenType][]rune{ 106 tokenWhitespace: []rune{' ', '\t', '\r'}, 107 tokenNewLine: []rune{'\n'}, 108 tokenEqual: []rune{'='}, 109 tokenOpenParen: []rune{'('}, 110 tokenCloseParen: []rune{')'}, 111 tokenOpenBracket: []rune{'['}, 112 tokenCloseBracket: []rune{']'}, 113 tokenOpenAngleBracket: []rune{'<'}, 114 tokenCloseAngleBracket: []rune{'>'}, 115 tokenPlusSign: []rune{'+'}, 116 tokenMinusSign: []rune{'-'}, 117 tokenHash: []rune{'#'}, 118 tokenColon: []rune{':'}, 119 tokenQuote: []rune{'"'}, 120 tokenBackslash: []rune{'\\'}, 121 tokenSlash: []rune{'/'}, 122 tokenComma: []rune{','}, 123 tokenDot: []rune{'.'}, 124 tokenQuestionMark: []rune{'?'}, 125 } 126 127 var ( 128 isSpace = isTokenType(tokenWhitespace) 129 isNewLine = isTokenType(tokenNewLine) 130 isQuestionMark = isTokenType(tokenQuestionMark) 131 isColon = isTokenType(tokenColon) 132 isHash = isTokenType(tokenHash) 133 isOpenParen = isTokenType(tokenOpenParen) 134 isCloseParen = isTokenType(tokenCloseParen) 135 isOpenBracket = isTokenType(tokenOpenBracket) 136 isCloseBracket = isTokenType(tokenCloseBracket) 137 isOpenAngleBracket = isTokenType(tokenOpenAngleBracket) 138 isCloseAngleBracket = isTokenType(tokenCloseAngleBracket) 139 isPlusSign = isTokenType(tokenPlusSign) 140 isMinusSign = isTokenType(tokenMinusSign) 141 isEqual = isTokenType(tokenEqual) 142 isComma = isTokenType(tokenComma) 143 isQuote = isTokenType(tokenQuote) 144 isBackslash = isTokenType(tokenBackslash) 145 isSlash = isTokenType(tokenSlash) 146 isDot = isTokenType(tokenDot) 147 ) 148 149 func isTokenType(tt tokenType) func(r rune) bool { 150 return func(r rune) bool { 151 for i := range tokenTypeValue[tt] { 152 if tokenTypeValue[tt][i] == r { 153 return true 154 } 155 } 156 return false 157 } 158 } 159 160 func isEmpty(r rune) bool { 161 return r == empty 162 } 163 164 func isWordBreak(r rune) bool { 165 for i := range wordBreak { 166 if r == wordBreak[i] { 167 return true 168 } 169 } 170 return false 171 } 172 173 func isWord(r rune) bool { 174 for i := range wordBeginning { 175 if r == wordBeginning[i] { 176 return true 177 } 178 } 179 return false 180 } 181 182 func lexPushTokenState(tt tokenType) lexState { 183 return func(lx *lexer) lexState { 184 lx.next() 185 lx.emit(tt) 186 return lexDefaultState 187 } 188 } 189 190 func lexStateCloseParen(lx *lexer) lexState { 191 return lexPushTokenState(tokenCloseParen) 192 } 193 194 func lexStateOpenParen(lx *lexer) lexState { 195 return lexPushTokenState(tokenOpenParen) 196 } 197 198 func lexStateCloseAngleBracket(lx *lexer) lexState { 199 return lexPushTokenState(tokenCloseAngleBracket) 200 } 201 202 func lexStateOpenAngleBracket(lx *lexer) lexState { 203 return lexPushTokenState(tokenOpenAngleBracket) 204 } 205 206 func lexStateCloseBracket(lx *lexer) lexState { 207 return lexPushTokenState(tokenCloseBracket) 208 } 209 210 func lexStateOpenBracket(lx *lexer) lexState { 211 return lexPushTokenState(tokenOpenBracket) 212 } 213 214 func lexStateRocket(lx *lexer) lexState { 215 return lexPushTokenState(tokenRocket) 216 } 217 218 func lexStateHash(lx *lexer) lexState { 219 return lexPushTokenState(tokenHash) 220 } 221 222 func lexStateComma(lx *lexer) lexState { 223 return lexPushTokenState(tokenComma) 224 } 225 226 func lexStateDot(lx *lexer) lexState { 227 return lexPushTokenState(tokenDot) 228 } 229 230 func lexStateExtra(lx *lexer) lexState { 231 return lexPushTokenState(tokenExtra) 232 } 233 234 func lexStateColon(lx *lexer) lexState { 235 return lexPushTokenState(tokenColon) 236 } 237 238 func lexStateQuestionMark(lx *lexer) lexState { 239 return lexPushTokenState(tokenQuestionMark) 240 } 241 242 func lexStatePlusSign(lx *lexer) lexState { 243 return lexPushTokenState(tokenPlusSign) 244 } 245 246 func lexStateMinusSign(lx *lexer) lexState { 247 return lexPushTokenState(tokenMinusSign) 248 } 249 250 func lexStateQuote(lx *lexer) lexState { 251 return lexPushTokenState(tokenQuote) 252 } 253 254 func lexStateSlash(lx *lexer) lexState { 255 return lexPushTokenState(tokenSlash) 256 } 257 258 func lexStateBackslash(lx *lexer) lexState { 259 return lexPushTokenState(tokenBackslash) 260 } 261 262 func lexStateWord(lx *lexer) lexState { 263 for { 264 lx.next() 265 if isWordBreak(lx.peek()) { 266 break 267 } 268 } 269 270 lx.emit(tokenWord) 271 return lexDefaultState 272 } 273 274 func lexStateSpace(lx *lexer) lexState { 275 lx.next() 276 277 for isSpace(lx.peek()) { 278 lx.next() 279 } 280 281 lx.emit(tokenWhitespace) 282 return lexDefaultState 283 } 284 285 func lexStateNewLine(lx *lexer) lexState { 286 lx.next() 287 lx.emit(tokenNewLine) 288 lx.col = 0 289 return lexDefaultState 290 } 291 292 func lexStateEqual(lx *lexer) lexState { 293 lx.next() 294 295 r := lx.peek() 296 297 switch { 298 case isCloseAngleBracket(r): 299 return lexStateRocket 300 } 301 302 lx.emit(tokenEqual) 303 return lexDefaultState 304 } 305 306 func lexDefaultState(lx *lexer) lexState { 307 r := lx.peek() 308 309 switch { 310 311 case isEmpty(r): 312 return nil 313 314 case isQuote(r): 315 return lexStateQuote 316 317 case isSlash(r): 318 return lexStateSlash 319 320 case isBackslash(r): 321 return lexStateBackslash 322 323 case isSpace(r): 324 return lexStateSpace 325 326 case isNewLine(r): 327 return lexStateNewLine 328 329 case isOpenParen(r): 330 return lexStateOpenParen 331 332 case isCloseParen(r): 333 return lexStateCloseParen 334 335 case isOpenAngleBracket(r): 336 return lexStateOpenAngleBracket 337 338 case isCloseAngleBracket(r): 339 return lexStateCloseAngleBracket 340 341 case isOpenBracket(r): 342 return lexStateOpenBracket 343 344 case isCloseBracket(r): 345 return lexStateCloseBracket 346 347 case isHash(r): 348 return lexStateHash 349 350 case isEqual(r): 351 return lexStateEqual 352 353 case isPlusSign(r): 354 return lexStatePlusSign 355 356 case isMinusSign(r): 357 return lexStateMinusSign 358 359 case isColon(r): 360 return lexStateColon 361 362 case isQuestionMark(r): 363 return lexStateQuestionMark 364 365 case isComma(r): 366 return lexStateComma 367 368 case isDot(r): 369 return lexStateDot 370 371 case isWord(r): 372 return lexStateWord 373 374 default: 375 return lexStateExtra 376 377 } 378 379 panic("unreachable") 380 } 381 382 type lexer struct { 383 input []rune 384 length int 385 386 start int 387 pos int 388 389 line int 390 col int 391 392 tokens chan token 393 } 394 395 func newLexer(in string) *lexer { 396 s := []rune(in) 397 lx := &lexer{ 398 input: s, 399 length: len(s), 400 tokens: make(chan token), 401 } 402 403 go lx.run() 404 return lx 405 } 406 407 func (lx *lexer) run() { 408 for state := lexDefaultState; state != nil; { 409 state = state(lx) 410 } 411 412 lx.emit(tokenEOF) 413 close(lx.tokens) 414 } 415 416 func (lx *lexer) peek() rune { 417 if lx.pos >= lx.length { 418 return empty 419 } 420 return lx.input[lx.pos] 421 } 422 423 func (lx *lexer) next() bool { 424 newPos := lx.pos + 1 425 if newPos > lx.length { 426 return false 427 } 428 lx.pos = newPos 429 430 if lx.col < 1 { 431 lx.line++ 432 } 433 lx.col++ 434 435 return true 436 } 437 438 func (lx *lexer) emit(tt tokenType) { 439 tok := token{ 440 tt: tt, 441 val: lx.val(), 442 pos: lx.pos, 443 line: lx.line, 444 col: lx.col, 445 } 446 lx.start = lx.pos 447 lx.tokens <- tok 448 } 449 450 func (lx *lexer) val() string { 451 return string(lx.input[lx.start:lx.pos]) 452 } 453 454 func (lx *lexer) String() string { 455 return fmt.Sprintf("line: %d, start: %d, pos: %d, col: %d, length: %d, value: %q", lx.line, lx.start, lx.pos, lx.col, lx.length, lx.val()) 456 }