github.com/vcilabs/webrpc@v0.5.2-0.20201116131534-162e27b1b33b/schema/ridl/lexer.go (about)

     1  package ridl
     2  
     3  import (
     4  	"fmt"
     5  )
     6  
     7  var (
     8  	empty = rune(0)
     9  )
    10  
    11  var (
    12  	wordBeginning = []rune("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_")
    13  	wordBreak     = []rune("\x00 \t\r\n[]()<>{}=:¿?¡!,\"")
    14  )
    15  
    16  type tokenType uint8
    17  
    18  func (tt tokenType) String() string {
    19  	if name := tokenTypeName[tt]; name != "" {
    20  		return name
    21  	}
    22  	return tokenInvalid.String()
    23  }
    24  
    25  type token struct {
    26  	tt  tokenType
    27  	val string
    28  
    29  	pos  int
    30  	line int
    31  	col  int
    32  }
    33  
    34  func (t token) String() string {
    35  	if t.val != "" {
    36  		return t.val
    37  	}
    38  	return t.tt.String()
    39  }
    40  
    41  type lexState func(*lexer) lexState
    42  
    43  const (
    44  	tokenInvalid           tokenType = iota
    45  	tokenWhitespace                  // " "
    46  	tokenNewLine                     // "\n"
    47  	tokenEqual                       // "="
    48  	tokenOpenParen                   // "("
    49  	tokenCloseParen                  // ")"
    50  	tokenOpenBracket                 // "["
    51  	tokenCloseBracket                // "]"
    52  	tokenOpenAngleBracket            // "<"
    53  	tokenCloseAngleBracket           // ">"
    54  	tokenPlusSign                    // "+"
    55  	tokenMinusSign                   // "-"
    56  	tokenHash                        // "#"
    57  	tokenColon                       // ":"
    58  	tokenComma                       // ","
    59  	tokenBackslash                   // "\"
    60  	tokenSlash                       // "/"
    61  	tokenQuote                       //  "
    62  	tokenDot                         // "."
    63  	tokenQuestionMark                // "?"
    64  	tokenRocket                      // "=>"
    65  	tokenWord                        // ..wordCharset..
    66  
    67  	tokenExtra // other
    68  	tokenOptionalWhitespace
    69  	tokenComposed
    70  
    71  	tokenEOL
    72  	tokenEOF
    73  )
    74  
    75  const tokenDash = tokenMinusSign
    76  
    77  var tokenTypeName = map[tokenType]string{
    78  	tokenInvalid:           "[invalid]",
    79  	tokenWhitespace:        "[space]",
    80  	tokenNewLine:           "[newline]",
    81  	tokenEqual:             "[equal sign]",
    82  	tokenOpenParen:         "[open parenthesis]",
    83  	tokenCloseParen:        "[close parenthesis]",
    84  	tokenOpenBracket:       "[open bracket]",
    85  	tokenCloseBracket:      "[close bracket]",
    86  	tokenOpenAngleBracket:  "[open angle bracket]",
    87  	tokenCloseAngleBracket: "[close angle bracket]",
    88  	tokenPlusSign:          "[plus]",
    89  	tokenMinusSign:         "[minus]",
    90  	tokenHash:              "[hash]",
    91  	tokenColon:             "[colon]",
    92  	tokenComma:             "[comma]",
    93  	tokenDot:               "[dot]",
    94  	tokenQuote:             "[quote]",
    95  	tokenBackslash:         "[backslash]",
    96  	tokenSlash:             "[slash]",
    97  	tokenQuestionMark:      "[question mark]",
    98  	tokenRocket:            "[rocket]",
    99  	tokenWord:              "[word]",
   100  	tokenExtra:             "[extra]",
   101  	tokenComposed:          "[composed]",
   102  	tokenEOF:               "[EOF]",
   103  }
   104  
   105  var tokenTypeValue = map[tokenType][]rune{
   106  	tokenWhitespace:        []rune{' ', '\t', '\r'},
   107  	tokenNewLine:           []rune{'\n'},
   108  	tokenEqual:             []rune{'='},
   109  	tokenOpenParen:         []rune{'('},
   110  	tokenCloseParen:        []rune{')'},
   111  	tokenOpenBracket:       []rune{'['},
   112  	tokenCloseBracket:      []rune{']'},
   113  	tokenOpenAngleBracket:  []rune{'<'},
   114  	tokenCloseAngleBracket: []rune{'>'},
   115  	tokenPlusSign:          []rune{'+'},
   116  	tokenMinusSign:         []rune{'-'},
   117  	tokenHash:              []rune{'#'},
   118  	tokenColon:             []rune{':'},
   119  	tokenQuote:             []rune{'"'},
   120  	tokenBackslash:         []rune{'\\'},
   121  	tokenSlash:             []rune{'/'},
   122  	tokenComma:             []rune{','},
   123  	tokenDot:               []rune{'.'},
   124  	tokenQuestionMark:      []rune{'?'},
   125  }
   126  
   127  var (
   128  	isSpace             = isTokenType(tokenWhitespace)
   129  	isNewLine           = isTokenType(tokenNewLine)
   130  	isQuestionMark      = isTokenType(tokenQuestionMark)
   131  	isColon             = isTokenType(tokenColon)
   132  	isHash              = isTokenType(tokenHash)
   133  	isOpenParen         = isTokenType(tokenOpenParen)
   134  	isCloseParen        = isTokenType(tokenCloseParen)
   135  	isOpenBracket       = isTokenType(tokenOpenBracket)
   136  	isCloseBracket      = isTokenType(tokenCloseBracket)
   137  	isOpenAngleBracket  = isTokenType(tokenOpenAngleBracket)
   138  	isCloseAngleBracket = isTokenType(tokenCloseAngleBracket)
   139  	isPlusSign          = isTokenType(tokenPlusSign)
   140  	isMinusSign         = isTokenType(tokenMinusSign)
   141  	isEqual             = isTokenType(tokenEqual)
   142  	isComma             = isTokenType(tokenComma)
   143  	isQuote             = isTokenType(tokenQuote)
   144  	isBackslash         = isTokenType(tokenBackslash)
   145  	isSlash             = isTokenType(tokenSlash)
   146  	isDot               = isTokenType(tokenDot)
   147  )
   148  
   149  func isTokenType(tt tokenType) func(r rune) bool {
   150  	return func(r rune) bool {
   151  		for i := range tokenTypeValue[tt] {
   152  			if tokenTypeValue[tt][i] == r {
   153  				return true
   154  			}
   155  		}
   156  		return false
   157  	}
   158  }
   159  
   160  func isEmpty(r rune) bool {
   161  	return r == empty
   162  }
   163  
   164  func isWordBreak(r rune) bool {
   165  	for i := range wordBreak {
   166  		if r == wordBreak[i] {
   167  			return true
   168  		}
   169  	}
   170  	return false
   171  }
   172  
   173  func isWord(r rune) bool {
   174  	for i := range wordBeginning {
   175  		if r == wordBeginning[i] {
   176  			return true
   177  		}
   178  	}
   179  	return false
   180  }
   181  
   182  func lexPushTokenState(tt tokenType) lexState {
   183  	return func(lx *lexer) lexState {
   184  		lx.next()
   185  		lx.emit(tt)
   186  		return lexDefaultState
   187  	}
   188  }
   189  
   190  func lexStateCloseParen(lx *lexer) lexState {
   191  	return lexPushTokenState(tokenCloseParen)
   192  }
   193  
   194  func lexStateOpenParen(lx *lexer) lexState {
   195  	return lexPushTokenState(tokenOpenParen)
   196  }
   197  
   198  func lexStateCloseAngleBracket(lx *lexer) lexState {
   199  	return lexPushTokenState(tokenCloseAngleBracket)
   200  }
   201  
   202  func lexStateOpenAngleBracket(lx *lexer) lexState {
   203  	return lexPushTokenState(tokenOpenAngleBracket)
   204  }
   205  
   206  func lexStateCloseBracket(lx *lexer) lexState {
   207  	return lexPushTokenState(tokenCloseBracket)
   208  }
   209  
   210  func lexStateOpenBracket(lx *lexer) lexState {
   211  	return lexPushTokenState(tokenOpenBracket)
   212  }
   213  
   214  func lexStateRocket(lx *lexer) lexState {
   215  	return lexPushTokenState(tokenRocket)
   216  }
   217  
   218  func lexStateHash(lx *lexer) lexState {
   219  	return lexPushTokenState(tokenHash)
   220  }
   221  
   222  func lexStateComma(lx *lexer) lexState {
   223  	return lexPushTokenState(tokenComma)
   224  }
   225  
   226  func lexStateDot(lx *lexer) lexState {
   227  	return lexPushTokenState(tokenDot)
   228  }
   229  
   230  func lexStateExtra(lx *lexer) lexState {
   231  	return lexPushTokenState(tokenExtra)
   232  }
   233  
   234  func lexStateColon(lx *lexer) lexState {
   235  	return lexPushTokenState(tokenColon)
   236  }
   237  
   238  func lexStateQuestionMark(lx *lexer) lexState {
   239  	return lexPushTokenState(tokenQuestionMark)
   240  }
   241  
   242  func lexStatePlusSign(lx *lexer) lexState {
   243  	return lexPushTokenState(tokenPlusSign)
   244  }
   245  
   246  func lexStateMinusSign(lx *lexer) lexState {
   247  	return lexPushTokenState(tokenMinusSign)
   248  }
   249  
   250  func lexStateQuote(lx *lexer) lexState {
   251  	return lexPushTokenState(tokenQuote)
   252  }
   253  
   254  func lexStateSlash(lx *lexer) lexState {
   255  	return lexPushTokenState(tokenSlash)
   256  }
   257  
   258  func lexStateBackslash(lx *lexer) lexState {
   259  	return lexPushTokenState(tokenBackslash)
   260  }
   261  
   262  func lexStateWord(lx *lexer) lexState {
   263  	for {
   264  		lx.next()
   265  		if isWordBreak(lx.peek()) {
   266  			break
   267  		}
   268  	}
   269  
   270  	lx.emit(tokenWord)
   271  	return lexDefaultState
   272  }
   273  
   274  func lexStateSpace(lx *lexer) lexState {
   275  	lx.next()
   276  
   277  	for isSpace(lx.peek()) {
   278  		lx.next()
   279  	}
   280  
   281  	lx.emit(tokenWhitespace)
   282  	return lexDefaultState
   283  }
   284  
   285  func lexStateNewLine(lx *lexer) lexState {
   286  	lx.next()
   287  	lx.emit(tokenNewLine)
   288  	lx.col = 0
   289  	return lexDefaultState
   290  }
   291  
   292  func lexStateEqual(lx *lexer) lexState {
   293  	lx.next()
   294  
   295  	r := lx.peek()
   296  
   297  	switch {
   298  	case isCloseAngleBracket(r):
   299  		return lexStateRocket
   300  	}
   301  
   302  	lx.emit(tokenEqual)
   303  	return lexDefaultState
   304  }
   305  
   306  func lexDefaultState(lx *lexer) lexState {
   307  	r := lx.peek()
   308  
   309  	switch {
   310  
   311  	case isEmpty(r):
   312  		return nil
   313  
   314  	case isQuote(r):
   315  		return lexStateQuote
   316  
   317  	case isSlash(r):
   318  		return lexStateSlash
   319  
   320  	case isBackslash(r):
   321  		return lexStateBackslash
   322  
   323  	case isSpace(r):
   324  		return lexStateSpace
   325  
   326  	case isNewLine(r):
   327  		return lexStateNewLine
   328  
   329  	case isOpenParen(r):
   330  		return lexStateOpenParen
   331  
   332  	case isCloseParen(r):
   333  		return lexStateCloseParen
   334  
   335  	case isOpenAngleBracket(r):
   336  		return lexStateOpenAngleBracket
   337  
   338  	case isCloseAngleBracket(r):
   339  		return lexStateCloseAngleBracket
   340  
   341  	case isOpenBracket(r):
   342  		return lexStateOpenBracket
   343  
   344  	case isCloseBracket(r):
   345  		return lexStateCloseBracket
   346  
   347  	case isHash(r):
   348  		return lexStateHash
   349  
   350  	case isEqual(r):
   351  		return lexStateEqual
   352  
   353  	case isPlusSign(r):
   354  		return lexStatePlusSign
   355  
   356  	case isMinusSign(r):
   357  		return lexStateMinusSign
   358  
   359  	case isColon(r):
   360  		return lexStateColon
   361  
   362  	case isQuestionMark(r):
   363  		return lexStateQuestionMark
   364  
   365  	case isComma(r):
   366  		return lexStateComma
   367  
   368  	case isDot(r):
   369  		return lexStateDot
   370  
   371  	case isWord(r):
   372  		return lexStateWord
   373  
   374  	default:
   375  		return lexStateExtra
   376  
   377  	}
   378  
   379  	panic("unreachable")
   380  }
   381  
   382  type lexer struct {
   383  	input  []rune
   384  	length int
   385  
   386  	start int
   387  	pos   int
   388  
   389  	line int
   390  	col  int
   391  
   392  	tokens chan token
   393  }
   394  
   395  func newLexer(in string) *lexer {
   396  	s := []rune(in)
   397  	lx := &lexer{
   398  		input:  s,
   399  		length: len(s),
   400  		tokens: make(chan token),
   401  	}
   402  
   403  	go lx.run()
   404  	return lx
   405  }
   406  
   407  func (lx *lexer) run() {
   408  	for state := lexDefaultState; state != nil; {
   409  		state = state(lx)
   410  	}
   411  
   412  	lx.emit(tokenEOF)
   413  	close(lx.tokens)
   414  }
   415  
   416  func (lx *lexer) peek() rune {
   417  	if lx.pos >= lx.length {
   418  		return empty
   419  	}
   420  	return lx.input[lx.pos]
   421  }
   422  
   423  func (lx *lexer) next() bool {
   424  	newPos := lx.pos + 1
   425  	if newPos > lx.length {
   426  		return false
   427  	}
   428  	lx.pos = newPos
   429  
   430  	if lx.col < 1 {
   431  		lx.line++
   432  	}
   433  	lx.col++
   434  
   435  	return true
   436  }
   437  
   438  func (lx *lexer) emit(tt tokenType) {
   439  	tok := token{
   440  		tt:   tt,
   441  		val:  lx.val(),
   442  		pos:  lx.pos,
   443  		line: lx.line,
   444  		col:  lx.col,
   445  	}
   446  	lx.start = lx.pos
   447  	lx.tokens <- tok
   448  }
   449  
   450  func (lx *lexer) val() string {
   451  	return string(lx.input[lx.start:lx.pos])
   452  }
   453  
   454  func (lx *lexer) String() string {
   455  	return fmt.Sprintf("line: %d, start: %d, pos: %d, col: %d, length: %d, value: %q", lx.line, lx.start, lx.pos, lx.col, lx.length, lx.val())
   456  }