github.com/bitxmesh/gopher-lua@v0.0.0-20190327085718-93c344ef97a4/parse/lexer.go (about)

     1  package parse
     2  
     3  import (
     4  	"bufio"
     5  	"bytes"
     6  	"fmt"
     7  	"github.com/yuin/gopher-lua/ast"
     8  	"io"
     9  	"reflect"
    10  	"strconv"
    11  	"strings"
    12  )
    13  
    14  const EOF = -1
    15  const whitespace1 = 1<<'\t' | 1<<' '
    16  const whitespace2 = 1<<'\t' | 1<<'\n' | 1<<'\r' | 1<<' '
    17  
    18  type Error struct {
    19  	Pos     ast.Position
    20  	Message string
    21  	Token   string
    22  }
    23  
    24  func (e *Error) Error() string {
    25  	pos := e.Pos
    26  	if pos.Line == EOF {
    27  		return fmt.Sprintf("%v at EOF:   %s\n", pos.Source, e.Message)
    28  	} else {
    29  		return fmt.Sprintf("%v line:%d(column:%d) near '%v':   %s\n", pos.Source, pos.Line, pos.Column, e.Token, e.Message)
    30  	}
    31  }
    32  
    33  func writeChar(buf *bytes.Buffer, c int) { buf.WriteByte(byte(c)) }
    34  
    35  func isDecimal(ch int) bool { return '0' <= ch && ch <= '9' }
    36  
    37  func isIdent(ch int, pos int) bool {
    38  	return ch == '_' || 'A' <= ch && ch <= 'Z' || 'a' <= ch && ch <= 'z' || isDecimal(ch) && pos > 0
    39  }
    40  
    41  func isDigit(ch int) bool {
    42  	return '0' <= ch && ch <= '9' || 'a' <= ch && ch <= 'f' || 'A' <= ch && ch <= 'F'
    43  }
    44  
    45  type Scanner struct {
    46  	Pos    ast.Position
    47  	reader *bufio.Reader
    48  }
    49  
    50  func NewScanner(reader io.Reader, source string) *Scanner {
    51  	return &Scanner{
    52  		Pos: ast.Position{
    53  			Source: source,
    54  			Line:   1,
    55  			Column: 0,
    56  		},
    57  		reader: bufio.NewReaderSize(reader, 4096),
    58  	}
    59  }
    60  
    61  func (sc *Scanner) Error(tok string, msg string) *Error { return &Error{sc.Pos, msg, tok} }
    62  
    63  func (sc *Scanner) TokenError(tok ast.Token, msg string) *Error { return &Error{tok.Pos, msg, tok.Str} }
    64  
    65  func (sc *Scanner) readNext() int {
    66  	ch, err := sc.reader.ReadByte()
    67  	if err == io.EOF {
    68  		return EOF
    69  	}
    70  	return int(ch)
    71  }
    72  
    73  func (sc *Scanner) Newline(ch int) {
    74  	if ch < 0 {
    75  		return
    76  	}
    77  	sc.Pos.Line += 1
    78  	sc.Pos.Column = 0
    79  	next := sc.Peek()
    80  	if ch == '\n' && next == '\r' || ch == '\r' && next == '\n' {
    81  		sc.reader.ReadByte()
    82  	}
    83  }
    84  
    85  func (sc *Scanner) Next() int {
    86  	ch := sc.readNext()
    87  	switch ch {
    88  	case '\n', '\r':
    89  		sc.Newline(ch)
    90  		ch = int('\n')
    91  	case EOF:
    92  		sc.Pos.Line = EOF
    93  		sc.Pos.Column = 0
    94  	default:
    95  		sc.Pos.Column++
    96  	}
    97  	return ch
    98  }
    99  
   100  func (sc *Scanner) Peek() int {
   101  	ch := sc.readNext()
   102  	if ch != EOF {
   103  		sc.reader.UnreadByte()
   104  	}
   105  	return ch
   106  }
   107  
   108  func (sc *Scanner) skipWhiteSpace(whitespace int64) int {
   109  	ch := sc.Next()
   110  	for ; whitespace&(1<<uint(ch)) != 0; ch = sc.Next() {
   111  	}
   112  	return ch
   113  }
   114  
   115  func (sc *Scanner) skipComments(ch int) error {
   116  	// multiline comment
   117  	if sc.Peek() == '[' {
   118  		ch = sc.Next()
   119  		if sc.Peek() == '[' || sc.Peek() == '=' {
   120  			var buf bytes.Buffer
   121  			if err := sc.scanMultilineString(sc.Next(), &buf); err != nil {
   122  				return sc.Error(buf.String(), "invalid multiline comment")
   123  			}
   124  			return nil
   125  		}
   126  	}
   127  	for {
   128  		if ch == '\n' || ch == '\r' || ch < 0 {
   129  			break
   130  		}
   131  		ch = sc.Next()
   132  	}
   133  	return nil
   134  }
   135  
   136  func (sc *Scanner) scanIdent(ch int, buf *bytes.Buffer) error {
   137  	writeChar(buf, ch)
   138  	for isIdent(sc.Peek(), 1) {
   139  		writeChar(buf, sc.Next())
   140  	}
   141  	return nil
   142  }
   143  
   144  func (sc *Scanner) scanDecimal(ch int, buf *bytes.Buffer) error {
   145  	writeChar(buf, ch)
   146  	for isDecimal(sc.Peek()) {
   147  		writeChar(buf, sc.Next())
   148  	}
   149  	return nil
   150  }
   151  
   152  func (sc *Scanner) scanNumber(ch int, buf *bytes.Buffer) error {
   153  	if ch == '0' { // octal
   154  		if sc.Peek() == 'x' || sc.Peek() == 'X' {
   155  			writeChar(buf, ch)
   156  			writeChar(buf, sc.Next())
   157  			hasvalue := false
   158  			for isDigit(sc.Peek()) {
   159  				writeChar(buf, sc.Next())
   160  				hasvalue = true
   161  			}
   162  			if !hasvalue {
   163  				return sc.Error(buf.String(), "illegal hexadecimal number")
   164  			}
   165  			return nil
   166  		} else if sc.Peek() != '.' && isDecimal(sc.Peek()) {
   167  			ch = sc.Next()
   168  		}
   169  	}
   170  	sc.scanDecimal(ch, buf)
   171  	if sc.Peek() == '.' {
   172  		sc.scanDecimal(sc.Next(), buf)
   173  	}
   174  	if ch = sc.Peek(); ch == 'e' || ch == 'E' {
   175  		writeChar(buf, sc.Next())
   176  		if ch = sc.Peek(); ch == '-' || ch == '+' {
   177  			writeChar(buf, sc.Next())
   178  		}
   179  		sc.scanDecimal(sc.Next(), buf)
   180  	}
   181  
   182  	return nil
   183  }
   184  
   185  func (sc *Scanner) scanString(quote int, buf *bytes.Buffer) error {
   186  	ch := sc.Next()
   187  	for ch != quote {
   188  		if ch == '\n' || ch == '\r' || ch < 0 {
   189  			return sc.Error(buf.String(), "unterminated string")
   190  		}
   191  		if ch == '\\' {
   192  			if err := sc.scanEscape(ch, buf); err != nil {
   193  				return err
   194  			}
   195  		} else {
   196  			writeChar(buf, ch)
   197  		}
   198  		ch = sc.Next()
   199  	}
   200  	return nil
   201  }
   202  
   203  func (sc *Scanner) scanEscape(ch int, buf *bytes.Buffer) error {
   204  	ch = sc.Next()
   205  	switch ch {
   206  	case 'a':
   207  		buf.WriteByte('\a')
   208  	case 'b':
   209  		buf.WriteByte('\b')
   210  	case 'f':
   211  		buf.WriteByte('\f')
   212  	case 'n':
   213  		buf.WriteByte('\n')
   214  	case 'r':
   215  		buf.WriteByte('\r')
   216  	case 't':
   217  		buf.WriteByte('\t')
   218  	case 'v':
   219  		buf.WriteByte('\v')
   220  	case '\\':
   221  		buf.WriteByte('\\')
   222  	case '"':
   223  		buf.WriteByte('"')
   224  	case '\'':
   225  		buf.WriteByte('\'')
   226  	case '\n':
   227  		buf.WriteByte('\n')
   228  	case '\r':
   229  		buf.WriteByte('\n')
   230  		sc.Newline('\r')
   231  	default:
   232  		if '0' <= ch && ch <= '9' {
   233  			bytes := []byte{byte(ch)}
   234  			for i := 0; i < 2 && isDecimal(sc.Peek()); i++ {
   235  				bytes = append(bytes, byte(sc.Next()))
   236  			}
   237  			val, _ := strconv.ParseInt(string(bytes), 10, 32)
   238  			writeChar(buf, int(val))
   239  		} else {
   240  			buf.WriteByte('\\')
   241  			writeChar(buf, ch)
   242  			return sc.Error(buf.String(), "Invalid escape sequence")
   243  		}
   244  	}
   245  	return nil
   246  }
   247  
   248  func (sc *Scanner) countSep(ch int) (int, int) {
   249  	count := 0
   250  	for ; ch == '='; count = count + 1 {
   251  		ch = sc.Next()
   252  	}
   253  	return count, ch
   254  }
   255  
   256  func (sc *Scanner) scanMultilineString(ch int, buf *bytes.Buffer) error {
   257  	var count1, count2 int
   258  	count1, ch = sc.countSep(ch)
   259  	if ch != '[' {
   260  		return sc.Error(string(ch), "invalid multiline string")
   261  	}
   262  	ch = sc.Next()
   263  	if ch == '\n' || ch == '\r' {
   264  		ch = sc.Next()
   265  	}
   266  	for {
   267  		if ch < 0 {
   268  			return sc.Error(buf.String(), "unterminated multiline string")
   269  		} else if ch == ']' {
   270  			count2, ch = sc.countSep(sc.Next())
   271  			if count1 == count2 && ch == ']' {
   272  				goto finally
   273  			}
   274  			buf.WriteByte(']')
   275  			buf.WriteString(strings.Repeat("=", count2))
   276  			continue
   277  		}
   278  		writeChar(buf, ch)
   279  		ch = sc.Next()
   280  	}
   281  
   282  finally:
   283  	return nil
   284  }
   285  
   286  var reservedWords = map[string]int{
   287  	"and": TAnd, "break": TBreak, "do": TDo, "else": TElse, "elseif": TElseIf,
   288  	"end": TEnd, "false": TFalse, "for": TFor, "function": TFunction,
   289  	"if": TIf, "in": TIn, "local": TLocal, "nil": TNil, "not": TNot, "or": TOr,
   290  	"return": TReturn, "repeat": TRepeat, "then": TThen, "true": TTrue,
   291  	"until": TUntil, "while": TWhile}
   292  
   293  func (sc *Scanner) Scan(lexer *Lexer) (ast.Token, error) {
   294  redo:
   295  	var err error
   296  	tok := ast.Token{}
   297  	newline := false
   298  
   299  	ch := sc.skipWhiteSpace(whitespace1)
   300  	if ch == '\n' || ch == '\r' {
   301  		newline = true
   302  		ch = sc.skipWhiteSpace(whitespace2)
   303  	}
   304  
   305  	if ch == '(' && lexer.PrevTokenType == ')' {
   306  		lexer.PNewLine = newline
   307  	} else {
   308  		lexer.PNewLine = false
   309  	}
   310  
   311  	var _buf bytes.Buffer
   312  	buf := &_buf
   313  	tok.Pos = sc.Pos
   314  
   315  	switch {
   316  	case isIdent(ch, 0):
   317  		tok.Type = TIdent
   318  		err = sc.scanIdent(ch, buf)
   319  		tok.Str = buf.String()
   320  		if err != nil {
   321  			goto finally
   322  		}
   323  		if typ, ok := reservedWords[tok.Str]; ok {
   324  			tok.Type = typ
   325  		}
   326  	case isDecimal(ch):
   327  		tok.Type = TNumber
   328  		err = sc.scanNumber(ch, buf)
   329  		tok.Str = buf.String()
   330  	default:
   331  		switch ch {
   332  		case EOF:
   333  			tok.Type = EOF
   334  		case '-':
   335  			if sc.Peek() == '-' {
   336  				err = sc.skipComments(sc.Next())
   337  				if err != nil {
   338  					goto finally
   339  				}
   340  				goto redo
   341  			} else {
   342  				tok.Type = ch
   343  				tok.Str = string(ch)
   344  			}
   345  		case '"', '\'':
   346  			tok.Type = TString
   347  			err = sc.scanString(ch, buf)
   348  			tok.Str = buf.String()
   349  		case '[':
   350  			if c := sc.Peek(); c == '[' || c == '=' {
   351  				tok.Type = TString
   352  				err = sc.scanMultilineString(sc.Next(), buf)
   353  				tok.Str = buf.String()
   354  			} else {
   355  				tok.Type = ch
   356  				tok.Str = string(ch)
   357  			}
   358  		case '=':
   359  			if sc.Peek() == '=' {
   360  				tok.Type = TEqeq
   361  				tok.Str = "=="
   362  				sc.Next()
   363  			} else {
   364  				tok.Type = ch
   365  				tok.Str = string(ch)
   366  			}
   367  		case '~':
   368  			if sc.Peek() == '=' {
   369  				tok.Type = TNeq
   370  				tok.Str = "~="
   371  				sc.Next()
   372  			} else {
   373  				err = sc.Error("~", "Invalid '~' token")
   374  			}
   375  		case '<':
   376  			if sc.Peek() == '=' {
   377  				tok.Type = TLte
   378  				tok.Str = "<="
   379  				sc.Next()
   380  			} else {
   381  				tok.Type = ch
   382  				tok.Str = string(ch)
   383  			}
   384  		case '>':
   385  			if sc.Peek() == '=' {
   386  				tok.Type = TGte
   387  				tok.Str = ">="
   388  				sc.Next()
   389  			} else {
   390  				tok.Type = ch
   391  				tok.Str = string(ch)
   392  			}
   393  		case '.':
   394  			ch2 := sc.Peek()
   395  			switch {
   396  			case isDecimal(ch2):
   397  				tok.Type = TNumber
   398  				err = sc.scanNumber(ch, buf)
   399  				tok.Str = buf.String()
   400  			case ch2 == '.':
   401  				writeChar(buf, ch)
   402  				writeChar(buf, sc.Next())
   403  				if sc.Peek() == '.' {
   404  					writeChar(buf, sc.Next())
   405  					tok.Type = T3Comma
   406  				} else {
   407  					tok.Type = T2Comma
   408  				}
   409  			default:
   410  				tok.Type = '.'
   411  			}
   412  			tok.Str = buf.String()
   413  		case '+', '*', '/', '%', '^', '#', '(', ')', '{', '}', ']', ';', ':', ',':
   414  			tok.Type = ch
   415  			tok.Str = string(ch)
   416  		default:
   417  			writeChar(buf, ch)
   418  			err = sc.Error(buf.String(), "Invalid token")
   419  			goto finally
   420  		}
   421  	}
   422  
   423  finally:
   424  	tok.Name = TokenName(int(tok.Type))
   425  	return tok, err
   426  }
   427  
   428  // yacc interface {{{
   429  
   430  type Lexer struct {
   431  	scanner       *Scanner
   432  	Stmts         []ast.Stmt
   433  	PNewLine      bool
   434  	Token         ast.Token
   435  	PrevTokenType int
   436  }
   437  
   438  func (lx *Lexer) Lex(lval *yySymType) int {
   439  	lx.PrevTokenType = lx.Token.Type
   440  	tok, err := lx.scanner.Scan(lx)
   441  	if err != nil {
   442  		panic(err)
   443  	}
   444  	if tok.Type < 0 {
   445  		return 0
   446  	}
   447  	lval.token = tok
   448  	lx.Token = tok
   449  	return int(tok.Type)
   450  }
   451  
   452  func (lx *Lexer) Error(message string) {
   453  	panic(lx.scanner.Error(lx.Token.Str, message))
   454  }
   455  
   456  func (lx *Lexer) TokenError(tok ast.Token, message string) {
   457  	panic(lx.scanner.TokenError(tok, message))
   458  }
   459  
   460  func Parse(reader io.Reader, name string) (chunk []ast.Stmt, err error) {
   461  	lexer := &Lexer{NewScanner(reader, name), nil, false, ast.Token{Str: ""}, TNil}
   462  	chunk = nil
   463  	defer func() {
   464  		if e := recover(); e != nil {
   465  			err, _ = e.(error)
   466  		}
   467  	}()
   468  	yyParse(lexer)
   469  	chunk = lexer.Stmts
   470  	return
   471  }
   472  
   473  // }}}
   474  
   475  // Dump {{{
   476  
   477  func isInlineDumpNode(rv reflect.Value) bool {
   478  	switch rv.Kind() {
   479  	case reflect.Struct, reflect.Slice, reflect.Interface, reflect.Ptr:
   480  		return false
   481  	default:
   482  		return true
   483  	}
   484  }
   485  
   486  func dump(node interface{}, level int, s string) string {
   487  	rt := reflect.TypeOf(node)
   488  	if fmt.Sprint(rt) == "<nil>" {
   489  		return strings.Repeat(s, level) + "<nil>"
   490  	}
   491  
   492  	rv := reflect.ValueOf(node)
   493  	buf := []string{}
   494  	switch rt.Kind() {
   495  	case reflect.Slice:
   496  		if rv.Len() == 0 {
   497  			return strings.Repeat(s, level) + "<empty>"
   498  		}
   499  		for i := 0; i < rv.Len(); i++ {
   500  			buf = append(buf, dump(rv.Index(i).Interface(), level, s))
   501  		}
   502  	case reflect.Ptr:
   503  		vt := rv.Elem()
   504  		tt := rt.Elem()
   505  		indicies := []int{}
   506  		for i := 0; i < tt.NumField(); i++ {
   507  			if strings.Index(tt.Field(i).Name, "Base") > -1 {
   508  				continue
   509  			}
   510  			indicies = append(indicies, i)
   511  		}
   512  		switch {
   513  		case len(indicies) == 0:
   514  			return strings.Repeat(s, level) + "<empty>"
   515  		case len(indicies) == 1 && isInlineDumpNode(vt.Field(indicies[0])):
   516  			for _, i := range indicies {
   517  				buf = append(buf, strings.Repeat(s, level)+"- Node$"+tt.Name()+": "+dump(vt.Field(i).Interface(), 0, s))
   518  			}
   519  		default:
   520  			buf = append(buf, strings.Repeat(s, level)+"- Node$"+tt.Name())
   521  			for _, i := range indicies {
   522  				if isInlineDumpNode(vt.Field(i)) {
   523  					inf := dump(vt.Field(i).Interface(), 0, s)
   524  					buf = append(buf, strings.Repeat(s, level+1)+tt.Field(i).Name+": "+inf)
   525  				} else {
   526  					buf = append(buf, strings.Repeat(s, level+1)+tt.Field(i).Name+": ")
   527  					buf = append(buf, dump(vt.Field(i).Interface(), level+2, s))
   528  				}
   529  			}
   530  		}
   531  	default:
   532  		buf = append(buf, strings.Repeat(s, level)+fmt.Sprint(node))
   533  	}
   534  	return strings.Join(buf, "\n")
   535  }
   536  
   537  func Dump(chunk []ast.Stmt) string {
   538  	return dump(chunk, 0, "   ")
   539  }
   540  
   541  // }}