github.com/hootrhino/gopher-lua@v1.0.3/parse/lexer.go (about)

     1  package parse
     2  
     3  import (
     4  	"bufio"
     5  	"bytes"
     6  	"fmt"
     7  	"io"
     8  	"reflect"
     9  	"strconv"
    10  	"strings"
    11  
    12  	"github.com/hootrhino/gopher-lua/ast"
    13  )
    14  
    15  const EOF = -1
    16  const whitespace1 = 1<<'\t' | 1<<' '
    17  const whitespace2 = 1<<'\t' | 1<<'\n' | 1<<'\r' | 1<<' '
    18  
    19  type Error struct {
    20  	Pos     ast.Position
    21  	Message string
    22  	Token   string
    23  }
    24  
    25  func (e *Error) Error() string {
    26  	pos := e.Pos
    27  	if pos.Line == EOF {
    28  		return fmt.Sprintf("%v at EOF:   %s\n", pos.Source, e.Message)
    29  	} else {
    30  		return fmt.Sprintf("%v line:%d(column:%d) near '%v':   %s\n", pos.Source, pos.Line, pos.Column, e.Token, e.Message)
    31  	}
    32  }
    33  
    34  func writeChar(buf *bytes.Buffer, c int) { buf.WriteByte(byte(c)) }
    35  
    36  func isDecimal(ch int) bool { return '0' <= ch && ch <= '9' }
    37  
    38  func isIdent(ch int, pos int) bool {
    39  	return ch == '_' || 'A' <= ch && ch <= 'Z' || 'a' <= ch && ch <= 'z' || isDecimal(ch) && pos > 0
    40  }
    41  
    42  func isDigit(ch int) bool {
    43  	return '0' <= ch && ch <= '9' || 'a' <= ch && ch <= 'f' || 'A' <= ch && ch <= 'F'
    44  }
    45  
    46  type Scanner struct {
    47  	Pos    ast.Position
    48  	reader *bufio.Reader
    49  }
    50  
    51  func NewScanner(reader io.Reader, source string) *Scanner {
    52  	return &Scanner{
    53  		Pos: ast.Position{
    54  			Source: source,
    55  			Line:   1,
    56  			Column: 0,
    57  		},
    58  		reader: bufio.NewReaderSize(reader, 4096),
    59  	}
    60  }
    61  
    62  func (sc *Scanner) Error(tok string, msg string) *Error { return &Error{sc.Pos, msg, tok} }
    63  
    64  func (sc *Scanner) TokenError(tok ast.Token, msg string) *Error { return &Error{tok.Pos, msg, tok.Str} }
    65  
    66  func (sc *Scanner) readNext() int {
    67  	ch, err := sc.reader.ReadByte()
    68  	if err == io.EOF {
    69  		return EOF
    70  	}
    71  	return int(ch)
    72  }
    73  
    74  func (sc *Scanner) Newline(ch int) {
    75  	if ch < 0 {
    76  		return
    77  	}
    78  	sc.Pos.Line += 1
    79  	sc.Pos.Column = 0
    80  	next := sc.Peek()
    81  	if ch == '\n' && next == '\r' || ch == '\r' && next == '\n' {
    82  		sc.reader.ReadByte()
    83  	}
    84  }
    85  
    86  func (sc *Scanner) Next() int {
    87  	ch := sc.readNext()
    88  	switch ch {
    89  	case '\n', '\r':
    90  		sc.Newline(ch)
    91  		ch = int('\n')
    92  	case EOF:
    93  		sc.Pos.Line = EOF
    94  		sc.Pos.Column = 0
    95  	default:
    96  		sc.Pos.Column++
    97  	}
    98  	return ch
    99  }
   100  
   101  func (sc *Scanner) Peek() int {
   102  	ch := sc.readNext()
   103  	if ch != EOF {
   104  		sc.reader.UnreadByte()
   105  	}
   106  	return ch
   107  }
   108  
   109  func (sc *Scanner) skipWhiteSpace(whitespace int64) int {
   110  	ch := sc.Next()
   111  	for ; whitespace&(1<<uint(ch)) != 0; ch = sc.Next() {
   112  	}
   113  	return ch
   114  }
   115  
   116  func (sc *Scanner) skipComments(ch int) error {
   117  	// multiline comment
   118  	if sc.Peek() == '[' {
   119  		ch = sc.Next()
   120  		if sc.Peek() == '[' || sc.Peek() == '=' {
   121  			var buf bytes.Buffer
   122  			if err := sc.scanMultilineString(sc.Next(), &buf); err != nil {
   123  				return sc.Error(buf.String(), "invalid multiline comment")
   124  			}
   125  			return nil
   126  		}
   127  	}
   128  	for {
   129  		if ch == '\n' || ch == '\r' || ch < 0 {
   130  			break
   131  		}
   132  		ch = sc.Next()
   133  	}
   134  	return nil
   135  }
   136  
   137  func (sc *Scanner) scanIdent(ch int, buf *bytes.Buffer) error {
   138  	writeChar(buf, ch)
   139  	for isIdent(sc.Peek(), 1) {
   140  		writeChar(buf, sc.Next())
   141  	}
   142  	return nil
   143  }
   144  
   145  func (sc *Scanner) scanDecimal(ch int, buf *bytes.Buffer) error {
   146  	writeChar(buf, ch)
   147  	for isDecimal(sc.Peek()) {
   148  		writeChar(buf, sc.Next())
   149  	}
   150  	return nil
   151  }
   152  
   153  func (sc *Scanner) scanNumber(ch int, buf *bytes.Buffer) error {
   154  	if ch == '0' { // octal
   155  		if sc.Peek() == 'x' || sc.Peek() == 'X' {
   156  			writeChar(buf, ch)
   157  			writeChar(buf, sc.Next())
   158  			hasvalue := false
   159  			for isDigit(sc.Peek()) {
   160  				writeChar(buf, sc.Next())
   161  				hasvalue = true
   162  			}
   163  			if !hasvalue {
   164  				return sc.Error(buf.String(), "illegal hexadecimal number")
   165  			}
   166  			return nil
   167  		} else if sc.Peek() != '.' && isDecimal(sc.Peek()) {
   168  			ch = sc.Next()
   169  		}
   170  	}
   171  	sc.scanDecimal(ch, buf)
   172  	if sc.Peek() == '.' {
   173  		sc.scanDecimal(sc.Next(), buf)
   174  	}
   175  	if ch = sc.Peek(); ch == 'e' || ch == 'E' {
   176  		writeChar(buf, sc.Next())
   177  		if ch = sc.Peek(); ch == '-' || ch == '+' {
   178  			writeChar(buf, sc.Next())
   179  		}
   180  		sc.scanDecimal(sc.Next(), buf)
   181  	}
   182  
   183  	return nil
   184  }
   185  
   186  func (sc *Scanner) scanString(quote int, buf *bytes.Buffer) error {
   187  	ch := sc.Next()
   188  	for ch != quote {
   189  		if ch == '\n' || ch == '\r' || ch < 0 {
   190  			return sc.Error(buf.String(), "unterminated string")
   191  		}
   192  		if ch == '\\' {
   193  			if err := sc.scanEscape(ch, buf); err != nil {
   194  				return err
   195  			}
   196  		} else {
   197  			writeChar(buf, ch)
   198  		}
   199  		ch = sc.Next()
   200  	}
   201  	return nil
   202  }
   203  
   204  func (sc *Scanner) scanEscape(ch int, buf *bytes.Buffer) error {
   205  	ch = sc.Next()
   206  	switch ch {
   207  	case 'a':
   208  		buf.WriteByte('\a')
   209  	case 'b':
   210  		buf.WriteByte('\b')
   211  	case 'f':
   212  		buf.WriteByte('\f')
   213  	case 'n':
   214  		buf.WriteByte('\n')
   215  	case 'r':
   216  		buf.WriteByte('\r')
   217  	case 't':
   218  		buf.WriteByte('\t')
   219  	case 'v':
   220  		buf.WriteByte('\v')
   221  	case '\\':
   222  		buf.WriteByte('\\')
   223  	case '"':
   224  		buf.WriteByte('"')
   225  	case '\'':
   226  		buf.WriteByte('\'')
   227  	case '\n':
   228  		buf.WriteByte('\n')
   229  	case '\r':
   230  		buf.WriteByte('\n')
   231  		sc.Newline('\r')
   232  	default:
   233  		if '0' <= ch && ch <= '9' {
   234  			bytes := []byte{byte(ch)}
   235  			for i := 0; i < 2 && isDecimal(sc.Peek()); i++ {
   236  				bytes = append(bytes, byte(sc.Next()))
   237  			}
   238  			val, _ := strconv.ParseInt(string(bytes), 10, 32)
   239  			writeChar(buf, int(val))
   240  		} else {
   241  			writeChar(buf, ch)
   242  		}
   243  	}
   244  	return nil
   245  }
   246  
   247  func (sc *Scanner) countSep(ch int) (int, int) {
   248  	count := 0
   249  	for ; ch == '='; count = count + 1 {
   250  		ch = sc.Next()
   251  	}
   252  	return count, ch
   253  }
   254  
   255  func (sc *Scanner) scanMultilineString(ch int, buf *bytes.Buffer) error {
   256  	var count1, count2 int
   257  	count1, ch = sc.countSep(ch)
   258  	if ch != '[' {
   259  		return sc.Error(string(rune(ch)), "invalid multiline string")
   260  	}
   261  	ch = sc.Next()
   262  	if ch == '\n' || ch == '\r' {
   263  		ch = sc.Next()
   264  	}
   265  	for {
   266  		if ch < 0 {
   267  			return sc.Error(buf.String(), "unterminated multiline string")
   268  		} else if ch == ']' {
   269  			count2, ch = sc.countSep(sc.Next())
   270  			if count1 == count2 && ch == ']' {
   271  				goto finally
   272  			}
   273  			buf.WriteByte(']')
   274  			buf.WriteString(strings.Repeat("=", count2))
   275  			continue
   276  		}
   277  		writeChar(buf, ch)
   278  		ch = sc.Next()
   279  	}
   280  
   281  finally:
   282  	return nil
   283  }
   284  
   285  var reservedWords = map[string]int{
   286  	"and": TAnd, "break": TBreak, "do": TDo, "else": TElse, "elseif": TElseIf,
   287  	"end": TEnd, "false": TFalse, "for": TFor, "function": TFunction,
   288  	"if": TIf, "in": TIn, "local": TLocal, "nil": TNil, "not": TNot, "or": TOr,
   289  	"return": TReturn, "repeat": TRepeat, "then": TThen, "true": TTrue,
   290  	"until": TUntil, "while": TWhile, "goto": TGoto}
   291  
   292  func (sc *Scanner) Scan(lexer *Lexer) (ast.Token, error) {
   293  redo:
   294  	var err error
   295  	tok := ast.Token{}
   296  	newline := false
   297  
   298  	ch := sc.skipWhiteSpace(whitespace1)
   299  	if ch == '\n' || ch == '\r' {
   300  		newline = true
   301  		ch = sc.skipWhiteSpace(whitespace2)
   302  	}
   303  
   304  	if ch == '(' && lexer.PrevTokenType == ')' {
   305  		lexer.PNewLine = newline
   306  	} else {
   307  		lexer.PNewLine = false
   308  	}
   309  
   310  	var _buf bytes.Buffer
   311  	buf := &_buf
   312  	tok.Pos = sc.Pos
   313  
   314  	switch {
   315  	case isIdent(ch, 0):
   316  		tok.Type = TIdent
   317  		err = sc.scanIdent(ch, buf)
   318  		tok.Str = buf.String()
   319  		if err != nil {
   320  			goto finally
   321  		}
   322  		if typ, ok := reservedWords[tok.Str]; ok {
   323  			tok.Type = typ
   324  		}
   325  	case isDecimal(ch):
   326  		tok.Type = TNumber
   327  		err = sc.scanNumber(ch, buf)
   328  		tok.Str = buf.String()
   329  	default:
   330  		switch ch {
   331  		case EOF:
   332  			tok.Type = EOF
   333  		case '-':
   334  			if sc.Peek() == '-' {
   335  				err = sc.skipComments(sc.Next())
   336  				if err != nil {
   337  					goto finally
   338  				}
   339  				goto redo
   340  			} else {
   341  				tok.Type = ch
   342  				tok.Str = string(rune(ch))
   343  			}
   344  		case '"', '\'':
   345  			tok.Type = TString
   346  			err = sc.scanString(ch, buf)
   347  			tok.Str = buf.String()
   348  		case '[':
   349  			if c := sc.Peek(); c == '[' || c == '=' {
   350  				tok.Type = TString
   351  				err = sc.scanMultilineString(sc.Next(), buf)
   352  				tok.Str = buf.String()
   353  			} else {
   354  				tok.Type = ch
   355  				tok.Str = string(rune(ch))
   356  			}
   357  		case '=':
   358  			if sc.Peek() == '=' {
   359  				tok.Type = TEqeq
   360  				tok.Str = "=="
   361  				sc.Next()
   362  			} else {
   363  				tok.Type = ch
   364  				tok.Str = string(rune(ch))
   365  			}
   366  		case '~':
   367  			if sc.Peek() == '=' {
   368  				tok.Type = TNeq
   369  				tok.Str = "~="
   370  				sc.Next()
   371  			} else {
   372  				err = sc.Error("~", "Invalid '~' token")
   373  			}
   374  		case '<':
   375  			if sc.Peek() == '=' {
   376  				tok.Type = TLte
   377  				tok.Str = "<="
   378  				sc.Next()
   379  			} else {
   380  				tok.Type = ch
   381  				tok.Str = string(rune(ch))
   382  			}
   383  		case '>':
   384  			if sc.Peek() == '=' {
   385  				tok.Type = TGte
   386  				tok.Str = ">="
   387  				sc.Next()
   388  			} else {
   389  				tok.Type = ch
   390  				tok.Str = string(rune(ch))
   391  			}
   392  		case '.':
   393  			ch2 := sc.Peek()
   394  			switch {
   395  			case isDecimal(ch2):
   396  				tok.Type = TNumber
   397  				err = sc.scanNumber(ch, buf)
   398  				tok.Str = buf.String()
   399  			case ch2 == '.':
   400  				writeChar(buf, ch)
   401  				writeChar(buf, sc.Next())
   402  				if sc.Peek() == '.' {
   403  					writeChar(buf, sc.Next())
   404  					tok.Type = T3Comma
   405  				} else {
   406  					tok.Type = T2Comma
   407  				}
   408  			default:
   409  				tok.Type = '.'
   410  			}
   411  			tok.Str = buf.String()
   412  		case ':':
   413  			if sc.Peek() == ':' {
   414  				tok.Type = T2Colon
   415  				tok.Str = "::"
   416  				sc.Next()
   417  			} else {
   418  				tok.Type = ch
   419  				tok.Str = string(rune(ch))
   420  			}
   421  		case '+', '*', '/', '%', '^', '#', '(', ')', '{', '}', ']', ';', ',':
   422  			tok.Type = ch
   423  			tok.Str = string(rune(ch))
   424  		default:
   425  			writeChar(buf, ch)
   426  			err = sc.Error(buf.String(), "Invalid token")
   427  			goto finally
   428  		}
   429  	}
   430  
   431  finally:
   432  	tok.Name = TokenName(int(tok.Type))
   433  	return tok, err
   434  }
   435  
   436  // yacc interface {{{
   437  
   438  type Lexer struct {
   439  	scanner       *Scanner
   440  	Stmts         []ast.Stmt
   441  	PNewLine      bool
   442  	Token         ast.Token
   443  	PrevTokenType int
   444  }
   445  
   446  func (lx *Lexer) Lex(lval *yySymType) int {
   447  	lx.PrevTokenType = lx.Token.Type
   448  	tok, err := lx.scanner.Scan(lx)
   449  	if err != nil {
   450  		panic(err)
   451  	}
   452  	if tok.Type < 0 {
   453  		return 0
   454  	}
   455  	lval.token = tok
   456  	lx.Token = tok
   457  	return int(tok.Type)
   458  }
   459  
   460  func (lx *Lexer) Error(message string) {
   461  	panic(lx.scanner.Error(lx.Token.Str, message))
   462  }
   463  
   464  func (lx *Lexer) TokenError(tok ast.Token, message string) {
   465  	panic(lx.scanner.TokenError(tok, message))
   466  }
   467  
   468  func Parse(reader io.Reader, name string) (chunk []ast.Stmt, err error) {
   469  	lexer := &Lexer{NewScanner(reader, name), nil, false, ast.Token{Str: ""}, TNil}
   470  	chunk = nil
   471  	defer func() {
   472  		if e := recover(); e != nil {
   473  			err, _ = e.(error)
   474  		}
   475  	}()
   476  	yyParse(lexer)
   477  	chunk = lexer.Stmts
   478  	return
   479  }
   480  
   481  // }}}
   482  
   483  // Dump {{{
   484  
   485  func isInlineDumpNode(rv reflect.Value) bool {
   486  	switch rv.Kind() {
   487  	case reflect.Struct, reflect.Slice, reflect.Interface, reflect.Ptr:
   488  		return false
   489  	default:
   490  		return true
   491  	}
   492  }
   493  
   494  func dump(node interface{}, level int, s string) string {
   495  	rt := reflect.TypeOf(node)
   496  	if fmt.Sprint(rt) == "<nil>" {
   497  		return strings.Repeat(s, level) + "<nil>"
   498  	}
   499  
   500  	rv := reflect.ValueOf(node)
   501  	buf := []string{}
   502  	switch rt.Kind() {
   503  	case reflect.Slice:
   504  		if rv.Len() == 0 {
   505  			return strings.Repeat(s, level) + "<empty>"
   506  		}
   507  		for i := 0; i < rv.Len(); i++ {
   508  			buf = append(buf, dump(rv.Index(i).Interface(), level, s))
   509  		}
   510  	case reflect.Ptr:
   511  		vt := rv.Elem()
   512  		tt := rt.Elem()
   513  		indicies := []int{}
   514  		for i := 0; i < tt.NumField(); i++ {
   515  			if strings.Index(tt.Field(i).Name, "Base") > -1 {
   516  				continue
   517  			}
   518  			indicies = append(indicies, i)
   519  		}
   520  		switch {
   521  		case len(indicies) == 0:
   522  			return strings.Repeat(s, level) + "<empty>"
   523  		case len(indicies) == 1 && isInlineDumpNode(vt.Field(indicies[0])):
   524  			for _, i := range indicies {
   525  				buf = append(buf, strings.Repeat(s, level)+"- Node$"+tt.Name()+": "+dump(vt.Field(i).Interface(), 0, s))
   526  			}
   527  		default:
   528  			buf = append(buf, strings.Repeat(s, level)+"- Node$"+tt.Name())
   529  			for _, i := range indicies {
   530  				if isInlineDumpNode(vt.Field(i)) {
   531  					inf := dump(vt.Field(i).Interface(), 0, s)
   532  					buf = append(buf, strings.Repeat(s, level+1)+tt.Field(i).Name+": "+inf)
   533  				} else {
   534  					buf = append(buf, strings.Repeat(s, level+1)+tt.Field(i).Name+": ")
   535  					buf = append(buf, dump(vt.Field(i).Interface(), level+2, s))
   536  				}
   537  			}
   538  		}
   539  	default:
   540  		buf = append(buf, strings.Repeat(s, level)+fmt.Sprint(node))
   541  	}
   542  	return strings.Join(buf, "\n")
   543  }
   544  
   545  func Dump(chunk []ast.Stmt) string {
   546  	return dump(chunk, 0, "   ")
   547  }
   548  
   549  // }}