github.com/assemblaj/gopher-lua@v0.0.0-20221116224352-d57295a0d9e8/parse/lexer.go (about)

     1  package parse
     2  
     3  import (
     4  	"bufio"
     5  	"bytes"
     6  	"fmt"
     7  	"github.com/assemblaj/gopher-lua/ast"
     8  	"io"
     9  	"reflect"
    10  	"strconv"
    11  	"strings"
    12  )
    13  
    14  const EOF = -1
    15  const whitespace1 = 1<<'\t' | 1<<' '
    16  const whitespace2 = 1<<'\t' | 1<<'\n' | 1<<'\r' | 1<<' '
    17  
    18  type Error struct {
    19  	Pos     ast.Position
    20  	Message string
    21  	Token   string
    22  }
    23  
    24  func (e *Error) Error() string {
    25  	pos := e.Pos
    26  	if pos.Line == EOF {
    27  		return fmt.Sprintf("%v at EOF:   %s\n", pos.Source, e.Message)
    28  	} else {
    29  		return fmt.Sprintf("%v line:%d(column:%d) near '%v':   %s\n", pos.Source, pos.Line, pos.Column, e.Token, e.Message)
    30  	}
    31  }
    32  
    33  func writeChar(buf *bytes.Buffer, c int) { buf.WriteByte(byte(c)) }
    34  
    35  func isDecimal(ch int) bool { return '0' <= ch && ch <= '9' }
    36  
    37  func isIdent(ch int, pos int) bool {
    38  	return ch == '_' || 'A' <= ch && ch <= 'Z' || 'a' <= ch && ch <= 'z' || isDecimal(ch) && pos > 0
    39  }
    40  
    41  func isDigit(ch int) bool {
    42  	return '0' <= ch && ch <= '9' || 'a' <= ch && ch <= 'f' || 'A' <= ch && ch <= 'F'
    43  }
    44  
    45  type Scanner struct {
    46  	Pos    ast.Position
    47  	reader *bufio.Reader
    48  }
    49  
    50  func NewScanner(reader io.Reader, source string) *Scanner {
    51  	return &Scanner{
    52  		Pos: ast.Position{
    53  			Source: source,
    54  			Line:   1,
    55  			Column: 0,
    56  		},
    57  		reader: bufio.NewReaderSize(reader, 4096),
    58  	}
    59  }
    60  
    61  func (sc *Scanner) Error(tok string, msg string) *Error { return &Error{sc.Pos, msg, tok} }
    62  
    63  func (sc *Scanner) TokenError(tok ast.Token, msg string) *Error { return &Error{tok.Pos, msg, tok.Str} }
    64  
    65  func (sc *Scanner) readNext() int {
    66  	ch, err := sc.reader.ReadByte()
    67  	if err == io.EOF {
    68  		return EOF
    69  	}
    70  	return int(ch)
    71  }
    72  
    73  func (sc *Scanner) Newline(ch int) {
    74  	if ch < 0 {
    75  		return
    76  	}
    77  	sc.Pos.Line += 1
    78  	sc.Pos.Column = 0
    79  	next := sc.Peek()
    80  	if ch == '\n' && next == '\r' || ch == '\r' && next == '\n' {
    81  		sc.reader.ReadByte()
    82  	}
    83  }
    84  
    85  func (sc *Scanner) Next() int {
    86  	ch := sc.readNext()
    87  	switch ch {
    88  	case '\n', '\r':
    89  		sc.Newline(ch)
    90  		ch = int('\n')
    91  	case EOF:
    92  		sc.Pos.Line = EOF
    93  		sc.Pos.Column = 0
    94  	default:
    95  		sc.Pos.Column++
    96  	}
    97  	return ch
    98  }
    99  
   100  func (sc *Scanner) Peek() int {
   101  	ch := sc.readNext()
   102  	if ch != EOF {
   103  		sc.reader.UnreadByte()
   104  	}
   105  	return ch
   106  }
   107  
   108  func (sc *Scanner) skipWhiteSpace(whitespace int64) int {
   109  	ch := sc.Next()
   110  	for ; whitespace&(1<<uint(ch)) != 0; ch = sc.Next() {
   111  	}
   112  	return ch
   113  }
   114  
   115  func (sc *Scanner) skipComments(ch int) error {
   116  	// multiline comment
   117  	if sc.Peek() == '[' {
   118  		ch = sc.Next()
   119  		if sc.Peek() == '[' || sc.Peek() == '=' {
   120  			var buf bytes.Buffer
   121  			if err := sc.scanMultilineString(sc.Next(), &buf); err != nil {
   122  				return sc.Error(buf.String(), "invalid multiline comment")
   123  			}
   124  			return nil
   125  		}
   126  	}
   127  	for {
   128  		if ch == '\n' || ch == '\r' || ch < 0 {
   129  			break
   130  		}
   131  		ch = sc.Next()
   132  	}
   133  	return nil
   134  }
   135  
   136  func (sc *Scanner) scanIdent(ch int, buf *bytes.Buffer) error {
   137  	writeChar(buf, ch)
   138  	for isIdent(sc.Peek(), 1) {
   139  		writeChar(buf, sc.Next())
   140  	}
   141  	return nil
   142  }
   143  
   144  func (sc *Scanner) scanDecimal(ch int, buf *bytes.Buffer) error {
   145  	writeChar(buf, ch)
   146  	for isDecimal(sc.Peek()) {
   147  		writeChar(buf, sc.Next())
   148  	}
   149  	return nil
   150  }
   151  
   152  func (sc *Scanner) scanNumber(ch int, buf *bytes.Buffer) error {
   153  	if ch == '0' { // octal
   154  		if sc.Peek() == 'x' || sc.Peek() == 'X' {
   155  			writeChar(buf, ch)
   156  			writeChar(buf, sc.Next())
   157  			hasvalue := false
   158  			for isDigit(sc.Peek()) {
   159  				writeChar(buf, sc.Next())
   160  				hasvalue = true
   161  			}
   162  			if !hasvalue {
   163  				return sc.Error(buf.String(), "illegal hexadecimal number")
   164  			}
   165  			return nil
   166  		} else if sc.Peek() != '.' && isDecimal(sc.Peek()) {
   167  			ch = sc.Next()
   168  		}
   169  	}
   170  	sc.scanDecimal(ch, buf)
   171  	if sc.Peek() == '.' {
   172  		sc.scanDecimal(sc.Next(), buf)
   173  	}
   174  	if ch = sc.Peek(); ch == 'e' || ch == 'E' {
   175  		writeChar(buf, sc.Next())
   176  		if ch = sc.Peek(); ch == '-' || ch == '+' {
   177  			writeChar(buf, sc.Next())
   178  		}
   179  		sc.scanDecimal(sc.Next(), buf)
   180  	}
   181  
   182  	return nil
   183  }
   184  
   185  func (sc *Scanner) scanString(quote int, buf *bytes.Buffer) error {
   186  	ch := sc.Next()
   187  	for ch != quote {
   188  		if ch == '\n' || ch == '\r' || ch < 0 {
   189  			return sc.Error(buf.String(), "unterminated string")
   190  		}
   191  		if ch == '\\' {
   192  			if err := sc.scanEscape(ch, buf); err != nil {
   193  				return err
   194  			}
   195  		} else {
   196  			writeChar(buf, ch)
   197  		}
   198  		ch = sc.Next()
   199  	}
   200  	return nil
   201  }
   202  
   203  func (sc *Scanner) scanEscape(ch int, buf *bytes.Buffer) error {
   204  	ch = sc.Next()
   205  	switch ch {
   206  	case 'a':
   207  		buf.WriteByte('\a')
   208  	case 'b':
   209  		buf.WriteByte('\b')
   210  	case 'f':
   211  		buf.WriteByte('\f')
   212  	case 'n':
   213  		buf.WriteByte('\n')
   214  	case 'r':
   215  		buf.WriteByte('\r')
   216  	case 't':
   217  		buf.WriteByte('\t')
   218  	case 'v':
   219  		buf.WriteByte('\v')
   220  	case '\\':
   221  		buf.WriteByte('\\')
   222  	case '"':
   223  		buf.WriteByte('"')
   224  	case '\'':
   225  		buf.WriteByte('\'')
   226  	case '\n':
   227  		buf.WriteByte('\n')
   228  	case '\r':
   229  		buf.WriteByte('\n')
   230  		sc.Newline('\r')
   231  	default:
   232  		if '0' <= ch && ch <= '9' {
   233  			bytes := []byte{byte(ch)}
   234  			for i := 0; i < 2 && isDecimal(sc.Peek()); i++ {
   235  				bytes = append(bytes, byte(sc.Next()))
   236  			}
   237  			val, _ := strconv.ParseInt(string(bytes), 10, 32)
   238  			writeChar(buf, int(val))
   239  		} else {
   240  			writeChar(buf, ch)
   241  		}
   242  	}
   243  	return nil
   244  }
   245  
   246  func (sc *Scanner) countSep(ch int) (int, int) {
   247  	count := 0
   248  	for ; ch == '='; count = count + 1 {
   249  		ch = sc.Next()
   250  	}
   251  	return count, ch
   252  }
   253  
   254  func (sc *Scanner) scanMultilineString(ch int, buf *bytes.Buffer) error {
   255  	var count1, count2 int
   256  	count1, ch = sc.countSep(ch)
   257  	if ch != '[' {
   258  		return sc.Error(string(rune(ch)), "invalid multiline string")
   259  	}
   260  	ch = sc.Next()
   261  	if ch == '\n' || ch == '\r' {
   262  		ch = sc.Next()
   263  	}
   264  	for {
   265  		if ch < 0 {
   266  			return sc.Error(buf.String(), "unterminated multiline string")
   267  		} else if ch == ']' {
   268  			count2, ch = sc.countSep(sc.Next())
   269  			if count1 == count2 && ch == ']' {
   270  				goto finally
   271  			}
   272  			buf.WriteByte(']')
   273  			buf.WriteString(strings.Repeat("=", count2))
   274  			continue
   275  		}
   276  		writeChar(buf, ch)
   277  		ch = sc.Next()
   278  	}
   279  
   280  finally:
   281  	return nil
   282  }
   283  
   284  var reservedWords = map[string]int{
   285  	"and": TAnd, "break": TBreak, "do": TDo, "else": TElse, "elseif": TElseIf,
   286  	"end": TEnd, "false": TFalse, "for": TFor, "function": TFunction,
   287  	"if": TIf, "in": TIn, "local": TLocal, "nil": TNil, "not": TNot, "or": TOr,
   288  	"return": TReturn, "repeat": TRepeat, "then": TThen, "true": TTrue,
   289  	"until": TUntil, "while": TWhile}
   290  
   291  func (sc *Scanner) Scan(lexer *Lexer) (ast.Token, error) {
   292  redo:
   293  	var err error
   294  	tok := ast.Token{}
   295  	newline := false
   296  
   297  	ch := sc.skipWhiteSpace(whitespace1)
   298  	if ch == '\n' || ch == '\r' {
   299  		newline = true
   300  		ch = sc.skipWhiteSpace(whitespace2)
   301  	}
   302  
   303  	if ch == '(' && lexer.PrevTokenType == ')' {
   304  		lexer.PNewLine = newline
   305  	} else {
   306  		lexer.PNewLine = false
   307  	}
   308  
   309  	var _buf bytes.Buffer
   310  	buf := &_buf
   311  	tok.Pos = sc.Pos
   312  
   313  	switch {
   314  	case isIdent(ch, 0):
   315  		tok.Type = TIdent
   316  		err = sc.scanIdent(ch, buf)
   317  		tok.Str = buf.String()
   318  		if err != nil {
   319  			goto finally
   320  		}
   321  		if typ, ok := reservedWords[tok.Str]; ok {
   322  			tok.Type = typ
   323  		}
   324  	case isDecimal(ch):
   325  		tok.Type = TNumber
   326  		err = sc.scanNumber(ch, buf)
   327  		tok.Str = buf.String()
   328  	default:
   329  		switch ch {
   330  		case EOF:
   331  			tok.Type = EOF
   332  		case '-':
   333  			if sc.Peek() == '-' {
   334  				err = sc.skipComments(sc.Next())
   335  				if err != nil {
   336  					goto finally
   337  				}
   338  				goto redo
   339  			} else {
   340  				tok.Type = ch
   341  				tok.Str = string(rune(ch))
   342  			}
   343  		case '"', '\'':
   344  			tok.Type = TString
   345  			err = sc.scanString(ch, buf)
   346  			tok.Str = buf.String()
   347  		case '[':
   348  			if c := sc.Peek(); c == '[' || c == '=' {
   349  				tok.Type = TString
   350  				err = sc.scanMultilineString(sc.Next(), buf)
   351  				tok.Str = buf.String()
   352  			} else {
   353  				tok.Type = ch
   354  				tok.Str = string(rune(ch))
   355  			}
   356  		case '=':
   357  			if sc.Peek() == '=' {
   358  				tok.Type = TEqeq
   359  				tok.Str = "=="
   360  				sc.Next()
   361  			} else {
   362  				tok.Type = ch
   363  				tok.Str = string(rune(ch))
   364  			}
   365  		case '~':
   366  			if sc.Peek() == '=' {
   367  				tok.Type = TNeq
   368  				tok.Str = "~="
   369  				sc.Next()
   370  			} else {
   371  				err = sc.Error("~", "Invalid '~' token")
   372  			}
   373  		case '<':
   374  			if sc.Peek() == '=' {
   375  				tok.Type = TLte
   376  				tok.Str = "<="
   377  				sc.Next()
   378  			} else {
   379  				tok.Type = ch
   380  				tok.Str = string(rune(ch))
   381  			}
   382  		case '>':
   383  			if sc.Peek() == '=' {
   384  				tok.Type = TGte
   385  				tok.Str = ">="
   386  				sc.Next()
   387  			} else {
   388  				tok.Type = ch
   389  				tok.Str = string(rune(ch))
   390  			}
   391  		case '.':
   392  			ch2 := sc.Peek()
   393  			switch {
   394  			case isDecimal(ch2):
   395  				tok.Type = TNumber
   396  				err = sc.scanNumber(ch, buf)
   397  				tok.Str = buf.String()
   398  			case ch2 == '.':
   399  				writeChar(buf, ch)
   400  				writeChar(buf, sc.Next())
   401  				if sc.Peek() == '.' {
   402  					writeChar(buf, sc.Next())
   403  					tok.Type = T3Comma
   404  				} else {
   405  					tok.Type = T2Comma
   406  				}
   407  			default:
   408  				tok.Type = '.'
   409  			}
   410  			tok.Str = buf.String()
   411  		case '+', '*', '/', '%', '^', '#', '(', ')', '{', '}', ']', ';', ':', ',':
   412  			tok.Type = ch
   413  			tok.Str = string(rune(ch))
   414  		default:
   415  			writeChar(buf, ch)
   416  			err = sc.Error(buf.String(), "Invalid token")
   417  			goto finally
   418  		}
   419  	}
   420  
   421  finally:
   422  	tok.Name = TokenName(int(tok.Type))
   423  	return tok, err
   424  }
   425  
   426  // yacc interface {{{
   427  
   428  type Lexer struct {
   429  	scanner       *Scanner
   430  	Stmts         []ast.Stmt
   431  	PNewLine      bool
   432  	Token         ast.Token
   433  	PrevTokenType int
   434  }
   435  
   436  func (lx *Lexer) Lex(lval *yySymType) int {
   437  	lx.PrevTokenType = lx.Token.Type
   438  	tok, err := lx.scanner.Scan(lx)
   439  	if err != nil {
   440  		panic(err)
   441  	}
   442  	if tok.Type < 0 {
   443  		return 0
   444  	}
   445  	lval.token = tok
   446  	lx.Token = tok
   447  	return int(tok.Type)
   448  }
   449  
   450  func (lx *Lexer) Error(message string) {
   451  	panic(lx.scanner.Error(lx.Token.Str, message))
   452  }
   453  
   454  func (lx *Lexer) TokenError(tok ast.Token, message string) {
   455  	panic(lx.scanner.TokenError(tok, message))
   456  }
   457  
   458  func Parse(reader io.Reader, name string) (chunk []ast.Stmt, err error) {
   459  	lexer := &Lexer{NewScanner(reader, name), nil, false, ast.Token{Str: ""}, TNil}
   460  	chunk = nil
   461  	defer func() {
   462  		if e := recover(); e != nil {
   463  			err, _ = e.(error)
   464  		}
   465  	}()
   466  	yyParse(lexer)
   467  	chunk = lexer.Stmts
   468  	return
   469  }
   470  
   471  // }}}
   472  
   473  // Dump {{{
   474  
   475  func isInlineDumpNode(rv reflect.Value) bool {
   476  	switch rv.Kind() {
   477  	case reflect.Struct, reflect.Slice, reflect.Interface, reflect.Ptr:
   478  		return false
   479  	default:
   480  		return true
   481  	}
   482  }
   483  
   484  func dump(node interface{}, level int, s string) string {
   485  	rt := reflect.TypeOf(node)
   486  	if fmt.Sprint(rt) == "<nil>" {
   487  		return strings.Repeat(s, level) + "<nil>"
   488  	}
   489  
   490  	rv := reflect.ValueOf(node)
   491  	buf := []string{}
   492  	switch rt.Kind() {
   493  	case reflect.Slice:
   494  		if rv.Len() == 0 {
   495  			return strings.Repeat(s, level) + "<empty>"
   496  		}
   497  		for i := 0; i < rv.Len(); i++ {
   498  			buf = append(buf, dump(rv.Index(i).Interface(), level, s))
   499  		}
   500  	case reflect.Ptr:
   501  		vt := rv.Elem()
   502  		tt := rt.Elem()
   503  		indicies := []int{}
   504  		for i := 0; i < tt.NumField(); i++ {
   505  			if strings.Index(tt.Field(i).Name, "Base") > -1 {
   506  				continue
   507  			}
   508  			indicies = append(indicies, i)
   509  		}
   510  		switch {
   511  		case len(indicies) == 0:
   512  			return strings.Repeat(s, level) + "<empty>"
   513  		case len(indicies) == 1 && isInlineDumpNode(vt.Field(indicies[0])):
   514  			for _, i := range indicies {
   515  				buf = append(buf, strings.Repeat(s, level)+"- Node$"+tt.Name()+": "+dump(vt.Field(i).Interface(), 0, s))
   516  			}
   517  		default:
   518  			buf = append(buf, strings.Repeat(s, level)+"- Node$"+tt.Name())
   519  			for _, i := range indicies {
   520  				if isInlineDumpNode(vt.Field(i)) {
   521  					inf := dump(vt.Field(i).Interface(), 0, s)
   522  					buf = append(buf, strings.Repeat(s, level+1)+tt.Field(i).Name+": "+inf)
   523  				} else {
   524  					buf = append(buf, strings.Repeat(s, level+1)+tt.Field(i).Name+": ")
   525  					buf = append(buf, dump(vt.Field(i).Interface(), level+2, s))
   526  				}
   527  			}
   528  		}
   529  	default:
   530  		buf = append(buf, strings.Repeat(s, level)+fmt.Sprint(node))
   531  	}
   532  	return strings.Join(buf, "\n")
   533  }
   534  
   535  func Dump(chunk []ast.Stmt) string {
   536  	return dump(chunk, 0, "   ")
   537  }
   538  
   539  // }}