github.com/xrash/gopher-lua@v0.0.0-20160304065408-e5faab4db06a/parse/lexer.go (about)

     1  package parse
     2  
     3  import (
     4  	"bufio"
     5  	"bytes"
     6  	"fmt"
     7  	"github.com/yuin/gopher-lua/ast"
     8  	"io"
     9  	"reflect"
    10  	"strconv"
    11  	"strings"
    12  )
    13  
    14  const EOF = -1
    15  const whitespace1 = 1<<'\t' | 1<<'\r' | 1<<' '
    16  const whitespace2 = 1<<'\t' | 1<<'\n' | 1<<'\r' | 1<<' '
    17  
    18  type Error struct {
    19  	Pos     ast.Position
    20  	Message string
    21  	Token   string
    22  }
    23  
    24  func (e *Error) Error() string {
    25  	pos := e.Pos
    26  	if pos.Line == EOF {
    27  		return fmt.Sprintf("%v at EOF:   %s\n", pos.Source, e.Message)
    28  	} else {
    29  		return fmt.Sprintf("%v line:%d(column:%d) near '%v':   %s\n", pos.Source, pos.Line, pos.Column, e.Token, e.Message)
    30  	}
    31  }
    32  
    33  func writeChar(buf *bytes.Buffer, c int) { buf.WriteByte(byte(c)) }
    34  
    35  func isDecimal(ch int) bool { return '0' <= ch && ch <= '9' }
    36  
    37  func isIdent(ch int, pos int) bool {
    38  	return ch == '_' || 'A' <= ch && ch <= 'Z' || 'a' <= ch && ch <= 'z' || isDecimal(ch) && pos > 0
    39  }
    40  
    41  func isDigit(ch int) bool {
    42  	return '0' <= ch && ch <= '9' || 'a' <= ch && ch <= 'f' || 'A' <= ch && ch <= 'F'
    43  }
    44  
    45  type Scanner struct {
    46  	Pos    ast.Position
    47  	reader *bufio.Reader
    48  }
    49  
    50  func NewScanner(reader io.Reader, source string) *Scanner {
    51  	return &Scanner{
    52  		Pos:    ast.Position{source, 1, 0},
    53  		reader: bufio.NewReaderSize(reader, 4096),
    54  	}
    55  }
    56  
    57  func (sc *Scanner) Error(tok string, msg string) *Error { return &Error{sc.Pos, msg, tok} }
    58  
    59  func (sc *Scanner) TokenError(tok ast.Token, msg string) *Error { return &Error{tok.Pos, msg, tok.Str} }
    60  
    61  func (sc *Scanner) readNext() int {
    62  	ch, err := sc.reader.ReadByte()
    63  	if err == io.EOF {
    64  		return EOF
    65  	}
    66  	return int(ch)
    67  }
    68  
    69  func (sc *Scanner) Newline(ch int) {
    70  	if ch < 0 {
    71  		return
    72  	}
    73  	sc.Pos.Line += 1
    74  	sc.Pos.Column = 0
    75  	next := sc.Peek()
    76  	if ch == '\n' && next == '\r' || ch == '\r' && next == '\n' {
    77  		sc.reader.ReadByte()
    78  	}
    79  }
    80  
    81  func (sc *Scanner) Next() int {
    82  	ch := sc.readNext()
    83  	switch ch {
    84  	case '\n', '\r':
    85  		sc.Newline(ch)
    86  		ch = int('\n')
    87  	case EOF:
    88  		sc.Pos.Line = EOF
    89  		sc.Pos.Column = 0
    90  	default:
    91  		sc.Pos.Column++
    92  	}
    93  	return ch
    94  }
    95  
    96  func (sc *Scanner) Peek() int {
    97  	ch := sc.readNext()
    98  	if ch != EOF {
    99  		sc.reader.UnreadByte()
   100  	}
   101  	return ch
   102  }
   103  
   104  func (sc *Scanner) skipWhiteSpace(whitespace int64) int {
   105  	ch := sc.Next()
   106  	for ; whitespace&(1<<uint(ch)) != 0; ch = sc.Next() {
   107  	}
   108  	return ch
   109  }
   110  
   111  func (sc *Scanner) skipComments(ch int) error {
   112  	// multiline comment
   113  	if sc.Peek() == '[' {
   114  		ch = sc.Next()
   115  		if sc.Peek() == '[' || sc.Peek() == '=' {
   116  			var buf bytes.Buffer
   117  			if err := sc.scanMultilineString(sc.Next(), &buf); err != nil {
   118  				return sc.Error(buf.String(), "invalid multiline comment")
   119  			}
   120  			return nil
   121  		}
   122  	}
   123  	for {
   124  		if ch == '\n' || ch == '\r' || ch < 0 {
   125  			break
   126  		}
   127  		ch = sc.Next()
   128  	}
   129  	return nil
   130  }
   131  
   132  func (sc *Scanner) scanIdent(ch int, buf *bytes.Buffer) error {
   133  	writeChar(buf, ch)
   134  	for isIdent(sc.Peek(), 1) {
   135  		writeChar(buf, sc.Next())
   136  	}
   137  	return nil
   138  }
   139  
   140  func (sc *Scanner) scanDecimal(ch int, buf *bytes.Buffer) error {
   141  	writeChar(buf, ch)
   142  	for isDecimal(sc.Peek()) {
   143  		writeChar(buf, sc.Next())
   144  	}
   145  	return nil
   146  }
   147  
   148  func (sc *Scanner) scanNumber(ch int, buf *bytes.Buffer) error {
   149  	if ch == '0' { // octal
   150  		if sc.Peek() == 'x' || sc.Peek() == 'X' {
   151  			writeChar(buf, ch)
   152  			writeChar(buf, sc.Next())
   153  			hasvalue := false
   154  			for isDigit(sc.Peek()) {
   155  				writeChar(buf, sc.Next())
   156  				hasvalue = true
   157  			}
   158  			if !hasvalue {
   159  				return sc.Error(buf.String(), "illegal hexadecimal number")
   160  			}
   161  			return nil
   162  		} else if sc.Peek() != '.' && isDecimal(sc.Peek()) {
   163  			ch = sc.Next()
   164  		}
   165  	}
   166  	sc.scanDecimal(ch, buf)
   167  	if sc.Peek() == '.' {
   168  		sc.scanDecimal(sc.Next(), buf)
   169  	}
   170  	if ch = sc.Peek(); ch == 'e' || ch == 'E' {
   171  		writeChar(buf, sc.Next())
   172  		if ch = sc.Peek(); ch == '-' || ch == '+' {
   173  			writeChar(buf, sc.Next())
   174  		}
   175  		sc.scanDecimal(sc.Next(), buf)
   176  	}
   177  
   178  	return nil
   179  }
   180  
   181  func (sc *Scanner) scanString(quote int, buf *bytes.Buffer) error {
   182  	ch := sc.Next()
   183  	for ch != quote {
   184  		if ch == '\n' || ch == '\r' || ch < 0 {
   185  			return sc.Error(buf.String(), "unterminated string")
   186  		}
   187  		if ch == '\\' {
   188  			if err := sc.scanEscape(ch, buf); err != nil {
   189  				return err
   190  			}
   191  		} else {
   192  			writeChar(buf, ch)
   193  		}
   194  		ch = sc.Next()
   195  	}
   196  	return nil
   197  }
   198  
   199  func (sc *Scanner) scanEscape(ch int, buf *bytes.Buffer) error {
   200  	ch = sc.Next()
   201  	switch ch {
   202  	case 'a':
   203  		buf.WriteByte('\a')
   204  	case 'b':
   205  		buf.WriteByte('\b')
   206  	case 'f':
   207  		buf.WriteByte('\f')
   208  	case 'n':
   209  		buf.WriteByte('\n')
   210  	case 'r':
   211  		buf.WriteByte('\r')
   212  	case 't':
   213  		buf.WriteByte('\t')
   214  	case 'v':
   215  		buf.WriteByte('\v')
   216  	case '\\':
   217  		buf.WriteByte('\\')
   218  	case '"':
   219  		buf.WriteByte('"')
   220  	case '\'':
   221  		buf.WriteByte('\'')
   222  	case '\n':
   223  		buf.WriteByte('\n')
   224  	case '\r':
   225  		buf.WriteByte('\n')
   226  		sc.Newline('\r')
   227  	default:
   228  		if '0' <= ch && ch <= '9' {
   229  			bytes := []byte{byte(ch)}
   230  			for i := 0; i < 2 && isDecimal(sc.Peek()); i++ {
   231  				bytes = append(bytes, byte(sc.Next()))
   232  			}
   233  			val, _ := strconv.ParseInt(string(bytes), 10, 32)
   234  			writeChar(buf, int(val))
   235  		} else {
   236  			buf.WriteByte('\\')
   237  			writeChar(buf, ch)
   238  			return sc.Error(buf.String(), "Invalid escape sequence")
   239  		}
   240  	}
   241  	return nil
   242  }
   243  
   244  func (sc *Scanner) countSep(ch int) (int, int) {
   245  	count := 0
   246  	for ; ch == '='; count = count + 1 {
   247  		ch = sc.Next()
   248  	}
   249  	return count, ch
   250  }
   251  
   252  func (sc *Scanner) scanMultilineString(ch int, buf *bytes.Buffer) error {
   253  	var count1, count2 int
   254  	count1, ch = sc.countSep(ch)
   255  	if ch != '[' {
   256  		return sc.Error(string(ch), "invalid multiline string")
   257  	}
   258  	ch = sc.Next()
   259  	if ch == '\n' || ch == '\r' {
   260  		ch = sc.Next()
   261  	}
   262  	for {
   263  		if ch < 0 {
   264  			return sc.Error(buf.String(), "unterminated multiline string")
   265  		} else if ch == ']' {
   266  			count2, ch = sc.countSep(sc.Next())
   267  			if count1 == count2 && ch == ']' {
   268  				goto finally
   269  			}
   270  			buf.WriteByte(']')
   271  			buf.WriteString(strings.Repeat("=", count2))
   272  			continue
   273  		}
   274  		writeChar(buf, ch)
   275  		ch = sc.Next()
   276  	}
   277  
   278  finally:
   279  	return nil
   280  }
   281  
   282  var reservedWords = map[string]int{
   283  	"and": TAnd, "break": TBreak, "do": TDo, "else": TElse, "elseif": TElseIf,
   284  	"end": TEnd, "false": TFalse, "for": TFor, "function": TFunction,
   285  	"if": TIf, "in": TIn, "local": TLocal, "nil": TNil, "not": TNot, "or": TOr,
   286  	"return": TReturn, "repeat": TRepeat, "then": TThen, "true": TTrue,
   287  	"until": TUntil, "while": TWhile}
   288  
   289  func (sc *Scanner) Scan(lexer *Lexer) (ast.Token, error) {
   290  redo:
   291  	var err error
   292  	tok := ast.Token{}
   293  	newline := false
   294  
   295  	ch := sc.skipWhiteSpace(whitespace1)
   296  	if ch == '\n' || ch == '\r' {
   297  		newline = true
   298  		ch = sc.skipWhiteSpace(whitespace2)
   299  	}
   300  
   301  	if ch == '(' {
   302  		lexer.PNewLine = newline
   303  	}
   304  
   305  	var _buf bytes.Buffer
   306  	buf := &_buf
   307  	tok.Pos = sc.Pos
   308  
   309  	switch {
   310  	case isIdent(ch, 0):
   311  		tok.Type = TIdent
   312  		err = sc.scanIdent(ch, buf)
   313  		tok.Str = buf.String()
   314  		if err != nil {
   315  			goto finally
   316  		}
   317  		if typ, ok := reservedWords[tok.Str]; ok {
   318  			tok.Type = typ
   319  		}
   320  	case isDecimal(ch):
   321  		tok.Type = TNumber
   322  		err = sc.scanNumber(ch, buf)
   323  		tok.Str = buf.String()
   324  	default:
   325  		switch ch {
   326  		case EOF:
   327  			tok.Type = EOF
   328  		case '-':
   329  			if sc.Peek() == '-' {
   330  				err = sc.skipComments(sc.Next())
   331  				if err != nil {
   332  					goto finally
   333  				}
   334  				goto redo
   335  			} else {
   336  				tok.Type = ch
   337  				tok.Str = string(ch)
   338  			}
   339  		case '"', '\'':
   340  			tok.Type = TString
   341  			err = sc.scanString(ch, buf)
   342  			tok.Str = buf.String()
   343  		case '[':
   344  			if c := sc.Peek(); c == '[' || c == '=' {
   345  				tok.Type = TString
   346  				err = sc.scanMultilineString(sc.Next(), buf)
   347  				tok.Str = buf.String()
   348  			} else {
   349  				tok.Type = ch
   350  				tok.Str = string(ch)
   351  			}
   352  		case '=':
   353  			if sc.Peek() == '=' {
   354  				tok.Type = TEqeq
   355  				tok.Str = "=="
   356  				sc.Next()
   357  			} else {
   358  				tok.Type = ch
   359  				tok.Str = string(ch)
   360  			}
   361  		case '~':
   362  			if sc.Peek() == '=' {
   363  				tok.Type = TNeq
   364  				tok.Str = "~="
   365  				sc.Next()
   366  			} else {
   367  				err = sc.Error("~", "Invalid '~' token")
   368  			}
   369  		case '<':
   370  			if sc.Peek() == '=' {
   371  				tok.Type = TLte
   372  				tok.Str = "<="
   373  				sc.Next()
   374  			} else {
   375  				tok.Type = ch
   376  				tok.Str = string(ch)
   377  			}
   378  		case '>':
   379  			if sc.Peek() == '=' {
   380  				tok.Type = TGte
   381  				tok.Str = ">="
   382  				sc.Next()
   383  			} else {
   384  				tok.Type = ch
   385  				tok.Str = string(ch)
   386  			}
   387  		case '.':
   388  			ch2 := sc.Peek()
   389  			switch {
   390  			case isDecimal(ch2):
   391  				tok.Type = TNumber
   392  				err = sc.scanNumber(ch, buf)
   393  				tok.Str = buf.String()
   394  			case ch2 == '.':
   395  				writeChar(buf, ch)
   396  				writeChar(buf, sc.Next())
   397  				if sc.Peek() == '.' {
   398  					writeChar(buf, sc.Next())
   399  					tok.Type = T3Comma
   400  				} else {
   401  					tok.Type = T2Comma
   402  				}
   403  			default:
   404  				tok.Type = '.'
   405  			}
   406  			tok.Str = buf.String()
   407  		case '+', '*', '/', '%', '^', '#', '(', ')', '{', '}', ']', ';', ':', ',':
   408  			tok.Type = ch
   409  			tok.Str = string(ch)
   410  		default:
   411  			writeChar(buf, ch)
   412  			err = sc.Error(buf.String(), "Invalid token")
   413  			goto finally
   414  		}
   415  	}
   416  
   417  finally:
   418  	tok.Name = TokenName(int(tok.Type))
   419  	return tok, err
   420  }
   421  
   422  // yacc interface {{{
   423  
   424  type Lexer struct {
   425  	scanner  *Scanner
   426  	Stmts    []ast.Stmt
   427  	PNewLine bool
   428  	Token    ast.Token
   429  }
   430  
   431  func (lx *Lexer) Lex(lval *yySymType) int {
   432  	tok, err := lx.scanner.Scan(lx)
   433  	if err != nil {
   434  		panic(err)
   435  	}
   436  	if tok.Type < 0 {
   437  		return 0
   438  	}
   439  	lval.token = tok
   440  	lx.Token = tok
   441  	return int(tok.Type)
   442  }
   443  
   444  func (lx *Lexer) Error(message string) {
   445  	panic(lx.scanner.Error(lx.Token.Str, message))
   446  }
   447  
   448  func (lx *Lexer) TokenError(tok ast.Token, message string) {
   449  	panic(lx.scanner.TokenError(tok, message))
   450  }
   451  
   452  func Parse(reader io.Reader, name string) (chunk []ast.Stmt, err error) {
   453  	lexer := &Lexer{NewScanner(reader, name), nil, false, ast.Token{Str: ""}}
   454  	chunk = nil
   455  	defer func() {
   456  		if e := recover(); e != nil {
   457  			err, _ = e.(error)
   458  		}
   459  	}()
   460  	yyParse(lexer)
   461  	chunk = lexer.Stmts
   462  	return
   463  }
   464  
   465  // }}}
   466  
   467  // Dump {{{
   468  
   469  func isInlineDumpNode(rv reflect.Value) bool {
   470  	switch rv.Kind() {
   471  	case reflect.Struct, reflect.Slice, reflect.Interface, reflect.Ptr:
   472  		return false
   473  	default:
   474  		return true
   475  	}
   476  }
   477  
   478  func dump(node interface{}, level int, s string) string {
   479  	rt := reflect.TypeOf(node)
   480  	if fmt.Sprint(rt) == "<nil>" {
   481  		return strings.Repeat(s, level) + "<nil>"
   482  	}
   483  
   484  	rv := reflect.ValueOf(node)
   485  	buf := []string{}
   486  	switch rt.Kind() {
   487  	case reflect.Slice:
   488  		if rv.Len() == 0 {
   489  			return strings.Repeat(s, level) + "<empty>"
   490  		}
   491  		for i := 0; i < rv.Len(); i++ {
   492  			buf = append(buf, dump(rv.Index(i).Interface(), level, s))
   493  		}
   494  	case reflect.Ptr:
   495  		vt := rv.Elem()
   496  		tt := rt.Elem()
   497  		indicies := []int{}
   498  		for i := 0; i < tt.NumField(); i++ {
   499  			if strings.Index(tt.Field(i).Name, "Base") > -1 {
   500  				continue
   501  			}
   502  			indicies = append(indicies, i)
   503  		}
   504  		switch {
   505  		case len(indicies) == 0:
   506  			return strings.Repeat(s, level) + "<empty>"
   507  		case len(indicies) == 1 && isInlineDumpNode(vt.Field(indicies[0])):
   508  			for _, i := range indicies {
   509  				buf = append(buf, strings.Repeat(s, level)+"- Node$"+tt.Name()+": "+dump(vt.Field(i).Interface(), 0, s))
   510  			}
   511  		default:
   512  			buf = append(buf, strings.Repeat(s, level)+"- Node$"+tt.Name())
   513  			for _, i := range indicies {
   514  				if isInlineDumpNode(vt.Field(i)) {
   515  					inf := dump(vt.Field(i).Interface(), 0, s)
   516  					buf = append(buf, strings.Repeat(s, level+1)+tt.Field(i).Name+": "+inf)
   517  				} else {
   518  					buf = append(buf, strings.Repeat(s, level+1)+tt.Field(i).Name+": ")
   519  					buf = append(buf, dump(vt.Field(i).Interface(), level+2, s))
   520  				}
   521  			}
   522  		}
   523  	default:
   524  		buf = append(buf, strings.Repeat(s, level)+fmt.Sprint(node))
   525  	}
   526  	return strings.Join(buf, "\n")
   527  }
   528  
   529  func Dump(chunk []ast.Stmt) string {
   530  	return dump(chunk, 0, "   ")
   531  }
   532  
   533  // }}