github.com/mattn/anko@v0.1.10/parser/lexer.go (about)

     1  // Package parser implements parser for anko.
     2  package parser
     3  
     4  import (
     5  	"errors"
     6  	"fmt"
     7  	"reflect"
     8  	"strconv"
     9  	"strings"
    10  	"unicode"
    11  
    12  	"github.com/mattn/anko/ast"
    13  )
    14  
    15  const (
    16  	// EOF is short for End of file.
    17  	EOF = -1
    18  	// EOL is short for End of line.
    19  	EOL = '\n'
    20  )
    21  
    22  // Error is a parse error.
    23  type Error struct {
    24  	Message  string
    25  	Pos      ast.Position
    26  	Filename string
    27  	Fatal    bool
    28  }
    29  
    30  // Error returns the parse error message.
    31  func (e *Error) Error() string {
    32  	return e.Message
    33  }
    34  
    35  // Scanner stores informations for lexer.
    36  type Scanner struct {
    37  	src      []rune
    38  	offset   int
    39  	lineHead int
    40  	line     int
    41  }
    42  
    43  // opName is correction of operation names.
    44  var opName = map[string]int{
    45  	"func":     FUNC,
    46  	"return":   RETURN,
    47  	"var":      VAR,
    48  	"throw":    THROW,
    49  	"if":       IF,
    50  	"for":      FOR,
    51  	"break":    BREAK,
    52  	"continue": CONTINUE,
    53  	"in":       IN,
    54  	"else":     ELSE,
    55  	"new":      NEW,
    56  	"true":     TRUE,
    57  	"false":    FALSE,
    58  	"nil":      NIL,
    59  	"module":   MODULE,
    60  	"try":      TRY,
    61  	"catch":    CATCH,
    62  	"finally":  FINALLY,
    63  	"switch":   SWITCH,
    64  	"case":     CASE,
    65  	"default":  DEFAULT,
    66  	"go":       GO,
    67  	"chan":     CHAN,
    68  	"struct":   STRUCT,
    69  	"make":     MAKE,
    70  	"type":     TYPE,
    71  	"len":      LEN,
    72  	"delete":   DELETE,
    73  	"close":    CLOSE,
    74  	"map":      MAP,
    75  	"import":   IMPORT,
    76  }
    77  
    78  var (
    79  	nilValue   = reflect.New(reflect.TypeOf((*interface{})(nil)).Elem()).Elem()
    80  	trueValue  = reflect.ValueOf(true)
    81  	falseValue = reflect.ValueOf(false)
    82  	oneLiteral = &ast.LiteralExpr{Literal: reflect.ValueOf(int64(1))}
    83  )
    84  
    85  // Init resets code to scan.
    86  func (s *Scanner) Init(src string) {
    87  	s.src = []rune(src)
    88  }
    89  
    90  // Scan analyses token, and decide identify or literals.
    91  func (s *Scanner) Scan() (tok int, lit string, pos ast.Position, err error) {
    92  retry:
    93  	s.skipBlank()
    94  	pos = s.pos()
    95  	switch ch := s.peek(); {
    96  	case isLetter(ch):
    97  		lit, err = s.scanIdentifier()
    98  		if err != nil {
    99  			return
   100  		}
   101  		if name, ok := opName[lit]; ok {
   102  			tok = name
   103  		} else {
   104  			tok = IDENT
   105  		}
   106  	case isDigit(ch):
   107  		tok = NUMBER
   108  		lit, err = s.scanNumber()
   109  		if err != nil {
   110  			return
   111  		}
   112  	case ch == '"':
   113  		tok = STRING
   114  		lit, err = s.scanString('"')
   115  		if err != nil {
   116  			return
   117  		}
   118  	case ch == '\'':
   119  		tok = STRING
   120  		lit, err = s.scanString('\'')
   121  		if err != nil {
   122  			return
   123  		}
   124  	case ch == '`':
   125  		tok = STRING
   126  		lit, err = s.scanRawString('`')
   127  		if err != nil {
   128  			return
   129  		}
   130  	default:
   131  		switch ch {
   132  		case EOF:
   133  			tok = EOF
   134  		case '#':
   135  			for !isEOL(s.peek()) {
   136  				s.next()
   137  			}
   138  			goto retry
   139  		case '!':
   140  			s.next()
   141  			switch s.peek() {
   142  			case '=':
   143  				tok = NEQ
   144  				lit = "!="
   145  			default:
   146  				s.back()
   147  				tok = int(ch)
   148  				lit = string(ch)
   149  			}
   150  		case '=':
   151  			s.next()
   152  			switch s.peek() {
   153  			case '=':
   154  				tok = EQEQ
   155  				lit = "=="
   156  			case ' ':
   157  				if s.peekPlus(1) == '<' && s.peekPlus(2) == '-' {
   158  					s.next()
   159  					s.next()
   160  					tok = EQOPCHAN
   161  					lit = "= <-"
   162  				} else {
   163  					s.back()
   164  					tok = int(ch)
   165  					lit = string(ch)
   166  				}
   167  			default:
   168  				s.back()
   169  				tok = int(ch)
   170  				lit = string(ch)
   171  			}
   172  		case '?':
   173  			s.next()
   174  			switch s.peek() {
   175  			case '?':
   176  				tok = NILCOALESCE
   177  				lit = "??"
   178  			default:
   179  				s.back()
   180  				tok = int(ch)
   181  				lit = string(ch)
   182  			}
   183  		case '+':
   184  			s.next()
   185  			switch s.peek() {
   186  			case '+':
   187  				tok = PLUSPLUS
   188  				lit = "++"
   189  			case '=':
   190  				tok = PLUSEQ
   191  				lit = "+="
   192  			default:
   193  				s.back()
   194  				tok = int(ch)
   195  				lit = string(ch)
   196  			}
   197  		case '-':
   198  			s.next()
   199  			switch s.peek() {
   200  			case '-':
   201  				tok = MINUSMINUS
   202  				lit = "--"
   203  			case '=':
   204  				tok = MINUSEQ
   205  				lit = "-="
   206  			default:
   207  				s.back()
   208  				tok = int(ch)
   209  				lit = "-"
   210  			}
   211  		case '*':
   212  			s.next()
   213  			switch s.peek() {
   214  			case '=':
   215  				tok = MULEQ
   216  				lit = "*="
   217  			default:
   218  				s.back()
   219  				tok = int(ch)
   220  				lit = string(ch)
   221  			}
   222  		case '/':
   223  			s.next()
   224  			switch s.peek() {
   225  			case '=':
   226  				tok = DIVEQ
   227  				lit = "/="
   228  			case '/':
   229  				for !isEOL(s.peek()) {
   230  					s.next()
   231  				}
   232  				goto retry
   233  			case '*':
   234  				for {
   235  					_, err = s.scanRawString('*')
   236  					if err != nil {
   237  						return
   238  					}
   239  
   240  					if s.peek() == '/' {
   241  						s.next()
   242  						goto retry
   243  					}
   244  
   245  					s.back()
   246  				}
   247  			default:
   248  				s.back()
   249  				tok = int(ch)
   250  				lit = string(ch)
   251  			}
   252  		case '>':
   253  			s.next()
   254  			switch s.peek() {
   255  			case '=':
   256  				tok = GE
   257  				lit = ">="
   258  			case '>':
   259  				tok = SHIFTRIGHT
   260  				lit = ">>"
   261  			default:
   262  				s.back()
   263  				tok = int(ch)
   264  				lit = string(ch)
   265  			}
   266  		case '<':
   267  			s.next()
   268  			switch s.peek() {
   269  			case '-':
   270  				tok = OPCHAN
   271  				lit = "<-"
   272  			case '=':
   273  				tok = LE
   274  				lit = "<="
   275  			case '<':
   276  				tok = SHIFTLEFT
   277  				lit = "<<"
   278  			default:
   279  				s.back()
   280  				tok = int(ch)
   281  				lit = string(ch)
   282  			}
   283  		case '|':
   284  			s.next()
   285  			switch s.peek() {
   286  			case '|':
   287  				tok = OROR
   288  				lit = "||"
   289  			case '=':
   290  				tok = OREQ
   291  				lit = "|="
   292  			default:
   293  				s.back()
   294  				tok = int(ch)
   295  				lit = string(ch)
   296  			}
   297  		case '&':
   298  			s.next()
   299  			switch s.peek() {
   300  			case '&':
   301  				tok = ANDAND
   302  				lit = "&&"
   303  			case '=':
   304  				tok = ANDEQ
   305  				lit = "&="
   306  			default:
   307  				s.back()
   308  				tok = int(ch)
   309  				lit = string(ch)
   310  			}
   311  		case '.':
   312  			s.next()
   313  			if s.peek() == '.' {
   314  				s.next()
   315  				if s.peek() == '.' {
   316  					tok = VARARG
   317  				} else {
   318  					err = fmt.Errorf("syntax error on '%v' at %v:%v", string(ch), pos.Line, pos.Column)
   319  					return
   320  				}
   321  			} else {
   322  				s.back()
   323  				tok = int(ch)
   324  				lit = string(ch)
   325  			}
   326  		case '\n', '(', ')', ':', ';', '%', '{', '}', '[', ']', ',', '^':
   327  			tok = int(ch)
   328  			lit = string(ch)
   329  		default:
   330  			err = fmt.Errorf("syntax error on '%v' at %v:%v", string(ch), pos.Line, pos.Column)
   331  			tok = int(ch)
   332  			lit = string(ch)
   333  			return
   334  		}
   335  		s.next()
   336  	}
   337  	return
   338  }
   339  
   340  // isLetter returns true if the rune is a letter for identity.
   341  func isLetter(ch rune) bool {
   342  	return unicode.IsLetter(ch) || ch == '_'
   343  }
   344  
   345  // isDigit returns true if the rune is a number.
   346  func isDigit(ch rune) bool {
   347  	return '0' <= ch && ch <= '9'
   348  }
   349  
   350  // isHex returns true if the rune is a hex digits.
   351  func isHex(ch rune) bool {
   352  	return ('0' <= ch && ch <= '9') || ('a' <= ch && ch <= 'f') || ('A' <= ch && ch <= 'F')
   353  }
   354  
   355  // isEOL returns true if the rune is at end-of-line or end-of-file.
   356  func isEOL(ch rune) bool {
   357  	return ch == '\n' || ch == -1
   358  }
   359  
   360  // isBlank returns true if the rune is empty character..
   361  func isBlank(ch rune) bool {
   362  	return ch == ' ' || ch == '\t' || ch == '\r'
   363  }
   364  
   365  // peek returns current rune in the code.
   366  func (s *Scanner) peek() rune {
   367  	if s.reachEOF() {
   368  		return EOF
   369  	}
   370  	return s.src[s.offset]
   371  }
   372  
   373  // peek returns current rune plus i in the code.
   374  func (s *Scanner) peekPlus(i int) rune {
   375  	if len(s.src) <= s.offset+i {
   376  		return EOF
   377  	}
   378  	return s.src[s.offset+i]
   379  }
   380  
   381  // next moves offset to next.
   382  func (s *Scanner) next() {
   383  	if !s.reachEOF() {
   384  		if s.peek() == '\n' {
   385  			s.lineHead = s.offset + 1
   386  			s.line++
   387  		}
   388  		s.offset++
   389  	}
   390  }
   391  
   392  // current returns the current offset.
   393  func (s *Scanner) current() int {
   394  	return s.offset
   395  }
   396  
   397  // offset sets the offset value.
   398  func (s *Scanner) set(o int) {
   399  	s.offset = o
   400  }
   401  
   402  // back moves back offset once to top.
   403  func (s *Scanner) back() {
   404  	s.offset--
   405  }
   406  
   407  // reachEOF returns true if offset is at end-of-file.
   408  func (s *Scanner) reachEOF() bool {
   409  	return len(s.src) <= s.offset
   410  }
   411  
   412  // pos returns the position of current.
   413  func (s *Scanner) pos() ast.Position {
   414  	return ast.Position{Line: s.line + 1, Column: s.offset - s.lineHead + 1}
   415  }
   416  
   417  // skipBlank moves position into non-black character.
   418  func (s *Scanner) skipBlank() {
   419  	for isBlank(s.peek()) {
   420  		s.next()
   421  	}
   422  }
   423  
   424  // scanIdentifier returns identifier beginning at current position.
   425  func (s *Scanner) scanIdentifier() (string, error) {
   426  	var ret []rune
   427  	for {
   428  		if !isLetter(s.peek()) && !isDigit(s.peek()) {
   429  			break
   430  		}
   431  		ret = append(ret, s.peek())
   432  		s.next()
   433  	}
   434  	return string(ret), nil
   435  }
   436  
   437  // scanNumber returns number beginning at current position.
   438  func (s *Scanner) scanNumber() (string, error) {
   439  	result := []rune{s.peek()}
   440  	s.next()
   441  
   442  	if result[0] == '0' && (s.peek() == 'x' || s.peek() == 'X') {
   443  		// hex
   444  		result = append(result, 'x')
   445  		s.next()
   446  		for isHex(s.peek()) {
   447  			result = append(result, s.peek())
   448  			s.next()
   449  		}
   450  	} else {
   451  		// non-hex
   452  		found := false
   453  		for {
   454  			if isDigit(s.peek()) {
   455  				// is digit
   456  				result = append(result, s.peek())
   457  				s.next()
   458  				continue
   459  			}
   460  
   461  			if s.peek() == '.' {
   462  				// is .
   463  				result = append(result, '.')
   464  				s.next()
   465  				continue
   466  			}
   467  
   468  			if s.peek() == 'e' || s.peek() == 'E' {
   469  				// is e
   470  				if found {
   471  					return "", errors.New("unexpected " + string(s.peek()))
   472  				}
   473  				found = true
   474  				s.next()
   475  
   476  				// check if + or -
   477  				if s.peek() == '+' || s.peek() == '-' {
   478  					// add e with + or -
   479  					result = append(result, 'e')
   480  					result = append(result, s.peek())
   481  					s.next()
   482  				} else {
   483  					// add e, but next char not + or -
   484  					result = append(result, 'e')
   485  				}
   486  				continue
   487  			}
   488  
   489  			// not digit, e, nor .
   490  			break
   491  		}
   492  	}
   493  
   494  	if isLetter(s.peek()) {
   495  		return "", errors.New("identifier starts immediately after numeric literal")
   496  	}
   497  
   498  	return string(result), nil
   499  }
   500  
   501  // scanRawString returns raw-string starting at current position.
   502  func (s *Scanner) scanRawString(l rune) (string, error) {
   503  	var ret []rune
   504  	for {
   505  		s.next()
   506  		if s.peek() == EOF {
   507  			return "", errors.New("unexpected EOF")
   508  		}
   509  		if s.peek() == l {
   510  			s.next()
   511  			break
   512  		}
   513  		ret = append(ret, s.peek())
   514  	}
   515  	return string(ret), nil
   516  }
   517  
   518  // scanString returns string starting at current position.
   519  // This handles backslash escaping.
   520  func (s *Scanner) scanString(l rune) (string, error) {
   521  	var ret []rune
   522  eos:
   523  	for {
   524  		s.next()
   525  		switch s.peek() {
   526  		case EOL:
   527  			return "", errors.New("unexpected EOL")
   528  		case EOF:
   529  			return "", errors.New("unexpected EOF")
   530  		case l:
   531  			s.next()
   532  			break eos
   533  		case '\\':
   534  			s.next()
   535  			switch s.peek() {
   536  			case 'b':
   537  				ret = append(ret, '\b')
   538  				continue
   539  			case 'f':
   540  				ret = append(ret, '\f')
   541  				continue
   542  			case 'r':
   543  				ret = append(ret, '\r')
   544  				continue
   545  			case 'n':
   546  				ret = append(ret, '\n')
   547  				continue
   548  			case 't':
   549  				ret = append(ret, '\t')
   550  				continue
   551  			}
   552  			ret = append(ret, s.peek())
   553  			continue
   554  		default:
   555  			ret = append(ret, s.peek())
   556  		}
   557  	}
   558  	return string(ret), nil
   559  }
   560  
   561  // Lexer provides interface to parse codes.
   562  type Lexer struct {
   563  	s    *Scanner
   564  	lit  string
   565  	pos  ast.Position
   566  	e    error
   567  	stmt ast.Stmt
   568  }
   569  
   570  // Lex scans the token and literals.
   571  func (l *Lexer) Lex(lval *yySymType) int {
   572  	tok, lit, pos, err := l.s.Scan()
   573  	if err != nil {
   574  		l.e = &Error{Message: err.Error(), Pos: pos, Fatal: true}
   575  	}
   576  	lval.tok = ast.Token{Tok: tok, Lit: lit}
   577  	lval.tok.SetPosition(pos)
   578  	l.lit = lit
   579  	l.pos = pos
   580  	return tok
   581  }
   582  
   583  // Error sets parse error.
   584  func (l *Lexer) Error(msg string) {
   585  	l.e = &Error{Message: msg, Pos: l.pos, Fatal: false}
   586  }
   587  
   588  // Parse provides way to parse the code using Scanner.
   589  func Parse(s *Scanner) (ast.Stmt, error) {
   590  	l := Lexer{s: s}
   591  	if yyParse(&l) != 0 {
   592  		return nil, l.e
   593  	}
   594  	return l.stmt, l.e
   595  }
   596  
   597  // EnableErrorVerbose enabled verbose errors from the parser
   598  func EnableErrorVerbose() {
   599  	yyErrorVerbose = true
   600  }
   601  
   602  // EnableDebug enabled debug from the parser
   603  func EnableDebug(level int) {
   604  	yyDebug = level
   605  }
   606  
   607  // ParseSrc provides way to parse the code from source.
   608  func ParseSrc(src string) (ast.Stmt, error) {
   609  	scanner := &Scanner{
   610  		src: []rune(src),
   611  	}
   612  	return Parse(scanner)
   613  }
   614  
   615  func toNumber(numString string) (reflect.Value, error) {
   616  	// hex
   617  	if len(numString) > 2 && numString[0:2] == "0x" {
   618  		i, err := strconv.ParseInt(numString[2:], 16, 64)
   619  		if err != nil {
   620  			return nilValue, err
   621  		}
   622  		return reflect.ValueOf(i), nil
   623  	}
   624  
   625  	// hex
   626  	if len(numString) > 3 && numString[0:3] == "-0x" {
   627  		i, err := strconv.ParseInt("-"+numString[3:], 16, 64)
   628  		if err != nil {
   629  			return nilValue, err
   630  		}
   631  		return reflect.ValueOf(i), nil
   632  	}
   633  
   634  	// float
   635  	if strings.Contains(numString, ".") || strings.Contains(numString, "e") {
   636  		f, err := strconv.ParseFloat(numString, 64)
   637  		if err != nil {
   638  			return nilValue, err
   639  		}
   640  		return reflect.ValueOf(f), nil
   641  	}
   642  
   643  	// int
   644  	i, err := strconv.ParseInt(numString, 10, 64)
   645  	if err != nil {
   646  		return nilValue, err
   647  	}
   648  	return reflect.ValueOf(i), nil
   649  }
   650  
   651  func stringToValue(aString string) reflect.Value {
   652  	return reflect.ValueOf(aString)
   653  }