github.com/hattya/go.sh@v0.0.0-20240328132134-f53276d95cc6/interp/lexer.go (about)

     1  //
     2  // go.sh/interp :: lexer.go
     3  //
     4  //   Copyright (c) 2021 Akinori Hattori <hattya@gmail.com>
     5  //
     6  //   SPDX-License-Identifier: MIT
     7  //
     8  
     9  //go:generate goyacc -l -o arith.go arith.go.y
    10  
    11  package interp
    12  
    13  import (
    14  	"fmt"
    15  	"io"
    16  	"strings"
    17  	"sync"
    18  	"unicode"
    19  )
    20  
    21  var ops = map[int]string{
    22  	'(':        "(",
    23  	')':        ")",
    24  	INC:        "++",
    25  	DEC:        "--",
    26  	'+':        "+",
    27  	'-':        "-",
    28  	'~':        "~",
    29  	'!':        "!",
    30  	'*':        "*",
    31  	'/':        "/",
    32  	'%':        "%",
    33  	LSH:        "<<",
    34  	RSH:        ">>",
    35  	'<':        "<",
    36  	'>':        ">",
    37  	LE:         "<=",
    38  	GE:         ">=",
    39  	EQ:         "==",
    40  	NE:         "!=",
    41  	'&':        "&",
    42  	'^':        "^",
    43  	'|':        "|",
    44  	LAND:       "&&",
    45  	LOR:        "||",
    46  	'?':        "?",
    47  	':':        ":",
    48  	'=':        "=",
    49  	MUL_ASSIGN: "*=",
    50  	DIV_ASSIGN: "/=",
    51  	MOD_ASSIGN: "%=",
    52  	ADD_ASSIGN: "+=",
    53  	SUB_ASSIGN: "-=",
    54  	LSH_ASSIGN: "<<=",
    55  	RSH_ASSIGN: ">>=",
    56  	AND_ASSIGN: "&=",
    57  	XOR_ASSIGN: "^=",
    58  	OR_ASSIGN:  "|=",
    59  }
    60  
    61  type lexer struct {
    62  	env   *ExecEnv
    63  	r     io.RuneScanner
    64  	n     int
    65  	token chan interface{}
    66  
    67  	mu     sync.Mutex
    68  	err    error
    69  	cancel chan struct{}
    70  
    71  	b strings.Builder
    72  }
    73  
    74  func newLexer(env *ExecEnv, r io.RuneScanner) *lexer {
    75  	l := &lexer{
    76  		env:    env,
    77  		r:      r,
    78  		token:  make(chan interface{}),
    79  		cancel: make(chan struct{}),
    80  	}
    81  	go l.run()
    82  	return l
    83  }
    84  
    85  func (l *lexer) Lex(lval *yySymType) int {
    86  	switch tok := (<-l.token).(type) {
    87  	case token:
    88  		lval.expr.s = tok.val
    89  		return tok.typ
    90  	case int:
    91  		lval.op = ops[tok]
    92  		return tok
    93  	}
    94  	return 0
    95  }
    96  
    97  func (l *lexer) run() {
    98  	defer func() {
    99  		close(l.token)
   100  
   101  		if e := recover(); e != nil {
   102  			// re-panic
   103  			panic(e)
   104  		}
   105  	}()
   106  
   107  	for action := l.lexToken; action != nil; {
   108  		action = action()
   109  	}
   110  }
   111  
   112  func (l *lexer) lexToken() action {
   113  Read:
   114  	r, err := l.read()
   115  	if err != nil {
   116  		return nil
   117  	}
   118  	switch r {
   119  	case ' ', '\t', '\n':
   120  		goto Read
   121  	}
   122  	l.unread()
   123  
   124  	switch {
   125  	case '0' <= r && r <= '9':
   126  		return l.lexNumber
   127  	case r == '_' || unicode.IsLetter(r):
   128  		return l.lexIdent
   129  	}
   130  	return l.lexOp
   131  }
   132  
   133  func (l *lexer) lexNumber() action {
   134  	r, _ := l.read()
   135  	l.b.WriteRune(r)
   136  	var hex bool
   137  	if r == '0' {
   138  		r, err := l.read()
   139  		switch {
   140  		case err != nil:
   141  			goto Number
   142  		case r == 'X' || r == 'x':
   143  			hex = true
   144  		case r < '0' || '9' < r:
   145  			l.unread()
   146  			goto Number
   147  		}
   148  		l.b.WriteRune(r)
   149  	}
   150  
   151  	for {
   152  		r, err := l.read()
   153  		switch {
   154  		case err != nil:
   155  			goto Number
   156  		case '0' <= r && r <= '9' || hex && ('A' <= r && r <= 'Z' || 'a' <= r && r <= 'z'):
   157  			l.b.WriteRune(r)
   158  		default:
   159  			l.unread()
   160  			goto Number
   161  		}
   162  	}
   163  Number:
   164  	l.emit(NUMBER)
   165  	return l.lexToken
   166  }
   167  
   168  func (l *lexer) lexIdent() action {
   169  	for {
   170  		r, err := l.read()
   171  		switch {
   172  		case err != nil:
   173  			goto Ident
   174  		case r == '_' || unicode.IsLetter(r) || unicode.IsDigit(r):
   175  			l.b.WriteRune(r)
   176  		default:
   177  			l.unread()
   178  			goto Ident
   179  		}
   180  	}
   181  Ident:
   182  	l.emit(IDENT)
   183  	return l.lexToken
   184  }
   185  
   186  func (l *lexer) lexOp() action {
   187  	var op int
   188  	switch r, _ := l.read(); r {
   189  	case '(', ')', '~', '?', ':':
   190  		op = int(r)
   191  	case '+':
   192  		op = '+'
   193  		if r, err := l.read(); err == nil {
   194  			switch r {
   195  			case '+':
   196  				op = INC
   197  			case '=':
   198  				op = ADD_ASSIGN
   199  			default:
   200  				l.unread()
   201  			}
   202  		}
   203  	case '-':
   204  		op = '-'
   205  		if r, err := l.read(); err == nil {
   206  			switch r {
   207  			case '-':
   208  				op = DEC
   209  			case '=':
   210  				op = SUB_ASSIGN
   211  			default:
   212  				l.unread()
   213  			}
   214  		}
   215  	case '!':
   216  		op = '!'
   217  		if r, err := l.read(); err == nil {
   218  			if r == '=' {
   219  				op = NE
   220  			} else {
   221  				l.unread()
   222  			}
   223  		}
   224  	case '*':
   225  		op = '*'
   226  		if r, err := l.read(); err == nil {
   227  			if r == '=' {
   228  				op = MUL_ASSIGN
   229  			} else {
   230  				l.unread()
   231  			}
   232  		}
   233  	case '/':
   234  		op = '/'
   235  		if r, err := l.read(); err == nil {
   236  			if r == '=' {
   237  				op = DIV_ASSIGN
   238  			} else {
   239  				l.unread()
   240  			}
   241  		}
   242  	case '%':
   243  		op = '%'
   244  		if r, err := l.read(); err == nil {
   245  			if r == '=' {
   246  				op = MOD_ASSIGN
   247  			} else {
   248  				l.unread()
   249  			}
   250  		}
   251  	case '<':
   252  		op = '<'
   253  		if r, err := l.read(); err == nil {
   254  			switch r {
   255  			case '<':
   256  				op = LSH
   257  				if r, err := l.read(); err == nil {
   258  					if r == '=' {
   259  						op = LSH_ASSIGN
   260  					} else {
   261  						l.unread()
   262  					}
   263  				}
   264  			case '=':
   265  				op = LE
   266  			default:
   267  				l.unread()
   268  			}
   269  		}
   270  	case '>':
   271  		op = '>'
   272  		if r, err := l.read(); err == nil {
   273  			switch r {
   274  			case '>':
   275  				op = RSH
   276  				if r, err := l.read(); err == nil {
   277  					if r == '=' {
   278  						op = RSH_ASSIGN
   279  					} else {
   280  						l.unread()
   281  					}
   282  				}
   283  			case '=':
   284  				op = GE
   285  			default:
   286  				l.unread()
   287  			}
   288  		}
   289  	case '=':
   290  		op = '='
   291  		if r, err := l.read(); err == nil {
   292  			if r == '=' {
   293  				op = EQ
   294  			} else {
   295  				l.unread()
   296  			}
   297  		}
   298  	case '&':
   299  		op = '&'
   300  		if r, err := l.read(); err == nil {
   301  			switch r {
   302  			case '&':
   303  				op = LAND
   304  			case '=':
   305  				op = AND_ASSIGN
   306  			default:
   307  				l.unread()
   308  			}
   309  		}
   310  	case '^':
   311  		op = '^'
   312  		if r, err := l.read(); err == nil {
   313  			if r == '=' {
   314  				op = XOR_ASSIGN
   315  			} else {
   316  				l.unread()
   317  			}
   318  		}
   319  	case '|':
   320  		op = '|'
   321  		if r, err := l.read(); err == nil {
   322  			switch r {
   323  			case '|':
   324  				op = LOR
   325  			case '=':
   326  				op = OR_ASSIGN
   327  			default:
   328  				l.unread()
   329  			}
   330  		}
   331  	default:
   332  		l.Error(fmt.Sprintf("unexpected %q", r))
   333  		return nil
   334  	}
   335  	l.emit(op)
   336  	return l.lexToken
   337  }
   338  
   339  func (l *lexer) emit(typ int) {
   340  	var tok interface{}
   341  	switch typ {
   342  	case NUMBER, IDENT:
   343  		tok = token{
   344  			typ: typ,
   345  			val: l.b.String(),
   346  		}
   347  		l.b.Reset()
   348  	default:
   349  		tok = typ
   350  	}
   351  	select {
   352  	case l.token <- tok:
   353  	case <-l.cancel:
   354  		// bailout
   355  		panic(nil)
   356  	}
   357  }
   358  
   359  func (l *lexer) read() (rune, error) {
   360  	r, _, err := l.r.ReadRune()
   361  	return r, err
   362  }
   363  
   364  func (l *lexer) unread() {
   365  	l.r.UnreadRune()
   366  }
   367  
   368  func (l *lexer) Error(s string) {
   369  	l.mu.Lock()
   370  	defer l.mu.Unlock()
   371  
   372  	switch {
   373  	case strings.HasPrefix(s, "syntax error: "):
   374  		s = s[14:]
   375  		if l.err != nil && s == "unexpected EOF" {
   376  			return // lexing was interrupted
   377  		}
   378  	case strings.HasPrefix(s, "runtime error: "):
   379  		s = s[15:]
   380  	}
   381  	l.err = ArithExprError{Msg: s}
   382  
   383  	select {
   384  	case <-l.cancel:
   385  	default:
   386  		close(l.cancel)
   387  	}
   388  }
   389  
   390  type action func() action
   391  
   392  type token struct {
   393  	typ int
   394  	val string
   395  }
   396  
   397  // ArithExprError represents an arithmetic expression error.
   398  type ArithExprError struct {
   399  	Expr string
   400  	Msg  string
   401  }
   402  
   403  func (e ArithExprError) Error() string {
   404  	if e.Expr != "" {
   405  		return fmt.Sprintf("%v: %v", e.Expr, e.Msg)
   406  	}
   407  	return e.Msg
   408  }