github.com/tinygo-org/tinygo@v0.31.3-0.20240404173401-90b0bf646c27/cgo/const.go (about)

     1  package cgo
     2  
     3  // This file implements a parser of a subset of the C language, just enough to
     4  // parse common #define statements to Go constant expressions.
     5  
     6  import (
     7  	"fmt"
     8  	"go/ast"
     9  	"go/scanner"
    10  	"go/token"
    11  	"strings"
    12  )
    13  
    14  var (
    15  	prefixParseFns map[token.Token]func(*tokenizer) (ast.Expr, *scanner.Error)
    16  	precedences    = map[token.Token]int{
    17  		token.OR:  precedenceOr,
    18  		token.XOR: precedenceXor,
    19  		token.AND: precedenceAnd,
    20  		token.ADD: precedenceAdd,
    21  		token.SUB: precedenceAdd,
    22  		token.MUL: precedenceMul,
    23  		token.QUO: precedenceMul,
    24  		token.REM: precedenceMul,
    25  	}
    26  )
    27  
    28  const (
    29  	precedenceLowest = iota + 1
    30  	precedenceOr
    31  	precedenceXor
    32  	precedenceAnd
    33  	precedenceAdd
    34  	precedenceMul
    35  	precedencePrefix
    36  )
    37  
    38  func init() {
    39  	// This must be done in an init function to avoid an initialization order
    40  	// failure.
    41  	prefixParseFns = map[token.Token]func(*tokenizer) (ast.Expr, *scanner.Error){
    42  		token.IDENT:  parseIdent,
    43  		token.INT:    parseBasicLit,
    44  		token.FLOAT:  parseBasicLit,
    45  		token.STRING: parseBasicLit,
    46  		token.CHAR:   parseBasicLit,
    47  		token.LPAREN: parseParenExpr,
    48  		token.SUB:    parseUnaryExpr,
    49  	}
    50  }
    51  
    52  // parseConst parses the given string as a C constant.
    53  func parseConst(pos token.Pos, fset *token.FileSet, value string) (ast.Expr, *scanner.Error) {
    54  	t := newTokenizer(pos, fset, value)
    55  	expr, err := parseConstExpr(t, precedenceLowest)
    56  	t.Next()
    57  	if t.curToken != token.EOF {
    58  		return nil, &scanner.Error{
    59  			Pos: t.fset.Position(t.curPos),
    60  			Msg: "unexpected token " + t.curToken.String() + ", expected end of expression",
    61  		}
    62  	}
    63  	return expr, err
    64  }
    65  
    66  // parseConstExpr parses a stream of C tokens to a Go expression.
    67  func parseConstExpr(t *tokenizer, precedence int) (ast.Expr, *scanner.Error) {
    68  	if t.curToken == token.EOF {
    69  		return nil, &scanner.Error{
    70  			Pos: t.fset.Position(t.curPos),
    71  			Msg: "empty constant",
    72  		}
    73  	}
    74  	prefix := prefixParseFns[t.curToken]
    75  	if prefix == nil {
    76  		return nil, &scanner.Error{
    77  			Pos: t.fset.Position(t.curPos),
    78  			Msg: fmt.Sprintf("unexpected token %s", t.curToken),
    79  		}
    80  	}
    81  	leftExpr, err := prefix(t)
    82  
    83  	for t.peekToken != token.EOF && precedence < precedences[t.peekToken] {
    84  		switch t.peekToken {
    85  		case token.OR, token.XOR, token.AND, token.ADD, token.SUB, token.MUL, token.QUO, token.REM:
    86  			t.Next()
    87  			leftExpr, err = parseBinaryExpr(t, leftExpr)
    88  		}
    89  	}
    90  
    91  	return leftExpr, err
    92  }
    93  
    94  func parseIdent(t *tokenizer) (ast.Expr, *scanner.Error) {
    95  	return &ast.Ident{
    96  		NamePos: t.curPos,
    97  		Name:    "C." + t.curValue,
    98  	}, nil
    99  }
   100  
   101  func parseBasicLit(t *tokenizer) (ast.Expr, *scanner.Error) {
   102  	return &ast.BasicLit{
   103  		ValuePos: t.curPos,
   104  		Kind:     t.curToken,
   105  		Value:    t.curValue,
   106  	}, nil
   107  }
   108  
   109  func parseParenExpr(t *tokenizer) (ast.Expr, *scanner.Error) {
   110  	lparen := t.curPos
   111  	t.Next()
   112  	x, err := parseConstExpr(t, precedenceLowest)
   113  	if err != nil {
   114  		return nil, err
   115  	}
   116  	t.Next()
   117  	if t.curToken != token.RPAREN {
   118  		return nil, unexpectedToken(t, token.RPAREN)
   119  	}
   120  	expr := &ast.ParenExpr{
   121  		Lparen: lparen,
   122  		X:      x,
   123  		Rparen: t.curPos,
   124  	}
   125  	return expr, nil
   126  }
   127  
   128  func parseBinaryExpr(t *tokenizer, left ast.Expr) (ast.Expr, *scanner.Error) {
   129  	expression := &ast.BinaryExpr{
   130  		X:     left,
   131  		Op:    t.curToken,
   132  		OpPos: t.curPos,
   133  	}
   134  	precedence := precedences[t.curToken]
   135  	t.Next()
   136  	right, err := parseConstExpr(t, precedence)
   137  	expression.Y = right
   138  	return expression, err
   139  }
   140  
   141  func parseUnaryExpr(t *tokenizer) (ast.Expr, *scanner.Error) {
   142  	expression := &ast.UnaryExpr{
   143  		OpPos: t.curPos,
   144  		Op:    t.curToken,
   145  	}
   146  	t.Next()
   147  	x, err := parseConstExpr(t, precedencePrefix)
   148  	expression.X = x
   149  	return expression, err
   150  }
   151  
   152  // unexpectedToken returns an error of the form "unexpected token FOO, expected
   153  // BAR".
   154  func unexpectedToken(t *tokenizer, expected token.Token) *scanner.Error {
   155  	return &scanner.Error{
   156  		Pos: t.fset.Position(t.curPos),
   157  		Msg: fmt.Sprintf("unexpected token %s, expected %s", t.curToken, expected),
   158  	}
   159  }
   160  
   161  // tokenizer reads C source code and converts it to Go tokens.
   162  type tokenizer struct {
   163  	curPos, peekPos     token.Pos
   164  	fset                *token.FileSet
   165  	curToken, peekToken token.Token
   166  	curValue, peekValue string
   167  	buf                 string
   168  }
   169  
   170  // newTokenizer initializes a new tokenizer, positioned at the first token in
   171  // the string.
   172  func newTokenizer(start token.Pos, fset *token.FileSet, buf string) *tokenizer {
   173  	t := &tokenizer{
   174  		peekPos:   start,
   175  		fset:      fset,
   176  		buf:       buf,
   177  		peekToken: token.ILLEGAL,
   178  	}
   179  	// Parse the first two tokens (cur and peek).
   180  	t.Next()
   181  	t.Next()
   182  	return t
   183  }
   184  
   185  // Next consumes the next token in the stream. There is no return value, read
   186  // the next token from the pos, token and value properties.
   187  func (t *tokenizer) Next() {
   188  	// The previous peek is now the current token.
   189  	t.curPos = t.peekPos
   190  	t.curToken = t.peekToken
   191  	t.curValue = t.peekValue
   192  
   193  	// Parse the next peek token.
   194  	t.peekPos += token.Pos(len(t.curValue))
   195  	for {
   196  		if len(t.buf) == 0 {
   197  			t.peekToken = token.EOF
   198  			return
   199  		}
   200  		c := t.buf[0]
   201  		switch {
   202  		case c == ' ' || c == '\f' || c == '\n' || c == '\r' || c == '\t' || c == '\v':
   203  			// Skip whitespace.
   204  			// Based on this source, not sure whether it represents C whitespace:
   205  			// https://en.cppreference.com/w/cpp/string/byte/isspace
   206  			t.peekPos++
   207  			t.buf = t.buf[1:]
   208  		case len(t.buf) >= 2 && (string(t.buf[:2]) == "||" || string(t.buf[:2]) == "&&"):
   209  			// Two-character tokens.
   210  			switch c {
   211  			case '&':
   212  				t.peekToken = token.LAND
   213  			case '|':
   214  				t.peekToken = token.LOR
   215  			}
   216  			t.peekValue = t.buf[:2]
   217  			t.buf = t.buf[2:]
   218  			return
   219  		case c == '(' || c == ')' || c == '+' || c == '-' || c == '*' || c == '/' || c == '%' || c == '&' || c == '|' || c == '^':
   220  			// Single-character tokens.
   221  			// TODO: ++ (increment) and -- (decrement) operators.
   222  			switch c {
   223  			case '(':
   224  				t.peekToken = token.LPAREN
   225  			case ')':
   226  				t.peekToken = token.RPAREN
   227  			case '+':
   228  				t.peekToken = token.ADD
   229  			case '-':
   230  				t.peekToken = token.SUB
   231  			case '*':
   232  				t.peekToken = token.MUL
   233  			case '/':
   234  				t.peekToken = token.QUO
   235  			case '%':
   236  				t.peekToken = token.REM
   237  			case '&':
   238  				t.peekToken = token.AND
   239  			case '|':
   240  				t.peekToken = token.OR
   241  			case '^':
   242  				t.peekToken = token.XOR
   243  			}
   244  			t.peekValue = t.buf[:1]
   245  			t.buf = t.buf[1:]
   246  			return
   247  		case c >= '0' && c <= '9':
   248  			// Numeric constant (int, float, etc.).
   249  			// Find the last non-numeric character.
   250  			tokenLen := len(t.buf)
   251  			hasDot := false
   252  			for i, c := range t.buf {
   253  				if c == '.' {
   254  					hasDot = true
   255  				}
   256  				if c >= '0' && c <= '9' || c == '.' || c == '_' || c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z' {
   257  					tokenLen = i + 1
   258  				} else {
   259  					break
   260  				}
   261  			}
   262  			t.peekValue = t.buf[:tokenLen]
   263  			t.buf = t.buf[tokenLen:]
   264  			if hasDot {
   265  				// Integer constants are more complicated than this but this is
   266  				// a close approximation.
   267  				// https://en.cppreference.com/w/cpp/language/integer_literal
   268  				t.peekToken = token.FLOAT
   269  				t.peekValue = strings.TrimRight(t.peekValue, "f")
   270  			} else {
   271  				t.peekToken = token.INT
   272  				t.peekValue = strings.TrimRight(t.peekValue, "uUlL")
   273  			}
   274  			return
   275  		case c >= 'A' && c <= 'Z' || c >= 'a' && c <= 'z' || c == '_':
   276  			// Identifier. Find all remaining tokens that are part of this
   277  			// identifier.
   278  			tokenLen := len(t.buf)
   279  			for i, c := range t.buf {
   280  				if c >= '0' && c <= '9' || c >= 'A' && c <= 'Z' || c >= 'a' && c <= 'z' || c == '_' {
   281  					tokenLen = i + 1
   282  				} else {
   283  					break
   284  				}
   285  			}
   286  			t.peekValue = t.buf[:tokenLen]
   287  			t.buf = t.buf[tokenLen:]
   288  			t.peekToken = token.IDENT
   289  			return
   290  		case c == '"':
   291  			// String constant. Find the first '"' character that is not
   292  			// preceded by a backslash.
   293  			escape := false
   294  			tokenLen := len(t.buf)
   295  			for i, c := range t.buf {
   296  				if i != 0 && c == '"' && !escape {
   297  					tokenLen = i + 1
   298  					break
   299  				}
   300  				if !escape {
   301  					escape = c == '\\'
   302  				}
   303  			}
   304  			t.peekToken = token.STRING
   305  			t.peekValue = t.buf[:tokenLen]
   306  			t.buf = t.buf[tokenLen:]
   307  			return
   308  		case c == '\'':
   309  			// Char (rune) constant. Find the first '\'' character that is not
   310  			// preceded by a backslash.
   311  			escape := false
   312  			tokenLen := len(t.buf)
   313  			for i, c := range t.buf {
   314  				if i != 0 && c == '\'' && !escape {
   315  					tokenLen = i + 1
   316  					break
   317  				}
   318  				if !escape {
   319  					escape = c == '\\'
   320  				}
   321  			}
   322  			t.peekToken = token.CHAR
   323  			t.peekValue = t.buf[:tokenLen]
   324  			t.buf = t.buf[tokenLen:]
   325  			return
   326  		default:
   327  			t.peekToken = token.ILLEGAL
   328  			return
   329  		}
   330  	}
   331  }