github.com/tinygo-org/tinygo@v0.31.3-0.20240404173401-90b0bf646c27/cgo/const.go (about) 1 package cgo 2 3 // This file implements a parser of a subset of the C language, just enough to 4 // parse common #define statements to Go constant expressions. 5 6 import ( 7 "fmt" 8 "go/ast" 9 "go/scanner" 10 "go/token" 11 "strings" 12 ) 13 14 var ( 15 prefixParseFns map[token.Token]func(*tokenizer) (ast.Expr, *scanner.Error) 16 precedences = map[token.Token]int{ 17 token.OR: precedenceOr, 18 token.XOR: precedenceXor, 19 token.AND: precedenceAnd, 20 token.ADD: precedenceAdd, 21 token.SUB: precedenceAdd, 22 token.MUL: precedenceMul, 23 token.QUO: precedenceMul, 24 token.REM: precedenceMul, 25 } 26 ) 27 28 const ( 29 precedenceLowest = iota + 1 30 precedenceOr 31 precedenceXor 32 precedenceAnd 33 precedenceAdd 34 precedenceMul 35 precedencePrefix 36 ) 37 38 func init() { 39 // This must be done in an init function to avoid an initialization order 40 // failure. 41 prefixParseFns = map[token.Token]func(*tokenizer) (ast.Expr, *scanner.Error){ 42 token.IDENT: parseIdent, 43 token.INT: parseBasicLit, 44 token.FLOAT: parseBasicLit, 45 token.STRING: parseBasicLit, 46 token.CHAR: parseBasicLit, 47 token.LPAREN: parseParenExpr, 48 token.SUB: parseUnaryExpr, 49 } 50 } 51 52 // parseConst parses the given string as a C constant. 53 func parseConst(pos token.Pos, fset *token.FileSet, value string) (ast.Expr, *scanner.Error) { 54 t := newTokenizer(pos, fset, value) 55 expr, err := parseConstExpr(t, precedenceLowest) 56 t.Next() 57 if t.curToken != token.EOF { 58 return nil, &scanner.Error{ 59 Pos: t.fset.Position(t.curPos), 60 Msg: "unexpected token " + t.curToken.String() + ", expected end of expression", 61 } 62 } 63 return expr, err 64 } 65 66 // parseConstExpr parses a stream of C tokens to a Go expression. 67 func parseConstExpr(t *tokenizer, precedence int) (ast.Expr, *scanner.Error) { 68 if t.curToken == token.EOF { 69 return nil, &scanner.Error{ 70 Pos: t.fset.Position(t.curPos), 71 Msg: "empty constant", 72 } 73 } 74 prefix := prefixParseFns[t.curToken] 75 if prefix == nil { 76 return nil, &scanner.Error{ 77 Pos: t.fset.Position(t.curPos), 78 Msg: fmt.Sprintf("unexpected token %s", t.curToken), 79 } 80 } 81 leftExpr, err := prefix(t) 82 83 for t.peekToken != token.EOF && precedence < precedences[t.peekToken] { 84 switch t.peekToken { 85 case token.OR, token.XOR, token.AND, token.ADD, token.SUB, token.MUL, token.QUO, token.REM: 86 t.Next() 87 leftExpr, err = parseBinaryExpr(t, leftExpr) 88 } 89 } 90 91 return leftExpr, err 92 } 93 94 func parseIdent(t *tokenizer) (ast.Expr, *scanner.Error) { 95 return &ast.Ident{ 96 NamePos: t.curPos, 97 Name: "C." + t.curValue, 98 }, nil 99 } 100 101 func parseBasicLit(t *tokenizer) (ast.Expr, *scanner.Error) { 102 return &ast.BasicLit{ 103 ValuePos: t.curPos, 104 Kind: t.curToken, 105 Value: t.curValue, 106 }, nil 107 } 108 109 func parseParenExpr(t *tokenizer) (ast.Expr, *scanner.Error) { 110 lparen := t.curPos 111 t.Next() 112 x, err := parseConstExpr(t, precedenceLowest) 113 if err != nil { 114 return nil, err 115 } 116 t.Next() 117 if t.curToken != token.RPAREN { 118 return nil, unexpectedToken(t, token.RPAREN) 119 } 120 expr := &ast.ParenExpr{ 121 Lparen: lparen, 122 X: x, 123 Rparen: t.curPos, 124 } 125 return expr, nil 126 } 127 128 func parseBinaryExpr(t *tokenizer, left ast.Expr) (ast.Expr, *scanner.Error) { 129 expression := &ast.BinaryExpr{ 130 X: left, 131 Op: t.curToken, 132 OpPos: t.curPos, 133 } 134 precedence := precedences[t.curToken] 135 t.Next() 136 right, err := parseConstExpr(t, precedence) 137 expression.Y = right 138 return expression, err 139 } 140 141 func parseUnaryExpr(t *tokenizer) (ast.Expr, *scanner.Error) { 142 expression := &ast.UnaryExpr{ 143 OpPos: t.curPos, 144 Op: t.curToken, 145 } 146 t.Next() 147 x, err := parseConstExpr(t, precedencePrefix) 148 expression.X = x 149 return expression, err 150 } 151 152 // unexpectedToken returns an error of the form "unexpected token FOO, expected 153 // BAR". 154 func unexpectedToken(t *tokenizer, expected token.Token) *scanner.Error { 155 return &scanner.Error{ 156 Pos: t.fset.Position(t.curPos), 157 Msg: fmt.Sprintf("unexpected token %s, expected %s", t.curToken, expected), 158 } 159 } 160 161 // tokenizer reads C source code and converts it to Go tokens. 162 type tokenizer struct { 163 curPos, peekPos token.Pos 164 fset *token.FileSet 165 curToken, peekToken token.Token 166 curValue, peekValue string 167 buf string 168 } 169 170 // newTokenizer initializes a new tokenizer, positioned at the first token in 171 // the string. 172 func newTokenizer(start token.Pos, fset *token.FileSet, buf string) *tokenizer { 173 t := &tokenizer{ 174 peekPos: start, 175 fset: fset, 176 buf: buf, 177 peekToken: token.ILLEGAL, 178 } 179 // Parse the first two tokens (cur and peek). 180 t.Next() 181 t.Next() 182 return t 183 } 184 185 // Next consumes the next token in the stream. There is no return value, read 186 // the next token from the pos, token and value properties. 187 func (t *tokenizer) Next() { 188 // The previous peek is now the current token. 189 t.curPos = t.peekPos 190 t.curToken = t.peekToken 191 t.curValue = t.peekValue 192 193 // Parse the next peek token. 194 t.peekPos += token.Pos(len(t.curValue)) 195 for { 196 if len(t.buf) == 0 { 197 t.peekToken = token.EOF 198 return 199 } 200 c := t.buf[0] 201 switch { 202 case c == ' ' || c == '\f' || c == '\n' || c == '\r' || c == '\t' || c == '\v': 203 // Skip whitespace. 204 // Based on this source, not sure whether it represents C whitespace: 205 // https://en.cppreference.com/w/cpp/string/byte/isspace 206 t.peekPos++ 207 t.buf = t.buf[1:] 208 case len(t.buf) >= 2 && (string(t.buf[:2]) == "||" || string(t.buf[:2]) == "&&"): 209 // Two-character tokens. 210 switch c { 211 case '&': 212 t.peekToken = token.LAND 213 case '|': 214 t.peekToken = token.LOR 215 } 216 t.peekValue = t.buf[:2] 217 t.buf = t.buf[2:] 218 return 219 case c == '(' || c == ')' || c == '+' || c == '-' || c == '*' || c == '/' || c == '%' || c == '&' || c == '|' || c == '^': 220 // Single-character tokens. 221 // TODO: ++ (increment) and -- (decrement) operators. 222 switch c { 223 case '(': 224 t.peekToken = token.LPAREN 225 case ')': 226 t.peekToken = token.RPAREN 227 case '+': 228 t.peekToken = token.ADD 229 case '-': 230 t.peekToken = token.SUB 231 case '*': 232 t.peekToken = token.MUL 233 case '/': 234 t.peekToken = token.QUO 235 case '%': 236 t.peekToken = token.REM 237 case '&': 238 t.peekToken = token.AND 239 case '|': 240 t.peekToken = token.OR 241 case '^': 242 t.peekToken = token.XOR 243 } 244 t.peekValue = t.buf[:1] 245 t.buf = t.buf[1:] 246 return 247 case c >= '0' && c <= '9': 248 // Numeric constant (int, float, etc.). 249 // Find the last non-numeric character. 250 tokenLen := len(t.buf) 251 hasDot := false 252 for i, c := range t.buf { 253 if c == '.' { 254 hasDot = true 255 } 256 if c >= '0' && c <= '9' || c == '.' || c == '_' || c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z' { 257 tokenLen = i + 1 258 } else { 259 break 260 } 261 } 262 t.peekValue = t.buf[:tokenLen] 263 t.buf = t.buf[tokenLen:] 264 if hasDot { 265 // Integer constants are more complicated than this but this is 266 // a close approximation. 267 // https://en.cppreference.com/w/cpp/language/integer_literal 268 t.peekToken = token.FLOAT 269 t.peekValue = strings.TrimRight(t.peekValue, "f") 270 } else { 271 t.peekToken = token.INT 272 t.peekValue = strings.TrimRight(t.peekValue, "uUlL") 273 } 274 return 275 case c >= 'A' && c <= 'Z' || c >= 'a' && c <= 'z' || c == '_': 276 // Identifier. Find all remaining tokens that are part of this 277 // identifier. 278 tokenLen := len(t.buf) 279 for i, c := range t.buf { 280 if c >= '0' && c <= '9' || c >= 'A' && c <= 'Z' || c >= 'a' && c <= 'z' || c == '_' { 281 tokenLen = i + 1 282 } else { 283 break 284 } 285 } 286 t.peekValue = t.buf[:tokenLen] 287 t.buf = t.buf[tokenLen:] 288 t.peekToken = token.IDENT 289 return 290 case c == '"': 291 // String constant. Find the first '"' character that is not 292 // preceded by a backslash. 293 escape := false 294 tokenLen := len(t.buf) 295 for i, c := range t.buf { 296 if i != 0 && c == '"' && !escape { 297 tokenLen = i + 1 298 break 299 } 300 if !escape { 301 escape = c == '\\' 302 } 303 } 304 t.peekToken = token.STRING 305 t.peekValue = t.buf[:tokenLen] 306 t.buf = t.buf[tokenLen:] 307 return 308 case c == '\'': 309 // Char (rune) constant. Find the first '\'' character that is not 310 // preceded by a backslash. 311 escape := false 312 tokenLen := len(t.buf) 313 for i, c := range t.buf { 314 if i != 0 && c == '\'' && !escape { 315 tokenLen = i + 1 316 break 317 } 318 if !escape { 319 escape = c == '\\' 320 } 321 } 322 t.peekToken = token.CHAR 323 t.peekValue = t.buf[:tokenLen] 324 t.buf = t.buf[tokenLen:] 325 return 326 default: 327 t.peekToken = token.ILLEGAL 328 return 329 } 330 } 331 }