modernc.org/cc@v1.0.1/lexer.go (about) 1 // Copyright 2016 The CC Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package cc // import "modernc.org/cc" 6 7 import ( 8 "bytes" 9 "fmt" 10 "go/token" 11 "io" 12 "strings" 13 14 "modernc.org/golex/lex" 15 "modernc.org/xc" 16 ) 17 18 // Lexer state 19 const ( 20 lsZero = iota 21 lsBOL // Preprocessor: Beginning of line. 22 lsDefine // Preprocessor: Seen ^#define. 23 lsSeekRParen // Preprocessor: Seen ^#define identifier( 24 lsTokens // Preprocessor: Convert anything to PPOTHER until EOL. 25 lsUndef // Preprocessor: Seen ^#undef. 26 lsConstExpr0 // Preprocessor: Parsing constant expression. 27 lsConstExpr // Preprocessor: Parsing constant expression. 28 lsTranslationUnit0 // 29 lsTranslationUnit // 30 ) 31 32 type trigraphsReader struct { 33 *lex.Lexer // 34 pos0 token.Pos // 35 sc int // Start condition. 36 } 37 38 func (t *trigraphsReader) ReadRune() (rune, int, error) { return lex.RuneEOF, 0, io.EOF } 39 40 func (t *trigraphsReader) ReadChar() (c lex.Char, size int, err error) { 41 r := rune(t.scan()) 42 pos0 := t.pos0 43 pos := t.Lookahead().Pos() 44 t.pos0 = pos 45 c = lex.NewChar(t.First.Pos(), r) 46 return c, int(pos - pos0), nil 47 } 48 49 type byteReader struct { 50 io.Reader 51 b [1]byte 52 } 53 54 func (b *byteReader) ReadRune() (r rune, size int, err error) { 55 if _, err = b.Read(b.b[:]); err != nil { 56 return -1, 0, err 57 } 58 59 return rune(b.b[0]), 1, nil 60 } 61 62 type lexer struct { 63 *lex.Lexer // 64 ch chan []xc.Token // 65 commentPos0 token.Pos // 66 constExprToks []xc.Token // 67 constantExpression *ConstantExpression // 68 cpp func([]xc.Token) // 69 encBuf []byte // PPTokens 70 encBuf1 [30]byte // Rune, position, optional value ID. 71 encPos token.Pos // For delta pos encoding 72 eof lex.Char // 73 example interface{} // 74 exampleRule int // 75 externs map[int]*Declarator // 76 file *token.File // 77 finalNLInjected bool // 78 fnDeclarator *Declarator // 79 includePaths []string // 80 injectFunc []xc.Token // [0], 6.4.2.2. 81 iota int64 // 82 isPreprocessing bool // 83 last xc.Token // 84 model *Model // 85 preprocessingFile *PreprocessingFile // 86 report *xc.Report // 87 sc int // Start condition. 88 scope *Bindings // 89 scs int // Start condition stack. 90 state int // Lexer state. 91 sysIncludePaths []string // 92 t *trigraphsReader // 93 textLine []xc.Token // 94 toC bool // Whether to translate preprocessor identifiers to reserved C words. 95 tokLast xc.Token // 96 tokPrev xc.Token // 97 toks []xc.Token // Parsing preprocessor constant expression. 98 translationUnit *TranslationUnit // 99 tweaks *tweaks // 100 101 fsm struct { 102 comment int 103 pos token.Pos 104 state int 105 } 106 } 107 108 func newLexer(nm string, sz int, r io.RuneReader, report *xc.Report, tweaks *tweaks, opts ...lex.Option) (*lexer, error) { 109 file := fset.AddFile(nm, -1, sz) 110 t := &trigraphsReader{} 111 lx, err := lex.New( 112 file, 113 &byteReader{Reader: r.(io.Reader)}, 114 lex.ErrorFunc(func(pos token.Pos, msg string) { 115 report.Err(pos, msg) 116 }), 117 lex.RuneClass(func(r rune) int { return int(r) }), 118 ) 119 if err != nil { 120 return nil, err 121 } 122 123 t.Lexer = lx 124 t.pos0 = lx.Lookahead().Pos() 125 if tweaks.enableTrigraphs { 126 t.sc = scTRIGRAPHS 127 } 128 r = t 129 130 scope := newBindings(nil, ScopeFile) 131 lexer := &lexer{ 132 externs: map[int]*Declarator{}, 133 file: file, 134 report: report, 135 scope: scope, 136 scs: -1, // Stack empty 137 t: t, 138 tweaks: tweaks, 139 } 140 if lexer.Lexer, err = lex.New( 141 file, 142 r, 143 append(opts, lex.RuneClass(rune2class))..., 144 ); err != nil { 145 return nil, err 146 } 147 148 return lexer, nil 149 } 150 151 func newSimpleLexer(cpp func([]xc.Token), report *xc.Report, tweaks *tweaks) *lexer { 152 return &lexer{ 153 cpp: cpp, 154 externs: map[int]*Declarator{}, 155 report: report, 156 scope: newBindings(nil, ScopeFile), 157 tweaks: tweaks, 158 } 159 } 160 161 func (l *lexer) push(sc int) { 162 if l.scs >= 0 { // Stack overflow. 163 if l.sc != scDIRECTIVE || sc != scCOMMENT { 164 panic("internal error") 165 } 166 167 // /*-style comment in a line starting with # 168 l.pop() 169 } 170 171 l.scs = l.sc 172 l.sc = sc 173 } 174 175 func (l *lexer) pop() { 176 if l.scs < 0 { // Stack underflow 177 panic("internal error") 178 } 179 l.sc = l.scs 180 l.scs = -1 // Stack empty. 181 } 182 183 func (l *lexer) pushScope(kind Scope) (old *Bindings) { 184 old = l.scope 185 l.scope = newBindings(old, kind) 186 l.scope.maxAlign = 1 187 return old 188 } 189 190 func (l *lexer) popScope(tok xc.Token) (old, new *Bindings) { 191 return l.popScopePos(tok.Pos()) 192 } 193 194 func (l *lexer) popScopePos(pos token.Pos) (old, new *Bindings) { 195 old = l.scope 196 new = l.scope.Parent 197 if new == nil { 198 l.report.Err(pos, "cannot pop scope") 199 return nil, old 200 } 201 202 l.scope = new 203 return old, new 204 } 205 206 const ( 207 fsmZero = iota 208 fsmHasComment 209 ) 210 211 var genCommentLeader = []byte("/*") 212 213 func (l *lexer) comment(general bool) { 214 if l.tweaks.comments != nil { 215 b := l.TokenBytes(nil) 216 pos := l.First.Pos() 217 if general { 218 pos = l.commentPos0 219 b = append(genCommentLeader, b...) 220 } 221 if l.Lookahead().Rune == '\n' { 222 b = append(b, '\n') 223 } 224 225 switch fsm := &l.fsm; fsm.state { 226 case fsmHasComment: 227 if pos == fsm.pos+token.Pos(len(dict.S(l.fsm.comment))) { 228 fsm.comment = dict.ID(append(dict.S(fsm.comment), b...)) 229 break 230 } 231 232 fallthrough 233 case fsmZero: 234 fsm.state = fsmHasComment 235 fsm.comment = dict.ID(b) 236 fsm.pos = pos 237 } 238 } 239 } 240 241 func (l *lexer) scanChar() (c lex.Char) { 242 again: 243 r := rune(l.scan()) 244 switch r { 245 case ' ': 246 if l.state != lsTokens || l.tokLast.Rune == ' ' { 247 goto again 248 } 249 case '\n': 250 if l.state == lsTokens { 251 l.encodeToken(xc.Token{Char: lex.NewChar(l.First.Pos(), ' '), Val: idSpace}) 252 } 253 l.state = lsBOL 254 l.sc = scINITIAL 255 l.scs = -1 // Stack empty 256 case PREPROCESSING_FILE: 257 l.state = lsBOL 258 l.isPreprocessing = true 259 case CONSTANT_EXPRESSION, TRANSLATION_UNIT: //TODO- CONSTANT_EXPRESSION, then must add some manual yy:examples. 260 l.toC = true 261 } 262 263 fp := l.First.Pos() 264 if l.fsm.state == fsmHasComment { 265 switch { 266 case r == '\n' && fp == l.fsm.pos+token.Pos(len(dict.S(l.fsm.comment)))-1: 267 // keep going 268 case r != '\n' && fp == l.fsm.pos+token.Pos(len(dict.S(l.fsm.comment))): 269 l.tweaks.comments[fp] = dict.ID(bytes.TrimSpace(dict.S(l.fsm.comment))) 270 l.fsm.state = fsmZero 271 default: 272 l.fsm.state = fsmZero 273 } 274 } 275 276 return lex.NewChar(l.First.Pos(), r) 277 } 278 279 func (l *lexer) scanToken() (tok xc.Token) { 280 switch l.state { 281 case lsConstExpr0: 282 tok = xc.Token{Char: lex.NewChar(0, CONSTANT_EXPRESSION)} 283 l.state = lsConstExpr 284 case lsConstExpr: 285 if len(l.toks) == 0 { 286 tok = xc.Token{Char: lex.NewChar(l.tokLast.Pos(), lex.RuneEOF)} 287 break 288 } 289 290 tok = l.toks[0] 291 l.toks = l.toks[1:] 292 case lsTranslationUnit0: 293 tok = xc.Token{Char: lex.NewChar(0, TRANSLATION_UNIT)} 294 l.state = lsTranslationUnit 295 l.toC = true 296 case lsTranslationUnit: 297 again: 298 for len(l.textLine) == 0 { 299 var ok bool 300 if l.textLine, ok = <-l.ch; !ok { 301 return xc.Token{Char: lex.NewChar(l.tokLast.Pos(), lex.RuneEOF)} 302 } 303 304 if l.cpp != nil { 305 l.cpp(l.textLine) 306 } 307 } 308 tok = l.textLine[0] 309 l.textLine = l.textLine[1:] 310 if tok.Rune == ' ' { 311 goto again 312 } 313 314 tok = l.scope.lexerHack(tok, l.tokLast) 315 default: 316 c := l.scanChar() 317 if c.Rune == ccEOF { 318 c = lex.NewChar(c.Pos(), lex.RuneEOF) 319 if l.isPreprocessing && l.last.Rune != '\n' && !l.finalNLInjected { 320 l.finalNLInjected = true 321 l.eof = c 322 c.Rune = '\n' 323 l.state = lsBOL 324 return xc.Token{Char: c} 325 } 326 327 return xc.Token{Char: c} 328 } 329 330 val := 0 331 if tokHasVal[c.Rune] { 332 b := l.TokenBytes(nil) 333 val = dict.ID(b) 334 //TODO handle ID UCNs 335 //TODO- chars := l.Token() 336 //TODO- switch c.Rune { 337 //TODO- case IDENTIFIER, IDENTIFIER_LPAREN: 338 //TODO- b := l.TokenBytes(func(buf *bytes.Buffer) { 339 //TODO- for i := 0; i < len(chars); { 340 //TODO- switch c := chars[i]; { 341 //TODO- case c.Rune == '$' && !l.tweaks.enableDlrInIdentifiers: 342 //TODO- l.report.Err(c.Pos(), "identifier character set extension '$' not enabled") 343 //TODO- i++ 344 //TODO- case c.Rune == '\\': 345 //TODO- r, n := decodeUCN(chars[i:]) 346 //TODO- buf.WriteRune(r) 347 //TODO- i += n 348 //TODO- case c.Rune < 0x80: // ASCII 349 //TODO- buf.WriteByte(byte(c.Rune)) 350 //TODO- i++ 351 //TODO- default: 352 //TODO- panic("internal error") 353 //TODO- } 354 //TODO- } 355 //TODO- }) 356 //TODO- val = dict.ID(b) 357 //TODO- default: 358 //TODO- panic("internal error: " + yySymName(int(c.Rune))) 359 //TODO- } 360 } 361 tok = xc.Token{Char: c, Val: val} 362 if !l.isPreprocessing { 363 tok = l.scope.lexerHack(tok, l.tokLast) 364 } 365 } 366 if l.toC { 367 tok = toC(tok, l.tweaks) 368 } 369 l.tokPrev = l.tokLast 370 l.tokLast = tok 371 return tok 372 } 373 374 // Lex implements yyLexer 375 func (l *lexer) Lex(lval *yySymType) int { 376 var tok xc.Token 377 if x := l.injectFunc; l.exampleRule == 0 && len(x) != 0 { 378 tok = x[0] 379 l.injectFunc = x[1:] 380 } else { 381 tok = l.scanToken() 382 } 383 //dbg("Lex %s", PrettyString(tok)) 384 if l.constExprToks != nil { 385 l.constExprToks = append(l.constExprToks, tok) 386 } 387 l.last = tok 388 if tok.Rune == lex.RuneEOF { 389 lval.Token = tok 390 return 0 391 } 392 393 switch l.state { 394 case lsBOL: 395 switch tok.Rune { 396 case PREPROCESSING_FILE, '\n': 397 // nop 398 case '#': 399 l.push(scDIRECTIVE) 400 tok = l.scanToken() 401 switch tok.Rune { 402 case '\n': 403 tok.Char = lex.NewChar(tok.Pos(), PPHASH_NL) 404 case PPDEFINE: 405 l.push(scDEFINE) 406 l.state = lsDefine 407 case PPELIF, PPENDIF, PPERROR, PPIF, PPLINE, PPPRAGMA: 408 l.sc = scINITIAL 409 l.state = lsTokens 410 case PPELSE, PPIFDEF, PPIFNDEF: 411 l.state = lsZero 412 case PPUNDEF: 413 l.state = lsUndef 414 case PPINCLUDE: 415 l.sc = scHEADER 416 l.state = lsTokens 417 case PPINCLUDE_NEXT: 418 if l.tweaks.enableIncludeNext { 419 l.sc = scHEADER 420 l.state = lsTokens 421 break 422 } 423 424 l.state = lsTokens 425 tok.Char = lex.NewChar(tok.Pos(), PPNONDIRECTIVE) 426 tok.Val = xc.Dict.SID("include_next") 427 default: 428 l.state = lsTokens 429 tok.Char = lex.NewChar(tok.Pos(), PPNONDIRECTIVE) 430 l.pop() 431 } 432 default: 433 l.encodeToken(tok) 434 tok.Char = lex.NewChar(tok.Pos(), PPOTHER) 435 l.state = lsTokens 436 } 437 case lsDefine: 438 l.pop() 439 switch tok.Rune { 440 case IDENTIFIER: 441 l.state = lsTokens 442 case IDENTIFIER_LPAREN: 443 l.state = lsSeekRParen 444 default: 445 l.state = lsZero 446 } 447 case lsSeekRParen: 448 if tok.Rune == ')' { 449 l.state = lsTokens 450 } 451 case lsTokens: 452 l.encodeToken(tok) 453 tok.Char = lex.NewChar(tok.Pos(), PPOTHER) 454 case lsUndef: 455 l.state = lsTokens 456 } 457 458 lval.Token = tok 459 return int(tok.Char.Rune) 460 } 461 462 // Error Implements yyLexer. 463 func (l *lexer) Error(msg string) { 464 msg = strings.Replace(msg, "$end", "EOF", -1) 465 t := l.last 466 parts := strings.Split(msg, ", expected ") 467 if len(parts) == 2 && strings.HasPrefix(parts[0], "unexpected ") && tokHasVal[t.Rune] { 468 msg = fmt.Sprintf("%s %s, expected %s", parts[0], t.S(), parts[1]) 469 } 470 l.report.ErrTok(t, "%s", msg) 471 } 472 473 // Reduced implements yyLexerEx 474 func (l *lexer) Reduced(rule, state int, lval *yySymType) (stop bool) { 475 if n := l.exampleRule; n >= 0 && rule != n { 476 return false 477 } 478 479 switch x := lval.node.(type) { 480 case interface { 481 fragment() interface{} 482 }: 483 l.example = x.fragment() 484 default: 485 l.example = x 486 } 487 return true 488 } 489 490 func (l *lexer) parsePPConstExpr0(list PPTokenList, p *pp) (interface{}, Type) { 491 l.toks = l.toks[:0] 492 p.expand(&tokenBuf{decodeTokens(list, nil, true)}, true, func(toks []xc.Token) { 493 l.toks = append(l.toks, toks...) 494 }) 495 w := 0 496 for _, tok := range l.toks { 497 switch tok.Rune { 498 case ' ': 499 // nop 500 case IDENTIFIER: 501 if p.macros.m[tok.Val] != nil { 502 l.report.ErrTok(tok, "expected constant expression") 503 return nil, nil 504 } 505 506 tok.Rune = INTCONST 507 tok.Val = id0 508 fallthrough 509 default: 510 l.toks[w] = tok 511 w++ 512 } 513 } 514 l.toks = l.toks[:w] 515 l.state = lsConstExpr0 516 if yyParse(l) == 0 { 517 e := l.constantExpression 518 return e.Value, e.Type 519 } 520 521 return nil, nil 522 } 523 524 func (l *lexer) parsePPConstExpr(list PPTokenList, p *pp) bool { 525 if v, _ := l.parsePPConstExpr0(list, p); v != nil { 526 return isNonZero(v) 527 } 528 529 return false 530 }