modernc.org/cc@v1.0.1/v2/cpp.go (about) 1 // Copyright 2017 The CC Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // [0]: http://www.open-std.org/jtc1/sc22/wg14/www/docs/n1256.pdf 6 // [1]: https://www.spinellis.gr/blog/20060626/cpp.algo.pdf 7 8 package cc // import "modernc.org/cc/v2" 9 10 import ( 11 "bytes" 12 "encoding/binary" 13 "fmt" 14 "go/token" 15 "io" 16 "math" 17 "os" 18 "path/filepath" 19 "strconv" 20 "strings" 21 22 "modernc.org/golex/lex" 23 "modernc.org/ir" 24 "modernc.org/mathutil" 25 "modernc.org/xc" 26 ) 27 28 const ( 29 maxIncludeLevel = 200 // gcc, std is at least 15. 30 ) 31 32 var ( 33 _ tokenReader = (*cppReader)(nil) 34 _ tokenReader = (*tokenBuffer)(nil) 35 _ tokenReader = (*tokenPipe)(nil) 36 _ tokenWriter = (*tokenBuffer)(nil) 37 _ tokenWriter = (*tokenPipe)(nil) 38 ) 39 40 type cppToken struct { 41 xc.Token 42 hs map[int]struct{} 43 } 44 45 func (t *cppToken) has(nm int) bool { _, ok := t.hs[nm]; return ok } 46 47 func (t *cppToken) cloneAdd(nm int) map[int]struct{} { 48 nhs := map[int]struct{}{nm: {}} 49 for k, v := range t.hs { 50 nhs[k] = v 51 } 52 return nhs 53 } 54 55 func (t *cppToken) hsAdd(hs map[int]struct{}) { 56 if len(hs) == 0 { 57 return 58 } 59 60 if len(t.hs) == 0 { 61 t.hs = map[int]struct{}{} 62 } 63 for k := range hs { 64 t.hs[k] = struct{}{} 65 } 66 } 67 68 type tokenWriter interface { 69 write(...cppToken) 70 } 71 72 type tokenReader interface { 73 read() cppToken 74 unget(cppToken) 75 ungets(...cppToken) 76 } 77 78 type tokenPipe struct { 79 b []byte 80 ch chan cppToken 81 s []cppToken 82 83 emitWhiteSpace bool 84 } 85 86 func newTokenPipe(n int) *tokenPipe { return &tokenPipe{ch: make(chan cppToken, n)} } 87 88 func (*tokenPipe) unget(cppToken) { panic("internal error") } 89 func (*tokenPipe) ungets(...cppToken) { panic("internal error") } 90 91 func (p *tokenPipe) close() { 92 if len(p.s) != 0 { 93 p.flush() 94 } 95 close(p.ch) 96 } 97 98 func (p *tokenPipe) flush() { 99 p.b = p.b[:0] 100 p.b = append(p.b, '"') 101 for _, t := range p.s { 102 s := dict.S(t.Val) 103 p.b = append(p.b, s[1:len(s)-1]...) 104 } 105 p.b = append(p.b, '"') 106 p.s[0].Val = dict.ID(p.b) 107 p.ch <- p.s[0] 108 p.s = p.s[:0] 109 } 110 111 func (p *tokenPipe) read() cppToken { 112 t, ok := <-p.ch 113 if !ok { 114 t.Rune = ccEOF 115 } 116 return t 117 } 118 119 func (p *tokenPipe) write(toks ...cppToken) { 120 for _, t := range toks { 121 switch t.Rune { 122 case '\n', ' ': 123 if p.emitWhiteSpace { 124 p.ch <- t 125 } 126 case STRINGLITERAL, LONGSTRINGLITERAL: 127 p.s = append(p.s, t) 128 default: 129 if len(p.s) != 0 { 130 p.flush() 131 } 132 p.ch <- t 133 } 134 } 135 } 136 137 type tokenBuffer struct { 138 toks0 []cppToken 139 toks []cppToken 140 ungetBuffer 141 142 last rune 143 } 144 145 func (b *tokenBuffer) write(t ...cppToken) { 146 b.toks = append(b.toks, t...) 147 if b.toks0 == nil || &b.toks0[0] != &b.toks[0] { 148 b.toks0 = b.toks 149 } 150 } 151 152 func (b *tokenBuffer) read() (t cppToken) { 153 if len(b.ungetBuffer) != 0 { 154 return b.ungetBuffer.read() 155 } 156 157 if len(b.toks) == 0 { 158 t.Rune = ccEOF 159 return 160 } 161 162 t = b.toks[0] 163 b.toks = b.toks[1:] 164 if len(b.toks) == 0 { 165 b.toks = b.toks0[:0] 166 } 167 if t.Rune == '#' && (b.last == '\n' || b.last == 0) { 168 t.Rune = DIRECTIVE 169 } 170 b.last = t.Rune 171 return t 172 } 173 174 type cppReader struct { 175 decBuf []byte 176 decPos token.Pos 177 tu [][]uint32 178 ungetBuffer 179 180 last rune 181 } 182 183 func (c *cppReader) unget(t cppToken) { c.ungetBuffer = append(c.ungetBuffer, t) } 184 185 func (c *cppReader) read() (t cppToken) { 186 if len(c.ungetBuffer) != 0 { 187 return c.ungetBuffer.read() 188 } 189 190 more: 191 if len(c.decBuf) == 0 { 192 if len(c.tu) == 0 { 193 t.Rune = ccEOF 194 return t 195 } 196 197 if len(c.tu[0]) == 0 { 198 c.tu = c.tu[1:] 199 goto more 200 } 201 202 c.decBuf = dict.S(int(c.tu[0][0])) 203 c.tu[0] = c.tu[0][1:] 204 c.decPos = 0 205 } 206 207 c.decBuf, c.decPos, t.Token = decodeToken(c.decBuf, c.decPos) 208 if t.Rune == '#' && (c.last == '\n' || c.last == 0) { 209 t.Rune = DIRECTIVE 210 } 211 c.last = t.Rune 212 return t 213 } 214 215 type conds []cond 216 217 func (c conds) on() bool { return condOn[c.tos()] } 218 func (c conds) pop() conds { return c[:len(c)-1] } 219 func (c conds) push(n cond) conds { return append(c, n) } 220 func (c conds) tos() cond { return c[len(c)-1] } 221 222 // Macro represents a preprocessor Macro. 223 type Macro struct { 224 Args []int // Numeric IDs of argument identifiers. 225 DefTok xc.Token // Macro name definition token. 226 ReplacementToks []xc.Token // The tokens that replace the macro. R/O 227 228 IsFnLike bool // Whether the macro is function like. 229 IsVariadic bool // Whether the macro is variadic. 230 ident bool 231 } 232 233 func newMacro(def xc.Token, repl []xc.Token) *Macro { 234 return &Macro{DefTok: def, ReplacementToks: append([]xc.Token(nil), repl...)} 235 } 236 237 // Eval attempts to evaluate m, which must be a simple macro, like `#define foo numeric-literal`. 238 func (m *Macro) Eval(model Model, macros map[int]*Macro) (op Operand, err error) { 239 returned := false 240 241 defer func() { 242 e := recover() 243 if !returned && err == nil { 244 err = fmt.Errorf("PANIC: %v\n%s", e, debugStack()) 245 } 246 }() 247 248 if m.IsFnLike { 249 return op, fmt.Errorf("cannot evaluate function-like macro") 250 } 251 252 ctx, err := newContext(&Tweaks{}) 253 if err != nil { 254 return op, err 255 } 256 257 ctx.model = model 258 c := newCPP(ctx) 259 c.macros = macros 260 if op, _ = c.constExpr(cppToks(m.ReplacementToks), false); op.Type == nil { 261 return op, fmt.Errorf("cannot evaluate macro") 262 } 263 264 returned = true 265 return op, nil 266 } 267 268 func (m *Macro) param(ap [][]cppToken, nm int, out *[]cppToken) bool { 269 *out = nil 270 if nm == idVaArgs { 271 if !m.IsVariadic { 272 return false 273 } 274 275 if i := len(m.Args); i < len(ap) { 276 o := *out 277 for i, v := range ap[i:] { 278 if i != 0 { 279 switch lo := len(o); lo { 280 case 0: 281 var t cppToken 282 t.Rune = ',' 283 t.Val = 0 284 o = append(o, t) 285 default: 286 t := o[len(o)-1] 287 t.Rune = ',' 288 t.Val = 0 289 o = append(o, t) 290 t.Rune = ' ' 291 o = append(o, t) 292 } 293 } 294 o = append(o, v...) 295 } 296 *out = o 297 } 298 return true 299 } 300 301 if len(m.Args) != 0 && nm == m.Args[len(m.Args)-1] && m.IsVariadic && !m.ident { 302 if i := len(m.Args) - 1; i < len(ap) { 303 o := *out 304 for i, v := range ap[i:] { 305 if i != 0 { 306 switch lo := len(o); lo { 307 case 0: 308 var t cppToken 309 t.Rune = ',' 310 t.Val = 0 311 o = append(o, t) 312 default: 313 t := o[len(o)-1] 314 t.Rune = ',' 315 t.Val = 0 316 o = append(o, t) 317 t.Rune = ' ' 318 o = append(o, t) 319 } 320 } 321 o = append(o, v...) 322 } 323 *out = o 324 } 325 return true 326 } 327 328 for i, v := range m.Args { 329 if v == nm { 330 *out = ap[i] 331 return true 332 } 333 } 334 return false 335 } 336 337 type nullReader struct{} 338 339 func (nullReader) Read([]byte) (int, error) { return 0, io.EOF } 340 341 type cpp struct { 342 *context 343 includeLevel int 344 lx *lexer 345 macroStack map[int][]*Macro 346 macros map[int]*Macro // name ID: macro 347 toks []cppToken 348 } 349 350 func newCPP(ctx *context) *cpp { 351 lx, err := newLexer(ctx, "", 0, nullReader{}) 352 if err != nil { 353 panic(err) 354 } 355 356 lx.context = ctx 357 r := &cpp{ 358 context: ctx, 359 lx: lx, 360 macroStack: map[int][]*Macro{}, 361 macros: map[int]*Macro{}, 362 } 363 return r 364 } 365 366 func (c *cpp) parse(src ...Source) (tokenReader, error) { 367 var ( 368 encBuf []byte 369 encBuf1 [30]byte // Rune, position, optional value ID. 370 tokBuf []cppToken 371 tu [][]uint32 372 ) 373 for _, v := range src { 374 if pf := v.Cached(); pf != nil { 375 tu = append(tu, pf) 376 continue 377 } 378 379 sz, err := v.Size() 380 if err != nil { 381 return nil, err 382 } 383 384 if sz > mathutil.MaxInt { 385 return nil, fmt.Errorf("%v: file too big: %v", v.Name(), sz) 386 } 387 388 r, err := v.ReadCloser() 389 if err != nil { 390 return nil, err 391 } 392 393 lx, err := newLexer(c.context, v.Name(), int(sz), r) 394 if err != nil { 395 return nil, err 396 } 397 398 if err := func() (err error) { 399 returned := false 400 401 defer func() { 402 e := recover() 403 if !returned && err == nil { 404 err = fmt.Errorf("PANIC: %v\n%s", e, debugStack()) 405 c.err(nopos, "%v", err) 406 } 407 if e := r.Close(); e != nil && err == nil { 408 err = e 409 } 410 }() 411 412 var pf []uint32 413 var t cppToken 414 var toks []cppToken 415 for { 416 ch := lx.cppScan() 417 if ch.Rune == ccEOF { 418 break 419 } 420 421 tokBuf = tokBuf[:0] 422 for { 423 t.Char = ch 424 t.Val = 0 425 if ch.Rune == '\n' { 426 toks = append(cppTrimSpace(tokBuf), t) 427 break 428 } 429 430 if _, ok := tokHasVal[ch.Rune]; ok { 431 t.Val = dict.ID(lx.TokenBytes(nil)) 432 } 433 tokBuf = append(tokBuf, t) 434 435 if ch = lx.cppScan(); ch.Rune == ccEOF { 436 if !c.tweaks.InjectFinalNL { 437 c.errPos(lx.last.Pos(), "file is missing final newline") 438 } 439 ch.Rune = '\n' 440 } 441 } 442 443 var encPos token.Pos 444 encBuf = encBuf[:0] 445 for _, t := range toks { 446 n := binary.PutUvarint(encBuf1[:], uint64(t.Rune)) 447 pos := t.Pos() 448 n += binary.PutUvarint(encBuf1[n:], uint64(pos-encPos)) 449 encPos = pos 450 if t.Val != 0 { 451 n += binary.PutUvarint(encBuf1[n:], uint64(t.Val)) 452 } 453 encBuf = append(encBuf, encBuf1[:n]...) 454 } 455 id := dict.ID(encBuf) 456 if int64(id) > math.MaxUint32 { 457 panic("internal error 4") 458 } 459 460 pf = append(pf, uint32(id)) 461 } 462 v.Cache(pf) 463 tu = append(tu, pf) 464 returned = true 465 return nil 466 }(); err != nil { 467 return nil, err 468 } 469 } 470 return &cppReader{tu: tu}, nil 471 } 472 func (c *cpp) eval(r tokenReader, w tokenWriter) (err error) { 473 c.macros[idFile] = &Macro{ReplacementToks: []xc.Token{{Char: lex.NewChar(0, STRINGLITERAL)}}} 474 c.macros[idLineMacro] = &Macro{ReplacementToks: []xc.Token{{Char: lex.NewChar(0, INTCONST)}}} 475 if cs := c.expand(r, w, conds(nil).push(condZero), 0, false); len(cs) != 1 || cs.tos() != condZero { 476 return fmt.Errorf("unexpected top of condition stack value: %v", cs) 477 } 478 479 return nil 480 } 481 482 // [1]pg 1. 483 // 484 // expand(TS ) /* recur, substitute, pushback, rescan */ 485 // { 486 // if TS is {} then 487 // // ---------------------------------------------------------- A 488 // return {}; 489 // 490 // else if TS is T^HS • TS’ and T is in HS then 491 // //----------------------------------------------------------- B 492 // return T^HS • expand(TS’); 493 // 494 // else if TS is T^HS • TS’ and T is a "()-less macro" then 495 // // ---------------------------------------------------------- C 496 // return expand(subst(ts(T), {}, {}, HS \cup {T}, {}) • TS’ ); 497 // 498 // else if TS is T^HS •(•TS’ and T is a "()’d macro" then 499 // // ---------------------------------------------------------- D 500 // check TS’ is actuals • )^HS’ • TS’’ and actuals are "correct for T" 501 // return expand(subst(ts(T), fp(T), actuals,(HS \cap HS’) \cup {T }, {}) • TS’’); 502 // 503 // // ------------------------------------------------------------------ E 504 // note TS must be T^HS • TS’ 505 // return T^HS • expand(TS’); 506 // } 507 func (c *cpp) expand(r tokenReader, w tokenWriter, cs conds, lvl int, expandDefined bool) conds { 508 for { 509 t := r.read() 510 switch t.Rune { 511 // First, if TS is the empty set, the result is the 512 // empty set. 513 case ccEOF: 514 // -------------------------------------------------- A 515 // return {}; 516 return cs 517 case DIRECTIVE: 518 cs = c.directive(r, w, cs) 519 t.Rune = '\n' 520 t.Val = 0 521 w.write(t) 522 case IDENTIFIER: 523 if !cs.on() { 524 break 525 } 526 527 nm := t.Val 528 if nm == idDefined && expandDefined { 529 more: 530 switch t = r.read(); t.Rune { 531 case ccEOF: 532 panic("TODO") 533 case IDENTIFIER: 534 nm := t.Val 535 t.Rune = INTCONST 536 t.Val = idZero 537 if _, ok := c.macros[nm]; ok { 538 t.Val = idOne 539 } 540 w.write(t) 541 continue 542 case ' ': 543 goto more 544 case '(': // defined(name) 545 var u cppToken 546 switch t = r.read(); t.Rune { 547 case ccEOF: 548 panic("TODO") 549 case IDENTIFIER: 550 nm := t.Val 551 u = t 552 u.Rune = INTCONST 553 u.Val = idZero 554 if _, ok := c.macros[nm]; ok { 555 u.Val = idOne 556 } 557 more2: 558 switch t = r.read(); t.Rune { 559 case ccEOF: 560 panic("TODO") 561 case ' ': 562 goto more2 563 case ')': 564 // ok done 565 w.write(u) 566 continue 567 default: 568 panic(t.String()) 569 } 570 default: 571 panic(t.String()) 572 } 573 default: 574 panic(t.String()) 575 } 576 } 577 578 // Otherwise, if the token sequence begins with a token 579 // whose hide set contains that token, then the result 580 // is the token sequence beginning with that token 581 // (including its hide set) followed by the result of 582 // expand on the rest of the token sequence. 583 if t.has(nm) { 584 // ------------------------------------------ B 585 // return T^HS • expand(TS’); 586 w.write(t) 587 continue 588 } 589 590 m := c.macros[nm] 591 if m != nil && !m.IsFnLike { 592 // Otherwise, if the token sequence begins with 593 // an object-like macro, the result is the 594 // expansion of the rest of the token sequence 595 // beginning with the sequence returned by 596 // subst invoked with the replacement token 597 // sequence for the macro, two empty sets, the 598 // union of the macro’s hide set and the macro 599 // itself, and an empty set. 600 switch nm { 601 case idFile: 602 m.ReplacementToks[0].Val = dict.SID(fmt.Sprintf("%q", c.position(t).Filename)) 603 case idLineMacro: 604 m.ReplacementToks[0].Val = dict.SID(fmt.Sprint(c.position(t).Line)) 605 } 606 // ------------------------------------------ C 607 // return expand(subst(ts(T), {}, {}, HS \cup {T}, {}) • TS’ ); 608 toks := c.subst(m, nil, t.cloneAdd(nm), expandDefined) 609 for i, v := range toks { 610 toks[i].Char = lex.NewChar(t.Pos(), v.Rune) 611 } 612 r.ungets(toks...) 613 continue 614 } 615 616 if m != nil && m.IsFnLike { 617 // ------------------------------------------ D 618 // check TS’ is actuals • )^HS’ • TS’’ and actuals are "correct for T" 619 // return expand(subst(ts(T), fp(T), actuals,(HS \cap HS’) \cup {T }, {}) • TS’’); 620 hs := t.hs 621 again: 622 switch t2 := r.read(); t2.Rune { 623 case '\n', ' ': 624 goto again 625 case '(': 626 // ok 627 case ccEOF: 628 w.write(t) 629 continue 630 default: 631 w.write(t) 632 w.write(t2) 633 continue 634 } 635 636 ap, hs2 := c.actuals(m, r) 637 switch { 638 case len(hs2) == 0: 639 hs2 = map[int]struct{}{nm: {}} 640 default: 641 nhs := map[int]struct{}{} 642 for k := range hs { 643 if _, ok := hs2[k]; ok { 644 nhs[k] = struct{}{} 645 } 646 } 647 nhs[nm] = struct{}{} 648 hs2 = nhs 649 } 650 toks := c.subst(m, ap, hs2, expandDefined) 651 for i, v := range toks { 652 toks[i].Char = lex.NewChar(t.Pos(), v.Rune) 653 } 654 r.ungets(toks...) 655 continue 656 } 657 658 w.write(t) 659 default: 660 // -------------------------------------------------- E 661 if !cs.on() { 662 break 663 } 664 665 w.write(t) 666 } 667 } 668 } 669 670 func (c *cpp) pragmaActuals(nd Node, line []cppToken) (out []cppToken) { 671 first := true 672 for { 673 if len(line) == 0 { 674 c.err(nd, "unexpected EOF") 675 return nil 676 } 677 678 t := line[0] 679 line = line[1:] 680 switch t.Rune { 681 case '(': 682 if !first { 683 panic(fmt.Errorf("%v", t)) 684 } 685 686 first = false 687 case STRINGLITERAL: 688 out = append(out, t) 689 case ')': 690 return out 691 default: 692 panic(fmt.Errorf("%v: %v (%v)", c.position(t), t, yySymName(int(t.Rune)))) 693 } 694 } 695 } 696 697 func (c *cpp) actuals(m *Macro, r tokenReader) (out [][]cppToken, hs map[int]struct{}) { 698 var lvl, n int 699 for { 700 t := r.read() 701 if t.Rune < 0 { 702 c.err(t, "unexpected EOF") 703 return nil, nil 704 } 705 706 switch t.Rune { 707 case ',': 708 if lvl == 0 { 709 n++ 710 continue 711 } 712 case ')': 713 if lvl == 0 { 714 for i, v := range out { 715 out[i] = cppTrimSpace(v) 716 } 717 for len(out) < len(m.Args) { 718 out = append(out, nil) 719 } 720 return out, t.hs 721 } 722 723 lvl-- 724 case '(': 725 lvl++ 726 } 727 728 for len(out) <= n { 729 out = append(out, []cppToken{}) 730 } 731 if t.Rune == '\n' { 732 t.Rune = ' ' 733 } 734 out[n] = append(out[n], t) 735 } 736 } 737 738 func (c *cpp) expands(toks []cppToken, expandDefined bool) (out []cppToken) { 739 var r, w tokenBuffer 740 r.toks = toks 741 c.expand(&r, &w, conds(nil).push(condZero), 1, expandDefined) 742 return w.toks 743 } 744 745 // [1]pg 2. 746 // 747 // subst(IS, FP, AP, HS, OS) /* substitute args, handle stringize and paste */ 748 // { 749 // if IS is {} then 750 // // ---------------------------------------------------------- A 751 // return hsadd(HS, OS); 752 // 753 // else if IS is # • T • IS’ and T is FP[i] then 754 // // ---------------------------------------------------------- B 755 // return subst(IS’, FP, AP, HS, OS • stringize(select(i, AP))); 756 // 757 // else if IS is ## • T • IS’ and T is FP[i] then 758 // { 759 // // ---------------------------------------------------------- C 760 // if select(i, AP) is {} then /* only if actuals can be empty */ 761 // // -------------------------------------------------- D 762 // return subst(IS’, FP, AP, HS, OS); 763 // else 764 // // -------------------------------------------------- E 765 // return subst(IS’, FP, AP, HS, glue(OS, select(i, AP))); 766 // } 767 // 768 // else if IS is ## • T^HS’ • IS’ then 769 // // ---------------------------------------------------------- F 770 // return subst(IS’, FP, AP, HS, glue(OS, T^HS’)); 771 // 772 // else if IS is T • ##^HS’ • IS’ and T is FP[i] then 773 // { 774 // // ---------------------------------------------------------- G 775 // if select(i, AP) is {} then /* only if actuals can be empty */ 776 // { 777 // // -------------------------------------------------- H 778 // if IS’ is T’ • IS’’ and T’ is FP[j] then 779 // // ------------------------------------------ I 780 // return subst(IS’’, FP, AP, HS, OS • select(j, AP)); 781 // else 782 // // ------------------------------------------ J 783 // return subst(IS’, FP, AP, HS, OS); 784 // } 785 // else 786 // // -------------------------------------------------- K 787 // return subst(##^HS’ • IS’, FP, AP, HS, OS • select(i, AP)); 788 // 789 // } 790 // 791 // else if IS is T • IS’ and T is FP[i] then 792 // // ---------------------------------------------------------- L 793 // return subst(IS’, FP, AP, HS, OS • expand(select(i, AP))); 794 // 795 // // ------------------------------------------------------------------ M 796 // note IS must be T^HS’ • IS’ 797 // return subst(IS’, FP, AP, HS, OS • T^HS’); 798 // } 799 // 800 // A quick overview of subst is that it walks through the input sequence, IS, 801 // building up an output sequence, OS, by handling each token from left to 802 // right. (The order that this operation takes is left to the implementation 803 // also, walking from left to right is more natural since the rest of the 804 // algorithm is constrained to this ordering.) Stringizing is easy, pasting 805 // requires trickier handling because the operation has a bunch of 806 // combinations. After the entire input sequence is finished, the updated hide 807 // set is applied to the output sequence, and that is the result of subst. 808 func (c *cpp) subst(m *Macro, ap [][]cppToken, hs map[int]struct{}, expandDefined bool) (out []cppToken) { 809 // dbg("%s %v %v", m.def.S(), m.variadic, ap) 810 repl := cppToks(m.ReplacementToks) 811 var arg []cppToken 812 for { 813 if len(repl) == 0 { 814 // -------------------------------------------------- A 815 // return hsadd(HS, OS); 816 out := cppTrimSpace(out) 817 for i := range out { 818 out[i].hsAdd(hs) 819 } 820 return out 821 } 822 823 if repl[0].Rune == '#' && len(repl) > 1 && repl[1].Rune == IDENTIFIER && m.param(ap, repl[1].Val, &arg) { 824 // -------------------------------------------------- B 825 // return subst(IS’, FP, AP, HS, OS • stringize(select(i, AP))); 826 out = append(out, c.stringize(arg)) 827 repl = repl[2:] 828 continue 829 } 830 831 if repl[0].Rune == '#' && len(repl) > 2 && repl[1].Rune == ' ' && repl[2].Rune == IDENTIFIER && m.param(ap, repl[2].Val, &arg) { 832 // -------------------------------------------------- B 833 // return subst(IS’, FP, AP, HS, OS • stringize(select(i, AP))); 834 out = append(out, c.stringize(arg)) 835 repl = repl[3:] 836 continue 837 } 838 839 if repl[0].Rune == PPPASTE && len(repl) > 1 && repl[1].Rune == IDENTIFIER && m.param(ap, repl[1].Val, &arg) { 840 // -------------------------------------------------- C 841 if len(arg) == 0 { 842 // ------------------------------------------ D 843 // return subst(IS’, FP, AP, HS, OS); 844 repl = repl[2:] 845 continue 846 } 847 848 // -------------------------------------------------- E 849 // return subst(IS’, FP, AP, HS, glue(OS, select(i, AP))); 850 _, out = c.glue(out, arg) 851 repl = repl[2:] 852 continue 853 } 854 855 if repl[0].Rune == PPPASTE && len(repl) > 2 && repl[1].Rune == ' ' && repl[2].Rune == IDENTIFIER && m.param(ap, repl[2].Val, &arg) { 856 // -------------------------------------------------- C 857 if len(arg) == 0 { 858 // ------------------------------------------ D 859 // return subst(IS’, FP, AP, HS, OS); 860 repl = repl[3:] 861 continue 862 } 863 864 // -------------------------------------------------- E 865 // return subst(IS’, FP, AP, HS, glue(OS, select(i, AP))); 866 _, out = c.glue(out, arg) 867 repl = repl[3:] 868 continue 869 } 870 871 if repl[0].Rune == PPPASTE && len(repl) > 1 && repl[1].Rune != ' ' { 872 // -------------------------------------------------- F 873 // return subst(IS’, FP, AP, HS, glue(OS, T^HS’)); 874 _, out = c.glue(out, repl[1:2]) 875 repl = repl[2:] 876 continue 877 } 878 879 if repl[0].Rune == PPPASTE && len(repl) > 2 && repl[1].Rune == ' ' { 880 // -------------------------------------------------- F 881 // return subst(IS’, FP, AP, HS, glue(OS, T^HS’)); 882 _, out = c.glue(out, repl[2:3]) 883 repl = repl[3:] 884 continue 885 } 886 887 if len(repl) > 1 && repl[0].Rune == IDENTIFIER && m.param(ap, repl[0].Val, &arg) && repl[1].Rune == PPPASTE { 888 // -------------------------------------------------- G 889 if len(arg) == 0 { 890 // ------------------------------------------ H 891 panic(c.position(repl[0])) 892 } 893 894 // -------------------------------------------------- K 895 // return subst(##^HS’ • IS’, FP, AP, HS, OS • select(i, AP)); 896 out = append(out, arg...) 897 repl = repl[1:] 898 continue 899 } 900 901 if len(repl) > 2 && repl[0].Rune == IDENTIFIER && m.param(ap, repl[0].Val, &arg) && repl[1].Rune == ' ' && repl[2].Rune == PPPASTE { 902 // -------------------------------------------------- G 903 if len(arg) == 0 { 904 // ------------------------------------------ H 905 if len(repl) > 3 && repl[3].Rune == IDENTIFIER && m.param(ap, repl[3].Val, &arg) { 906 // ---------------------------------- I 907 panic(c.position(repl[0])) 908 } 909 910 // ------------------------------------------ J 911 // return subst(IS’, FP, AP, HS, OS); 912 repl = repl[3:] 913 continue 914 } 915 916 // -------------------------------------------------- K 917 // return subst(##^HS’ • IS’, FP, AP, HS, OS • select(i, AP)); 918 out = append(out, arg...) 919 repl = repl[2:] 920 continue 921 } 922 923 if repl[0].Rune == IDENTIFIER && m.param(ap, repl[0].Val, &arg) { 924 // -------------------------------------------------- L 925 // return subst(IS’, FP, AP, HS, OS • expand(select(i, AP))); 926 out = append(out, c.expands(arg, expandDefined)...) 927 repl = repl[1:] 928 continue 929 } 930 931 // ---------------------------------------------------------- M 932 // note IS must be T^HS’ • IS’ 933 // return subst(IS’, FP, AP, HS, OS • T^HS’); 934 out = append(out, repl[0]) 935 repl = repl[1:] 936 } 937 } 938 939 // paste last of left side with first of right side 940 // 941 // [1] pg. 3 942 func (c *cpp) glue(ls, rs []cppToken) (n int, out []cppToken) { 943 for len(ls) != 0 && ls[len(ls)-1].Rune == ' ' { 944 ls = ls[:len(ls)-1] 945 } 946 947 for len(rs) != 0 && rs[0].Rune == ' ' { 948 rs = rs[1:] 949 n++ 950 } 951 if len(rs) == 0 { 952 panic("TODO") 953 } 954 955 if len(ls) == 0 { 956 return n, rs 957 } 958 959 l := ls[len(ls)-1] 960 ls = ls[:len(ls)-1] 961 r := rs[0] 962 rs = rs[1:] 963 n++ 964 965 switch l.Rune { 966 case '#': 967 switch r.Rune { 968 case '#': 969 l.Rune = PPPASTE 970 default: 971 panic(PrettyString([]cppToken{l, r})) 972 } 973 default: 974 switch l.Rune { 975 case STRINGLITERAL: 976 s := TokSrc(l.Token) 977 if len(s) > 2 && s[0] == '"' && s[len(s)-1] == '"' { 978 s = s[1 : len(s)-1] 979 } 980 l.Val = dict.SID(s + TokSrc(r.Token)) 981 default: 982 l.Val = dict.SID(TokSrc(l.Token) + TokSrc(r.Token)) 983 } 984 } 985 return n, append(append(ls, l), rs...) 986 } 987 988 // Given a token sequence, stringize returns a single string literal token 989 // containing the concatenated spellings of the tokens. 990 // 991 // [1] pg. 3 992 func (c *cpp) stringize(s []cppToken) cppToken { 993 var a []string 994 for _, v := range s { 995 switch v.Rune { 996 case CHARCONST, LONGCHARCONST, LONGSTRINGLITERAL, STRINGLITERAL: 997 s := fmt.Sprintf("%q", TokSrc(v.Token)) 998 a = append(a, s[1:len(s)-1]) 999 default: 1000 a = append(a, TokSrc(v.Token)) 1001 } 1002 } 1003 if v := dict.SID(fmt.Sprintf(`"%s"`, strings.Join(a, ""))); v != 0 { 1004 var t cppToken 1005 if len(s) != 0 { 1006 t = s[0] 1007 } 1008 t.Rune = STRINGLITERAL 1009 t.Val = v 1010 return t 1011 } 1012 1013 return cppToken{} 1014 } 1015 1016 func (c *cpp) directive(r tokenReader, w tokenWriter, cs conds) (y conds) { 1017 line := c.line(r) 1018 if len(line) == 0 { 1019 return cs 1020 } 1021 1022 if cs.on() { 1023 if f := c.tweaks.TrackExpand; f != nil && c.tweaks.DefinesOnly { 1024 if s := cppToksDump(line, ""); strings.HasPrefix(s, "define") { 1025 f(fmt.Sprintf("#%s", cppToksDump(line, ""))) 1026 } 1027 } 1028 } 1029 1030 outer: 1031 switch t := line[0]; t.Rune { 1032 case ccEOF: 1033 // nop 1034 case IDENTIFIER: 1035 switch t.Val { 1036 case idDefine: 1037 if !cs.on() { 1038 break 1039 } 1040 1041 if len(line) == 1 { 1042 c.err(t, "empty define not allowed") 1043 break 1044 } 1045 1046 c.define(line[1:]) 1047 case idElif: 1048 switch cs.tos() { 1049 case condIfOff: 1050 if _, ok := c.constExpr(line[1:], true); ok { 1051 return cs.pop().push(condIfOn) 1052 } 1053 case condIfOn: 1054 return cs.pop().push(condIfSkip) 1055 case condIfSkip: 1056 // nop 1057 default: 1058 panic(fmt.Errorf("%v: %v", c.position(t), cs.tos())) 1059 } 1060 case idElse: 1061 switch cs.tos() { 1062 case condIfOff: 1063 return cs.pop().push(condIfOn) 1064 case condIfOn: 1065 return cs.pop().push(condIfOff) 1066 case condIfSkip: 1067 // nop 1068 default: 1069 panic(fmt.Errorf("%v: %v", c.position(t), cs.tos())) 1070 } 1071 case idError: 1072 if !cs.on() { 1073 break 1074 } 1075 1076 c.err(t, "%s", cppToksDump(line, "")) 1077 case idIf: 1078 if !cs.on() { 1079 return cs.push(condIfSkip) 1080 } 1081 1082 switch _, ok := c.constExpr(line[1:], true); { 1083 case ok: 1084 return cs.push(condIfOn) 1085 default: 1086 return cs.push(condIfOff) 1087 } 1088 case idIfdef: 1089 if !cs.on() { 1090 return cs.push(condIfSkip) 1091 } 1092 1093 line = cppTrimAllSpace(line[1:]) 1094 if len(line) == 0 { 1095 c.err(t, "empty #ifdef not allowed") 1096 break 1097 } 1098 1099 if len(line) > 1 { 1100 c.err(t, "extra tokens after #ifdef not allowed") 1101 break 1102 } 1103 1104 if line[0].Rune != IDENTIFIER { 1105 c.err(line[0], "expected identifier") 1106 break 1107 } 1108 1109 if _, ok := c.macros[line[0].Val]; ok { 1110 return cs.push(condIfOn) 1111 } 1112 1113 return cs.push(condIfOff) 1114 case idIfndef: 1115 if !cs.on() { 1116 return cs.push(condIfSkip) 1117 } 1118 1119 line = cppTrimAllSpace(line[1:]) 1120 if len(line) == 0 { 1121 c.err(t, "empty #ifndef not allowed") 1122 break 1123 } 1124 1125 if len(line) > 1 { 1126 c.err(t, "extra tokens after #ifndef not allowed") 1127 break 1128 } 1129 1130 if line[0].Rune != IDENTIFIER { 1131 c.err(line[0], "expected identifier") 1132 break 1133 } 1134 1135 if _, ok := c.macros[line[0].Val]; ok { 1136 return cs.push(condIfOff) 1137 } 1138 1139 return cs.push(condIfOn) 1140 case 1141 idIncludeNext, 1142 idInclude: 1143 1144 if !cs.on() { 1145 break 1146 } 1147 1148 line = cppTrimAllSpace(line[1:]) 1149 if len(line) == 0 { 1150 c.err(t, "empty include not allowed") 1151 break 1152 } 1153 1154 expanded := false 1155 again: 1156 switch line[0].Rune { 1157 case '<': 1158 if c.tweaks.cppExpandTest { 1159 w.write(line...) 1160 return cs 1161 } 1162 1163 var nm string 1164 for _, v := range line[1:] { 1165 if v.Rune == '>' { 1166 c.include(t, nm, c.sysIncludePaths, w) 1167 return cs 1168 } 1169 1170 nm += TokSrc(v.Token) 1171 } 1172 c.err(t, "invalid include file name specification") 1173 case STRINGLITERAL: 1174 if c.tweaks.cppExpandTest { 1175 w.write(line...) 1176 return cs 1177 } 1178 1179 b := dict.S(line[0].Val) // `"foo.h"` 1180 nm := string(b[1 : len(b)-1]) // `foo.h` 1181 c.include(t, nm, c.includePaths, w) 1182 return cs 1183 default: 1184 if expanded { 1185 panic(PrettyString(line)) 1186 } 1187 1188 line = c.expands(cppTrimAllSpace(line), false) 1189 expanded = true 1190 if c.tweaks.cppExpandTest { 1191 w.write(line...) 1192 return cs 1193 } 1194 1195 goto again 1196 } 1197 case idEndif: 1198 switch cs.tos() { 1199 case condIfOn, condIfOff, condIfSkip: 1200 return cs.pop() 1201 default: 1202 panic(fmt.Errorf("%v: %v", c.position(t), cs.tos())) 1203 } 1204 case idLine: 1205 if !cs.on() { 1206 break 1207 } 1208 1209 f := fset.File(line[0].Pos()) 1210 off := f.Offset(line[0].Pos()) 1211 pos := c.position(line[0]) 1212 line = c.expands(cppTrimAllSpace(line[1:]), false) 1213 switch len(line) { 1214 case 1: // #line linenum 1215 n, err := strconv.ParseUint(string(line[0].S()), 10, 31) 1216 if err != nil { 1217 break 1218 } 1219 1220 f.AddLineInfo(off, pos.Filename, int(n-1)) 1221 //TODO 1222 case 2: // #line linenum filename 1223 //TODO 1224 default: 1225 // ignore 1226 } 1227 1228 // ignored 1229 case idPragma: 1230 if !cs.on() { 1231 break 1232 } 1233 1234 for { 1235 line = line[1:] 1236 if len(line) == 0 { 1237 panic(fmt.Errorf("%v", c.position(t))) 1238 } 1239 1240 switch t = line[0]; { 1241 case t.Rune == ' ': 1242 // nop 1243 case t.Val == idPushMacro: 1244 actuals := c.pragmaActuals(t, line[1:]) 1245 if len(actuals) != 1 { 1246 panic(fmt.Errorf("%v", c.position(t))) 1247 } 1248 1249 t := actuals[0] 1250 switch t.Rune { 1251 case STRINGLITERAL: 1252 nm := int(c.strConst(t.Token).Value.(*ir.StringValue).StringID) 1253 m := c.macros[nm] 1254 if m != nil { 1255 c.macroStack[nm] = append(c.macroStack[nm], m) 1256 } 1257 break outer 1258 default: 1259 panic(fmt.Errorf("%v: %v", c.position(t), yySymName(int(actuals[0].Rune)))) 1260 } 1261 case t.Val == idPopMacro: 1262 actuals := c.pragmaActuals(t, line[1:]) 1263 if len(actuals) != 1 { 1264 panic(fmt.Errorf("%v", c.position(t))) 1265 } 1266 1267 t := actuals[0] 1268 switch t.Rune { 1269 case STRINGLITERAL: 1270 nm := int(c.strConst(t.Token).Value.(*ir.StringValue).StringID) 1271 s := c.macroStack[nm] 1272 if n := len(s); n != 0 { 1273 m := s[n-1] 1274 s = s[:n-1] 1275 c.macroStack[nm] = s 1276 c.macros[nm] = m 1277 } 1278 break outer 1279 default: 1280 panic(fmt.Errorf("%v: %v", c.position(t), yySymName(int(actuals[0].Rune)))) 1281 } 1282 default: 1283 if c.tweaks.IgnoreUnknownPragmas { 1284 break outer 1285 } 1286 1287 panic(fmt.Errorf("%v: %#x, %v", c.position(t), t.Rune, t)) 1288 } 1289 } 1290 case idUndef: 1291 if !cs.on() { 1292 break 1293 } 1294 1295 line = cppTrimSpace(line[1:]) 1296 if len(line) == 0 { 1297 panic("TODO") 1298 } 1299 1300 if len(line) > 1 { 1301 panic("TODO") 1302 } 1303 1304 if line[0].Rune != IDENTIFIER { 1305 panic("TODO") 1306 } 1307 1308 delete(c.macros, line[0].Val) 1309 case idWarning: 1310 if !cs.on() { 1311 break 1312 } 1313 1314 panic(fmt.Errorf("%v", c.position(t))) 1315 default: 1316 panic(fmt.Errorf("%v %v", c.position(t), PrettyString(t))) 1317 } 1318 default: 1319 panic(PrettyString(t)) 1320 } 1321 return cs 1322 } 1323 1324 func (c *cpp) include(n Node, nm string, paths []string, w tokenWriter) { 1325 if c.includeLevel == maxIncludeLevel { 1326 c.err(n, "too many include levels") 1327 } 1328 1329 c.includeLevel++ 1330 1331 defer func() { c.includeLevel-- }() 1332 1333 dir := filepath.Dir(c.position(n).Filename) 1334 if d, err := filepath.Abs(dir); err == nil { 1335 dir = d 1336 } 1337 var path string 1338 if n.(cppToken).Val == idIncludeNext { 1339 nmDir, _ := filepath.Split(nm) 1340 for i, v := range paths { 1341 if w, err := filepath.Abs(v); err == nil { 1342 v = w 1343 } 1344 v = filepath.Join(v, nmDir) 1345 if v == dir { 1346 paths = paths[i+1:] 1347 break 1348 } 1349 } 1350 } 1351 for _, v := range paths { 1352 if v == "@" { 1353 v = dir 1354 } 1355 1356 var p string 1357 switch { 1358 case strings.HasPrefix(nm, "./"): 1359 p = nm 1360 default: 1361 p = filepath.Join(v, nm) 1362 } 1363 fi, err := os.Stat(p) 1364 if err != nil || fi.IsDir() { 1365 continue 1366 } 1367 1368 path = p 1369 break 1370 } 1371 1372 if path == "" { 1373 wd, _ := os.Getwd() 1374 c.err(n, "include file not found: %s\nworking dir: %s\nsearch paths:\n\t%s", nm, wd, strings.Join(paths, "\n\t")) 1375 return 1376 } 1377 1378 s, err := NewFileSource2(path, true) 1379 if err != nil { 1380 c.err(n, "%s", err.Error()) 1381 return 1382 } 1383 1384 if n, _ := s.Size(); n == 0 { 1385 return 1386 } 1387 1388 if f := c.tweaks.TrackIncludes; f != nil { 1389 f(path) 1390 } 1391 r, err := c.parse(s) 1392 if err != nil { 1393 c.err(n, "%s", err.Error()) 1394 } 1395 1396 c.expand(r, w, conds(nil).push(condZero), 0, false) 1397 } 1398 1399 func (c *cpp) constExpr(toks []cppToken, expandDefined bool) (op Operand, y bool) { 1400 toks = cppTrimAllSpace(c.expands(cppTrimAllSpace(toks), expandDefined)) 1401 for i, v := range toks { 1402 if v.Rune == IDENTIFIER { 1403 toks[i].Rune = INTCONST 1404 toks[i].Val = idZero 1405 } 1406 } 1407 c.lx.ungetBuffer = c.lx.ungetBuffer[:0] 1408 c.lx.ungets(toks...) 1409 if !c.lx.parseExpr() { 1410 return Operand{}, false 1411 } 1412 1413 e := c.lx.ast.(*ConstExpr) 1414 v := e.eval(c.context) 1415 if v.Type != Int { 1416 return v, false 1417 } 1418 1419 switch x := v.Value.(type) { 1420 case *ir.Int64Value: 1421 return v, x.Value != 0 1422 default: 1423 return v, false 1424 } 1425 } 1426 1427 func (c *cpp) define(line []cppToken) { 1428 switch line[0].Rune { 1429 case ' ': 1430 c.defineMacro(xcToks(line[1:])) 1431 default: 1432 panic(PrettyString(line)) 1433 } 1434 } 1435 1436 func (c *cpp) defineMacro(line []xc.Token) { 1437 if len(line) == 0 { 1438 panic("internal error") 1439 } 1440 1441 if line[0].Rune == ' ' { 1442 line = line[1:] 1443 } 1444 1445 switch t := line[0]; t.Rune { 1446 case IDENTIFIER: 1447 nm := t.Val 1448 if protectedMacro[nm] { 1449 panic("TODO") 1450 } 1451 line := line[1:] 1452 var repl []xc.Token 1453 if len(line) != 0 { 1454 switch line[0].Rune { 1455 case '\n', ccEOF: 1456 // nop 1457 case ' ': 1458 repl = line[1:] 1459 case '(': 1460 c.defineFnMacro(t, line[1:]) 1461 return 1462 default: 1463 panic(fmt.Errorf(PrettyString(line[0]))) 1464 } 1465 } 1466 1467 if ex := c.macros[nm]; ex != nil { 1468 if c.identicalReplacementLists(repl, ex.ReplacementToks) { 1469 return 1470 } 1471 1472 c.err(t, "%q replacement lists differ: %q, %q", dict.S(nm), toksDump(ex.ReplacementToks, ""), toksDump(repl, "")) 1473 return 1474 } 1475 1476 if traceMacroDefs { 1477 fmt.Fprintf(os.Stderr, "#define %s %s\n", dict.S(nm), toksDump(repl, "")) 1478 } 1479 c.macros[nm] = newMacro(t, repl) 1480 default: 1481 panic(PrettyString(t)) 1482 } 1483 } 1484 1485 func (c *cpp) identicalReplacementLists(a, b []xc.Token) bool { 1486 if len(a) != len(b) { 1487 return false 1488 } 1489 1490 for i, v := range a { 1491 w := b[i] 1492 if v.Rune != w.Rune || v.Val != w.Val { 1493 return false 1494 } 1495 } 1496 1497 return true 1498 } 1499 1500 func (c *cpp) defineFnMacro(nmTok xc.Token, line []xc.Token) { 1501 ident := true 1502 var params []int 1503 variadic := false 1504 for i, v := range line { 1505 switch v.Rune { 1506 case IDENTIFIER: 1507 if !ident { 1508 panic("TODO") 1509 } 1510 1511 params = append(params, v.Val) 1512 ident = false 1513 case ')': 1514 m := newMacro(nmTok, trimSpace(line[i+1:])) 1515 m.IsFnLike = true 1516 m.ident = ident 1517 m.IsVariadic = variadic 1518 m.Args = params 1519 if ex := c.macros[nmTok.Val]; ex != nil { 1520 if c.identicalParamLists(params, ex.Args) && c.identicalReplacementLists(m.ReplacementToks, ex.ReplacementToks) && m.IsVariadic == ex.IsVariadic { 1521 return 1522 } 1523 1524 c.err(nmTok, "parameter and/or replacement lists differ") 1525 return 1526 } 1527 1528 if traceMacroDefs { 1529 var a [][]byte 1530 for _, v := range m.Args { 1531 a = append(a, dict.S(v)) 1532 } 1533 fmt.Fprintf(os.Stderr, "#define %s(%s) %s\n", dict.S(nmTok.Val), bytes.Join(a, []byte(", ")), toksDump(m.ReplacementToks, "")) 1534 } 1535 c.macros[nmTok.Val] = m 1536 return 1537 case ',': 1538 if ident { 1539 panic("TODO") 1540 } 1541 1542 ident = true 1543 case ' ': 1544 // nop 1545 case DDD: 1546 variadic = true 1547 default: 1548 panic(PrettyString(v)) 1549 } 1550 } 1551 } 1552 1553 func (c *cpp) identicalParamLists(a, b []int) bool { 1554 if len(a) != len(b) { 1555 return false 1556 } 1557 1558 for i, v := range a { 1559 if v != b[i] { 1560 return false 1561 } 1562 } 1563 1564 return true 1565 } 1566 1567 func (c *cpp) line(r tokenReader) []cppToken { 1568 c.toks = c.toks[:0] 1569 for { 1570 switch t := r.read(); t.Rune { 1571 case '\n', ccEOF: 1572 if len(c.toks) == 0 || c.toks[0].Rune != ' ' { 1573 return c.toks 1574 } 1575 1576 for i, v := range c.toks { 1577 if v.Rune != ' ' { 1578 n := copy(c.toks, c.toks[i:]) 1579 c.toks = c.toks[:n] 1580 return c.toks 1581 } 1582 } 1583 1584 c.toks = c.toks[:0] 1585 return c.toks 1586 default: 1587 c.toks = append(c.toks, t) 1588 } 1589 } 1590 }