github.com/miolini/go@v0.0.0-20160405192216-fca68c8cb408/src/cmd/compile/internal/gc/lex.go (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package gc 6 7 import ( 8 "bufio" 9 "cmd/internal/obj" 10 "fmt" 11 "io" 12 "strconv" 13 "strings" 14 "unicode" 15 "unicode/utf8" 16 ) 17 18 const ( 19 EOF = -1 20 BOM = 0xFEFF 21 ) 22 23 func isSpace(c rune) bool { 24 return c == ' ' || c == '\t' || c == '\n' || c == '\r' 25 } 26 27 func isLetter(c rune) bool { 28 return 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || c == '_' 29 } 30 31 func isDigit(c rune) bool { 32 return '0' <= c && c <= '9' 33 } 34 35 func plan9quote(s string) string { 36 if s == "" { 37 return "''" 38 } 39 for _, c := range s { 40 if c <= ' ' || c == '\'' { 41 return "'" + strings.Replace(s, "'", "''", -1) + "'" 42 } 43 } 44 return s 45 } 46 47 type Pragma uint16 48 49 const ( 50 Nointerface Pragma = 1 << iota 51 Noescape // func parameters don't escape 52 Norace // func must not have race detector annotations 53 Nosplit // func should not execute on separate stack 54 Noinline // func should not be inlined 55 Systemstack // func must run on system stack 56 Nowritebarrier // emit compiler error instead of write barrier 57 Nowritebarrierrec // error on write barrier in this or recursive callees 58 CgoUnsafeArgs // treat a pointer to one arg as a pointer to them all 59 ) 60 61 type lexer struct { 62 // source 63 bin *bufio.Reader 64 prevlineno int32 // line no. of most recently read character 65 66 nlsemi bool // if set, '\n' and EOF translate to ';' 67 68 // pragma flags 69 // accumulated by lexer; reset by parser 70 pragma Pragma 71 72 // current token 73 tok int32 74 sym_ *Sym // valid if tok == LNAME 75 val Val // valid if tok == LLITERAL 76 op Op // valid if tok == LOPER, LASOP, or LINCOP, or prec > 0 77 prec OpPrec // operator precedence; 0 if not a binary operator 78 } 79 80 type OpPrec int 81 82 const ( 83 // Precedences of binary operators (must be > 0). 84 PCOMM OpPrec = 1 + iota 85 POROR 86 PANDAND 87 PCMP 88 PADD 89 PMUL 90 ) 91 92 const ( 93 // The value of single-char tokens is just their character's Unicode value. 94 // They are all below utf8.RuneSelf. Shift other tokens up to avoid conflicts. 95 96 // names and literals 97 LNAME = utf8.RuneSelf + iota 98 LLITERAL 99 100 // operator-based operations 101 LOPER 102 LASOP 103 LINCOP 104 105 // miscellaneous 106 LCOLAS 107 LCOMM 108 LDDD 109 110 // keywords 111 LBREAK 112 LCASE 113 LCHAN 114 LCONST 115 LCONTINUE 116 LDEFAULT 117 LDEFER 118 LELSE 119 LFALL 120 LFOR 121 LFUNC 122 LGO 123 LGOTO 124 LIF 125 LIMPORT 126 LINTERFACE 127 LMAP 128 LPACKAGE 129 LRANGE 130 LRETURN 131 LSELECT 132 LSTRUCT 133 LSWITCH 134 LTYPE 135 LVAR 136 137 LIGNORE 138 ) 139 140 var lexn = map[rune]string{ 141 LNAME: "NAME", 142 LLITERAL: "LITERAL", 143 144 LOPER: "OPER", 145 LASOP: "ASOP", 146 LINCOP: "INCOP", 147 148 LCOLAS: "COLAS", 149 LCOMM: "COMM", 150 LDDD: "DDD", 151 152 LBREAK: "BREAK", 153 LCASE: "CASE", 154 LCHAN: "CHAN", 155 LCONST: "CONST", 156 LCONTINUE: "CONTINUE", 157 LDEFAULT: "DEFAULT", 158 LDEFER: "DEFER", 159 LELSE: "ELSE", 160 LFALL: "FALL", 161 LFOR: "FOR", 162 LFUNC: "FUNC", 163 LGO: "GO", 164 LGOTO: "GOTO", 165 LIF: "IF", 166 LIMPORT: "IMPORT", 167 LINTERFACE: "INTERFACE", 168 LMAP: "MAP", 169 LPACKAGE: "PACKAGE", 170 LRANGE: "RANGE", 171 LRETURN: "RETURN", 172 LSELECT: "SELECT", 173 LSTRUCT: "STRUCT", 174 LSWITCH: "SWITCH", 175 LTYPE: "TYPE", 176 LVAR: "VAR", 177 178 // LIGNORE is never escaping lexer.next 179 } 180 181 func lexname(lex rune) string { 182 if s, ok := lexn[lex]; ok { 183 return s 184 } 185 return fmt.Sprintf("LEX-%d", lex) 186 } 187 188 func (l *lexer) next() { 189 nlsemi := l.nlsemi 190 l.nlsemi = false 191 l.prec = 0 192 193 l0: 194 // skip white space 195 c := l.getr() 196 for isSpace(c) { 197 if c == '\n' && nlsemi { 198 if Debug['x'] != 0 { 199 fmt.Printf("lex: implicit semi\n") 200 } 201 // Insert implicit semicolon on previous line, 202 // before the newline character. 203 lineno = lexlineno - 1 204 l.tok = ';' 205 return 206 } 207 c = l.getr() 208 } 209 210 // start of token 211 lineno = lexlineno 212 213 // identifiers and keywords 214 // (for better error messages consume all chars >= utf8.RuneSelf for identifiers) 215 if isLetter(c) || c >= utf8.RuneSelf { 216 l.ident(c) 217 if l.tok == LIGNORE { 218 goto l0 219 } 220 return 221 } 222 // c < utf8.RuneSelf 223 224 var c1 rune 225 var op Op 226 var prec OpPrec 227 228 switch c { 229 case EOF: 230 l.ungetr() 231 // Treat EOF as "end of line" for the purposes 232 // of inserting a semicolon. 233 if nlsemi { 234 if Debug['x'] != 0 { 235 fmt.Printf("lex: implicit semi\n") 236 } 237 l.tok = ';' 238 return 239 } 240 l.tok = -1 241 return 242 243 case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': 244 l.number(c) 245 return 246 247 case '.': 248 c1 = l.getr() 249 if isDigit(c1) { 250 l.ungetr() 251 l.number('.') 252 return 253 } 254 255 if c1 == '.' { 256 p, err := l.bin.Peek(1) 257 if err == nil && p[0] == '.' { 258 l.getr() 259 c = LDDD 260 goto lx 261 } 262 263 l.ungetr() 264 c1 = '.' 265 } 266 267 case '"': 268 l.stdString() 269 return 270 271 case '`': 272 l.rawString() 273 return 274 275 case '\'': 276 l.rune() 277 return 278 279 case '/': 280 c1 = l.getr() 281 if c1 == '*' { 282 c = l.getr() 283 for { 284 if c == '*' { 285 c = l.getr() 286 if c == '/' { 287 break 288 } 289 continue 290 } 291 if c == EOF { 292 Yyerror("eof in comment") 293 errorexit() 294 } 295 c = l.getr() 296 } 297 298 // A comment containing newlines acts like a newline. 299 if lexlineno > lineno && nlsemi { 300 if Debug['x'] != 0 { 301 fmt.Printf("lex: implicit semi\n") 302 } 303 l.tok = ';' 304 return 305 } 306 goto l0 307 } 308 309 if c1 == '/' { 310 c = l.getlinepragma() 311 for { 312 if c == '\n' || c == EOF { 313 l.ungetr() 314 goto l0 315 } 316 317 c = l.getr() 318 } 319 } 320 321 op = ODIV 322 prec = PMUL 323 goto binop1 324 325 case ':': 326 c1 = l.getr() 327 if c1 == '=' { 328 c = LCOLAS 329 goto lx 330 } 331 332 case '*': 333 op = OMUL 334 prec = PMUL 335 goto binop 336 337 case '%': 338 op = OMOD 339 prec = PMUL 340 goto binop 341 342 case '+': 343 op = OADD 344 goto incop 345 346 case '-': 347 op = OSUB 348 goto incop 349 350 case '>': 351 c = LOPER 352 c1 = l.getr() 353 if c1 == '>' { 354 op = ORSH 355 prec = PMUL 356 goto binop 357 } 358 359 l.prec = PCMP 360 if c1 == '=' { 361 l.op = OGE 362 goto lx 363 } 364 l.op = OGT 365 366 case '<': 367 c = LOPER 368 c1 = l.getr() 369 if c1 == '<' { 370 op = OLSH 371 prec = PMUL 372 goto binop 373 } 374 375 if c1 == '-' { 376 c = LCOMM 377 // Not a binary operator, but parsed as one 378 // so we can give a good error message when used 379 // in an expression context. 380 l.prec = PCOMM 381 l.op = OSEND 382 goto lx 383 } 384 385 l.prec = PCMP 386 if c1 == '=' { 387 l.op = OLE 388 goto lx 389 } 390 l.op = OLT 391 392 case '=': 393 c1 = l.getr() 394 if c1 == '=' { 395 c = LOPER 396 l.prec = PCMP 397 l.op = OEQ 398 goto lx 399 } 400 401 case '!': 402 c1 = l.getr() 403 if c1 == '=' { 404 c = LOPER 405 l.prec = PCMP 406 l.op = ONE 407 goto lx 408 } 409 410 case '&': 411 c1 = l.getr() 412 if c1 == '&' { 413 c = LOPER 414 l.prec = PANDAND 415 l.op = OANDAND 416 goto lx 417 } 418 419 if c1 == '^' { 420 c = LOPER 421 op = OANDNOT 422 prec = PMUL 423 goto binop 424 } 425 426 op = OAND 427 prec = PMUL 428 goto binop1 429 430 case '|': 431 c1 = l.getr() 432 if c1 == '|' { 433 c = LOPER 434 l.prec = POROR 435 l.op = OOROR 436 goto lx 437 } 438 439 op = OOR 440 prec = PADD 441 goto binop1 442 443 case '^': 444 op = OXOR 445 prec = PADD 446 goto binop 447 448 case '(', '[', '{', ',', ';': 449 goto lx 450 451 case ')', ']', '}': 452 l.nlsemi = true 453 goto lx 454 455 case '#', '$', '?', '@', '\\': 456 if importpkg != nil { 457 goto lx 458 } 459 fallthrough 460 461 default: 462 // anything else is illegal 463 Yyerror("syntax error: illegal character %#U", c) 464 goto l0 465 } 466 467 l.ungetr() 468 469 lx: 470 if Debug['x'] != 0 { 471 if c >= utf8.RuneSelf { 472 fmt.Printf("%v lex: TOKEN %s\n", linestr(lineno), lexname(c)) 473 } else { 474 fmt.Printf("%v lex: TOKEN '%c'\n", linestr(lineno), c) 475 } 476 } 477 478 l.tok = c 479 return 480 481 incop: 482 c1 = l.getr() 483 if c1 == c { 484 l.nlsemi = true 485 l.op = op 486 c = LINCOP 487 goto lx 488 } 489 prec = PADD 490 goto binop1 491 492 binop: 493 c1 = l.getr() 494 binop1: 495 if c1 != '=' { 496 l.ungetr() 497 l.op = op 498 l.prec = prec 499 goto lx 500 } 501 502 l.op = op 503 if Debug['x'] != 0 { 504 fmt.Printf("lex: TOKEN ASOP %s=\n", goopnames[op]) 505 } 506 l.tok = LASOP 507 } 508 509 func (l *lexer) ident(c rune) { 510 cp := &lexbuf 511 cp.Reset() 512 513 // accelerate common case (7bit ASCII) 514 for isLetter(c) || isDigit(c) { 515 cp.WriteByte(byte(c)) 516 c = l.getr() 517 } 518 519 // general case 520 for { 521 if c >= utf8.RuneSelf { 522 if unicode.IsLetter(c) || c == '_' || unicode.IsDigit(c) || importpkg != nil && c == 0xb7 { 523 if cp.Len() == 0 && unicode.IsDigit(c) { 524 Yyerror("identifier cannot begin with digit %#U", c) 525 } 526 } else { 527 Yyerror("invalid identifier character %#U", c) 528 } 529 cp.WriteRune(c) 530 } else if isLetter(c) || isDigit(c) { 531 cp.WriteByte(byte(c)) 532 } else { 533 break 534 } 535 c = l.getr() 536 } 537 538 cp = nil 539 l.ungetr() 540 541 name := lexbuf.Bytes() 542 543 if len(name) >= 2 { 544 if tok, ok := keywords[string(name)]; ok { 545 if Debug['x'] != 0 { 546 fmt.Printf("lex: %s\n", lexname(tok)) 547 } 548 switch tok { 549 case LBREAK, LCONTINUE, LFALL, LRETURN: 550 l.nlsemi = true 551 } 552 l.tok = tok 553 return 554 } 555 } 556 557 s := LookupBytes(name) 558 if Debug['x'] != 0 { 559 fmt.Printf("lex: ident %s\n", s) 560 } 561 l.sym_ = s 562 l.nlsemi = true 563 l.tok = LNAME 564 } 565 566 var keywords = map[string]int32{ 567 "break": LBREAK, 568 "case": LCASE, 569 "chan": LCHAN, 570 "const": LCONST, 571 "continue": LCONTINUE, 572 "default": LDEFAULT, 573 "defer": LDEFER, 574 "else": LELSE, 575 "fallthrough": LFALL, 576 "for": LFOR, 577 "func": LFUNC, 578 "go": LGO, 579 "goto": LGOTO, 580 "if": LIF, 581 "import": LIMPORT, 582 "interface": LINTERFACE, 583 "map": LMAP, 584 "package": LPACKAGE, 585 "range": LRANGE, 586 "return": LRETURN, 587 "select": LSELECT, 588 "struct": LSTRUCT, 589 "switch": LSWITCH, 590 "type": LTYPE, 591 "var": LVAR, 592 593 // 💩 594 "notwithstanding": LIGNORE, 595 "thetruthofthematter": LIGNORE, 596 "despiteallobjections": LIGNORE, 597 "whereas": LIGNORE, 598 "insofaras": LIGNORE, 599 } 600 601 func (l *lexer) number(c rune) { 602 cp := &lexbuf 603 cp.Reset() 604 605 // parse mantissa before decimal point or exponent 606 isInt := false 607 malformedOctal := false 608 if c != '.' { 609 if c != '0' { 610 // decimal or float 611 for isDigit(c) { 612 cp.WriteByte(byte(c)) 613 c = l.getr() 614 } 615 616 } else { 617 // c == 0 618 cp.WriteByte('0') 619 c = l.getr() 620 if c == 'x' || c == 'X' { 621 isInt = true // must be int 622 cp.WriteByte(byte(c)) 623 c = l.getr() 624 for isDigit(c) || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' { 625 cp.WriteByte(byte(c)) 626 c = l.getr() 627 } 628 if lexbuf.Len() == 2 { 629 Yyerror("malformed hex constant") 630 } 631 } else { 632 // decimal 0, octal, or float 633 for isDigit(c) { 634 if c > '7' { 635 malformedOctal = true 636 } 637 cp.WriteByte(byte(c)) 638 c = l.getr() 639 } 640 } 641 } 642 } 643 644 // unless we have a hex number, parse fractional part or exponent, if any 645 var str string 646 if !isInt { 647 isInt = true // assume int unless proven otherwise 648 649 // fraction 650 if c == '.' { 651 isInt = false 652 cp.WriteByte('.') 653 c = l.getr() 654 for isDigit(c) { 655 cp.WriteByte(byte(c)) 656 c = l.getr() 657 } 658 // Falling through to exponent parsing here permits invalid 659 // floating-point numbers with fractional mantissa and base-2 660 // (p or P) exponent. We don't care because base-2 exponents 661 // can only show up in machine-generated textual export data 662 // which will use correct formatting. 663 } 664 665 // exponent 666 // base-2 exponent (p or P) is only allowed in export data (see #9036) 667 // TODO(gri) Once we switch to binary import data, importpkg will 668 // always be nil in this function. Simplify the code accordingly. 669 if c == 'e' || c == 'E' || importpkg != nil && (c == 'p' || c == 'P') { 670 isInt = false 671 cp.WriteByte(byte(c)) 672 c = l.getr() 673 if c == '+' || c == '-' { 674 cp.WriteByte(byte(c)) 675 c = l.getr() 676 } 677 if !isDigit(c) { 678 Yyerror("malformed floating point constant exponent") 679 } 680 for isDigit(c) { 681 cp.WriteByte(byte(c)) 682 c = l.getr() 683 } 684 } 685 686 // imaginary constant 687 if c == 'i' { 688 str = lexbuf.String() 689 x := new(Mpcplx) 690 x.Real.SetFloat64(0.0) 691 x.Imag.SetString(str) 692 if x.Imag.Val.IsInf() { 693 Yyerror("overflow in imaginary constant") 694 x.Imag.SetFloat64(0.0) 695 } 696 l.val.U = x 697 698 if Debug['x'] != 0 { 699 fmt.Printf("lex: imaginary literal\n") 700 } 701 goto done 702 } 703 } 704 705 l.ungetr() 706 707 if isInt { 708 if malformedOctal { 709 Yyerror("malformed octal constant") 710 } 711 712 str = lexbuf.String() 713 x := new(Mpint) 714 x.SetString(str) 715 if x.Ovf { 716 Yyerror("overflow in constant") 717 x.SetInt64(0) 718 } 719 l.val.U = x 720 721 if Debug['x'] != 0 { 722 fmt.Printf("lex: integer literal\n") 723 } 724 725 } else { // float 726 727 str = lexbuf.String() 728 x := newMpflt() 729 x.SetString(str) 730 if x.Val.IsInf() { 731 Yyerror("overflow in float constant") 732 x.SetFloat64(0.0) 733 } 734 l.val.U = x 735 736 if Debug['x'] != 0 { 737 fmt.Printf("lex: floating literal\n") 738 } 739 } 740 741 done: 742 litbuf = "literal " + str 743 l.nlsemi = true 744 l.tok = LLITERAL 745 } 746 747 func (l *lexer) stdString() { 748 lexbuf.Reset() 749 lexbuf.WriteString(`"<string>"`) 750 751 cp := &strbuf 752 cp.Reset() 753 754 for { 755 r, b, ok := l.onechar('"') 756 if !ok { 757 break 758 } 759 if r == 0 { 760 cp.WriteByte(b) 761 } else { 762 cp.WriteRune(r) 763 } 764 } 765 766 l.val.U = internString(cp.Bytes()) 767 if Debug['x'] != 0 { 768 fmt.Printf("lex: string literal\n") 769 } 770 litbuf = "string literal" 771 l.nlsemi = true 772 l.tok = LLITERAL 773 } 774 775 func (l *lexer) rawString() { 776 lexbuf.Reset() 777 lexbuf.WriteString("`<string>`") 778 779 cp := &strbuf 780 cp.Reset() 781 782 for { 783 c := l.getr() 784 if c == '\r' { 785 continue 786 } 787 if c == EOF { 788 Yyerror("eof in string") 789 break 790 } 791 if c == '`' { 792 break 793 } 794 cp.WriteRune(c) 795 } 796 797 l.val.U = internString(cp.Bytes()) 798 if Debug['x'] != 0 { 799 fmt.Printf("lex: string literal\n") 800 } 801 litbuf = "string literal" 802 l.nlsemi = true 803 l.tok = LLITERAL 804 } 805 806 func (l *lexer) rune() { 807 r, b, ok := l.onechar('\'') 808 if !ok { 809 Yyerror("empty character literal or unescaped ' in character literal") 810 r = '\'' 811 } 812 if r == 0 { 813 r = rune(b) 814 } 815 816 if c := l.getr(); c != '\'' { 817 Yyerror("missing '") 818 l.ungetr() 819 } 820 821 x := new(Mpint) 822 l.val.U = x 823 x.SetInt64(int64(r)) 824 x.Rune = true 825 if Debug['x'] != 0 { 826 fmt.Printf("lex: codepoint literal\n") 827 } 828 litbuf = "rune literal" 829 l.nlsemi = true 830 l.tok = LLITERAL 831 } 832 833 var internedStrings = map[string]string{} 834 835 func internString(b []byte) string { 836 s, ok := internedStrings[string(b)] // string(b) here doesn't allocate 837 if !ok { 838 s = string(b) 839 internedStrings[s] = s 840 } 841 return s 842 } 843 844 func more(pp *string) bool { 845 p := *pp 846 for p != "" && isSpace(rune(p[0])) { 847 p = p[1:] 848 } 849 *pp = p 850 return p != "" 851 } 852 853 // read and interpret syntax that looks like 854 // //line parse.y:15 855 // as a discontinuity in sequential line numbers. 856 // the next line of input comes from parse.y:15 857 func (l *lexer) getlinepragma() rune { 858 c := l.getr() 859 if c == 'g' { // check for //go: directive 860 cp := &lexbuf 861 cp.Reset() 862 cp.WriteByte('g') // already read 863 for { 864 c = l.getr() 865 if c == EOF || c >= utf8.RuneSelf { 866 return c 867 } 868 if c == '\n' { 869 break 870 } 871 cp.WriteByte(byte(c)) 872 } 873 cp = nil 874 875 text := strings.TrimSuffix(lexbuf.String(), "\r") 876 877 if strings.HasPrefix(text, "go:cgo_") { 878 pragcgo(text) 879 } 880 881 verb := text 882 if i := strings.Index(text, " "); i >= 0 { 883 verb = verb[:i] 884 } 885 886 switch verb { 887 case "go:linkname": 888 if !imported_unsafe { 889 Yyerror("//go:linkname only allowed in Go files that import \"unsafe\"") 890 } 891 f := strings.Fields(text) 892 if len(f) != 3 { 893 Yyerror("usage: //go:linkname localname linkname") 894 break 895 } 896 Lookup(f[1]).Linkname = f[2] 897 case "go:nointerface": 898 if obj.Fieldtrack_enabled != 0 { 899 l.pragma |= Nointerface 900 } 901 case "go:noescape": 902 l.pragma |= Noescape 903 case "go:norace": 904 l.pragma |= Norace 905 case "go:nosplit": 906 l.pragma |= Nosplit 907 case "go:noinline": 908 l.pragma |= Noinline 909 case "go:systemstack": 910 if compiling_runtime == 0 { 911 Yyerror("//go:systemstack only allowed in runtime") 912 } 913 l.pragma |= Systemstack 914 case "go:nowritebarrier": 915 if compiling_runtime == 0 { 916 Yyerror("//go:nowritebarrier only allowed in runtime") 917 } 918 l.pragma |= Nowritebarrier 919 case "go:nowritebarrierrec": 920 if compiling_runtime == 0 { 921 Yyerror("//go:nowritebarrierrec only allowed in runtime") 922 } 923 l.pragma |= Nowritebarrierrec | Nowritebarrier // implies Nowritebarrier 924 case "go:cgo_unsafe_args": 925 l.pragma |= CgoUnsafeArgs 926 } 927 return c 928 } 929 930 // check for //line directive 931 if c != 'l' { 932 return c 933 } 934 for i := 1; i < 5; i++ { 935 c = l.getr() 936 if c != rune("line "[i]) { 937 return c 938 } 939 } 940 941 cp := &lexbuf 942 cp.Reset() 943 linep := 0 944 for { 945 c = l.getr() 946 if c == EOF { 947 return c 948 } 949 if c == '\n' { 950 break 951 } 952 if c == ' ' { 953 continue 954 } 955 if c == ':' { 956 linep = cp.Len() + 1 957 } 958 cp.WriteByte(byte(c)) 959 } 960 cp = nil 961 962 if linep == 0 { 963 return c 964 } 965 text := strings.TrimSuffix(lexbuf.String(), "\r") 966 n, err := strconv.Atoi(text[linep:]) 967 if err != nil { 968 return c // todo: make this an error instead? it is almost certainly a bug. 969 } 970 if n > 1e8 { 971 Yyerror("line number out of range") 972 errorexit() 973 } 974 if n <= 0 { 975 return c 976 } 977 978 linehistupdate(text[:linep-1], n) 979 return c 980 } 981 982 func getimpsym(pp *string) string { 983 more(pp) // skip spaces 984 p := *pp 985 if p == "" || p[0] == '"' { 986 return "" 987 } 988 i := 0 989 for i < len(p) && !isSpace(rune(p[i])) && p[i] != '"' { 990 i++ 991 } 992 sym := p[:i] 993 *pp = p[i:] 994 return sym 995 } 996 997 func getquoted(pp *string) (string, bool) { 998 more(pp) // skip spaces 999 p := *pp 1000 if p == "" || p[0] != '"' { 1001 return "", false 1002 } 1003 p = p[1:] 1004 i := strings.Index(p, `"`) 1005 if i < 0 { 1006 return "", false 1007 } 1008 *pp = p[i+1:] 1009 return p[:i], true 1010 } 1011 1012 // Copied nearly verbatim from the C compiler's #pragma parser. 1013 // TODO: Rewrite more cleanly once the compiler is written in Go. 1014 func pragcgo(text string) { 1015 var q string 1016 1017 if i := strings.Index(text, " "); i >= 0 { 1018 text, q = text[:i], text[i:] 1019 } 1020 1021 verb := text[3:] // skip "go:" 1022 1023 if verb == "cgo_dynamic_linker" || verb == "dynlinker" { 1024 p, ok := getquoted(&q) 1025 if !ok { 1026 Yyerror("usage: //go:cgo_dynamic_linker \"path\"") 1027 return 1028 } 1029 pragcgobuf += fmt.Sprintf("cgo_dynamic_linker %v\n", plan9quote(p)) 1030 return 1031 1032 } 1033 1034 if verb == "dynexport" { 1035 verb = "cgo_export_dynamic" 1036 } 1037 if verb == "cgo_export_static" || verb == "cgo_export_dynamic" { 1038 local := getimpsym(&q) 1039 var remote string 1040 if local == "" { 1041 goto err2 1042 } 1043 if !more(&q) { 1044 pragcgobuf += fmt.Sprintf("%s %v\n", verb, plan9quote(local)) 1045 return 1046 } 1047 1048 remote = getimpsym(&q) 1049 if remote == "" { 1050 goto err2 1051 } 1052 pragcgobuf += fmt.Sprintf("%s %v %v\n", verb, plan9quote(local), plan9quote(remote)) 1053 return 1054 1055 err2: 1056 Yyerror("usage: //go:%s local [remote]", verb) 1057 return 1058 } 1059 1060 if verb == "cgo_import_dynamic" || verb == "dynimport" { 1061 var ok bool 1062 local := getimpsym(&q) 1063 var p string 1064 var remote string 1065 if local == "" { 1066 goto err3 1067 } 1068 if !more(&q) { 1069 pragcgobuf += fmt.Sprintf("cgo_import_dynamic %v\n", plan9quote(local)) 1070 return 1071 } 1072 1073 remote = getimpsym(&q) 1074 if remote == "" { 1075 goto err3 1076 } 1077 if !more(&q) { 1078 pragcgobuf += fmt.Sprintf("cgo_import_dynamic %v %v\n", plan9quote(local), plan9quote(remote)) 1079 return 1080 } 1081 1082 p, ok = getquoted(&q) 1083 if !ok { 1084 goto err3 1085 } 1086 pragcgobuf += fmt.Sprintf("cgo_import_dynamic %v %v %v\n", plan9quote(local), plan9quote(remote), plan9quote(p)) 1087 return 1088 1089 err3: 1090 Yyerror("usage: //go:cgo_import_dynamic local [remote [\"library\"]]") 1091 return 1092 } 1093 1094 if verb == "cgo_import_static" { 1095 local := getimpsym(&q) 1096 if local == "" || more(&q) { 1097 Yyerror("usage: //go:cgo_import_static local") 1098 return 1099 } 1100 pragcgobuf += fmt.Sprintf("cgo_import_static %v\n", plan9quote(local)) 1101 return 1102 1103 } 1104 1105 if verb == "cgo_ldflag" { 1106 p, ok := getquoted(&q) 1107 if !ok { 1108 Yyerror("usage: //go:cgo_ldflag \"arg\"") 1109 return 1110 } 1111 pragcgobuf += fmt.Sprintf("cgo_ldflag %v\n", plan9quote(p)) 1112 return 1113 1114 } 1115 } 1116 1117 func (l *lexer) getr() rune { 1118 redo: 1119 l.prevlineno = lexlineno 1120 r, w, err := l.bin.ReadRune() 1121 if err != nil { 1122 if err != io.EOF { 1123 Fatalf("io error: %v", err) 1124 } 1125 return -1 1126 } 1127 switch r { 1128 case 0: 1129 yyerrorl(lexlineno, "illegal NUL byte") 1130 case '\n': 1131 if importpkg == nil { 1132 lexlineno++ 1133 } 1134 case utf8.RuneError: 1135 if w == 1 { 1136 yyerrorl(lexlineno, "illegal UTF-8 sequence") 1137 } 1138 case BOM: 1139 yyerrorl(lexlineno, "Unicode (UTF-8) BOM in middle of file") 1140 goto redo 1141 } 1142 1143 return r 1144 } 1145 1146 func (l *lexer) ungetr() { 1147 l.bin.UnreadRune() 1148 lexlineno = l.prevlineno 1149 } 1150 1151 // onechar lexes a single character within a rune or interpreted string literal, 1152 // handling escape sequences as necessary. 1153 func (l *lexer) onechar(quote rune) (r rune, b byte, ok bool) { 1154 c := l.getr() 1155 switch c { 1156 case EOF: 1157 Yyerror("eof in string") 1158 l.ungetr() 1159 return 1160 1161 case '\n': 1162 Yyerror("newline in string") 1163 l.ungetr() 1164 return 1165 1166 case '\\': 1167 break 1168 1169 case quote: 1170 return 1171 1172 default: 1173 return c, 0, true 1174 } 1175 1176 c = l.getr() 1177 switch c { 1178 case 'x': 1179 return 0, byte(l.hexchar(2)), true 1180 1181 case 'u': 1182 return l.unichar(4), 0, true 1183 1184 case 'U': 1185 return l.unichar(8), 0, true 1186 1187 case '0', '1', '2', '3', '4', '5', '6', '7': 1188 x := c - '0' 1189 for i := 2; i > 0; i-- { 1190 c = l.getr() 1191 if c >= '0' && c <= '7' { 1192 x = x*8 + c - '0' 1193 continue 1194 } 1195 1196 Yyerror("non-octal character in escape sequence: %c", c) 1197 l.ungetr() 1198 } 1199 1200 if x > 255 { 1201 Yyerror("octal escape value > 255: %d", x) 1202 } 1203 1204 return 0, byte(x), true 1205 1206 case 'a': 1207 c = '\a' 1208 case 'b': 1209 c = '\b' 1210 case 'f': 1211 c = '\f' 1212 case 'n': 1213 c = '\n' 1214 case 'r': 1215 c = '\r' 1216 case 't': 1217 c = '\t' 1218 case 'v': 1219 c = '\v' 1220 case '\\': 1221 c = '\\' 1222 1223 default: 1224 if c != quote { 1225 Yyerror("unknown escape sequence: %c", c) 1226 } 1227 } 1228 1229 return c, 0, true 1230 } 1231 1232 func (l *lexer) unichar(n int) rune { 1233 x := l.hexchar(n) 1234 if x > utf8.MaxRune || 0xd800 <= x && x < 0xe000 { 1235 Yyerror("invalid Unicode code point in escape sequence: %#x", x) 1236 x = utf8.RuneError 1237 } 1238 return rune(x) 1239 } 1240 1241 func (l *lexer) hexchar(n int) uint32 { 1242 var x uint32 1243 1244 for ; n > 0; n-- { 1245 var d uint32 1246 switch c := l.getr(); { 1247 case isDigit(c): 1248 d = uint32(c - '0') 1249 case 'a' <= c && c <= 'f': 1250 d = uint32(c - 'a' + 10) 1251 case 'A' <= c && c <= 'F': 1252 d = uint32(c - 'A' + 10) 1253 default: 1254 Yyerror("non-hex character in escape sequence: %c", c) 1255 l.ungetr() 1256 return x 1257 } 1258 x = x*16 + d 1259 } 1260 1261 return x 1262 }