github.com/xushiwei/go@v0.0.0-20130601165731-2b9d83f45bc9/src/cmd/gc/lex.c (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 #include <u.h> 6 #include <libc.h> 7 #include "go.h" 8 #include "y.tab.h" 9 #include <ar.h> 10 11 #undef getc 12 #undef ungetc 13 #define getc ccgetc 14 #define ungetc ccungetc 15 16 extern int yychar; 17 int windows; 18 int yyprev; 19 int yylast; 20 21 static void lexinit(void); 22 static void lexinit1(void); 23 static void lexfini(void); 24 static void yytinit(void); 25 static int getc(void); 26 static void ungetc(int); 27 static int32 getr(void); 28 static int escchar(int, int*, vlong*); 29 static void addidir(char*); 30 static int getlinepragma(void); 31 static char *goos, *goarch, *goroot; 32 33 #define BOM 0xFEFF 34 35 // Compiler experiments. 36 // These are controlled by the GOEXPERIMENT environment 37 // variable recorded when the compiler is built. 38 static struct { 39 char *name; 40 int *val; 41 } exper[] = { 42 // {"rune32", &rune32}, 43 {"fieldtrack", &fieldtrack_enabled}, 44 {nil, nil}, 45 }; 46 47 static void 48 addexp(char *s) 49 { 50 int i; 51 52 for(i=0; exper[i].name != nil; i++) { 53 if(strcmp(exper[i].name, s) == 0) { 54 *exper[i].val = 1; 55 return; 56 } 57 } 58 59 print("unknown experiment %s\n", s); 60 exits("unknown experiment"); 61 } 62 63 static void 64 setexp(void) 65 { 66 char *f[20]; 67 int i, nf; 68 69 // The makefile #defines GOEXPERIMENT for us. 70 nf = getfields(GOEXPERIMENT, f, nelem(f), 1, ","); 71 for(i=0; i<nf; i++) 72 addexp(f[i]); 73 } 74 75 char* 76 expstring(void) 77 { 78 int i; 79 static char buf[512]; 80 81 strcpy(buf, "X"); 82 for(i=0; exper[i].name != nil; i++) 83 if(*exper[i].val) 84 seprint(buf+strlen(buf), buf+sizeof buf, ",%s", exper[i].name); 85 if(strlen(buf) == 1) 86 strcpy(buf, "X,none"); 87 buf[1] = ':'; 88 return buf; 89 } 90 91 // Our own isdigit, isspace, isalpha, isalnum that take care 92 // of EOF and other out of range arguments. 93 static int 94 yy_isdigit(int c) 95 { 96 return c >= 0 && c <= 0xFF && isdigit(c); 97 } 98 99 static int 100 yy_isspace(int c) 101 { 102 return c == ' ' || c == '\t' || c == '\n' || c == '\r'; 103 } 104 105 static int 106 yy_isalpha(int c) 107 { 108 return c >= 0 && c <= 0xFF && isalpha(c); 109 } 110 111 static int 112 yy_isalnum(int c) 113 { 114 return c >= 0 && c <= 0xFF && isalnum(c); 115 } 116 117 // Disallow use of isdigit etc. 118 #undef isdigit 119 #undef isspace 120 #undef isalpha 121 #undef isalnum 122 #define isdigit use_yy_isdigit_instead_of_isdigit 123 #define isspace use_yy_isspace_instead_of_isspace 124 #define isalpha use_yy_isalpha_instead_of_isalpha 125 #define isalnum use_yy_isalnum_instead_of_isalnum 126 127 #define DBG if(!debug['x']){}else print 128 enum 129 { 130 EOF = -1, 131 }; 132 133 void 134 usage(void) 135 { 136 print("usage: %cg [options] file.go...\n", thechar); 137 flagprint(1); 138 exits("usage"); 139 } 140 141 void 142 fault(int s) 143 { 144 USED(s); 145 146 // If we've already complained about things 147 // in the program, don't bother complaining 148 // about the seg fault too; let the user clean up 149 // the code and try again. 150 if(nsavederrors + nerrors > 0) 151 errorexit(); 152 fatal("fault"); 153 } 154 155 void 156 doversion(void) 157 { 158 char *p; 159 160 p = expstring(); 161 if(strcmp(p, "X:none") == 0) 162 p = ""; 163 print("%cg version %s%s%s\n", thechar, getgoversion(), *p ? " " : "", p); 164 exits(0); 165 } 166 167 int 168 main(int argc, char *argv[]) 169 { 170 int i; 171 NodeList *l; 172 char *p; 173 174 #ifdef SIGBUS 175 signal(SIGBUS, fault); 176 signal(SIGSEGV, fault); 177 #endif 178 179 localpkg = mkpkg(strlit("")); 180 localpkg->prefix = "\"\""; 181 182 // pseudo-package, for scoping 183 builtinpkg = mkpkg(strlit("go.builtin")); 184 185 // pseudo-package, accessed by import "unsafe" 186 unsafepkg = mkpkg(strlit("unsafe")); 187 unsafepkg->name = "unsafe"; 188 189 // real package, referred to by generated runtime calls 190 runtimepkg = mkpkg(strlit("runtime")); 191 runtimepkg->name = "runtime"; 192 193 // pseudo-packages used in symbol tables 194 gostringpkg = mkpkg(strlit("go.string")); 195 gostringpkg->name = "go.string"; 196 gostringpkg->prefix = "go.string"; // not go%2estring 197 198 itabpkg = mkpkg(strlit("go.itab")); 199 itabpkg->name = "go.itab"; 200 itabpkg->prefix = "go.itab"; // not go%2eitab 201 202 weaktypepkg = mkpkg(strlit("go.weak.type")); 203 weaktypepkg->name = "go.weak.type"; 204 weaktypepkg->prefix = "go.weak.type"; // not go%2eweak%2etype 205 206 typelinkpkg = mkpkg(strlit("go.typelink")); 207 typelinkpkg->name = "go.typelink"; 208 typelinkpkg->prefix = "go.typelink"; // not go%2etypelink 209 210 trackpkg = mkpkg(strlit("go.track")); 211 trackpkg->name = "go.track"; 212 trackpkg->prefix = "go.track"; // not go%2etrack 213 214 typepkg = mkpkg(strlit("type")); 215 typepkg->name = "type"; 216 217 goroot = getgoroot(); 218 goos = getgoos(); 219 goarch = thestring; 220 221 setexp(); 222 223 outfile = nil; 224 flagcount("+", "compiling runtime", &compiling_runtime); 225 flagcount("%", "debug non-static initializers", &debug['%']); 226 flagcount("A", "for bootstrapping, allow 'any' type", &debug['A']); 227 flagcount("B", "disable bounds checking", &debug['B']); 228 flagstr("D", "path: set relative path for local imports", &localimport); 229 flagcount("E", "debug symbol export", &debug['E']); 230 flagfn1("I", "dir: add dir to import search path", addidir); 231 flagcount("K", "debug missing line numbers", &debug['K']); 232 flagcount("L", "use full (long) path in error messages", &debug['L']); 233 flagcount("M", "debug move generation", &debug['M']); 234 flagcount("N", "disable optimizations", &debug['N']); 235 flagcount("P", "debug peephole optimizer", &debug['P']); 236 flagcount("R", "debug register optimizer", &debug['R']); 237 flagcount("S", "print assembly listing", &debug['S']); 238 flagfn0("V", "print compiler version", doversion); 239 flagcount("W", "debug parse tree after type checking", &debug['W']); 240 flagcount("complete", "compiling complete package (no C or assembly)", &pure_go); 241 flagcount("d", "debug declarations", &debug['d']); 242 flagcount("e", "no limit on number of errors reported", &debug['e']); 243 flagcount("f", "debug stack frames", &debug['f']); 244 flagcount("g", "debug code generation", &debug['g']); 245 flagcount("h", "halt on error", &debug['h']); 246 flagcount("i", "debug line number stack", &debug['i']); 247 flagcount("j", "debug runtime-initialized variables", &debug['j']); 248 flagcount("l", "disable inlining", &debug['l']); 249 flagcount("m", "print optimization decisions", &debug['m']); 250 flagstr("o", "obj: set output file", &outfile); 251 flagstr("p", "path: set expected package import path", &myimportpath); 252 flagcount("r", "debug generated wrappers", &debug['r']); 253 flagcount("race", "enable race detector", &flag_race); 254 flagcount("s", "warn about composite literals that can be simplified", &debug['s']); 255 flagcount("u", "reject unsafe code", &safemode); 256 flagcount("v", "increase debug verbosity", &debug['v']); 257 flagcount("w", "debug type checking", &debug['w']); 258 flagcount("x", "debug lexer", &debug['x']); 259 flagcount("y", "debug declarations in canned imports (with -d)", &debug['y']); 260 if(thechar == '6') 261 flagcount("largemodel", "generate code that assumes a large memory model", &flag_largemodel); 262 263 flagparse(&argc, &argv, usage); 264 265 if(argc < 1) 266 usage(); 267 268 if(flag_race) { 269 racepkg = mkpkg(strlit("runtime/race")); 270 racepkg->name = "race"; 271 } 272 273 // enable inlining. for now: 274 // default: inlining on. (debug['l'] == 1) 275 // -l: inlining off (debug['l'] == 0) 276 // -ll, -lll: inlining on again, with extra debugging (debug['l'] > 1) 277 if(debug['l'] <= 1) 278 debug['l'] = 1 - debug['l']; 279 280 if(thechar == '8') { 281 p = getgo386(); 282 if(strcmp(p, "387") == 0) 283 use_sse = 0; 284 else if(strcmp(p, "sse2") == 0) 285 use_sse = 1; 286 else 287 sysfatal("unsupported setting GO386=%s", p); 288 } 289 290 pathname = mal(1000); 291 if(getwd(pathname, 999) == 0) 292 strcpy(pathname, "/???"); 293 294 if(yy_isalpha(pathname[0]) && pathname[1] == ':') { 295 // On Windows. 296 windows = 1; 297 298 // Canonicalize path by converting \ to / (Windows accepts both). 299 for(p=pathname; *p; p++) 300 if(*p == '\\') 301 *p = '/'; 302 } 303 304 fmtinstallgo(); 305 betypeinit(); 306 if(widthptr == 0) 307 fatal("betypeinit failed"); 308 309 lexinit(); 310 typeinit(); 311 lexinit1(); 312 yytinit(); 313 314 blockgen = 1; 315 dclcontext = PEXTERN; 316 nerrors = 0; 317 lexlineno = 1; 318 319 for(i=0; i<argc; i++) { 320 infile = argv[i]; 321 linehist(infile, 0, 0); 322 323 curio.infile = infile; 324 curio.bin = Bopen(infile, OREAD); 325 if(curio.bin == nil) { 326 print("open %s: %r\n", infile); 327 errorexit(); 328 } 329 curio.peekc = 0; 330 curio.peekc1 = 0; 331 curio.nlsemi = 0; 332 333 // Skip initial BOM if present. 334 if(Bgetrune(curio.bin) != BOM) 335 Bungetrune(curio.bin); 336 337 block = 1; 338 iota = -1000000; 339 340 yyparse(); 341 if(nsyntaxerrors != 0) 342 errorexit(); 343 344 linehist(nil, 0, 0); 345 if(curio.bin != nil) 346 Bterm(curio.bin); 347 } 348 testdclstack(); 349 mkpackage(localpkg->name); // final import not used checks 350 lexfini(); 351 352 typecheckok = 1; 353 if(debug['f']) 354 frame(1); 355 356 // Process top-level declarations in phases. 357 358 // Phase 1: const, type, and names and types of funcs. 359 // This will gather all the information about types 360 // and methods but doesn't depend on any of it. 361 defercheckwidth(); 362 for(l=xtop; l; l=l->next) 363 if(l->n->op != ODCL && l->n->op != OAS) 364 typecheck(&l->n, Etop); 365 366 // Phase 2: Variable assignments. 367 // To check interface assignments, depends on phase 1. 368 for(l=xtop; l; l=l->next) 369 if(l->n->op == ODCL || l->n->op == OAS) 370 typecheck(&l->n, Etop); 371 resumecheckwidth(); 372 373 // Phase 3: Type check function bodies. 374 for(l=xtop; l; l=l->next) { 375 if(l->n->op == ODCLFUNC || l->n->op == OCLOSURE) { 376 curfn = l->n; 377 saveerrors(); 378 typechecklist(l->n->nbody, Etop); 379 checkreturn(l->n); 380 if(nerrors != 0) 381 l->n->nbody = nil; // type errors; do not compile 382 } 383 } 384 385 curfn = nil; 386 387 if(nsavederrors+nerrors) 388 errorexit(); 389 390 // Phase 4: Inlining 391 if(debug['l'] > 1) { 392 // Typecheck imported function bodies if debug['l'] > 1, 393 // otherwise lazily when used or re-exported. 394 for(l=importlist; l; l=l->next) 395 if (l->n->inl) { 396 saveerrors(); 397 typecheckinl(l->n); 398 } 399 400 if(nsavederrors+nerrors) 401 errorexit(); 402 } 403 404 if(debug['l']) { 405 // Find functions that can be inlined and clone them before walk expands them. 406 for(l=xtop; l; l=l->next) 407 if(l->n->op == ODCLFUNC) 408 caninl(l->n); 409 410 // Expand inlineable calls in all functions 411 for(l=xtop; l; l=l->next) 412 if(l->n->op == ODCLFUNC) 413 inlcalls(l->n); 414 } 415 416 // Phase 5: Escape analysis. 417 if(!debug['N']) 418 escapes(xtop); 419 420 // Phase 6: Compile top level functions. 421 for(l=xtop; l; l=l->next) 422 if(l->n->op == ODCLFUNC) 423 funccompile(l->n, 0); 424 425 if(nsavederrors+nerrors == 0) 426 fninit(xtop); 427 428 // Phase 7: Check external declarations. 429 for(l=externdcl; l; l=l->next) 430 if(l->n->op == ONAME) 431 typecheck(&l->n, Erv); 432 433 if(nerrors+nsavederrors) 434 errorexit(); 435 436 dumpobj(); 437 438 if(nerrors+nsavederrors) 439 errorexit(); 440 441 flusherrors(); 442 exits(0); 443 return 0; 444 } 445 446 void 447 saveerrors(void) 448 { 449 nsavederrors += nerrors; 450 nerrors = 0; 451 } 452 453 /* 454 * macro to portably read/write archive header. 455 * 'cmd' is read/write/Bread/Bwrite, etc. 456 */ 457 #define HEADER_IO(cmd, f, h) cmd(f, h.name, sizeof(h.name)) != sizeof(h.name)\ 458 || cmd(f, h.date, sizeof(h.date)) != sizeof(h.date)\ 459 || cmd(f, h.uid, sizeof(h.uid)) != sizeof(h.uid)\ 460 || cmd(f, h.gid, sizeof(h.gid)) != sizeof(h.gid)\ 461 || cmd(f, h.mode, sizeof(h.mode)) != sizeof(h.mode)\ 462 || cmd(f, h.size, sizeof(h.size)) != sizeof(h.size)\ 463 || cmd(f, h.fmag, sizeof(h.fmag)) != sizeof(h.fmag) 464 465 static int 466 arsize(Biobuf *b, char *name) 467 { 468 struct ar_hdr a; 469 470 if (HEADER_IO(Bread, b, a)) 471 return -1; 472 473 if(strncmp(a.name, name, strlen(name)) != 0) 474 return -1; 475 476 return atoi(a.size); 477 } 478 479 static int 480 skiptopkgdef(Biobuf *b) 481 { 482 char *p; 483 int sz; 484 485 /* archive header */ 486 if((p = Brdline(b, '\n')) == nil) 487 return 0; 488 if(Blinelen(b) != 8) 489 return 0; 490 if(memcmp(p, "!<arch>\n", 8) != 0) 491 return 0; 492 /* symbol table is first; skip it */ 493 sz = arsize(b, "__.GOSYMDEF"); 494 if(sz < 0) 495 return 0; 496 Bseek(b, sz, 1); 497 /* package export block is second */ 498 sz = arsize(b, "__.PKGDEF"); 499 if(sz <= 0) 500 return 0; 501 return 1; 502 } 503 504 static void 505 addidir(char* dir) 506 { 507 Idir** pp; 508 509 if(dir == nil) 510 return; 511 512 for(pp = &idirs; *pp != nil; pp = &(*pp)->link) 513 ; 514 *pp = mal(sizeof(Idir)); 515 (*pp)->link = nil; 516 (*pp)->dir = dir; 517 } 518 519 // is this path a local name? begins with ./ or ../ or / 520 static int 521 islocalname(Strlit *name) 522 { 523 if(name->len >= 1 && name->s[0] == '/') 524 return 1; 525 if(windows && name->len >= 3 && 526 yy_isalpha(name->s[0]) && name->s[1] == ':' && name->s[2] == '/') 527 return 1; 528 if(name->len >= 2 && strncmp(name->s, "./", 2) == 0) 529 return 1; 530 if(name->len == 1 && strncmp(name->s, ".", 1) == 0) 531 return 1; 532 if(name->len >= 3 && strncmp(name->s, "../", 3) == 0) 533 return 1; 534 if(name->len == 2 && strncmp(name->s, "..", 2) == 0) 535 return 1; 536 return 0; 537 } 538 539 static int 540 findpkg(Strlit *name) 541 { 542 Idir *p; 543 char *q, *race; 544 545 if(islocalname(name)) { 546 if(safemode) 547 return 0; 548 // try .a before .6. important for building libraries: 549 // if there is an array.6 in the array.a library, 550 // want to find all of array.a, not just array.6. 551 snprint(namebuf, sizeof(namebuf), "%Z.a", name); 552 if(access(namebuf, 0) >= 0) 553 return 1; 554 snprint(namebuf, sizeof(namebuf), "%Z.%c", name, thechar); 555 if(access(namebuf, 0) >= 0) 556 return 1; 557 return 0; 558 } 559 560 // local imports should be canonicalized already. 561 // don't want to see "encoding/../encoding/base64" 562 // as different from "encoding/base64". 563 q = mal(name->len+1); 564 memmove(q, name->s, name->len); 565 q[name->len] = '\0'; 566 cleanname(q); 567 if(strlen(q) != name->len || memcmp(q, name->s, name->len) != 0) { 568 yyerror("non-canonical import path %Z (should be %s)", name, q); 569 return 0; 570 } 571 572 for(p = idirs; p != nil; p = p->link) { 573 snprint(namebuf, sizeof(namebuf), "%s/%Z.a", p->dir, name); 574 if(access(namebuf, 0) >= 0) 575 return 1; 576 snprint(namebuf, sizeof(namebuf), "%s/%Z.%c", p->dir, name, thechar); 577 if(access(namebuf, 0) >= 0) 578 return 1; 579 } 580 if(goroot != nil) { 581 race = ""; 582 if(flag_race) 583 race = "_race"; 584 snprint(namebuf, sizeof(namebuf), "%s/pkg/%s_%s%s/%Z.a", goroot, goos, goarch, race, name); 585 if(access(namebuf, 0) >= 0) 586 return 1; 587 snprint(namebuf, sizeof(namebuf), "%s/pkg/%s_%s%s/%Z.%c", goroot, goos, goarch, race, name, thechar); 588 if(access(namebuf, 0) >= 0) 589 return 1; 590 } 591 return 0; 592 } 593 594 static void 595 fakeimport(void) 596 { 597 importpkg = mkpkg(strlit("fake")); 598 cannedimports("fake.6", "$$\n"); 599 } 600 601 void 602 importfile(Val *f, int line) 603 { 604 Biobuf *imp; 605 char *file, *p, *q, *tag; 606 int32 c; 607 int len; 608 Strlit *path; 609 char *cleanbuf, *prefix; 610 611 USED(line); 612 613 if(f->ctype != CTSTR) { 614 yyerror("import statement not a string"); 615 fakeimport(); 616 return; 617 } 618 619 if(f->u.sval->len == 0) { 620 yyerror("import path is empty"); 621 fakeimport(); 622 return; 623 } 624 625 if(isbadimport(f->u.sval)) { 626 fakeimport(); 627 return; 628 } 629 630 // The package name main is no longer reserved, 631 // but we reserve the import path "main" to identify 632 // the main package, just as we reserve the import 633 // path "math" to identify the standard math package. 634 if(strcmp(f->u.sval->s, "main") == 0) { 635 yyerror("cannot import \"main\""); 636 errorexit(); 637 } 638 639 if(myimportpath != nil && strcmp(f->u.sval->s, myimportpath) == 0) { 640 yyerror("import \"%Z\" while compiling that package (import cycle)", f->u.sval); 641 errorexit(); 642 } 643 644 if(strcmp(f->u.sval->s, "unsafe") == 0) { 645 if(safemode) { 646 yyerror("cannot import package unsafe"); 647 errorexit(); 648 } 649 importpkg = mkpkg(f->u.sval); 650 cannedimports("unsafe.6", unsafeimport); 651 return; 652 } 653 654 path = f->u.sval; 655 if(islocalname(path)) { 656 if(path->s[0] == '/') { 657 yyerror("import path cannot be absolute path"); 658 fakeimport(); 659 return; 660 } 661 prefix = pathname; 662 if(localimport != nil) 663 prefix = localimport; 664 cleanbuf = mal(strlen(prefix) + strlen(path->s) + 2); 665 strcpy(cleanbuf, prefix); 666 strcat(cleanbuf, "/"); 667 strcat(cleanbuf, path->s); 668 cleanname(cleanbuf); 669 path = strlit(cleanbuf); 670 671 if(isbadimport(path)) { 672 fakeimport(); 673 return; 674 } 675 } 676 677 if(!findpkg(path)) { 678 yyerror("can't find import: \"%Z\"", f->u.sval); 679 errorexit(); 680 } 681 importpkg = mkpkg(path); 682 683 // If we already saw that package, feed a dummy statement 684 // to the lexer to avoid parsing export data twice. 685 if(importpkg->imported) { 686 file = strdup(namebuf); 687 tag = ""; 688 if(importpkg->safe) { 689 tag = "safe"; 690 } 691 p = smprint("package %s %s\n$$\n", importpkg->name, tag); 692 cannedimports(file, p); 693 return; 694 } 695 importpkg->imported = 1; 696 697 imp = Bopen(namebuf, OREAD); 698 if(imp == nil) { 699 yyerror("can't open import: \"%Z\": %r", f->u.sval); 700 errorexit(); 701 } 702 file = strdup(namebuf); 703 704 len = strlen(namebuf); 705 if(len > 2 && namebuf[len-2] == '.' && namebuf[len-1] == 'a') { 706 if(!skiptopkgdef(imp)) { 707 yyerror("import %s: not a package file", file); 708 errorexit(); 709 } 710 } 711 712 // check object header 713 p = Brdstr(imp, '\n', 1); 714 if(strcmp(p, "empty archive") != 0) { 715 if(strncmp(p, "go object ", 10) != 0) { 716 yyerror("import %s: not a go object file", file); 717 errorexit(); 718 } 719 q = smprint("%s %s %s %s", getgoos(), thestring, getgoversion(), expstring()); 720 if(strcmp(p+10, q) != 0) { 721 yyerror("import %s: object is [%s] expected [%s]", file, p+10, q); 722 errorexit(); 723 } 724 free(q); 725 } 726 727 // assume files move (get installed) 728 // so don't record the full path. 729 linehist(file + len - path->len - 2, -1, 1); // acts as #pragma lib 730 731 /* 732 * position the input right 733 * after $$ and return 734 */ 735 pushedio = curio; 736 curio.bin = imp; 737 curio.peekc = 0; 738 curio.peekc1 = 0; 739 curio.infile = file; 740 curio.nlsemi = 0; 741 typecheckok = 1; 742 743 for(;;) { 744 c = getc(); 745 if(c == EOF) 746 break; 747 if(c != '$') 748 continue; 749 c = getc(); 750 if(c == EOF) 751 break; 752 if(c != '$') 753 continue; 754 return; 755 } 756 yyerror("no import in \"%Z\"", f->u.sval); 757 unimportfile(); 758 } 759 760 void 761 unimportfile(void) 762 { 763 if(curio.bin != nil) { 764 Bterm(curio.bin); 765 curio.bin = nil; 766 } else 767 lexlineno--; // re correct sys.6 line number 768 769 curio = pushedio; 770 pushedio.bin = nil; 771 incannedimport = 0; 772 typecheckok = 0; 773 } 774 775 void 776 cannedimports(char *file, char *cp) 777 { 778 lexlineno++; // if sys.6 is included on line 1, 779 780 pushedio = curio; 781 curio.bin = nil; 782 curio.peekc = 0; 783 curio.peekc1 = 0; 784 curio.infile = file; 785 curio.cp = cp; 786 curio.nlsemi = 0; 787 curio.importsafe = 0; 788 789 typecheckok = 1; 790 incannedimport = 1; 791 } 792 793 static int 794 isfrog(int c) 795 { 796 // complain about possibly invisible control characters 797 if(c < ' ') { 798 return !yy_isspace(c); // exclude good white space 799 } 800 if(0x7f <= c && c <= 0xa0) // DEL, unicode block including unbreakable space. 801 return 1; 802 return 0; 803 } 804 805 typedef struct Loophack Loophack; 806 struct Loophack { 807 int v; 808 Loophack *next; 809 }; 810 811 static int32 812 _yylex(void) 813 { 814 int c, c1, clen, escflag, ncp; 815 vlong v; 816 char *cp, *ep; 817 Rune rune; 818 Sym *s; 819 static Loophack *lstk; 820 Loophack *h; 821 822 prevlineno = lineno; 823 824 l0: 825 c = getc(); 826 if(yy_isspace(c)) { 827 if(c == '\n' && curio.nlsemi) { 828 ungetc(c); 829 DBG("lex: implicit semi\n"); 830 return ';'; 831 } 832 goto l0; 833 } 834 835 lineno = lexlineno; /* start of token */ 836 837 if(c >= Runeself) { 838 /* all multibyte runes are alpha */ 839 cp = lexbuf; 840 ep = lexbuf+sizeof lexbuf; 841 goto talph; 842 } 843 844 if(yy_isalpha(c)) { 845 cp = lexbuf; 846 ep = lexbuf+sizeof lexbuf; 847 goto talph; 848 } 849 850 if(yy_isdigit(c)) 851 goto tnum; 852 853 switch(c) { 854 case EOF: 855 lineno = prevlineno; 856 ungetc(EOF); 857 return -1; 858 859 case '_': 860 cp = lexbuf; 861 ep = lexbuf+sizeof lexbuf; 862 goto talph; 863 864 case '.': 865 c1 = getc(); 866 if(yy_isdigit(c1)) { 867 cp = lexbuf; 868 ep = lexbuf+sizeof lexbuf; 869 *cp++ = c; 870 c = c1; 871 goto casedot; 872 } 873 if(c1 == '.') { 874 c1 = getc(); 875 if(c1 == '.') { 876 c = LDDD; 877 goto lx; 878 } 879 ungetc(c1); 880 c1 = '.'; 881 } 882 break; 883 884 case '"': 885 /* "..." */ 886 strcpy(lexbuf, "\"<string>\""); 887 cp = mal(8); 888 clen = sizeof(int32); 889 ncp = 8; 890 891 for(;;) { 892 if(clen+UTFmax > ncp) { 893 cp = remal(cp, ncp, ncp); 894 ncp += ncp; 895 } 896 if(escchar('"', &escflag, &v)) 897 break; 898 if(v < Runeself || escflag) { 899 cp[clen++] = v; 900 } else { 901 rune = v; 902 c = runelen(rune); 903 runetochar(cp+clen, &rune); 904 clen += c; 905 } 906 } 907 goto strlit; 908 909 case '`': 910 /* `...` */ 911 strcpy(lexbuf, "`<string>`"); 912 cp = mal(8); 913 clen = sizeof(int32); 914 ncp = 8; 915 916 for(;;) { 917 if(clen+UTFmax > ncp) { 918 cp = remal(cp, ncp, ncp); 919 ncp += ncp; 920 } 921 c = getr(); 922 if(c == '\r') 923 continue; 924 if(c == EOF) { 925 yyerror("eof in string"); 926 break; 927 } 928 if(c == '`') 929 break; 930 rune = c; 931 clen += runetochar(cp+clen, &rune); 932 } 933 934 strlit: 935 *(int32*)cp = clen-sizeof(int32); // length 936 do { 937 cp[clen++] = 0; 938 } while(clen & MAXALIGN); 939 yylval.val.u.sval = (Strlit*)cp; 940 yylval.val.ctype = CTSTR; 941 DBG("lex: string literal\n"); 942 strcpy(litbuf, "string literal"); 943 return LLITERAL; 944 945 case '\'': 946 /* '.' */ 947 if(escchar('\'', &escflag, &v)) { 948 yyerror("empty character literal or unescaped ' in character literal"); 949 v = '\''; 950 } 951 if(!escchar('\'', &escflag, &v)) { 952 yyerror("missing '"); 953 ungetc(v); 954 } 955 yylval.val.u.xval = mal(sizeof(*yylval.val.u.xval)); 956 mpmovecfix(yylval.val.u.xval, v); 957 yylval.val.ctype = CTRUNE; 958 DBG("lex: codepoint literal\n"); 959 strcpy(litbuf, "string literal"); 960 return LLITERAL; 961 962 case '/': 963 c1 = getc(); 964 if(c1 == '*') { 965 int nl; 966 967 nl = 0; 968 for(;;) { 969 c = getr(); 970 if(c == '\n') 971 nl = 1; 972 while(c == '*') { 973 c = getr(); 974 if(c == '/') { 975 if(nl) 976 ungetc('\n'); 977 goto l0; 978 } 979 if(c == '\n') 980 nl = 1; 981 } 982 if(c == EOF) { 983 yyerror("eof in comment"); 984 errorexit(); 985 } 986 } 987 } 988 if(c1 == '/') { 989 c = getlinepragma(); 990 for(;;) { 991 if(c == '\n' || c == EOF) { 992 ungetc(c); 993 goto l0; 994 } 995 c = getr(); 996 } 997 } 998 if(c1 == '=') { 999 c = ODIV; 1000 goto asop; 1001 } 1002 break; 1003 1004 case ':': 1005 c1 = getc(); 1006 if(c1 == '=') { 1007 c = LCOLAS; 1008 yylval.i = lexlineno; 1009 goto lx; 1010 } 1011 break; 1012 1013 case '*': 1014 c1 = getc(); 1015 if(c1 == '=') { 1016 c = OMUL; 1017 goto asop; 1018 } 1019 break; 1020 1021 case '%': 1022 c1 = getc(); 1023 if(c1 == '=') { 1024 c = OMOD; 1025 goto asop; 1026 } 1027 break; 1028 1029 case '+': 1030 c1 = getc(); 1031 if(c1 == '+') { 1032 c = LINC; 1033 goto lx; 1034 } 1035 if(c1 == '=') { 1036 c = OADD; 1037 goto asop; 1038 } 1039 break; 1040 1041 case '-': 1042 c1 = getc(); 1043 if(c1 == '-') { 1044 c = LDEC; 1045 goto lx; 1046 } 1047 if(c1 == '=') { 1048 c = OSUB; 1049 goto asop; 1050 } 1051 break; 1052 1053 case '>': 1054 c1 = getc(); 1055 if(c1 == '>') { 1056 c = LRSH; 1057 c1 = getc(); 1058 if(c1 == '=') { 1059 c = ORSH; 1060 goto asop; 1061 } 1062 break; 1063 } 1064 if(c1 == '=') { 1065 c = LGE; 1066 goto lx; 1067 } 1068 c = LGT; 1069 break; 1070 1071 case '<': 1072 c1 = getc(); 1073 if(c1 == '<') { 1074 c = LLSH; 1075 c1 = getc(); 1076 if(c1 == '=') { 1077 c = OLSH; 1078 goto asop; 1079 } 1080 break; 1081 } 1082 if(c1 == '=') { 1083 c = LLE; 1084 goto lx; 1085 } 1086 if(c1 == '-') { 1087 c = LCOMM; 1088 goto lx; 1089 } 1090 c = LLT; 1091 break; 1092 1093 case '=': 1094 c1 = getc(); 1095 if(c1 == '=') { 1096 c = LEQ; 1097 goto lx; 1098 } 1099 break; 1100 1101 case '!': 1102 c1 = getc(); 1103 if(c1 == '=') { 1104 c = LNE; 1105 goto lx; 1106 } 1107 break; 1108 1109 case '&': 1110 c1 = getc(); 1111 if(c1 == '&') { 1112 c = LANDAND; 1113 goto lx; 1114 } 1115 if(c1 == '^') { 1116 c = LANDNOT; 1117 c1 = getc(); 1118 if(c1 == '=') { 1119 c = OANDNOT; 1120 goto asop; 1121 } 1122 break; 1123 } 1124 if(c1 == '=') { 1125 c = OAND; 1126 goto asop; 1127 } 1128 break; 1129 1130 case '|': 1131 c1 = getc(); 1132 if(c1 == '|') { 1133 c = LOROR; 1134 goto lx; 1135 } 1136 if(c1 == '=') { 1137 c = OOR; 1138 goto asop; 1139 } 1140 break; 1141 1142 case '^': 1143 c1 = getc(); 1144 if(c1 == '=') { 1145 c = OXOR; 1146 goto asop; 1147 } 1148 break; 1149 1150 /* 1151 * clumsy dance: 1152 * to implement rule that disallows 1153 * if T{1}[0] { ... } 1154 * but allows 1155 * if (T{1}[0]) { ... } 1156 * the block bodies for if/for/switch/select 1157 * begin with an LBODY token, not '{'. 1158 * 1159 * when we see the keyword, the next 1160 * non-parenthesized '{' becomes an LBODY. 1161 * loophack is normally 0. 1162 * a keyword makes it go up to 1. 1163 * parens push loophack onto a stack and go back to 0. 1164 * a '{' with loophack == 1 becomes LBODY and disables loophack. 1165 * 1166 * i said it was clumsy. 1167 */ 1168 case '(': 1169 case '[': 1170 if(loophack || lstk != nil) { 1171 h = malloc(sizeof *h); 1172 if(h == nil) { 1173 flusherrors(); 1174 yyerror("out of memory"); 1175 errorexit(); 1176 } 1177 h->v = loophack; 1178 h->next = lstk; 1179 lstk = h; 1180 loophack = 0; 1181 } 1182 goto lx; 1183 case ')': 1184 case ']': 1185 if(lstk != nil) { 1186 h = lstk; 1187 loophack = h->v; 1188 lstk = h->next; 1189 free(h); 1190 } 1191 goto lx; 1192 case '{': 1193 if(loophack == 1) { 1194 DBG("%L lex: LBODY\n", lexlineno); 1195 loophack = 0; 1196 return LBODY; 1197 } 1198 goto lx; 1199 1200 default: 1201 goto lx; 1202 } 1203 ungetc(c1); 1204 1205 lx: 1206 if(c > 0xff) 1207 DBG("%L lex: TOKEN %s\n", lexlineno, lexname(c)); 1208 else 1209 DBG("%L lex: TOKEN '%c'\n", lexlineno, c); 1210 if(isfrog(c)) { 1211 yyerror("illegal character 0x%ux", c); 1212 goto l0; 1213 } 1214 if(importpkg == nil && (c == '#' || c == '$' || c == '?' || c == '@' || c == '\\')) { 1215 yyerror("%s: unexpected %c", "syntax error", c); 1216 goto l0; 1217 } 1218 return c; 1219 1220 asop: 1221 yylval.i = c; // rathole to hold which asop 1222 DBG("lex: TOKEN ASOP %c\n", c); 1223 return LASOP; 1224 1225 talph: 1226 /* 1227 * cp is set to lexbuf and some 1228 * prefix has been stored 1229 */ 1230 for(;;) { 1231 if(cp+10 >= ep) { 1232 yyerror("identifier too long"); 1233 errorexit(); 1234 } 1235 if(c >= Runeself) { 1236 ungetc(c); 1237 rune = getr(); 1238 // 0xb7 ยท is used for internal names 1239 if(!isalpharune(rune) && !isdigitrune(rune) && (importpkg == nil || rune != 0xb7)) 1240 yyerror("invalid identifier character U+%04x", rune); 1241 cp += runetochar(cp, &rune); 1242 } else if(!yy_isalnum(c) && c != '_') 1243 break; 1244 else 1245 *cp++ = c; 1246 c = getc(); 1247 } 1248 *cp = 0; 1249 ungetc(c); 1250 1251 s = lookup(lexbuf); 1252 switch(s->lexical) { 1253 case LIGNORE: 1254 goto l0; 1255 1256 case LFOR: 1257 case LIF: 1258 case LSWITCH: 1259 case LSELECT: 1260 loophack = 1; // see comment about loophack above 1261 break; 1262 } 1263 1264 DBG("lex: %S %s\n", s, lexname(s->lexical)); 1265 yylval.sym = s; 1266 return s->lexical; 1267 1268 tnum: 1269 cp = lexbuf; 1270 ep = lexbuf+sizeof lexbuf; 1271 if(c != '0') { 1272 for(;;) { 1273 if(cp+10 >= ep) { 1274 yyerror("identifier too long"); 1275 errorexit(); 1276 } 1277 *cp++ = c; 1278 c = getc(); 1279 if(yy_isdigit(c)) 1280 continue; 1281 goto dc; 1282 } 1283 } 1284 *cp++ = c; 1285 c = getc(); 1286 if(c == 'x' || c == 'X') { 1287 for(;;) { 1288 if(cp+10 >= ep) { 1289 yyerror("identifier too long"); 1290 errorexit(); 1291 } 1292 *cp++ = c; 1293 c = getc(); 1294 if(yy_isdigit(c)) 1295 continue; 1296 if(c >= 'a' && c <= 'f') 1297 continue; 1298 if(c >= 'A' && c <= 'F') 1299 continue; 1300 if(cp == lexbuf+2) 1301 yyerror("malformed hex constant"); 1302 if(c == 'p') 1303 goto caseep; 1304 goto ncu; 1305 } 1306 } 1307 1308 if(c == 'p') // 0p begins floating point zero 1309 goto caseep; 1310 1311 c1 = 0; 1312 for(;;) { 1313 if(cp+10 >= ep) { 1314 yyerror("identifier too long"); 1315 errorexit(); 1316 } 1317 if(!yy_isdigit(c)) 1318 break; 1319 if(c < '0' || c > '7') 1320 c1 = 1; // not octal 1321 *cp++ = c; 1322 c = getc(); 1323 } 1324 if(c == '.') 1325 goto casedot; 1326 if(c == 'e' || c == 'E') 1327 goto caseep; 1328 if(c == 'i') 1329 goto casei; 1330 if(c1) 1331 yyerror("malformed octal constant"); 1332 goto ncu; 1333 1334 dc: 1335 if(c == '.') 1336 goto casedot; 1337 if(c == 'e' || c == 'E' || c == 'p' || c == 'P') 1338 goto caseep; 1339 if(c == 'i') 1340 goto casei; 1341 1342 ncu: 1343 *cp = 0; 1344 ungetc(c); 1345 1346 yylval.val.u.xval = mal(sizeof(*yylval.val.u.xval)); 1347 mpatofix(yylval.val.u.xval, lexbuf); 1348 if(yylval.val.u.xval->ovf) { 1349 yyerror("overflow in constant"); 1350 mpmovecfix(yylval.val.u.xval, 0); 1351 } 1352 yylval.val.ctype = CTINT; 1353 DBG("lex: integer literal\n"); 1354 strcpy(litbuf, "literal "); 1355 strcat(litbuf, lexbuf); 1356 return LLITERAL; 1357 1358 casedot: 1359 for(;;) { 1360 if(cp+10 >= ep) { 1361 yyerror("identifier too long"); 1362 errorexit(); 1363 } 1364 *cp++ = c; 1365 c = getc(); 1366 if(!yy_isdigit(c)) 1367 break; 1368 } 1369 if(c == 'i') 1370 goto casei; 1371 if(c != 'e' && c != 'E') 1372 goto caseout; 1373 1374 caseep: 1375 *cp++ = c; 1376 c = getc(); 1377 if(c == '+' || c == '-') { 1378 *cp++ = c; 1379 c = getc(); 1380 } 1381 if(!yy_isdigit(c)) 1382 yyerror("malformed fp constant exponent"); 1383 while(yy_isdigit(c)) { 1384 if(cp+10 >= ep) { 1385 yyerror("identifier too long"); 1386 errorexit(); 1387 } 1388 *cp++ = c; 1389 c = getc(); 1390 } 1391 if(c == 'i') 1392 goto casei; 1393 goto caseout; 1394 1395 casei: 1396 // imaginary constant 1397 *cp = 0; 1398 yylval.val.u.cval = mal(sizeof(*yylval.val.u.cval)); 1399 mpmovecflt(&yylval.val.u.cval->real, 0.0); 1400 mpatoflt(&yylval.val.u.cval->imag, lexbuf); 1401 if(yylval.val.u.cval->imag.val.ovf) { 1402 yyerror("overflow in imaginary constant"); 1403 mpmovecflt(&yylval.val.u.cval->real, 0.0); 1404 } 1405 yylval.val.ctype = CTCPLX; 1406 DBG("lex: imaginary literal\n"); 1407 strcpy(litbuf, "literal "); 1408 strcat(litbuf, lexbuf); 1409 return LLITERAL; 1410 1411 caseout: 1412 *cp = 0; 1413 ungetc(c); 1414 1415 yylval.val.u.fval = mal(sizeof(*yylval.val.u.fval)); 1416 mpatoflt(yylval.val.u.fval, lexbuf); 1417 if(yylval.val.u.fval->val.ovf) { 1418 yyerror("overflow in float constant"); 1419 mpmovecflt(yylval.val.u.fval, 0.0); 1420 } 1421 yylval.val.ctype = CTFLT; 1422 DBG("lex: floating literal\n"); 1423 strcpy(litbuf, "literal "); 1424 strcat(litbuf, lexbuf); 1425 return LLITERAL; 1426 } 1427 1428 /* 1429 * read and interpret syntax that looks like 1430 * //line parse.y:15 1431 * as a discontinuity in sequential line numbers. 1432 * the next line of input comes from parse.y:15 1433 */ 1434 static int 1435 getlinepragma(void) 1436 { 1437 int i, c, n; 1438 char *cp, *ep, *linep; 1439 Hist *h; 1440 1441 c = getr(); 1442 if(c == 'g') 1443 goto go; 1444 if(c != 'l') 1445 goto out; 1446 for(i=1; i<5; i++) { 1447 c = getr(); 1448 if(c != "line "[i]) 1449 goto out; 1450 } 1451 1452 cp = lexbuf; 1453 ep = lexbuf+sizeof(lexbuf)-5; 1454 linep = nil; 1455 for(;;) { 1456 c = getr(); 1457 if(c == EOF) 1458 goto out; 1459 if(c == '\n') 1460 break; 1461 if(c == ' ') 1462 continue; 1463 if(c == ':') 1464 linep = cp; 1465 if(cp < ep) 1466 *cp++ = c; 1467 } 1468 *cp = 0; 1469 1470 if(linep == nil || linep >= ep) 1471 goto out; 1472 *linep++ = '\0'; 1473 n = 0; 1474 for(cp=linep; *cp; cp++) { 1475 if(*cp < '0' || *cp > '9') 1476 goto out; 1477 n = n*10 + *cp - '0'; 1478 if(n > 1e8) { 1479 yyerror("line number out of range"); 1480 errorexit(); 1481 } 1482 } 1483 if(n <= 0) 1484 goto out; 1485 1486 // try to avoid allocating file name over and over 1487 for(h=hist; h!=H; h=h->link) { 1488 if(h->name != nil && strcmp(h->name, lexbuf) == 0) { 1489 linehist(h->name, n, 0); 1490 goto out; 1491 } 1492 } 1493 linehist(strdup(lexbuf), n, 0); 1494 goto out; 1495 1496 go: 1497 cp = lexbuf; 1498 ep = lexbuf+sizeof(lexbuf)-5; 1499 *cp++ = 'g'; // already read 1500 for(;;) { 1501 c = getr(); 1502 if(c == EOF || c >= Runeself) 1503 goto out; 1504 if(c == '\n') 1505 break; 1506 if(cp < ep) 1507 *cp++ = c; 1508 } 1509 *cp = 0; 1510 ep = strchr(lexbuf, ' '); 1511 if(ep != nil) 1512 *ep = 0; 1513 1514 if(strcmp(lexbuf, "go:nointerface") == 0 && fieldtrack_enabled) { 1515 nointerface = 1; 1516 goto out; 1517 } 1518 if(strcmp(lexbuf, "go:noescape") == 0) { 1519 noescape = 1; 1520 goto out; 1521 } 1522 1523 out: 1524 return c; 1525 } 1526 1527 int32 1528 yylex(void) 1529 { 1530 int lx; 1531 1532 lx = _yylex(); 1533 1534 if(curio.nlsemi && lx == EOF) { 1535 // Treat EOF as "end of line" for the purposes 1536 // of inserting a semicolon. 1537 lx = ';'; 1538 } 1539 1540 switch(lx) { 1541 case LNAME: 1542 case LLITERAL: 1543 case LBREAK: 1544 case LCONTINUE: 1545 case LFALL: 1546 case LRETURN: 1547 case LINC: 1548 case LDEC: 1549 case ')': 1550 case '}': 1551 case ']': 1552 curio.nlsemi = 1; 1553 break; 1554 default: 1555 curio.nlsemi = 0; 1556 break; 1557 } 1558 1559 // Track last two tokens returned by yylex. 1560 yyprev = yylast; 1561 yylast = lx; 1562 return lx; 1563 } 1564 1565 static int 1566 getc(void) 1567 { 1568 int c, c1, c2; 1569 1570 c = curio.peekc; 1571 if(c != 0) { 1572 curio.peekc = curio.peekc1; 1573 curio.peekc1 = 0; 1574 goto check; 1575 } 1576 1577 if(curio.bin == nil) { 1578 c = *curio.cp & 0xff; 1579 if(c != 0) 1580 curio.cp++; 1581 } else { 1582 loop: 1583 c = Bgetc(curio.bin); 1584 if(c == 0xef) { 1585 c1 = Bgetc(curio.bin); 1586 c2 = Bgetc(curio.bin); 1587 if(c1 == 0xbb && c2 == 0xbf) { 1588 yyerrorl(lexlineno, "Unicode (UTF-8) BOM in middle of file"); 1589 goto loop; 1590 } 1591 Bungetc(curio.bin); 1592 Bungetc(curio.bin); 1593 } 1594 } 1595 1596 check: 1597 switch(c) { 1598 case 0: 1599 if(curio.bin != nil) { 1600 yyerror("illegal NUL byte"); 1601 break; 1602 } 1603 case EOF: 1604 // insert \n at EOF 1605 if(curio.eofnl) 1606 return EOF; 1607 curio.eofnl = 1; 1608 c = '\n'; 1609 case '\n': 1610 if(pushedio.bin == nil) 1611 lexlineno++; 1612 break; 1613 } 1614 return c; 1615 } 1616 1617 static void 1618 ungetc(int c) 1619 { 1620 curio.peekc1 = curio.peekc; 1621 curio.peekc = c; 1622 if(c == '\n' && pushedio.bin == nil) 1623 lexlineno--; 1624 } 1625 1626 static int32 1627 getr(void) 1628 { 1629 int c, i; 1630 char str[UTFmax+1]; 1631 Rune rune; 1632 1633 c = getc(); 1634 if(c < Runeself) 1635 return c; 1636 i = 0; 1637 str[i++] = c; 1638 1639 loop: 1640 c = getc(); 1641 str[i++] = c; 1642 if(!fullrune(str, i)) 1643 goto loop; 1644 c = chartorune(&rune, str); 1645 if(rune == Runeerror && c == 1) { 1646 lineno = lexlineno; 1647 yyerror("illegal UTF-8 sequence"); 1648 flusherrors(); 1649 print("\t"); 1650 for(c=0; c<i; c++) 1651 print("%s%.2x", c > 0 ? " " : "", *(uchar*)(str+c)); 1652 print("\n"); 1653 } 1654 return rune; 1655 } 1656 1657 static int 1658 escchar(int e, int *escflg, vlong *val) 1659 { 1660 int i, u, c; 1661 vlong l; 1662 1663 *escflg = 0; 1664 1665 c = getr(); 1666 switch(c) { 1667 case EOF: 1668 yyerror("eof in string"); 1669 return 1; 1670 case '\n': 1671 yyerror("newline in string"); 1672 return 1; 1673 case '\\': 1674 break; 1675 default: 1676 if(c == e) 1677 return 1; 1678 *val = c; 1679 return 0; 1680 } 1681 1682 u = 0; 1683 c = getr(); 1684 switch(c) { 1685 case 'x': 1686 *escflg = 1; // it's a byte 1687 i = 2; 1688 goto hex; 1689 1690 case 'u': 1691 i = 4; 1692 u = 1; 1693 goto hex; 1694 1695 case 'U': 1696 i = 8; 1697 u = 1; 1698 goto hex; 1699 1700 case '0': 1701 case '1': 1702 case '2': 1703 case '3': 1704 case '4': 1705 case '5': 1706 case '6': 1707 case '7': 1708 *escflg = 1; // it's a byte 1709 goto oct; 1710 1711 case 'a': c = '\a'; break; 1712 case 'b': c = '\b'; break; 1713 case 'f': c = '\f'; break; 1714 case 'n': c = '\n'; break; 1715 case 'r': c = '\r'; break; 1716 case 't': c = '\t'; break; 1717 case 'v': c = '\v'; break; 1718 case '\\': c = '\\'; break; 1719 1720 default: 1721 if(c != e) 1722 yyerror("unknown escape sequence: %c", c); 1723 } 1724 *val = c; 1725 return 0; 1726 1727 hex: 1728 l = 0; 1729 for(; i>0; i--) { 1730 c = getc(); 1731 if(c >= '0' && c <= '9') { 1732 l = l*16 + c-'0'; 1733 continue; 1734 } 1735 if(c >= 'a' && c <= 'f') { 1736 l = l*16 + c-'a' + 10; 1737 continue; 1738 } 1739 if(c >= 'A' && c <= 'F') { 1740 l = l*16 + c-'A' + 10; 1741 continue; 1742 } 1743 yyerror("non-hex character in escape sequence: %c", c); 1744 ungetc(c); 1745 break; 1746 } 1747 if(u && (l > Runemax || (0xd800 <= l && l < 0xe000))) { 1748 yyerror("invalid Unicode code point in escape sequence: %#llx", l); 1749 l = Runeerror; 1750 } 1751 *val = l; 1752 return 0; 1753 1754 oct: 1755 l = c - '0'; 1756 for(i=2; i>0; i--) { 1757 c = getc(); 1758 if(c >= '0' && c <= '7') { 1759 l = l*8 + c-'0'; 1760 continue; 1761 } 1762 yyerror("non-octal character in escape sequence: %c", c); 1763 ungetc(c); 1764 } 1765 if(l > 255) 1766 yyerror("octal escape value > 255: %d", l); 1767 1768 *val = l; 1769 return 0; 1770 } 1771 1772 static struct 1773 { 1774 char* name; 1775 int lexical; 1776 int etype; 1777 int op; 1778 } syms[] = 1779 { 1780 /* name lexical etype op 1781 */ 1782 /* basic types */ 1783 "int8", LNAME, TINT8, OXXX, 1784 "int16", LNAME, TINT16, OXXX, 1785 "int32", LNAME, TINT32, OXXX, 1786 "int64", LNAME, TINT64, OXXX, 1787 1788 "uint8", LNAME, TUINT8, OXXX, 1789 "uint16", LNAME, TUINT16, OXXX, 1790 "uint32", LNAME, TUINT32, OXXX, 1791 "uint64", LNAME, TUINT64, OXXX, 1792 1793 "float32", LNAME, TFLOAT32, OXXX, 1794 "float64", LNAME, TFLOAT64, OXXX, 1795 1796 "complex64", LNAME, TCOMPLEX64, OXXX, 1797 "complex128", LNAME, TCOMPLEX128, OXXX, 1798 1799 "bool", LNAME, TBOOL, OXXX, 1800 "string", LNAME, TSTRING, OXXX, 1801 1802 "any", LNAME, TANY, OXXX, 1803 1804 "break", LBREAK, Txxx, OXXX, 1805 "case", LCASE, Txxx, OXXX, 1806 "chan", LCHAN, Txxx, OXXX, 1807 "const", LCONST, Txxx, OXXX, 1808 "continue", LCONTINUE, Txxx, OXXX, 1809 "default", LDEFAULT, Txxx, OXXX, 1810 "else", LELSE, Txxx, OXXX, 1811 "defer", LDEFER, Txxx, OXXX, 1812 "fallthrough", LFALL, Txxx, OXXX, 1813 "for", LFOR, Txxx, OXXX, 1814 "func", LFUNC, Txxx, OXXX, 1815 "go", LGO, Txxx, OXXX, 1816 "goto", LGOTO, Txxx, OXXX, 1817 "if", LIF, Txxx, OXXX, 1818 "import", LIMPORT, Txxx, OXXX, 1819 "interface", LINTERFACE, Txxx, OXXX, 1820 "map", LMAP, Txxx, OXXX, 1821 "package", LPACKAGE, Txxx, OXXX, 1822 "range", LRANGE, Txxx, OXXX, 1823 "return", LRETURN, Txxx, OXXX, 1824 "select", LSELECT, Txxx, OXXX, 1825 "struct", LSTRUCT, Txxx, OXXX, 1826 "switch", LSWITCH, Txxx, OXXX, 1827 "type", LTYPE, Txxx, OXXX, 1828 "var", LVAR, Txxx, OXXX, 1829 1830 "append", LNAME, Txxx, OAPPEND, 1831 "cap", LNAME, Txxx, OCAP, 1832 "close", LNAME, Txxx, OCLOSE, 1833 "complex", LNAME, Txxx, OCOMPLEX, 1834 "copy", LNAME, Txxx, OCOPY, 1835 "delete", LNAME, Txxx, ODELETE, 1836 "imag", LNAME, Txxx, OIMAG, 1837 "len", LNAME, Txxx, OLEN, 1838 "make", LNAME, Txxx, OMAKE, 1839 "new", LNAME, Txxx, ONEW, 1840 "panic", LNAME, Txxx, OPANIC, 1841 "print", LNAME, Txxx, OPRINT, 1842 "println", LNAME, Txxx, OPRINTN, 1843 "real", LNAME, Txxx, OREAL, 1844 "recover", LNAME, Txxx, ORECOVER, 1845 1846 "notwithstanding", LIGNORE, Txxx, OXXX, 1847 "thetruthofthematter", LIGNORE, Txxx, OXXX, 1848 "despiteallobjections", LIGNORE, Txxx, OXXX, 1849 "whereas", LIGNORE, Txxx, OXXX, 1850 "insofaras", LIGNORE, Txxx, OXXX, 1851 }; 1852 1853 static void 1854 lexinit(void) 1855 { 1856 int i, lex; 1857 Sym *s, *s1; 1858 Type *t; 1859 int etype; 1860 Val v; 1861 1862 /* 1863 * initialize basic types array 1864 * initialize known symbols 1865 */ 1866 for(i=0; i<nelem(syms); i++) { 1867 lex = syms[i].lexical; 1868 s = lookup(syms[i].name); 1869 s->lexical = lex; 1870 1871 etype = syms[i].etype; 1872 if(etype != Txxx) { 1873 if(etype < 0 || etype >= nelem(types)) 1874 fatal("lexinit: %s bad etype", s->name); 1875 s1 = pkglookup(syms[i].name, builtinpkg); 1876 t = types[etype]; 1877 if(t == T) { 1878 t = typ(etype); 1879 t->sym = s1; 1880 1881 if(etype != TANY && etype != TSTRING) 1882 dowidth(t); 1883 types[etype] = t; 1884 } 1885 s1->lexical = LNAME; 1886 s1->def = typenod(t); 1887 continue; 1888 } 1889 1890 etype = syms[i].op; 1891 if(etype != OXXX) { 1892 s1 = pkglookup(syms[i].name, builtinpkg); 1893 s1->lexical = LNAME; 1894 s1->def = nod(ONAME, N, N); 1895 s1->def->sym = s1; 1896 s1->def->etype = etype; 1897 s1->def->builtin = 1; 1898 } 1899 } 1900 1901 // logically, the type of a string literal. 1902 // types[TSTRING] is the named type string 1903 // (the type of x in var x string or var x = "hello"). 1904 // this is the ideal form 1905 // (the type of x in const x = "hello"). 1906 idealstring = typ(TSTRING); 1907 idealbool = typ(TBOOL); 1908 1909 s = pkglookup("true", builtinpkg); 1910 s->def = nodbool(1); 1911 s->def->sym = lookup("true"); 1912 s->def->type = idealbool; 1913 1914 s = pkglookup("false", builtinpkg); 1915 s->def = nodbool(0); 1916 s->def->sym = lookup("false"); 1917 s->def->type = idealbool; 1918 1919 s = lookup("_"); 1920 s->block = -100; 1921 s->def = nod(ONAME, N, N); 1922 s->def->sym = s; 1923 types[TBLANK] = typ(TBLANK); 1924 s->def->type = types[TBLANK]; 1925 nblank = s->def; 1926 1927 s = pkglookup("_", builtinpkg); 1928 s->block = -100; 1929 s->def = nod(ONAME, N, N); 1930 s->def->sym = s; 1931 types[TBLANK] = typ(TBLANK); 1932 s->def->type = types[TBLANK]; 1933 1934 types[TNIL] = typ(TNIL); 1935 s = pkglookup("nil", builtinpkg); 1936 v.ctype = CTNIL; 1937 s->def = nodlit(v); 1938 s->def->sym = s; 1939 } 1940 1941 static void 1942 lexinit1(void) 1943 { 1944 Sym *s, *s1; 1945 Type *t, *f, *rcvr, *in, *out; 1946 1947 // t = interface { Error() string } 1948 rcvr = typ(TSTRUCT); 1949 rcvr->type = typ(TFIELD); 1950 rcvr->type->type = ptrto(typ(TSTRUCT)); 1951 rcvr->funarg = 1; 1952 in = typ(TSTRUCT); 1953 in->funarg = 1; 1954 out = typ(TSTRUCT); 1955 out->type = typ(TFIELD); 1956 out->type->type = types[TSTRING]; 1957 out->funarg = 1; 1958 f = typ(TFUNC); 1959 *getthis(f) = rcvr; 1960 *getoutarg(f) = out; 1961 *getinarg(f) = in; 1962 f->thistuple = 1; 1963 f->intuple = 0; 1964 f->outnamed = 0; 1965 f->outtuple = 1; 1966 t = typ(TINTER); 1967 t->type = typ(TFIELD); 1968 t->type->sym = lookup("Error"); 1969 t->type->type = f; 1970 1971 // error type 1972 s = lookup("error"); 1973 s->lexical = LNAME; 1974 errortype = t; 1975 errortype->sym = s; 1976 s1 = pkglookup("error", builtinpkg); 1977 s1->lexical = LNAME; 1978 s1->def = typenod(errortype); 1979 1980 // byte alias 1981 s = lookup("byte"); 1982 s->lexical = LNAME; 1983 bytetype = typ(TUINT8); 1984 bytetype->sym = s; 1985 s1 = pkglookup("byte", builtinpkg); 1986 s1->lexical = LNAME; 1987 s1->def = typenod(bytetype); 1988 1989 // rune alias 1990 s = lookup("rune"); 1991 s->lexical = LNAME; 1992 runetype = typ(TINT32); 1993 runetype->sym = s; 1994 s1 = pkglookup("rune", builtinpkg); 1995 s1->lexical = LNAME; 1996 s1->def = typenod(runetype); 1997 } 1998 1999 static void 2000 lexfini(void) 2001 { 2002 Sym *s; 2003 int lex, etype, i; 2004 Val v; 2005 2006 for(i=0; i<nelem(syms); i++) { 2007 lex = syms[i].lexical; 2008 if(lex != LNAME) 2009 continue; 2010 s = lookup(syms[i].name); 2011 s->lexical = lex; 2012 2013 etype = syms[i].etype; 2014 if(etype != Txxx && (etype != TANY || debug['A']) && s->def == N) { 2015 s->def = typenod(types[etype]); 2016 s->origpkg = builtinpkg; 2017 } 2018 2019 etype = syms[i].op; 2020 if(etype != OXXX && s->def == N) { 2021 s->def = nod(ONAME, N, N); 2022 s->def->sym = s; 2023 s->def->etype = etype; 2024 s->def->builtin = 1; 2025 s->origpkg = builtinpkg; 2026 } 2027 } 2028 2029 // backend-specific builtin types (e.g. int). 2030 for(i=0; typedefs[i].name; i++) { 2031 s = lookup(typedefs[i].name); 2032 if(s->def == N) { 2033 s->def = typenod(types[typedefs[i].etype]); 2034 s->origpkg = builtinpkg; 2035 } 2036 } 2037 2038 // there's only so much table-driven we can handle. 2039 // these are special cases. 2040 s = lookup("byte"); 2041 if(s->def == N) { 2042 s->def = typenod(bytetype); 2043 s->origpkg = builtinpkg; 2044 } 2045 2046 s = lookup("error"); 2047 if(s->def == N) { 2048 s->def = typenod(errortype); 2049 s->origpkg = builtinpkg; 2050 } 2051 2052 s = lookup("rune"); 2053 if(s->def == N) { 2054 s->def = typenod(runetype); 2055 s->origpkg = builtinpkg; 2056 } 2057 2058 s = lookup("nil"); 2059 if(s->def == N) { 2060 v.ctype = CTNIL; 2061 s->def = nodlit(v); 2062 s->def->sym = s; 2063 s->origpkg = builtinpkg; 2064 } 2065 2066 s = lookup("iota"); 2067 if(s->def == N) { 2068 s->def = nod(OIOTA, N, N); 2069 s->def->sym = s; 2070 s->origpkg = builtinpkg; 2071 } 2072 2073 s = lookup("true"); 2074 if(s->def == N) { 2075 s->def = nodbool(1); 2076 s->def->sym = s; 2077 s->origpkg = builtinpkg; 2078 } 2079 2080 s = lookup("false"); 2081 if(s->def == N) { 2082 s->def = nodbool(0); 2083 s->def->sym = s; 2084 s->origpkg = builtinpkg; 2085 } 2086 2087 nodfp = nod(ONAME, N, N); 2088 nodfp->type = types[TINT32]; 2089 nodfp->xoffset = 0; 2090 nodfp->class = PPARAM; 2091 nodfp->sym = lookup(".fp"); 2092 } 2093 2094 struct 2095 { 2096 int lex; 2097 char* name; 2098 } lexn[] = 2099 { 2100 LANDAND, "ANDAND", 2101 LASOP, "ASOP", 2102 LBREAK, "BREAK", 2103 LCASE, "CASE", 2104 LCHAN, "CHAN", 2105 LCOLAS, "COLAS", 2106 LCONST, "CONST", 2107 LCONTINUE, "CONTINUE", 2108 LDEC, "DEC", 2109 LDEFER, "DEFER", 2110 LELSE, "ELSE", 2111 LEQ, "EQ", 2112 LFALL, "FALL", 2113 LFOR, "FOR", 2114 LFUNC, "FUNC", 2115 LGE, "GE", 2116 LGO, "GO", 2117 LGOTO, "GOTO", 2118 LGT, "GT", 2119 LIF, "IF", 2120 LIMPORT, "IMPORT", 2121 LINC, "INC", 2122 LINTERFACE, "INTERFACE", 2123 LLE, "LE", 2124 LLITERAL, "LITERAL", 2125 LLSH, "LSH", 2126 LLT, "LT", 2127 LMAP, "MAP", 2128 LNAME, "NAME", 2129 LNE, "NE", 2130 LOROR, "OROR", 2131 LPACKAGE, "PACKAGE", 2132 LRANGE, "RANGE", 2133 LRETURN, "RETURN", 2134 LRSH, "RSH", 2135 LSTRUCT, "STRUCT", 2136 LSWITCH, "SWITCH", 2137 LTYPE, "TYPE", 2138 LVAR, "VAR", 2139 }; 2140 2141 char* 2142 lexname(int lex) 2143 { 2144 int i; 2145 static char buf[100]; 2146 2147 for(i=0; i<nelem(lexn); i++) 2148 if(lexn[i].lex == lex) 2149 return lexn[i].name; 2150 snprint(buf, sizeof(buf), "LEX-%d", lex); 2151 return buf; 2152 } 2153 2154 struct 2155 { 2156 char *have; 2157 char *want; 2158 } yytfix[] = 2159 { 2160 "$end", "EOF", 2161 "LLITERAL", "literal", 2162 "LASOP", "op=", 2163 "LBREAK", "break", 2164 "LCASE", "case", 2165 "LCOLAS", ":=", 2166 "LCONST", "const", 2167 "LCONTINUE", "continue", 2168 "LDDD", "...", 2169 "LDEFAULT", "default", 2170 "LDEFER", "defer", 2171 "LELSE", "else", 2172 "LFALL", "fallthrough", 2173 "LFOR", "for", 2174 "LFUNC", "func", 2175 "LGO", "go", 2176 "LGOTO", "goto", 2177 "LIF", "if", 2178 "LIMPORT", "import", 2179 "LINTERFACE", "interface", 2180 "LMAP", "map", 2181 "LNAME", "name", 2182 "LPACKAGE", "package", 2183 "LRANGE", "range", 2184 "LRETURN", "return", 2185 "LSELECT", "select", 2186 "LSTRUCT", "struct", 2187 "LSWITCH", "switch", 2188 "LTYPE", "type", 2189 "LVAR", "var", 2190 "LANDAND", "&&", 2191 "LANDNOT", "&^", 2192 "LBODY", "{", 2193 "LCOMM", "<-", 2194 "LDEC", "--", 2195 "LINC", "++", 2196 "LEQ", "==", 2197 "LGE", ">=", 2198 "LGT", ">", 2199 "LLE", "<=", 2200 "LLT", "<", 2201 "LLSH", "<<", 2202 "LRSH", ">>", 2203 "LOROR", "||", 2204 "LNE", "!=", 2205 2206 // spell out to avoid confusion with punctuation in error messages 2207 "';'", "semicolon or newline", 2208 "','", "comma", 2209 }; 2210 2211 static void 2212 yytinit(void) 2213 { 2214 int i, j; 2215 extern char *yytname[]; 2216 char *s, *t; 2217 2218 for(i=0; yytname[i] != nil; i++) { 2219 s = yytname[i]; 2220 2221 if(strcmp(s, "LLITERAL") == 0) { 2222 strcpy(litbuf, "literal"); 2223 yytname[i] = litbuf; 2224 goto loop; 2225 } 2226 2227 // apply yytfix if possible 2228 for(j=0; j<nelem(yytfix); j++) { 2229 if(strcmp(s, yytfix[j].have) == 0) { 2230 yytname[i] = yytfix[j].want; 2231 goto loop; 2232 } 2233 } 2234 2235 // turn 'x' into x. 2236 if(s[0] == '\'') { 2237 t = strdup(s+1); 2238 t[strlen(t)-1] = '\0'; 2239 yytname[i] = t; 2240 } 2241 loop:; 2242 } 2243 } 2244 2245 void 2246 mkpackage(char* pkgname) 2247 { 2248 Sym *s; 2249 int32 h; 2250 char *p, *q; 2251 2252 if(localpkg->name == nil) { 2253 if(strcmp(pkgname, "_") == 0) 2254 yyerror("invalid package name _"); 2255 localpkg->name = pkgname; 2256 } else { 2257 if(strcmp(pkgname, localpkg->name) != 0) 2258 yyerror("package %s; expected %s", pkgname, localpkg->name); 2259 for(h=0; h<NHASH; h++) { 2260 for(s = hash[h]; s != S; s = s->link) { 2261 if(s->def == N || s->pkg != localpkg) 2262 continue; 2263 if(s->def->op == OPACK) { 2264 // throw away top-level package name leftover 2265 // from previous file. 2266 // leave s->block set to cause redeclaration 2267 // errors if a conflicting top-level name is 2268 // introduced by a different file. 2269 if(!s->def->used && !nsyntaxerrors) 2270 yyerrorl(s->def->lineno, "imported and not used: \"%Z\"", s->def->pkg->path); 2271 s->def = N; 2272 continue; 2273 } 2274 if(s->def->sym != s) { 2275 // throw away top-level name left over 2276 // from previous import . "x" 2277 if(s->def->pack != N && !s->def->pack->used && !nsyntaxerrors) { 2278 yyerrorl(s->def->pack->lineno, "imported and not used: \"%Z\"", s->def->pack->pkg->path); 2279 s->def->pack->used = 1; 2280 } 2281 s->def = N; 2282 continue; 2283 } 2284 } 2285 } 2286 } 2287 2288 if(outfile == nil) { 2289 p = strrchr(infile, '/'); 2290 if(windows) { 2291 q = strrchr(infile, '\\'); 2292 if(q > p) 2293 p = q; 2294 } 2295 if(p == nil) 2296 p = infile; 2297 else 2298 p = p+1; 2299 snprint(namebuf, sizeof(namebuf), "%s", p); 2300 p = strrchr(namebuf, '.'); 2301 if(p != nil) 2302 *p = 0; 2303 outfile = smprint("%s.%c", namebuf, thechar); 2304 } 2305 }