github.com/lab47/exprcore@v0.0.0-20210525052339-fb7d6bd9331e/internal/compile/compile.go (about) 1 // Package compile defines the exprcore bytecode compiler. 2 // It is an internal package of the exprcore interpreter and is not directly accessible to clients. 3 // 4 // The compiler generates byte code with optional uint32 operands for a 5 // virtual machine with the following components: 6 // - a program counter, which is an index into the byte code array. 7 // - an operand stack, whose maximum size is computed for each function by the compiler. 8 // - an stack of active iterators. 9 // - an array of local variables. 10 // The number of local variables and their indices are computed by the resolver. 11 // Locals (possibly including parameters) that are shared with nested functions 12 // are 'cells': their locals array slot will contain a value of type 'cell', 13 // an indirect value in a box that is explicitly read/updated by instructions. 14 // - an array of free variables, for nested functions. 15 // Free variables are a subset of the ancestors' cell variables. 16 // As with locals and cells, these are computed by the resolver. 17 // - an array of global variables, shared among all functions in the same module. 18 // All elements are initially nil. 19 // - two maps of predeclared and universal identifiers. 20 // 21 // Each function has a line number table that maps each program counter 22 // offset to a source position, including the column number. 23 // 24 // Operands, logically uint32s, are encoded using little-endian 7-bit 25 // varints, the top bit indicating that more bytes follow. 26 // 27 package compile // import "github.com/lab47/exprcore/internal/compile" 28 29 import ( 30 "bytes" 31 "fmt" 32 "hash" 33 "log" 34 "os" 35 "path/filepath" 36 "sort" 37 "strconv" 38 "sync" 39 40 "github.com/lab47/exprcore/resolve" 41 "github.com/lab47/exprcore/syntax" 42 "golang.org/x/crypto/blake2b" 43 ) 44 45 // Disassemble causes the assembly code for each function 46 // to be printed to stderr as it is generated. 47 var Disassemble = false 48 49 const debug = false // make code generation verbose, for debugging the compiler 50 51 // Increment this to force recompilation of saved bytecode files. 52 const Version = 10 53 54 type Opcode uint8 55 56 // "x DUP x x" is a "stack picture" that describes the state of the 57 // stack before and after execution of the instruction. 58 // 59 // OP<index> indicates an immediate operand that is an index into the 60 // specified table: locals, names, freevars, constants. 61 const ( 62 NOP Opcode = iota // - NOP - 63 64 // stack operations 65 DUP // x DUP x x 66 DUP2 // x y DUP2 x y x y 67 POP // x POP - 68 EXCH // x y EXCH y x 69 70 // binary comparisons 71 // (order must match Token) 72 LT 73 GT 74 GE 75 LE 76 EQL 77 NEQ 78 79 // binary arithmetic 80 // (order must match Token) 81 PLUS 82 MINUS 83 STAR 84 SLASH 85 SLASHSLASH 86 PERCENT 87 AMP 88 PIPE 89 CIRCUMFLEX 90 LTLT 91 GTGT 92 93 IN 94 95 // unary operators 96 UPLUS // x UPLUS x 97 UMINUS // x UMINUS -x 98 TILDE // x TILDE ~x 99 100 NONE // - NONE None 101 TRUE // - TRUE True 102 FALSE // - FALSE False 103 MANDATORY // - MANDATORY Mandatory [sentinel value for required kwonly args] 104 105 ITERPUSH // iterable ITERPUSH - [pushes the iterator stack] 106 ITERPOP // - ITERPOP - [pops the iterator stack] 107 NOT // value NOT bool 108 RETURN // value RETURN - 109 SETINDEX // a i new SETINDEX - 110 INDEX // a i INDEX elem 111 SETDICT // dict key value SETDICT - 112 SETDICTUNIQ // dict key value SETDICTUNIQ - 113 APPEND // list elem APPEND - 114 SLICE // x lo hi step SLICE slice 115 INPLACE_ADD // x y INPLACE_ADD z where z is x+y or x.extend(y) 116 MAKEDICT // - MAKEDICT dict 117 MAKEPROTO // - MAKEPROTO proto 118 SETPROTOKEY // proto k v SETDICTUNIQ - 119 SETCELL // value cell SETCELL - 120 CELL // cell CELL value 121 IMPORT // ns name args IMPORT value 122 123 // --- opcodes with an argument must go below this line --- 124 125 // control flow 126 JMP // - JMP<addr> - 127 CJMP // cond CJMP<addr> - 128 ITERJMP // - ITERJMP<addr> elem (and fall through) [acts on topmost iterator] 129 // or: - ITERJMP<addr> - (and jump) 130 131 CONSTANT // - CONSTANT<constant> value 132 MAKETUPLE // x1 ... xn MAKETUPLE<n> tuple 133 MAKELIST // x1 ... xn MAKELIST<n> list 134 MAKEFUNC // defaults+freevars MAKEFUNC<func> fn 135 LOAD // from1 ... fromN module LOAD<n> v1 ... vN 136 SETLOCAL // value SETLOCAL<local> - 137 SETGLOBAL // value SETGLOBAL<global> - 138 LOCAL // - LOCAL<local> value 139 FREE // - FREE<freevar> cell 140 GLOBAL // - GLOBAL<global> value 141 AT // - AT<name> value 142 PREDECLARED // - PREDECLARED<name> value 143 UNIVERSAL // - UNIVERSAL<name> value 144 ATTR // x ATTR<name> y y = x.name 145 SETFIELD // x y SETFIELD<name> - x.name = y 146 UNPACK // iterable UNPACK<n> vn ... v1 147 148 SHELL // x1 ... xn SHELL value 149 150 // n>>8 is #positional args and n&0xff is #named args (pairs). 151 CALL // fn positional named CALL<n> result 152 CALL_VAR // fn positional named *args CALL_VAR<n> result 153 CALL_KW // fn positional named **kwargs CALL_KW<n> result 154 CALL_VAR_KW // fn positional named *args **kwargs CALL_VAR_KW<n> result 155 156 OpcodeArgMin = JMP 157 OpcodeMax = CALL_VAR_KW 158 ) 159 160 // TODO(adonovan): add dynamic checks for missing opcodes in the tables below. 161 162 var opcodeNames = [...]string{ 163 AMP: "amp", 164 APPEND: "append", 165 AT: "at", 166 ATTR: "attr", 167 CALL: "call", 168 CALL_KW: "call_kw ", 169 CALL_VAR: "call_var", 170 CALL_VAR_KW: "call_var_kw", 171 CELL: "cell", 172 CIRCUMFLEX: "circumflex", 173 CJMP: "cjmp", 174 CONSTANT: "constant", 175 DUP2: "dup2", 176 DUP: "dup", 177 EQL: "eql", 178 EXCH: "exch", 179 FALSE: "false", 180 FREE: "free", 181 GE: "ge", 182 GLOBAL: "global", 183 GT: "gt", 184 GTGT: "gtgt", 185 IN: "in", 186 INDEX: "index", 187 INPLACE_ADD: "inplace_add", 188 ITERJMP: "iterjmp", 189 ITERPOP: "iterpop", 190 ITERPUSH: "iterpush", 191 JMP: "jmp", 192 LE: "le", 193 LOAD: "load", 194 LOCAL: "local", 195 LT: "lt", 196 LTLT: "ltlt", 197 MAKEDICT: "makedict", 198 MAKEFUNC: "makefunc", 199 MAKELIST: "makelist", 200 MAKETUPLE: "maketuple", 201 MANDATORY: "mandatory", 202 MINUS: "minus", 203 NEQ: "neq", 204 NONE: "none", 205 NOP: "nop", 206 NOT: "not", 207 PERCENT: "percent", 208 PIPE: "pipe", 209 PLUS: "plus", 210 POP: "pop", 211 PREDECLARED: "predeclared", 212 RETURN: "return", 213 SETCELL: "setcell", 214 SETDICT: "setdict", 215 SETDICTUNIQ: "setdictuniq", 216 SETFIELD: "setfield", 217 SETGLOBAL: "setglobal", 218 SETINDEX: "setindex", 219 SETLOCAL: "setlocal", 220 SLASH: "slash", 221 SLASHSLASH: "slashslash", 222 SLICE: "slice", 223 STAR: "star", 224 TILDE: "tilde", 225 TRUE: "true", 226 UMINUS: "uminus", 227 UNIVERSAL: "universal", 228 UNPACK: "unpack", 229 UPLUS: "uplus", 230 SHELL: "shell", 231 IMPORT: "import", 232 } 233 234 const variableStackEffect = 0x7f 235 236 // stackEffect records the effect on the size of the operand stack of 237 // each kind of instruction. For some instructions this requires computation. 238 var stackEffect = [...]int8{ 239 AMP: -1, 240 APPEND: -2, 241 AT: +1, 242 ATTR: 0, 243 CALL: variableStackEffect, 244 CALL_KW: variableStackEffect, 245 CALL_VAR: variableStackEffect, 246 CALL_VAR_KW: variableStackEffect, 247 CELL: 0, 248 CIRCUMFLEX: -1, 249 CJMP: -1, 250 CONSTANT: +1, 251 DUP2: +2, 252 DUP: +1, 253 EQL: -1, 254 FALSE: +1, 255 FREE: +1, 256 GE: -1, 257 GLOBAL: +1, 258 GT: -1, 259 GTGT: -1, 260 IN: -1, 261 INDEX: -1, 262 INPLACE_ADD: -1, 263 ITERJMP: variableStackEffect, 264 ITERPOP: 0, 265 ITERPUSH: -1, 266 JMP: 0, 267 LE: -1, 268 LOAD: -1, 269 IMPORT: -2, 270 LOCAL: +1, 271 LT: -1, 272 LTLT: -1, 273 MAKEDICT: +1, 274 MAKEPROTO: +1, 275 MAKEFUNC: 0, 276 MAKELIST: variableStackEffect, 277 MAKETUPLE: variableStackEffect, 278 SHELL: variableStackEffect, 279 MANDATORY: +1, 280 MINUS: -1, 281 NEQ: -1, 282 NONE: +1, 283 NOP: 0, 284 NOT: 0, 285 PERCENT: -1, 286 PIPE: -1, 287 PLUS: -1, 288 POP: -1, 289 PREDECLARED: +1, 290 RETURN: -1, 291 SETCELL: -2, 292 SETDICT: -3, 293 SETDICTUNIQ: -3, 294 SETPROTOKEY: -3, 295 SETFIELD: -2, 296 SETGLOBAL: -1, 297 SETINDEX: -3, 298 SETLOCAL: -1, 299 SLASH: -1, 300 SLASHSLASH: -1, 301 SLICE: -3, 302 STAR: -1, 303 TRUE: +1, 304 UMINUS: 0, 305 UNIVERSAL: +1, 306 UNPACK: variableStackEffect, 307 UPLUS: 0, 308 } 309 310 func (op Opcode) String() string { 311 if op < OpcodeMax { 312 if name := opcodeNames[op]; name != "" { 313 return name 314 } 315 } 316 return fmt.Sprintf("illegal op (%d)", op) 317 } 318 319 // A Program is a exprcore file in executable form. 320 // 321 // Programs are serialized by the Program.Encode method, 322 // which must be updated whenever this declaration is changed. 323 type Program struct { 324 Loads []Binding // name (really, string) and position of each load stmt 325 Names []string // names of attributes and predeclared variables 326 Constants []interface{} // = string | int64 | float64 | *big.Int 327 Functions []*Funcode 328 Globals []Binding // for error messages and tracing 329 Toplevel *Funcode // module initialization function 330 } 331 332 // A Funcode is the code of a compiled exprcore function. 333 // 334 // Funcodes are serialized by the encoder.function method, 335 // which must be updated whenever this declaration is changed. 336 type Funcode struct { 337 Prog *Program 338 Pos syntax.Position // position of def or lambda token 339 Name string // name of this function 340 Doc string // docstring of this function 341 Code []byte // the byte code 342 pclinetab []uint16 // mapping from pc to linenum 343 Locals []Binding // locals, parameters first 344 Cells []int // indices of Locals that require cells 345 Freevars []Binding // for tracing 346 MaxStack int 347 NumParams int 348 NumKwonlyParams int 349 HasVarargs, HasKwargs bool 350 SubFunctions []uint32 351 Constants []int 352 Signature []byte 353 354 // -- transient state -- 355 356 lntOnce sync.Once 357 lnt []pclinecol // decoded line number table 358 } 359 360 type pclinecol struct { 361 pc uint32 362 line, col int32 363 } 364 365 // A Binding is the name and position of a binding identifier. 366 type Binding struct { 367 Name string 368 Pos syntax.Position 369 } 370 371 // A pcomp holds the compiler state for a Program. 372 type pcomp struct { 373 prog *Program // what we're building 374 375 names map[string]uint32 376 constants map[interface{}]uint32 377 functions map[*Funcode]uint32 378 } 379 380 // An fcomp holds the compiler state for a Funcode. 381 type fcomp struct { 382 fn *Funcode // what we're building 383 384 pcomp *pcomp 385 pos syntax.Position // current position of generated code 386 loops []loop 387 block *block 388 389 constants map[uint32]struct{} 390 391 h hash.Hash 392 } 393 394 type loop struct { 395 break_, continue_ *block 396 } 397 398 type block struct { 399 insns []insn 400 401 // If the last insn is a RETURN, jmp and cjmp are nil. 402 // If the last insn is a CJMP or ITERJMP, 403 // cjmp and jmp are the "true" and "false" successors. 404 // Otherwise, jmp is the sole successor. 405 jmp, cjmp *block 406 407 initialstack int // for stack depth computation 408 409 // Used during encoding 410 index int // -1 => not encoded yet 411 addr uint32 412 } 413 414 type insn struct { 415 op Opcode 416 arg uint32 417 line, col int32 418 } 419 420 // Position returns the source position for program counter pc. 421 func (fn *Funcode) Position(pc uint32) syntax.Position { 422 fn.lntOnce.Do(fn.decodeLNT) 423 424 // Binary search to find last LNT entry not greater than pc. 425 // To avoid dynamic dispatch, this is a specialization of 426 // sort.Search using this predicate: 427 // !(i < len(fn.lnt)-1 && fn.lnt[i+1].pc <= pc) 428 n := len(fn.lnt) 429 i, j := 0, n 430 for i < j { 431 h := int(uint(i+j) >> 1) 432 if !(h >= n-1 || fn.lnt[h+1].pc > pc) { 433 i = h + 1 434 } else { 435 j = h 436 } 437 } 438 439 var line, col int32 440 if i < n { 441 line = fn.lnt[i].line 442 col = fn.lnt[i].col 443 } 444 445 pos := fn.Pos // copy the (annoyingly inaccessible) filename 446 pos.Col = col 447 pos.Line = line 448 return pos 449 } 450 451 // decodeLNT decodes the line number table and populates fn.lnt. 452 // It is called at most once. 453 func (fn *Funcode) decodeLNT() { 454 // Conceptually the table contains rows of the form 455 // (pc uint32, line int32, col int32), sorted by pc. 456 // We use a delta encoding, since the differences 457 // between successive pc, line, and column values 458 // are typically small and positive (though line and 459 // especially column differences may be negative). 460 // The delta encoding starts from 461 // {pc: 0, line: fn.Pos.Line, col: fn.Pos.Col}. 462 // 463 // Each entry is packed into one or more 16-bit values: 464 // Δpc uint4 465 // Δline int5 466 // Δcol int6 467 // incomplete uint1 468 // The top 4 bits are the unsigned delta pc. 469 // The next 5 bits are the signed line number delta. 470 // The next 6 bits are the signed column number delta. 471 // The bottom bit indicates that more rows follow because 472 // one of the deltas was maxed out. 473 // These field widths were chosen from a sample of real programs, 474 // and allow >97% of rows to be encoded in a single uint16. 475 476 fn.lnt = make([]pclinecol, 0, len(fn.pclinetab)) // a minor overapproximation 477 entry := pclinecol{ 478 pc: 0, 479 line: fn.Pos.Line, 480 col: fn.Pos.Col, 481 } 482 for _, x := range fn.pclinetab { 483 entry.pc += uint32(x) >> 12 484 entry.line += int32((int16(x) << 4) >> (16 - 5)) // sign extend Δline 485 entry.col += int32((int16(x) << 9) >> (16 - 6)) // sign extend Δcol 486 if (x & 1) == 0 { 487 fn.lnt = append(fn.lnt, entry) 488 } 489 } 490 } 491 492 // bindings converts resolve.Bindings to compiled form. 493 func bindings(bindings []*resolve.Binding) []Binding { 494 res := make([]Binding, len(bindings)) 495 for i, bind := range bindings { 496 res[i].Name = bind.First.Name 497 res[i].Pos = bind.First.NamePos 498 } 499 return res 500 } 501 502 // Expr compiles an expression to a program whose toplevel function evaluates it. 503 func Expr(expr syntax.Expr, name string, locals []*resolve.Binding) *Program { 504 pos := syntax.Start(expr) 505 stmts := []syntax.Stmt{&syntax.ReturnStmt{Result: expr}} 506 return File(stmts, pos, name, locals, nil) 507 } 508 509 // File compiles the statements of a file into a program. 510 func File(stmts []syntax.Stmt, pos syntax.Position, name string, locals, globals []*resolve.Binding) *Program { 511 pcomp := &pcomp{ 512 prog: &Program{ 513 Globals: bindings(globals), 514 }, 515 names: make(map[string]uint32), 516 constants: make(map[interface{}]uint32), 517 functions: make(map[*Funcode]uint32), 518 } 519 pcomp.prog.Toplevel = pcomp.function(name, pos, stmts, locals, nil) 520 521 return pcomp.prog 522 } 523 524 func (pcomp *pcomp) function(name string, pos syntax.Position, stmts []syntax.Stmt, locals, freevars []*resolve.Binding) *Funcode { 525 h, _ := blake2b.New256(nil) 526 527 fcomp := &fcomp{ 528 pcomp: pcomp, 529 pos: pos, 530 fn: &Funcode{ 531 Prog: pcomp.prog, 532 Pos: pos, 533 Name: name, 534 Doc: docStringFromBody(stmts), 535 Locals: bindings(locals), 536 Freevars: bindings(freevars), 537 }, 538 constants: make(map[uint32]struct{}), 539 h: h, 540 } 541 542 // Record indices of locals that require cells. 543 for i, local := range locals { 544 if local.Scope == resolve.Cell { 545 fcomp.fn.Cells = append(fcomp.fn.Cells, i) 546 } 547 } 548 549 if debug { 550 fmt.Fprintf(os.Stderr, "start function(%s @ %s)\n", name, pos) 551 } 552 553 // Convert AST to a CFG of instructions. 554 entry := fcomp.newBlock() 555 fcomp.block = entry 556 fcomp.stmts(stmts) 557 if fcomp.block != nil { 558 if len(fcomp.block.insns) == 0 { 559 fcomp.emit(NONE) 560 } 561 562 fcomp.emit(RETURN) 563 } 564 565 var oops bool // something bad happened 566 567 setinitialstack := func(b *block, depth int) { 568 if b.initialstack == -1 { 569 b.initialstack = depth 570 } else if b.initialstack != depth { 571 fmt.Fprintf(os.Stderr, "%d: setinitialstack: depth mismatch: %d vs %d\n", 572 b.index, b.initialstack, depth) 573 oops = true 574 } 575 } 576 577 // Linearize the CFG: 578 // compute order, address, and initial 579 // stack depth of each reachable block. 580 var pc uint32 581 var blocks []*block 582 var maxstack int 583 var visit func(b *block) 584 visit = func(b *block) { 585 if b.index >= 0 { 586 return // already visited 587 } 588 b.index = len(blocks) 589 b.addr = pc 590 blocks = append(blocks, b) 591 592 stack := b.initialstack 593 if debug { 594 fmt.Fprintf(os.Stderr, "%s block %d: (stack = %d)\n", name, b.index, stack) 595 } 596 var cjmpAddr *uint32 597 var isiterjmp int 598 for i, insn := range b.insns { 599 pc++ 600 601 // Compute size of argument. 602 if insn.op >= OpcodeArgMin { 603 switch insn.op { 604 case ITERJMP: 605 isiterjmp = 1 606 fallthrough 607 case CJMP: 608 cjmpAddr = &b.insns[i].arg 609 pc += 4 610 default: 611 pc += uint32(argLen(insn.arg)) 612 } 613 } 614 615 // Compute effect on stack. 616 se := insn.stackeffect() 617 if debug { 618 fmt.Fprintln(os.Stderr, "\t", insn.op, stack, stack+se) 619 } 620 621 stack += se 622 if stack < 0 { 623 fmt.Fprintf(os.Stderr, "After pc=%d: stack underflow\n", pc) 624 oops = true 625 } 626 if stack+isiterjmp > maxstack { 627 maxstack = stack + isiterjmp 628 } 629 } 630 631 if debug { 632 fmt.Fprintf(os.Stderr, "successors of block %d (start=%d):\n", 633 b.addr, b.index) 634 if b.jmp != nil { 635 fmt.Fprintf(os.Stderr, "jmp to %d\n", b.jmp.index) 636 } 637 if b.cjmp != nil { 638 fmt.Fprintf(os.Stderr, "cjmp to %d\n", b.cjmp.index) 639 } 640 } 641 642 // Place the jmp block next. 643 if b.jmp != nil { 644 // jump threading (empty cycles are impossible) 645 for b.jmp.insns == nil { 646 b.jmp = b.jmp.jmp 647 } 648 649 setinitialstack(b.jmp, stack+isiterjmp) 650 if b.jmp.index < 0 { 651 // Successor is not yet visited: 652 // place it next and fall through. 653 visit(b.jmp) 654 } else { 655 // Successor already visited; 656 // explicit backward jump required. 657 pc += 5 658 } 659 } 660 661 // Then the cjmp block. 662 if b.cjmp != nil { 663 // jump threading (empty cycles are impossible) 664 for b.cjmp.insns == nil { 665 b.cjmp = b.cjmp.jmp 666 } 667 668 setinitialstack(b.cjmp, stack) 669 visit(b.cjmp) 670 671 // Patch the CJMP/ITERJMP, if present. 672 if cjmpAddr != nil { 673 *cjmpAddr = b.cjmp.addr 674 } 675 } 676 } 677 setinitialstack(entry, 0) 678 visit(entry) 679 680 fn := fcomp.fn 681 fn.MaxStack = maxstack 682 683 var constants []int 684 685 for k := range fcomp.constants { 686 constants = append(constants, int(k)) 687 } 688 689 sort.Ints(constants) 690 691 fn.Constants = constants 692 693 // Emit bytecode (and position table). 694 if Disassemble { 695 fmt.Fprintf(os.Stderr, "Function %s: (%d blocks, %d bytes)\n", name, len(blocks), pc) 696 } 697 fcomp.generate(blocks, pc) 698 699 fn.Signature = fcomp.h.Sum(nil) 700 701 if debug { 702 fmt.Fprintf(os.Stderr, "code=%d maxstack=%d\n", fn.Code, fn.MaxStack) 703 } 704 705 // Don't panic until we've completed printing of the function. 706 if oops { 707 panic("internal error") 708 } 709 710 if debug { 711 fmt.Fprintf(os.Stderr, "end function(%s @ %s)\n", name, pos) 712 } 713 714 return fn 715 } 716 717 func docStringFromBody(body []syntax.Stmt) string { 718 if len(body) == 0 { 719 return "" 720 } 721 expr, ok := body[0].(*syntax.ExprStmt) 722 if !ok { 723 return "" 724 } 725 lit, ok := expr.X.(*syntax.Literal) 726 if !ok { 727 return "" 728 } 729 if lit.Token != syntax.STRING { 730 return "" 731 } 732 return lit.Value.(string) 733 } 734 735 func (insn *insn) stackeffect() int { 736 se := int(stackEffect[insn.op]) 737 if se == variableStackEffect { 738 arg := int(insn.arg) 739 switch insn.op { 740 case CALL, CALL_KW, CALL_VAR, CALL_VAR_KW: 741 se = -int(2*(insn.arg&0xff) + insn.arg>>8) 742 if insn.op != CALL { 743 se-- 744 } 745 if insn.op == CALL_VAR_KW { 746 se-- 747 } 748 case ITERJMP: 749 // Stack effect differs by successor: 750 // +1 for jmp/false/ok 751 // 0 for cjmp/true/exhausted 752 // Handled specially in caller. 753 se = 0 754 case MAKELIST, MAKETUPLE, SHELL: 755 se = 1 - arg 756 case UNPACK: 757 se = arg - 1 758 default: 759 panic(insn.op) 760 } 761 } 762 return se 763 } 764 765 // generate emits the linear instruction stream from the CFG, 766 // and builds the PC-to-line number table. 767 func (fcomp *fcomp) generate(blocks []*block, codelen uint32) { 768 code := make([]byte, 0, codelen) 769 var pclinetab []uint16 770 prev := pclinecol{ 771 pc: 0, 772 line: fcomp.fn.Pos.Line, 773 col: fcomp.fn.Pos.Col, 774 } 775 776 for _, b := range blocks { 777 if Disassemble { 778 fmt.Fprintf(os.Stderr, "%d:\n", b.index) 779 } 780 pc := b.addr 781 for _, insn := range b.insns { 782 if insn.line != 0 { 783 // Instruction has a source position. Delta-encode it. 784 // See Funcode.Position for the encoding. 785 for { 786 var incomplete uint16 787 788 // Δpc, uint4 789 deltapc := pc - prev.pc 790 if deltapc > 0x0f { 791 deltapc = 0x0f 792 incomplete = 1 793 } 794 prev.pc += deltapc 795 796 // Δline, int5 797 deltaline, ok := clip(insn.line-prev.line, -0x10, 0x0f) 798 if !ok { 799 incomplete = 1 800 } 801 prev.line += deltaline 802 803 // Δcol, int6 804 deltacol, ok := clip(insn.col-prev.col, -0x20, 0x1f) 805 if !ok { 806 incomplete = 1 807 } 808 prev.col += deltacol 809 810 entry := uint16(deltapc<<12) | uint16(deltaline&0x1f)<<7 | uint16(deltacol&0x3f)<<1 | incomplete 811 pclinetab = append(pclinetab, entry) 812 if incomplete == 0 { 813 break 814 } 815 } 816 817 if Disassemble { 818 fmt.Fprintf(os.Stderr, "\t\t\t\t\t; %s:%d:%d\n", 819 filepath.Base(fcomp.fn.Pos.Filename()), insn.line, insn.col) 820 } 821 } 822 823 switch insn.op { 824 case CONSTANT: 825 fmt.Fprintf(fcomp.h, "%d:%s\n", insn.op, fcomp.fn.Prog.Constants[insn.arg]) 826 case SETGLOBAL, GLOBAL: 827 fmt.Fprintf(fcomp.h, "%d:%s\n", insn.op, fcomp.fn.Prog.Globals[insn.arg].Name) 828 case ATTR, SETFIELD, PREDECLARED, UNIVERSAL, AT: 829 fmt.Fprintf(fcomp.h, "%d:%s\n", insn.op, fcomp.fn.Prog.Names[insn.arg]) 830 case FREE: 831 fmt.Fprintf(fcomp.h, "%d:%s\n", insn.op, fcomp.fn.Freevars[insn.arg].Name) 832 case MAKEFUNC: 833 // We write the signature of the sub function here, to avoid changing 834 // if the same function gets moved in the program function list. 835 fmt.Fprintf(fcomp.h, "%d:-\n", insn.op) 836 837 subfn := fcomp.fn.Prog.Functions[insn.arg] 838 if len(subfn.Signature) == 0 { 839 panic("signature not set on subfn") 840 } 841 842 fcomp.h.Write(subfn.Signature) 843 default: 844 fmt.Fprintf(fcomp.h, "%d:%d\n", insn.op, insn.arg) 845 } 846 847 if Disassemble { 848 PrintOp(fcomp.fn, pc, insn.op, insn.arg) 849 } 850 code = append(code, byte(insn.op)) 851 pc++ 852 if insn.op >= OpcodeArgMin { 853 if insn.op == CJMP || insn.op == ITERJMP { 854 code = addUint32(code, insn.arg, 4) // pad arg to 4 bytes 855 } else { 856 code = addUint32(code, insn.arg, 0) 857 } 858 pc = uint32(len(code)) 859 } 860 } 861 862 if b.jmp != nil && b.jmp.index != b.index+1 { 863 addr := b.jmp.addr 864 if Disassemble { 865 fmt.Fprintf(os.Stderr, "\t%d\tjmp\t\t%d\t; block %d\n", 866 pc, addr, b.jmp.index) 867 } 868 code = append(code, byte(JMP)) 869 code = addUint32(code, addr, 4) 870 } 871 } 872 if len(code) != int(codelen) { 873 panic("internal error: wrong code length") 874 } 875 876 fcomp.fn.pclinetab = pclinetab 877 fcomp.fn.Code = code 878 } 879 880 // clip returns the value nearest x in the range [min...max], 881 // and whether it equals x. 882 func clip(x, min, max int32) (int32, bool) { 883 if x > max { 884 return max, false 885 } else if x < min { 886 return min, false 887 } else { 888 return x, true 889 } 890 } 891 892 // addUint32 encodes x as 7-bit little-endian varint. 893 // TODO(adonovan): opt: steal top two bits of opcode 894 // to encode the number of complete bytes that follow. 895 func addUint32(code []byte, x uint32, min int) []byte { 896 end := len(code) + min 897 for x >= 0x80 { 898 code = append(code, byte(x)|0x80) 899 x >>= 7 900 } 901 code = append(code, byte(x)) 902 // Pad the operand with NOPs to exactly min bytes. 903 for len(code) < end { 904 code = append(code, byte(NOP)) 905 } 906 return code 907 } 908 909 func argLen(x uint32) int { 910 n := 0 911 for x >= 0x80 { 912 n++ 913 x >>= 7 914 } 915 return n + 1 916 } 917 918 // PrintOp prints an instruction. 919 // It is provided for debugging. 920 func PrintOp(fn *Funcode, pc uint32, op Opcode, arg uint32) { 921 if op < OpcodeArgMin { 922 fmt.Fprintf(os.Stderr, "\t%d\t%s\n", pc, op) 923 return 924 } 925 926 var comment string 927 switch op { 928 case CONSTANT: 929 switch x := fn.Prog.Constants[arg].(type) { 930 case string: 931 comment = strconv.Quote(x) 932 default: 933 comment = fmt.Sprint(x) 934 } 935 case MAKEFUNC: 936 comment = fn.Prog.Functions[arg].Name 937 case SETLOCAL, LOCAL: 938 comment = fn.Locals[arg].Name 939 case SETGLOBAL, GLOBAL: 940 comment = fn.Prog.Globals[arg].Name 941 case ATTR, SETFIELD, PREDECLARED, UNIVERSAL, AT: 942 comment = fn.Prog.Names[arg] 943 case FREE: 944 comment = fn.Freevars[arg].Name 945 case CALL, CALL_VAR, CALL_KW, CALL_VAR_KW: 946 comment = fmt.Sprintf("%d pos, %d named", arg>>8, arg&0xff) 947 default: 948 // JMP, CJMP, ITERJMP, MAKETUPLE, MAKELIST, LOAD, UNPACK: 949 // arg is just a number 950 } 951 var buf bytes.Buffer 952 fmt.Fprintf(&buf, "\t%d\t%-10s\t%d", pc, op, arg) 953 if comment != "" { 954 fmt.Fprint(&buf, "\t; ", comment) 955 } 956 fmt.Fprintln(&buf) 957 os.Stderr.Write(buf.Bytes()) 958 } 959 960 // newBlock returns a new block. 961 func (fcomp) newBlock() *block { 962 return &block{index: -1, initialstack: -1} 963 } 964 965 // emit emits an instruction to the current block. 966 func (fcomp *fcomp) emit(op Opcode) { 967 if op >= OpcodeArgMin { 968 panic("missing arg: " + op.String()) 969 } 970 insn := insn{op: op, line: fcomp.pos.Line, col: fcomp.pos.Col} 971 fcomp.block.insns = append(fcomp.block.insns, insn) 972 fcomp.pos.Line = 0 973 fcomp.pos.Col = 0 974 } 975 976 // emit1 emits an instruction with an immediate operand. 977 func (fcomp *fcomp) emit1(op Opcode, arg uint32) { 978 if op < OpcodeArgMin { 979 panic("unwanted arg: " + op.String()) 980 } 981 insn := insn{op: op, arg: arg, line: fcomp.pos.Line, col: fcomp.pos.Col} 982 fcomp.block.insns = append(fcomp.block.insns, insn) 983 fcomp.pos.Line = 0 984 fcomp.pos.Col = 0 985 } 986 987 // jump emits a jump to the specified block. 988 // On return, the current block is unset. 989 func (fcomp *fcomp) jump(b *block) { 990 if b == fcomp.block { 991 panic("self-jump") // unreachable: exprcore has no arbitrary looping constructs 992 } 993 fcomp.block.jmp = b 994 fcomp.block = nil 995 } 996 997 // condjump emits a conditional jump (CJMP or ITERJMP) 998 // to the specified true/false blocks. 999 // (For ITERJMP, the cases are jmp/f/ok and cjmp/t/exhausted.) 1000 // On return, the current block is unset. 1001 func (fcomp *fcomp) condjump(op Opcode, t, f *block) { 1002 if !(op == CJMP || op == ITERJMP) { 1003 panic("not a conditional jump: " + op.String()) 1004 } 1005 fcomp.emit1(op, 0) // fill in address later 1006 fcomp.block.cjmp = t 1007 fcomp.jump(f) 1008 } 1009 1010 // nameIndex returns the index of the specified name 1011 // within the name pool, adding it if necessary. 1012 func (pcomp *pcomp) nameIndex(name string) uint32 { 1013 index, ok := pcomp.names[name] 1014 if !ok { 1015 index = uint32(len(pcomp.prog.Names)) 1016 pcomp.names[name] = index 1017 pcomp.prog.Names = append(pcomp.prog.Names, name) 1018 } 1019 return index 1020 } 1021 1022 // constantIndex returns the index of the specified constant 1023 // within the constant pool, adding it if necessary. 1024 func (pcomp *pcomp) constantIndex(v interface{}) uint32 { 1025 index, ok := pcomp.constants[v] 1026 if !ok { 1027 index = uint32(len(pcomp.prog.Constants)) 1028 pcomp.constants[v] = index 1029 pcomp.prog.Constants = append(pcomp.prog.Constants, v) 1030 } 1031 return index 1032 } 1033 1034 // functionIndex returns the index of the specified function 1035 // AST the nestedfun pool, adding it if necessary. 1036 func (pcomp *pcomp) functionIndex(fn *Funcode) uint32 { 1037 index, ok := pcomp.functions[fn] 1038 if !ok { 1039 index = uint32(len(pcomp.prog.Functions)) 1040 pcomp.functions[fn] = index 1041 pcomp.prog.Functions = append(pcomp.prog.Functions, fn) 1042 } 1043 return index 1044 } 1045 1046 // string emits code to push the specified string. 1047 func (fcomp *fcomp) string(s string) { 1048 idx := fcomp.pcomp.constantIndex(s) 1049 fcomp.constants[idx] = struct{}{} 1050 fcomp.emit1(CONSTANT, idx) 1051 } 1052 1053 // setPos sets the current source position. 1054 // It should be called prior to any operation that can fail dynamically. 1055 // All positions are assumed to belong to the same file. 1056 func (fcomp *fcomp) setPos(pos syntax.Position) { 1057 fcomp.pos = pos 1058 } 1059 1060 // set emits code to store the top-of-stack value 1061 // to the specified local, cell, or global variable. 1062 func (fcomp *fcomp) set(id *syntax.Ident) { 1063 bind := id.Binding.(*resolve.Binding) 1064 switch bind.Scope { 1065 case resolve.Local: 1066 fcomp.emit1(SETLOCAL, uint32(bind.Index)) 1067 case resolve.Cell: 1068 // TODO(adonovan): opt: make a single op for LOCAL<n>, SETCELL. 1069 fcomp.emit1(LOCAL, uint32(bind.Index)) 1070 fcomp.emit(SETCELL) 1071 case resolve.Global: 1072 fcomp.emit1(SETGLOBAL, uint32(bind.Index)) 1073 default: 1074 log.Panicf("%s: set(%s): not global/local/cell (%d)", id.NamePos, id.Name, bind.Scope) 1075 } 1076 } 1077 1078 // lookup emits code to push the value of the specified variable. 1079 func (fcomp *fcomp) lookup(id *syntax.Ident) { 1080 bind := id.Binding.(*resolve.Binding) 1081 if bind.Scope != resolve.Universal { // (universal lookup can't fail) 1082 fcomp.setPos(id.NamePos) 1083 } 1084 switch bind.Scope { 1085 case resolve.Local: 1086 fcomp.emit1(LOCAL, uint32(bind.Index)) 1087 case resolve.Free: 1088 // TODO(adonovan): opt: make a single op for FREE<n>, CELL. 1089 fcomp.emit1(FREE, uint32(bind.Index)) 1090 fcomp.emit(CELL) 1091 case resolve.Cell: 1092 // TODO(adonovan): opt: make a single op for LOCAL<n>, CELL. 1093 fcomp.emit1(LOCAL, uint32(bind.Index)) 1094 fcomp.emit(CELL) 1095 case resolve.Global: 1096 fcomp.emit1(GLOBAL, uint32(bind.Index)) 1097 case resolve.Predeclared: 1098 fcomp.emit1(PREDECLARED, fcomp.pcomp.nameIndex(id.Name)) 1099 case resolve.Universal: 1100 fcomp.emit1(UNIVERSAL, fcomp.pcomp.nameIndex(id.Name)) 1101 default: 1102 log.Panicf("%s: compiler.lookup(%s): scope = %d", id.NamePos, id.Name, bind.Scope) 1103 } 1104 } 1105 1106 func (fcomp *fcomp) stmts(stmts []syntax.Stmt) { 1107 for i, stmt := range stmts { 1108 if i != 0 { 1109 fcomp.emit(POP) 1110 } 1111 1112 fcomp.stmt(stmt) 1113 } 1114 } 1115 1116 func (fcomp *fcomp) stmt(stmt syntax.Stmt) { 1117 switch stmt := stmt.(type) { 1118 case *syntax.ExprStmt: 1119 if _, ok := stmt.X.(*syntax.Literal); ok { 1120 // Opt: don't compile doc comments only to pop them. 1121 return 1122 } 1123 fcomp.expr(stmt.X) 1124 1125 case *syntax.BranchStmt: 1126 // Resolver invariant: break/continue appear only within loops. 1127 switch stmt.Token { 1128 case syntax.PASS: 1129 // no-op 1130 case syntax.BREAK: 1131 b := fcomp.loops[len(fcomp.loops)-1].break_ 1132 fcomp.jump(b) 1133 fcomp.block = fcomp.newBlock() // dead code 1134 case syntax.CONTINUE: 1135 b := fcomp.loops[len(fcomp.loops)-1].continue_ 1136 fcomp.jump(b) 1137 fcomp.block = fcomp.newBlock() // dead code 1138 } 1139 1140 case *syntax.IfStmt: 1141 // Keep consistent with CondExpr. 1142 t := fcomp.newBlock() 1143 f := fcomp.newBlock() 1144 done := fcomp.newBlock() 1145 1146 fcomp.ifelse(stmt.Cond, t, f) 1147 1148 fcomp.block = t 1149 if len(stmt.True) == 0 { 1150 fcomp.emit(NONE) 1151 } else { 1152 fcomp.stmts(stmt.True) 1153 } 1154 fcomp.jump(done) 1155 1156 fcomp.block = f 1157 if len(stmt.False) == 0 { 1158 fcomp.emit(NONE) 1159 } else { 1160 fcomp.stmts(stmt.False) 1161 } 1162 fcomp.jump(done) 1163 1164 fcomp.block = done 1165 1166 case *syntax.AssignStmt: 1167 switch stmt.Op { 1168 case syntax.EQ: 1169 // simple assignment: x = y 1170 fcomp.expr(stmt.RHS) 1171 fcomp.emit(DUP) 1172 fcomp.assign(stmt.OpPos, stmt.LHS) 1173 1174 case syntax.PLUS_EQ, 1175 syntax.MINUS_EQ, 1176 syntax.STAR_EQ, 1177 syntax.SLASH_EQ, 1178 syntax.SLASHSLASH_EQ, 1179 syntax.PERCENT_EQ, 1180 syntax.AMP_EQ, 1181 syntax.PIPE_EQ, 1182 syntax.CIRCUMFLEX_EQ, 1183 syntax.LTLT_EQ, 1184 syntax.GTGT_EQ: 1185 // augmented assignment: x += y 1186 1187 var set func() 1188 1189 // Evaluate "address" of x exactly once to avoid duplicate side-effects. 1190 switch lhs := unparen(stmt.LHS).(type) { 1191 case *syntax.Ident: 1192 // x = ... 1193 fcomp.lookup(lhs) 1194 set = func() { 1195 fcomp.set(lhs) 1196 } 1197 1198 case *syntax.IndexExpr: 1199 // x[y] = ... 1200 fcomp.expr(lhs.X) 1201 fcomp.expr(lhs.Y) 1202 fcomp.emit(DUP2) 1203 fcomp.setPos(lhs.Lbrack) 1204 fcomp.emit(INDEX) 1205 set = func() { 1206 fcomp.setPos(lhs.Lbrack) 1207 fcomp.emit(SETINDEX) 1208 } 1209 1210 case *syntax.DotExpr: 1211 // x.f = ... 1212 fcomp.expr(lhs.X) 1213 fcomp.emit(DUP) 1214 name := fcomp.pcomp.nameIndex(lhs.Name.Name) 1215 fcomp.setPos(lhs.Dot) 1216 fcomp.emit1(ATTR, name) 1217 set = func() { 1218 fcomp.setPos(lhs.Dot) 1219 fcomp.emit1(SETFIELD, name) 1220 } 1221 1222 default: 1223 panic(lhs) 1224 } 1225 1226 fcomp.expr(stmt.RHS) 1227 1228 if stmt.Op == syntax.PLUS_EQ { 1229 // Allow the runtime to optimize list += iterable. 1230 fcomp.setPos(stmt.OpPos) 1231 fcomp.emit(INPLACE_ADD) 1232 } else { 1233 fcomp.binop(stmt.OpPos, stmt.Op-syntax.PLUS_EQ+syntax.PLUS) 1234 } 1235 fcomp.emit(DUP) 1236 set() 1237 } 1238 1239 case *syntax.DefStmt: 1240 fcomp.function(stmt.Function.(*resolve.Function)) 1241 fcomp.emit(DUP) 1242 fcomp.set(stmt.Name) 1243 1244 case *syntax.ForStmt: 1245 // Keep consistent with ForClause. 1246 head := fcomp.newBlock() 1247 body := fcomp.newBlock() 1248 tail := fcomp.newBlock() 1249 1250 fcomp.expr(stmt.X) 1251 fcomp.setPos(stmt.For) 1252 fcomp.emit(ITERPUSH) 1253 fcomp.jump(head) 1254 1255 fcomp.block = head 1256 fcomp.condjump(ITERJMP, tail, body) 1257 1258 fcomp.block = body 1259 fcomp.assign(stmt.For, stmt.Vars) 1260 fcomp.loops = append(fcomp.loops, loop{break_: tail, continue_: head}) 1261 fcomp.stmts(stmt.Body) 1262 fcomp.emit(POP) 1263 fcomp.loops = fcomp.loops[:len(fcomp.loops)-1] 1264 fcomp.jump(head) 1265 1266 fcomp.block = tail 1267 fcomp.emit(ITERPOP) 1268 fcomp.emit(NONE) 1269 1270 case *syntax.WhileStmt: 1271 head := fcomp.newBlock() 1272 body := fcomp.newBlock() 1273 done := fcomp.newBlock() 1274 1275 fcomp.jump(head) 1276 fcomp.block = head 1277 fcomp.ifelse(stmt.Cond, body, done) 1278 1279 fcomp.block = body 1280 fcomp.loops = append(fcomp.loops, loop{break_: done, continue_: head}) 1281 fcomp.stmts(stmt.Body) 1282 fcomp.loops = fcomp.loops[:len(fcomp.loops)-1] 1283 fcomp.jump(head) 1284 1285 fcomp.block = done 1286 fcomp.emit(NONE) 1287 1288 case *syntax.ReturnStmt: 1289 if stmt.Result != nil { 1290 fcomp.expr(stmt.Result) 1291 } else { 1292 fcomp.emit(NONE) 1293 } 1294 fcomp.emit(DUP) 1295 fcomp.emit(RETURN) 1296 fcomp.block = fcomp.newBlock() // dead code 1297 1298 case *syntax.LoadStmt: 1299 for i := range stmt.From { 1300 fcomp.string(stmt.From[i].Name) 1301 } 1302 module := stmt.Module.Value.(string) 1303 fcomp.pcomp.prog.Loads = append(fcomp.pcomp.prog.Loads, Binding{ 1304 Name: module, 1305 Pos: stmt.Module.TokenPos, 1306 }) 1307 fcomp.string(module) 1308 fcomp.setPos(stmt.Load) 1309 fcomp.emit1(LOAD, uint32(len(stmt.From))) 1310 for i := range stmt.To { 1311 fcomp.set(stmt.To[len(stmt.To)-1-i]) 1312 } 1313 fcomp.emit(NONE) 1314 1315 case *syntax.ImportStmt: 1316 for _, i := range stmt.Imports { 1317 pkg := i.PackageName 1318 name := pkg.Value.(string) 1319 fcomp.pcomp.prog.Loads = append(fcomp.pcomp.prog.Loads, Binding{ 1320 Name: name, 1321 Pos: pkg.TokenPos, 1322 }) 1323 1324 if i.Namespace == nil { 1325 fcomp.string("") 1326 } else { 1327 fcomp.string(i.Namespace.Value.(string)) 1328 } 1329 1330 fcomp.string(name) 1331 fcomp.setPos(stmt.Load) 1332 if len(i.Args) == 0 { 1333 fcomp.emit(NONE) 1334 } else { 1335 fcomp.emit(MAKEDICT) 1336 for _, arg := range i.Args { 1337 fcomp.emit(DUP) 1338 fcomp.string(arg.X.(*syntax.Ident).Name) 1339 fcomp.expr(arg.Y) 1340 fcomp.setPos(arg.OpPos) 1341 fcomp.emit(SETDICTUNIQ) 1342 } 1343 } 1344 fcomp.emit(IMPORT) 1345 fcomp.set(i.BindingName) 1346 } 1347 fcomp.emit(NONE) 1348 1349 default: 1350 start, _ := stmt.Span() 1351 log.Panicf("%s: exec: unexpected statement %T", start, stmt) 1352 } 1353 } 1354 1355 // assign implements lhs = rhs for arbitrary expressions lhs. 1356 // RHS is on top of stack, consumed. 1357 func (fcomp *fcomp) assign(pos syntax.Position, lhs syntax.Expr) { 1358 switch lhs := lhs.(type) { 1359 case *syntax.ParenExpr: 1360 // (lhs) = rhs 1361 fcomp.assign(pos, lhs.X) 1362 1363 case *syntax.Ident: 1364 // x = rhs 1365 fcomp.set(lhs) 1366 1367 case *syntax.TupleExpr: 1368 // x, y = rhs 1369 fcomp.assignSequence(pos, lhs.List) 1370 1371 case *syntax.ListExpr: 1372 // [x, y] = rhs 1373 fcomp.assignSequence(pos, lhs.List) 1374 1375 case *syntax.IndexExpr: 1376 // x[y] = rhs 1377 fcomp.expr(lhs.X) 1378 fcomp.emit(EXCH) 1379 fcomp.expr(lhs.Y) 1380 fcomp.emit(EXCH) 1381 fcomp.setPos(lhs.Lbrack) 1382 fcomp.emit(SETINDEX) 1383 1384 case *syntax.DotExpr: 1385 // x.f = rhs 1386 fcomp.expr(lhs.X) 1387 fcomp.emit(EXCH) 1388 fcomp.setPos(lhs.Dot) 1389 fcomp.emit1(SETFIELD, fcomp.pcomp.nameIndex(lhs.Name.Name)) 1390 1391 default: 1392 panic(lhs) 1393 } 1394 } 1395 1396 func (fcomp *fcomp) assignSequence(pos syntax.Position, lhs []syntax.Expr) { 1397 fcomp.setPos(pos) 1398 fcomp.emit1(UNPACK, uint32(len(lhs))) 1399 for i := range lhs { 1400 fcomp.assign(pos, lhs[i]) 1401 } 1402 } 1403 1404 func (fcomp *fcomp) expr(e syntax.Expr) { 1405 switch e := e.(type) { 1406 case *syntax.ParenExpr: 1407 fcomp.expr(e.X) 1408 1409 case *syntax.Ident: 1410 fcomp.lookup(e) 1411 1412 case *syntax.Literal: 1413 // e.Value is int64, float64, *bigInt, or string. 1414 idx := fcomp.pcomp.constantIndex(e.Value) 1415 fcomp.constants[idx] = struct{}{} 1416 fcomp.emit1(CONSTANT, idx) 1417 1418 case *syntax.ListExpr: 1419 for _, x := range e.List { 1420 fcomp.expr(x) 1421 } 1422 fcomp.emit1(MAKELIST, uint32(len(e.List))) 1423 1424 case *syntax.CondExpr: 1425 // Keep consistent with IfStmt. 1426 t := fcomp.newBlock() 1427 f := fcomp.newBlock() 1428 done := fcomp.newBlock() 1429 1430 fcomp.ifelse(e.Cond, t, f) 1431 1432 fcomp.block = t 1433 fcomp.expr(e.True) 1434 fcomp.jump(done) 1435 1436 fcomp.block = f 1437 fcomp.expr(e.False) 1438 fcomp.jump(done) 1439 1440 fcomp.block = done 1441 1442 case *syntax.IndexExpr: 1443 fcomp.expr(e.X) 1444 fcomp.expr(e.Y) 1445 fcomp.setPos(e.Lbrack) 1446 fcomp.emit(INDEX) 1447 1448 case *syntax.SliceExpr: 1449 fcomp.setPos(e.Lbrack) 1450 fcomp.expr(e.X) 1451 if e.Lo != nil { 1452 fcomp.expr(e.Lo) 1453 } else { 1454 fcomp.emit(NONE) 1455 } 1456 if e.Hi != nil { 1457 fcomp.expr(e.Hi) 1458 } else { 1459 fcomp.emit(NONE) 1460 } 1461 if e.Step != nil { 1462 fcomp.expr(e.Step) 1463 } else { 1464 fcomp.emit(NONE) 1465 } 1466 fcomp.emit(SLICE) 1467 1468 case *syntax.Comprehension: 1469 if e.Curly { 1470 fcomp.emit(MAKEDICT) 1471 } else { 1472 fcomp.emit1(MAKELIST, 0) 1473 } 1474 fcomp.comprehension(e, 0) 1475 1476 case *syntax.TupleExpr: 1477 fcomp.tuple(e.List) 1478 1479 case *syntax.DictExpr: 1480 fcomp.emit(MAKEDICT) 1481 for _, entry := range e.List { 1482 entry := entry.(*syntax.DictEntry) 1483 fcomp.emit(DUP) 1484 fcomp.expr(entry.Key) 1485 fcomp.expr(entry.Value) 1486 fcomp.setPos(entry.Colon) 1487 fcomp.emit(SETDICTUNIQ) 1488 } 1489 case *syntax.ProtoExpr: 1490 fcomp.emit(MAKEPROTO) 1491 for _, entry := range e.List { 1492 entry := entry.(*syntax.ProtoEntry) 1493 fcomp.emit(DUP) 1494 1495 idx := fcomp.pcomp.constantIndex(entry.Key.(*syntax.Ident).Name) 1496 fcomp.constants[idx] = struct{}{} 1497 1498 fcomp.emit1(CONSTANT, idx) 1499 1500 switch v := entry.Value.(type) { 1501 case *syntax.ExprStmt: 1502 fcomp.expr(v.X) 1503 case *syntax.DefStmt: 1504 fcomp.function(v.Function.(*resolve.Function)) 1505 default: 1506 start, _ := e.Span() 1507 log.Panicf("%s: unexpected proto value type: %T", start, v) 1508 } 1509 1510 fcomp.setPos(entry.Colon) 1511 fcomp.emit(SETPROTOKEY) 1512 } 1513 1514 case *syntax.UnaryExpr: 1515 fcomp.expr(e.X) 1516 fcomp.setPos(e.OpPos) 1517 switch e.Op { 1518 case syntax.MINUS: 1519 fcomp.emit(UMINUS) 1520 case syntax.PLUS: 1521 fcomp.emit(UPLUS) 1522 case syntax.NOT: 1523 fcomp.emit(NOT) 1524 case syntax.TILDE: 1525 fcomp.emit(TILDE) 1526 default: 1527 log.Panicf("%s: unexpected unary op: %s", e.OpPos, e.Op) 1528 } 1529 1530 case *syntax.BinaryExpr: 1531 switch e.Op { 1532 // short-circuit operators 1533 // TODO(adonovan): use ifelse to simplify conditions. 1534 case syntax.OR: 1535 // x or y => if x then x else y 1536 done := fcomp.newBlock() 1537 y := fcomp.newBlock() 1538 1539 fcomp.expr(e.X) 1540 fcomp.emit(DUP) 1541 fcomp.condjump(CJMP, done, y) 1542 1543 fcomp.block = y 1544 fcomp.emit(POP) // discard X 1545 fcomp.expr(e.Y) 1546 fcomp.jump(done) 1547 1548 fcomp.block = done 1549 1550 case syntax.AND: 1551 // x and y => if x then y else x 1552 done := fcomp.newBlock() 1553 y := fcomp.newBlock() 1554 1555 fcomp.expr(e.X) 1556 fcomp.emit(DUP) 1557 fcomp.condjump(CJMP, y, done) 1558 1559 fcomp.block = y 1560 fcomp.emit(POP) // discard X 1561 fcomp.expr(e.Y) 1562 fcomp.jump(done) 1563 1564 fcomp.block = done 1565 1566 case syntax.PLUS: 1567 fcomp.plus(e) 1568 1569 default: 1570 // all other strict binary operator (includes comparisons) 1571 fcomp.expr(e.X) 1572 fcomp.expr(e.Y) 1573 fcomp.binop(e.OpPos, e.Op) 1574 } 1575 1576 case *syntax.DotExpr: 1577 fcomp.expr(e.X) 1578 fcomp.setPos(e.Dot) 1579 fcomp.emit1(ATTR, fcomp.pcomp.nameIndex(e.Name.Name)) 1580 1581 case *syntax.CallExpr: 1582 fcomp.call(e) 1583 1584 case *syntax.LambdaExpr: 1585 fcomp.function(e.Function.(*resolve.Function)) 1586 1587 case *syntax.AtExpr: 1588 fcomp.setPos(e.OpPos) 1589 fcomp.emit1(AT, fcomp.pcomp.nameIndex(e.Name)) 1590 1591 case *syntax.ShellExpr: 1592 fcomp.shell(e) 1593 1594 default: 1595 start, _ := e.Span() 1596 log.Panicf("%s: unexpected expr %T", start, e) 1597 } 1598 } 1599 1600 type summand struct { 1601 x syntax.Expr 1602 plusPos syntax.Position 1603 } 1604 1605 // plus emits optimized code for ((a+b)+...)+z that avoids naive 1606 // quadratic behavior for strings, tuples, and lists, 1607 // and folds together adjacent literals of the same type. 1608 func (fcomp *fcomp) plus(e *syntax.BinaryExpr) { 1609 // Gather all the right operands of the left tree of plusses. 1610 // A tree (((a+b)+c)+d) becomes args=[a +b +c +d]. 1611 args := make([]summand, 0, 2) // common case: 2 operands 1612 for plus := e; ; { 1613 args = append(args, summand{unparen(plus.Y), plus.OpPos}) 1614 left := unparen(plus.X) 1615 x, ok := left.(*syntax.BinaryExpr) 1616 if !ok || x.Op != syntax.PLUS { 1617 args = append(args, summand{x: left}) 1618 break 1619 } 1620 plus = x 1621 } 1622 // Reverse args to syntactic order. 1623 for i, n := 0, len(args)/2; i < n; i++ { 1624 j := len(args) - 1 - i 1625 args[i], args[j] = args[j], args[i] 1626 } 1627 1628 // Fold sums of adjacent literals of the same type: ""+"", []+[], ()+(). 1629 out := args[:0] // compact in situ 1630 for i := 0; i < len(args); { 1631 j := i + 1 1632 if code := addable(args[i].x); code != 0 { 1633 for j < len(args) && addable(args[j].x) == code { 1634 j++ 1635 } 1636 if j > i+1 { 1637 args[i].x = add(code, args[i:j]) 1638 } 1639 } 1640 out = append(out, args[i]) 1641 i = j 1642 } 1643 args = out 1644 1645 // Emit code for an n-ary sum (n > 0). 1646 fcomp.expr(args[0].x) 1647 for _, summand := range args[1:] { 1648 fcomp.expr(summand.x) 1649 fcomp.setPos(summand.plusPos) 1650 fcomp.emit(PLUS) 1651 } 1652 1653 // If len(args) > 2, use of an accumulator instead of a chain of 1654 // PLUS operations may be more efficient. 1655 // However, no gain was measured on a workload analogous to Bazel loading; 1656 // TODO(adonovan): opt: re-evaluate on a Bazel analysis-like workload. 1657 // 1658 // We cannot use a single n-ary SUM operation 1659 // a b c SUM<3> 1660 // because we need to report a distinct error for each 1661 // individual '+' operation, so three additional operations are 1662 // needed: 1663 // 1664 // ACCSTART => create buffer and append to it 1665 // ACCUM => append to buffer 1666 // ACCEND => get contents of buffer 1667 // 1668 // For string, list, and tuple values, the interpreter can 1669 // optimize these operations by using a mutable buffer. 1670 // For all other types, ACCSTART and ACCEND would behave like 1671 // the identity function and ACCUM behaves like PLUS. 1672 // ACCUM must correctly support user-defined operations 1673 // such as list+foo. 1674 // 1675 // fcomp.emit(ACCSTART) 1676 // for _, summand := range args[1:] { 1677 // fcomp.expr(summand.x) 1678 // fcomp.setPos(summand.plusPos) 1679 // fcomp.emit(ACCUM) 1680 // } 1681 // fcomp.emit(ACCEND) 1682 } 1683 1684 // addable reports whether e is a statically addable 1685 // expression: a [s]tring, [l]ist, or [t]uple. 1686 func addable(e syntax.Expr) rune { 1687 switch e := e.(type) { 1688 case *syntax.Literal: 1689 // TODO(adonovan): opt: support INT/FLOAT/BIGINT constant folding. 1690 switch e.Token { 1691 case syntax.STRING: 1692 return 's' 1693 } 1694 case *syntax.ListExpr: 1695 return 'l' 1696 case *syntax.TupleExpr: 1697 return 't' 1698 } 1699 return 0 1700 } 1701 1702 // add returns an expression denoting the sum of args, 1703 // which are all addable values of the type indicated by code. 1704 // The resulting syntax is degenerate, lacking position, etc. 1705 func add(code rune, args []summand) syntax.Expr { 1706 switch code { 1707 case 's': 1708 var buf bytes.Buffer 1709 for _, arg := range args { 1710 buf.WriteString(arg.x.(*syntax.Literal).Value.(string)) 1711 } 1712 return &syntax.Literal{Token: syntax.STRING, Value: buf.String()} 1713 case 'l': 1714 var elems []syntax.Expr 1715 for _, arg := range args { 1716 elems = append(elems, arg.x.(*syntax.ListExpr).List...) 1717 } 1718 return &syntax.ListExpr{List: elems} 1719 case 't': 1720 var elems []syntax.Expr 1721 for _, arg := range args { 1722 elems = append(elems, arg.x.(*syntax.TupleExpr).List...) 1723 } 1724 return &syntax.TupleExpr{List: elems} 1725 } 1726 panic(code) 1727 } 1728 1729 func unparen(e syntax.Expr) syntax.Expr { 1730 if p, ok := e.(*syntax.ParenExpr); ok { 1731 return unparen(p.X) 1732 } 1733 return e 1734 } 1735 1736 func (fcomp *fcomp) binop(pos syntax.Position, op syntax.Token) { 1737 // TODO(adonovan): simplify by assuming syntax and compiler constants align. 1738 fcomp.setPos(pos) 1739 switch op { 1740 // arithmetic 1741 case syntax.PLUS: 1742 fcomp.emit(PLUS) 1743 case syntax.MINUS: 1744 fcomp.emit(MINUS) 1745 case syntax.STAR: 1746 fcomp.emit(STAR) 1747 case syntax.SLASH: 1748 fcomp.emit(SLASH) 1749 case syntax.SLASHSLASH: 1750 fcomp.emit(SLASHSLASH) 1751 case syntax.PERCENT: 1752 fcomp.emit(PERCENT) 1753 case syntax.AMP: 1754 fcomp.emit(AMP) 1755 case syntax.PIPE: 1756 fcomp.emit(PIPE) 1757 case syntax.CIRCUMFLEX: 1758 fcomp.emit(CIRCUMFLEX) 1759 case syntax.LTLT: 1760 fcomp.emit(LTLT) 1761 case syntax.GTGT: 1762 fcomp.emit(GTGT) 1763 case syntax.IN: 1764 fcomp.emit(IN) 1765 case syntax.NOT_IN: 1766 fcomp.emit(IN) 1767 fcomp.emit(NOT) 1768 1769 // comparisons 1770 case syntax.EQL, 1771 syntax.NEQ, 1772 syntax.GT, 1773 syntax.LT, 1774 syntax.LE, 1775 syntax.GE: 1776 fcomp.emit(Opcode(op-syntax.EQL) + EQL) 1777 1778 default: 1779 log.Panicf("%s: unexpected binary op: %s", pos, op) 1780 } 1781 } 1782 1783 func (fcomp *fcomp) shell(shell *syntax.ShellExpr) { 1784 for _, x := range shell.Content { 1785 fcomp.expr(x) 1786 } 1787 1788 fcomp.emit1(SHELL, uint32(len(shell.Content))) 1789 } 1790 1791 func (fcomp *fcomp) call(call *syntax.CallExpr) { 1792 // TODO(adonovan): opt: Use optimized path for calling methods 1793 // of built-ins: x.f(...) to avoid materializing a closure. 1794 // if dot, ok := call.Fcomp.(*syntax.DotExpr); ok { 1795 // fcomp.expr(dot.X) 1796 // fcomp.args(call) 1797 // fcomp.emit1(CALL_ATTR, fcomp.name(dot.Name.Name)) 1798 // return 1799 // } 1800 1801 // usual case 1802 fcomp.expr(call.Fn) 1803 op, arg := fcomp.args(call) 1804 fcomp.setPos(call.Lparen) 1805 fcomp.emit1(op, arg) 1806 } 1807 1808 // args emits code to push a tuple of positional arguments 1809 // and a tuple of named arguments containing alternating keys and values. 1810 // Either or both tuples may be empty (TODO(adonovan): optimize). 1811 func (fcomp *fcomp) args(call *syntax.CallExpr) (op Opcode, arg uint32) { 1812 var callmode int 1813 // Compute the number of each kind of parameter. 1814 var p, n int // number of positional, named arguments 1815 var varargs, kwargs syntax.Expr 1816 for _, arg := range call.Args { 1817 if binary, ok := arg.(*syntax.BinaryExpr); ok && binary.Op == syntax.EQ { 1818 1819 // named argument (name, value) 1820 fcomp.string(binary.X.(*syntax.Ident).Name) 1821 fcomp.expr(binary.Y) 1822 n++ 1823 continue 1824 } 1825 if unary, ok := arg.(*syntax.UnaryExpr); ok { 1826 if unary.Op == syntax.STAR { 1827 callmode |= 1 1828 varargs = unary.X 1829 continue 1830 } else if unary.Op == syntax.STARSTAR { 1831 callmode |= 2 1832 kwargs = unary.X 1833 continue 1834 } 1835 } 1836 1837 // positional argument 1838 fcomp.expr(arg) 1839 p++ 1840 } 1841 1842 // Python2 and Python3 both permit named arguments 1843 // to appear both before and after a *args argument: 1844 // f(1, 2, x=3, *[4], y=5, **dict(z=6)) 1845 // 1846 // They also differ in their evaluation order: 1847 // Python2: 1 2 3 5 4 6 (*args and **kwargs evaluated last) 1848 // Python3: 1 2 4 3 5 6 (positional args evaluated before named args) 1849 // exprcore-in-Java historically used a third order: 1850 // Lexical: 1 2 3 4 5 6 (all args evaluated left-to-right) 1851 // 1852 // After discussion in github.com/bazelbuild/exprcore#13, the 1853 // spec now requires exprcore to statically reject named 1854 // arguments after *args (e.g. y=5), and to use Python2-style 1855 // evaluation order. This is both easy to implement and 1856 // consistent with lexical order: 1857 // 1858 // f(1, 2, x=3, *[4], **dict(z=6)) # 1 2 3 4 6 1859 1860 // *args 1861 if varargs != nil { 1862 fcomp.expr(varargs) 1863 } 1864 1865 // **kwargs 1866 if kwargs != nil { 1867 fcomp.expr(kwargs) 1868 } 1869 1870 // TODO(adonovan): avoid this with a more flexible encoding. 1871 if p >= 256 || n >= 256 { 1872 // resolve already checked this; should be unreachable 1873 panic("too many arguments in call") 1874 } 1875 1876 return CALL + Opcode(callmode), uint32(p<<8 | n) 1877 } 1878 1879 func (fcomp *fcomp) tuple(elems []syntax.Expr) { 1880 for _, elem := range elems { 1881 fcomp.expr(elem) 1882 } 1883 fcomp.emit1(MAKETUPLE, uint32(len(elems))) 1884 } 1885 1886 func (fcomp *fcomp) comprehension(comp *syntax.Comprehension, clauseIndex int) { 1887 if clauseIndex == len(comp.Clauses) { 1888 fcomp.emit(DUP) // accumulator 1889 if comp.Curly { 1890 // dict: {k:v for ...} 1891 // Parser ensures that body is of form k:v. 1892 // Python-style set comprehensions {body for vars in x} 1893 // are not supported. 1894 entry := comp.Body.(*syntax.DictEntry) 1895 fcomp.expr(entry.Key) 1896 fcomp.expr(entry.Value) 1897 fcomp.setPos(entry.Colon) 1898 fcomp.emit(SETDICT) 1899 } else { 1900 // list: [body for vars in x] 1901 fcomp.expr(comp.Body) 1902 fcomp.emit(APPEND) 1903 } 1904 return 1905 } 1906 1907 clause := comp.Clauses[clauseIndex] 1908 switch clause := clause.(type) { 1909 case *syntax.IfClause: 1910 t := fcomp.newBlock() 1911 done := fcomp.newBlock() 1912 fcomp.ifelse(clause.Cond, t, done) 1913 1914 fcomp.block = t 1915 fcomp.comprehension(comp, clauseIndex+1) 1916 fcomp.jump(done) 1917 1918 fcomp.block = done 1919 return 1920 1921 case *syntax.ForClause: 1922 // Keep consistent with ForStmt. 1923 head := fcomp.newBlock() 1924 body := fcomp.newBlock() 1925 tail := fcomp.newBlock() 1926 1927 fcomp.expr(clause.X) 1928 fcomp.setPos(clause.For) 1929 fcomp.emit(ITERPUSH) 1930 fcomp.jump(head) 1931 1932 fcomp.block = head 1933 fcomp.condjump(ITERJMP, tail, body) 1934 1935 fcomp.block = body 1936 fcomp.assign(clause.For, clause.Vars) 1937 fcomp.comprehension(comp, clauseIndex+1) 1938 fcomp.jump(head) 1939 1940 fcomp.block = tail 1941 fcomp.emit(ITERPOP) 1942 return 1943 } 1944 1945 start, _ := clause.Span() 1946 log.Panicf("%s: unexpected comprehension clause %T", start, clause) 1947 } 1948 1949 func (fcomp *fcomp) function(f *resolve.Function) { 1950 // Evaluation of the defaults may fail, so record the position. 1951 fcomp.setPos(f.Pos) 1952 1953 // To reduce allocation, we emit a combined tuple 1954 // for the defaults and the freevars. 1955 // The function knows where to split it at run time. 1956 1957 // Generate tuple of parameter defaults. For: 1958 // def f(p1, p2=dp2, p3=dp3, *, k1, k2=dk2, k3, **kwargs) 1959 // the tuple is: 1960 // (dp2, dp3, MANDATORY, dk2, MANDATORY). 1961 ndefaults := 0 1962 seenStar := false 1963 for _, param := range f.Params { 1964 switch param := param.(type) { 1965 case *syntax.BinaryExpr: 1966 fcomp.expr(param.Y) 1967 ndefaults++ 1968 case *syntax.UnaryExpr: 1969 seenStar = true // * or *args (also **kwargs) 1970 case *syntax.Ident: 1971 if seenStar { 1972 fcomp.emit(MANDATORY) 1973 ndefaults++ 1974 } 1975 default: 1976 log.Panicf("%s: unexpected param type %T", f.Pos, param) 1977 } 1978 } 1979 1980 // Capture the cells of the function's 1981 // free variables from the lexical environment. 1982 for _, freevar := range f.FreeVars { 1983 // Don't call fcomp.lookup because we want 1984 // the cell itself, not its content. 1985 switch freevar.Scope { 1986 case resolve.Free: 1987 fcomp.emit1(FREE, uint32(freevar.Index)) 1988 case resolve.Cell: 1989 fcomp.emit1(LOCAL, uint32(freevar.Index)) 1990 } 1991 } 1992 1993 fcomp.emit1(MAKETUPLE, uint32(ndefaults+len(f.FreeVars))) 1994 1995 funcode := fcomp.pcomp.function(f.Name, f.Pos, f.Body, f.Locals, f.FreeVars) 1996 1997 if debug { 1998 // TODO(adonovan): do compilations sequentially not as a tree, 1999 // to make the log easier to read. 2000 // Simplify by identifying Toplevel and functionIndex 0. 2001 fmt.Fprintf(os.Stderr, "resuming %s @ %s\n", fcomp.fn.Name, fcomp.pos) 2002 } 2003 2004 // def f(a, *, b=1) has only 2 parameters. 2005 numParams := len(f.Params) 2006 if f.NumKwonlyParams > 0 && !f.HasVarargs { 2007 numParams-- 2008 } 2009 2010 funcode.NumParams = numParams 2011 funcode.NumKwonlyParams = f.NumKwonlyParams 2012 funcode.HasVarargs = f.HasVarargs 2013 funcode.HasKwargs = f.HasKwargs 2014 2015 subfn := fcomp.pcomp.functionIndex(funcode) 2016 2017 fcomp.fn.SubFunctions = append(fcomp.fn.SubFunctions, subfn) 2018 2019 fcomp.emit1(MAKEFUNC, subfn) 2020 } 2021 2022 // ifelse emits a Boolean control flow decision. 2023 // On return, the current block is unset. 2024 func (fcomp *fcomp) ifelse(cond syntax.Expr, t, f *block) { 2025 switch cond := cond.(type) { 2026 case *syntax.UnaryExpr: 2027 if cond.Op == syntax.NOT { 2028 // if not x then goto t else goto f 2029 // => 2030 // if x then goto f else goto t 2031 fcomp.ifelse(cond.X, f, t) 2032 return 2033 } 2034 2035 case *syntax.BinaryExpr: 2036 switch cond.Op { 2037 case syntax.AND: 2038 // if x and y then goto t else goto f 2039 // => 2040 // if x then ifelse(y, t, f) else goto f 2041 fcomp.expr(cond.X) 2042 y := fcomp.newBlock() 2043 fcomp.condjump(CJMP, y, f) 2044 2045 fcomp.block = y 2046 fcomp.ifelse(cond.Y, t, f) 2047 return 2048 2049 case syntax.OR: 2050 // if x or y then goto t else goto f 2051 // => 2052 // if x then goto t else ifelse(y, t, f) 2053 fcomp.expr(cond.X) 2054 y := fcomp.newBlock() 2055 fcomp.condjump(CJMP, t, y) 2056 2057 fcomp.block = y 2058 fcomp.ifelse(cond.Y, t, f) 2059 return 2060 case syntax.NOT_IN: 2061 // if x not in y then goto t else goto f 2062 // => 2063 // if x in y then goto f else goto t 2064 copy := *cond 2065 copy.Op = syntax.IN 2066 fcomp.expr(©) 2067 fcomp.condjump(CJMP, f, t) 2068 return 2069 } 2070 } 2071 2072 // general case 2073 fcomp.expr(cond) 2074 fcomp.condjump(CJMP, t, f) 2075 }