github.com/k14s/starlark-go@v0.0.0-20200720175618-3a5c849cc368/internal/compile/compile.go (about) 1 // Package compile defines the Starlark bytecode compiler. 2 // It is an internal package of the Starlark interpreter and is not directly accessible to clients. 3 // 4 // The compiler generates byte code with optional uint32 operands for a 5 // virtual machine with the following components: 6 // - a program counter, which is an index into the byte code array. 7 // - an operand stack, whose maximum size is computed for each function by the compiler. 8 // - an stack of active iterators. 9 // - an array of local variables. 10 // The number of local variables and their indices are computed by the resolver. 11 // Locals (possibly including parameters) that are shared with nested functions 12 // are 'cells': their locals array slot will contain a value of type 'cell', 13 // an indirect value in a box that is explicitly read/updated by instructions. 14 // - an array of free variables, for nested functions. 15 // Free variables are a subset of the ancestors' cell variables. 16 // As with locals and cells, these are computed by the resolver. 17 // - an array of global variables, shared among all functions in the same module. 18 // All elements are initially nil. 19 // - two maps of predeclared and universal identifiers. 20 // 21 // Each function has a line number table that maps each program counter 22 // offset to a source position, including the column number. 23 // 24 // Operands, logically uint32s, are encoded using little-endian 7-bit 25 // varints, the top bit indicating that more bytes follow. 26 // 27 package compile // import "github.com/k14s/starlark-go/internal/compile" 28 29 import ( 30 "bytes" 31 "fmt" 32 "log" 33 "os" 34 "path/filepath" 35 "strconv" 36 "sync" 37 38 "github.com/k14s/starlark-go/resolve" 39 "github.com/k14s/starlark-go/syntax" 40 ) 41 42 // Disassemble causes the assembly code for each function 43 // to be printed to stderr as it is generated. 44 var Disassemble = false 45 46 const debug = false // make code generation verbose, for debugging the compiler 47 48 // Increment this to force recompilation of saved bytecode files. 49 const Version = 10 50 51 type Opcode uint8 52 53 // "x DUP x x" is a "stack picture" that describes the state of the 54 // stack before and after execution of the instruction. 55 // 56 // OP<index> indicates an immediate operand that is an index into the 57 // specified table: locals, names, freevars, constants. 58 const ( 59 NOP Opcode = iota // - NOP - 60 61 // stack operations 62 DUP // x DUP x x 63 DUP2 // x y DUP2 x y x y 64 POP // x POP - 65 EXCH // x y EXCH y x 66 67 // binary comparisons 68 // (order must match Token) 69 LT 70 GT 71 GE 72 LE 73 EQL 74 NEQ 75 76 // binary arithmetic 77 // (order must match Token) 78 PLUS 79 MINUS 80 STAR 81 SLASH 82 SLASHSLASH 83 PERCENT 84 AMP 85 PIPE 86 CIRCUMFLEX 87 LTLT 88 GTGT 89 90 IN 91 92 // unary operators 93 UPLUS // x UPLUS x 94 UMINUS // x UMINUS -x 95 TILDE // x TILDE ~x 96 97 NONE // - NONE None 98 TRUE // - TRUE True 99 FALSE // - FALSE False 100 MANDATORY // - MANDATORY Mandatory [sentinel value for required kwonly args] 101 102 ITERPUSH // iterable ITERPUSH - [pushes the iterator stack] 103 ITERPOP // - ITERPOP - [pops the iterator stack] 104 NOT // value NOT bool 105 RETURN // value RETURN - 106 SETINDEX // a i new SETINDEX - 107 INDEX // a i INDEX elem 108 SETDICT // dict key value SETDICT - 109 SETDICTUNIQ // dict key value SETDICTUNIQ - 110 APPEND // list elem APPEND - 111 SLICE // x lo hi step SLICE slice 112 INPLACE_ADD // x y INPLACE_ADD z where z is x+y or x.extend(y) 113 MAKEDICT // - MAKEDICT dict 114 SETCELL // value cell SETCELL - 115 CELL // cell CELL value 116 117 // --- opcodes with an argument must go below this line --- 118 119 // control flow 120 JMP // - JMP<addr> - 121 CJMP // cond CJMP<addr> - 122 ITERJMP // - ITERJMP<addr> elem (and fall through) [acts on topmost iterator] 123 // or: - ITERJMP<addr> - (and jump) 124 125 CONSTANT // - CONSTANT<constant> value 126 MAKETUPLE // x1 ... xn MAKETUPLE<n> tuple 127 MAKELIST // x1 ... xn MAKELIST<n> list 128 MAKEFUNC // defaults+freevars MAKEFUNC<func> fn 129 LOAD // from1 ... fromN module LOAD<n> v1 ... vN 130 SETLOCAL // value SETLOCAL<local> - 131 SETGLOBAL // value SETGLOBAL<global> - 132 LOCAL // - LOCAL<local> value 133 FREE // - FREE<freevar> cell 134 GLOBAL // - GLOBAL<global> value 135 PREDECLARED // - PREDECLARED<name> value 136 UNIVERSAL // - UNIVERSAL<name> value 137 ATTR // x ATTR<name> y y = x.name 138 SETFIELD // x y SETFIELD<name> - x.name = y 139 UNPACK // iterable UNPACK<n> vn ... v1 140 141 // n>>8 is #positional args and n&0xff is #named args (pairs). 142 CALL // fn positional named CALL<n> result 143 CALL_VAR // fn positional named *args CALL_VAR<n> result 144 CALL_KW // fn positional named **kwargs CALL_KW<n> result 145 CALL_VAR_KW // fn positional named *args **kwargs CALL_VAR_KW<n> result 146 147 OpcodeArgMin = JMP 148 OpcodeMax = CALL_VAR_KW 149 ) 150 151 // TODO(adonovan): add dynamic checks for missing opcodes in the tables below. 152 153 var opcodeNames = [...]string{ 154 AMP: "amp", 155 APPEND: "append", 156 ATTR: "attr", 157 CALL: "call", 158 CALL_KW: "call_kw ", 159 CALL_VAR: "call_var", 160 CALL_VAR_KW: "call_var_kw", 161 CELL: "cell", 162 CIRCUMFLEX: "circumflex", 163 CJMP: "cjmp", 164 CONSTANT: "constant", 165 DUP2: "dup2", 166 DUP: "dup", 167 EQL: "eql", 168 EXCH: "exch", 169 FALSE: "false", 170 FREE: "free", 171 GE: "ge", 172 GLOBAL: "global", 173 GT: "gt", 174 GTGT: "gtgt", 175 IN: "in", 176 INDEX: "index", 177 INPLACE_ADD: "inplace_add", 178 ITERJMP: "iterjmp", 179 ITERPOP: "iterpop", 180 ITERPUSH: "iterpush", 181 JMP: "jmp", 182 LE: "le", 183 LOAD: "load", 184 LOCAL: "local", 185 LT: "lt", 186 LTLT: "ltlt", 187 MAKEDICT: "makedict", 188 MAKEFUNC: "makefunc", 189 MAKELIST: "makelist", 190 MAKETUPLE: "maketuple", 191 MANDATORY: "mandatory", 192 MINUS: "minus", 193 NEQ: "neq", 194 NONE: "none", 195 NOP: "nop", 196 NOT: "not", 197 PERCENT: "percent", 198 PIPE: "pipe", 199 PLUS: "plus", 200 POP: "pop", 201 PREDECLARED: "predeclared", 202 RETURN: "return", 203 SETCELL: "setcell", 204 SETDICT: "setdict", 205 SETDICTUNIQ: "setdictuniq", 206 SETFIELD: "setfield", 207 SETGLOBAL: "setglobal", 208 SETINDEX: "setindex", 209 SETLOCAL: "setlocal", 210 SLASH: "slash", 211 SLASHSLASH: "slashslash", 212 SLICE: "slice", 213 STAR: "star", 214 TILDE: "tilde", 215 TRUE: "true", 216 UMINUS: "uminus", 217 UNIVERSAL: "universal", 218 UNPACK: "unpack", 219 UPLUS: "uplus", 220 } 221 222 const variableStackEffect = 0x7f 223 224 // stackEffect records the effect on the size of the operand stack of 225 // each kind of instruction. For some instructions this requires computation. 226 var stackEffect = [...]int8{ 227 AMP: -1, 228 APPEND: -2, 229 ATTR: 0, 230 CALL: variableStackEffect, 231 CALL_KW: variableStackEffect, 232 CALL_VAR: variableStackEffect, 233 CALL_VAR_KW: variableStackEffect, 234 CELL: 0, 235 CIRCUMFLEX: -1, 236 CJMP: -1, 237 CONSTANT: +1, 238 DUP2: +2, 239 DUP: +1, 240 EQL: -1, 241 FALSE: +1, 242 FREE: +1, 243 GE: -1, 244 GLOBAL: +1, 245 GT: -1, 246 GTGT: -1, 247 IN: -1, 248 INDEX: -1, 249 INPLACE_ADD: -1, 250 ITERJMP: variableStackEffect, 251 ITERPOP: 0, 252 ITERPUSH: -1, 253 JMP: 0, 254 LE: -1, 255 LOAD: -1, 256 LOCAL: +1, 257 LT: -1, 258 LTLT: -1, 259 MAKEDICT: +1, 260 MAKEFUNC: 0, 261 MAKELIST: variableStackEffect, 262 MAKETUPLE: variableStackEffect, 263 MANDATORY: +1, 264 MINUS: -1, 265 NEQ: -1, 266 NONE: +1, 267 NOP: 0, 268 NOT: 0, 269 PERCENT: -1, 270 PIPE: -1, 271 PLUS: -1, 272 POP: -1, 273 PREDECLARED: +1, 274 RETURN: -1, 275 SETCELL: -2, 276 SETDICT: -3, 277 SETDICTUNIQ: -3, 278 SETFIELD: -2, 279 SETGLOBAL: -1, 280 SETINDEX: -3, 281 SETLOCAL: -1, 282 SLASH: -1, 283 SLASHSLASH: -1, 284 SLICE: -3, 285 STAR: -1, 286 TRUE: +1, 287 UMINUS: 0, 288 UNIVERSAL: +1, 289 UNPACK: variableStackEffect, 290 UPLUS: 0, 291 } 292 293 func (op Opcode) String() string { 294 if op < OpcodeMax { 295 if name := opcodeNames[op]; name != "" { 296 return name 297 } 298 } 299 return fmt.Sprintf("illegal op (%d)", op) 300 } 301 302 // A Program is a Starlark file in executable form. 303 // 304 // Programs are serialized by the Program.Encode method, 305 // which must be updated whenever this declaration is changed. 306 type Program struct { 307 Loads []Binding // name (really, string) and position of each load stmt 308 Names []string // names of attributes and predeclared variables 309 Constants []interface{} // = string | int64 | float64 | *big.Int 310 Functions []*Funcode 311 Globals []Binding // for error messages and tracing 312 Toplevel *Funcode // module initialization function 313 } 314 315 // A Funcode is the code of a compiled Starlark function. 316 // 317 // Funcodes are serialized by the encoder.function method, 318 // which must be updated whenever this declaration is changed. 319 type Funcode struct { 320 Prog *Program 321 Pos syntax.Position // position of def or lambda token 322 Name string // name of this function 323 Doc string // docstring of this function 324 Code []byte // the byte code 325 pclinetab []uint16 // mapping from pc to linenum 326 Locals []Binding // locals, parameters first 327 Cells []int // indices of Locals that require cells 328 Freevars []Binding // for tracing 329 MaxStack int 330 NumParams int 331 NumKwonlyParams int 332 HasVarargs, HasKwargs bool 333 334 // -- transient state -- 335 336 lntOnce sync.Once 337 lnt []pclinecol // decoded line number table 338 } 339 340 type pclinecol struct { 341 pc uint32 342 line, col int32 343 } 344 345 // A Binding is the name and position of a binding identifier. 346 type Binding struct { 347 Name string 348 Pos syntax.Position 349 } 350 351 // A pcomp holds the compiler state for a Program. 352 type pcomp struct { 353 prog *Program // what we're building 354 355 names map[string]uint32 356 constants map[interface{}]uint32 357 functions map[*Funcode]uint32 358 } 359 360 // An fcomp holds the compiler state for a Funcode. 361 type fcomp struct { 362 fn *Funcode // what we're building 363 364 pcomp *pcomp 365 pos syntax.Position // current position of generated code 366 loops []loop 367 block *block 368 } 369 370 type loop struct { 371 break_, continue_ *block 372 } 373 374 type block struct { 375 insns []insn 376 377 // If the last insn is a RETURN, jmp and cjmp are nil. 378 // If the last insn is a CJMP or ITERJMP, 379 // cjmp and jmp are the "true" and "false" successors. 380 // Otherwise, jmp is the sole successor. 381 jmp, cjmp *block 382 383 initialstack int // for stack depth computation 384 385 // Used during encoding 386 index int // -1 => not encoded yet 387 addr uint32 388 } 389 390 type insn struct { 391 op Opcode 392 arg uint32 393 line, col int32 394 } 395 396 // Position returns the source position for program counter pc. 397 func (fn *Funcode) Position(pc uint32) syntax.Position { 398 fn.lntOnce.Do(fn.decodeLNT) 399 400 // Binary search to find last LNT entry not greater than pc. 401 // To avoid dynamic dispatch, this is a specialization of 402 // sort.Search using this predicate: 403 // !(i < len(fn.lnt)-1 && fn.lnt[i+1].pc <= pc) 404 n := len(fn.lnt) 405 i, j := 0, n 406 for i < j { 407 h := int(uint(i+j) >> 1) 408 if !(h >= n-1 || fn.lnt[h+1].pc > pc) { 409 i = h + 1 410 } else { 411 j = h 412 } 413 } 414 415 var line, col int32 416 if i < n { 417 line = fn.lnt[i].line 418 col = fn.lnt[i].col 419 } 420 421 pos := fn.Pos // copy the (annoyingly inaccessible) filename 422 pos.Col = col 423 pos.Line = line 424 return pos 425 } 426 427 // decodeLNT decodes the line number table and populates fn.lnt. 428 // It is called at most once. 429 func (fn *Funcode) decodeLNT() { 430 // Conceptually the table contains rows of the form 431 // (pc uint32, line int32, col int32), sorted by pc. 432 // We use a delta encoding, since the differences 433 // between successive pc, line, and column values 434 // are typically small and positive (though line and 435 // especially column differences may be negative). 436 // The delta encoding starts from 437 // {pc: 0, line: fn.Pos.Line, col: fn.Pos.Col}. 438 // 439 // Each entry is packed into one or more 16-bit values: 440 // Δpc uint4 441 // Δline int5 442 // Δcol int6 443 // incomplete uint1 444 // The top 4 bits are the unsigned delta pc. 445 // The next 5 bits are the signed line number delta. 446 // The next 6 bits are the signed column number delta. 447 // The bottom bit indicates that more rows follow because 448 // one of the deltas was maxed out. 449 // These field widths were chosen from a sample of real programs, 450 // and allow >97% of rows to be encoded in a single uint16. 451 452 fn.lnt = make([]pclinecol, 0, len(fn.pclinetab)) // a minor overapproximation 453 entry := pclinecol{ 454 pc: 0, 455 line: fn.Pos.Line, 456 col: fn.Pos.Col, 457 } 458 for _, x := range fn.pclinetab { 459 entry.pc += uint32(x) >> 12 460 entry.line += int32((int16(x) << 4) >> (16 - 5)) // sign extend Δline 461 entry.col += int32((int16(x) << 9) >> (16 - 6)) // sign extend Δcol 462 if (x & 1) == 0 { 463 fn.lnt = append(fn.lnt, entry) 464 } 465 } 466 } 467 468 // bindings converts resolve.Bindings to compiled form. 469 func bindings(bindings []*resolve.Binding) []Binding { 470 res := make([]Binding, len(bindings)) 471 for i, bind := range bindings { 472 res[i].Name = bind.First.Name 473 res[i].Pos = bind.First.NamePos 474 } 475 return res 476 } 477 478 // Expr compiles an expression to a program whose toplevel function evaluates it. 479 func Expr(expr syntax.Expr, name string, locals []*resolve.Binding) *Program { 480 pos := syntax.Start(expr) 481 stmts := []syntax.Stmt{&syntax.ReturnStmt{Result: expr}} 482 return File(stmts, pos, name, locals, nil) 483 } 484 485 // File compiles the statements of a file into a program. 486 func File(stmts []syntax.Stmt, pos syntax.Position, name string, locals, globals []*resolve.Binding) *Program { 487 pcomp := &pcomp{ 488 prog: &Program{ 489 Globals: bindings(globals), 490 }, 491 names: make(map[string]uint32), 492 constants: make(map[interface{}]uint32), 493 functions: make(map[*Funcode]uint32), 494 } 495 pcomp.prog.Toplevel = pcomp.function(name, pos, stmts, locals, nil) 496 497 return pcomp.prog 498 } 499 500 func (pcomp *pcomp) function(name string, pos syntax.Position, stmts []syntax.Stmt, locals, freevars []*resolve.Binding) *Funcode { 501 fcomp := &fcomp{ 502 pcomp: pcomp, 503 pos: pos, 504 fn: &Funcode{ 505 Prog: pcomp.prog, 506 Pos: pos, 507 Name: name, 508 Doc: docStringFromBody(stmts), 509 Locals: bindings(locals), 510 Freevars: bindings(freevars), 511 }, 512 } 513 514 // Record indices of locals that require cells. 515 for i, local := range locals { 516 if local.Scope == resolve.Cell { 517 fcomp.fn.Cells = append(fcomp.fn.Cells, i) 518 } 519 } 520 521 if debug { 522 fmt.Fprintf(os.Stderr, "start function(%s @ %s)\n", name, pos) 523 } 524 525 // Convert AST to a CFG of instructions. 526 entry := fcomp.newBlock() 527 fcomp.block = entry 528 fcomp.stmts(stmts) 529 if fcomp.block != nil { 530 fcomp.emit(NONE) 531 fcomp.emit(RETURN) 532 } 533 534 var oops bool // something bad happened 535 536 setinitialstack := func(b *block, depth int) { 537 if b.initialstack == -1 { 538 b.initialstack = depth 539 } else if b.initialstack != depth { 540 fmt.Fprintf(os.Stderr, "%d: setinitialstack: depth mismatch: %d vs %d\n", 541 b.index, b.initialstack, depth) 542 oops = true 543 } 544 } 545 546 // Linearize the CFG: 547 // compute order, address, and initial 548 // stack depth of each reachable block. 549 var pc uint32 550 var blocks []*block 551 var maxstack int 552 var visit func(b *block) 553 visit = func(b *block) { 554 if b.index >= 0 { 555 return // already visited 556 } 557 b.index = len(blocks) 558 b.addr = pc 559 blocks = append(blocks, b) 560 561 stack := b.initialstack 562 if debug { 563 fmt.Fprintf(os.Stderr, "%s block %d: (stack = %d)\n", name, b.index, stack) 564 } 565 var cjmpAddr *uint32 566 var isiterjmp int 567 for i, insn := range b.insns { 568 pc++ 569 570 // Compute size of argument. 571 if insn.op >= OpcodeArgMin { 572 switch insn.op { 573 case ITERJMP: 574 isiterjmp = 1 575 fallthrough 576 case CJMP: 577 cjmpAddr = &b.insns[i].arg 578 pc += 4 579 default: 580 pc += uint32(argLen(insn.arg)) 581 } 582 } 583 584 // Compute effect on stack. 585 se := insn.stackeffect() 586 if debug { 587 fmt.Fprintln(os.Stderr, "\t", insn.op, stack, stack+se) 588 } 589 stack += se 590 if stack < 0 { 591 fmt.Fprintf(os.Stderr, "After pc=%d: stack underflow\n", pc) 592 oops = true 593 } 594 if stack+isiterjmp > maxstack { 595 maxstack = stack + isiterjmp 596 } 597 } 598 599 if debug { 600 fmt.Fprintf(os.Stderr, "successors of block %d (start=%d):\n", 601 b.addr, b.index) 602 if b.jmp != nil { 603 fmt.Fprintf(os.Stderr, "jmp to %d\n", b.jmp.index) 604 } 605 if b.cjmp != nil { 606 fmt.Fprintf(os.Stderr, "cjmp to %d\n", b.cjmp.index) 607 } 608 } 609 610 // Place the jmp block next. 611 if b.jmp != nil { 612 // jump threading (empty cycles are impossible) 613 for b.jmp.insns == nil { 614 b.jmp = b.jmp.jmp 615 } 616 617 setinitialstack(b.jmp, stack+isiterjmp) 618 if b.jmp.index < 0 { 619 // Successor is not yet visited: 620 // place it next and fall through. 621 visit(b.jmp) 622 } else { 623 // Successor already visited; 624 // explicit backward jump required. 625 pc += 5 626 } 627 } 628 629 // Then the cjmp block. 630 if b.cjmp != nil { 631 // jump threading (empty cycles are impossible) 632 for b.cjmp.insns == nil { 633 b.cjmp = b.cjmp.jmp 634 } 635 636 setinitialstack(b.cjmp, stack) 637 visit(b.cjmp) 638 639 // Patch the CJMP/ITERJMP, if present. 640 if cjmpAddr != nil { 641 *cjmpAddr = b.cjmp.addr 642 } 643 } 644 } 645 setinitialstack(entry, 0) 646 visit(entry) 647 648 fn := fcomp.fn 649 fn.MaxStack = maxstack 650 651 // Emit bytecode (and position table). 652 if Disassemble { 653 fmt.Fprintf(os.Stderr, "Function %s: (%d blocks, %d bytes)\n", name, len(blocks), pc) 654 } 655 fcomp.generate(blocks, pc) 656 657 if debug { 658 fmt.Fprintf(os.Stderr, "code=%d maxstack=%d\n", fn.Code, fn.MaxStack) 659 } 660 661 // Don't panic until we've completed printing of the function. 662 if oops { 663 panic("internal error") 664 } 665 666 if debug { 667 fmt.Fprintf(os.Stderr, "end function(%s @ %s)\n", name, pos) 668 } 669 670 return fn 671 } 672 673 func docStringFromBody(body []syntax.Stmt) string { 674 if len(body) == 0 { 675 return "" 676 } 677 expr, ok := body[0].(*syntax.ExprStmt) 678 if !ok { 679 return "" 680 } 681 lit, ok := expr.X.(*syntax.Literal) 682 if !ok { 683 return "" 684 } 685 if lit.Token != syntax.STRING { 686 return "" 687 } 688 return lit.Value.(string) 689 } 690 691 func (insn *insn) stackeffect() int { 692 se := int(stackEffect[insn.op]) 693 if se == variableStackEffect { 694 arg := int(insn.arg) 695 switch insn.op { 696 case CALL, CALL_KW, CALL_VAR, CALL_VAR_KW: 697 se = -int(2*(insn.arg&0xff) + insn.arg>>8) 698 if insn.op != CALL { 699 se-- 700 } 701 if insn.op == CALL_VAR_KW { 702 se-- 703 } 704 case ITERJMP: 705 // Stack effect differs by successor: 706 // +1 for jmp/false/ok 707 // 0 for cjmp/true/exhausted 708 // Handled specially in caller. 709 se = 0 710 case MAKELIST, MAKETUPLE: 711 se = 1 - arg 712 case UNPACK: 713 se = arg - 1 714 default: 715 panic(insn.op) 716 } 717 } 718 return se 719 } 720 721 // generate emits the linear instruction stream from the CFG, 722 // and builds the PC-to-line number table. 723 func (fcomp *fcomp) generate(blocks []*block, codelen uint32) { 724 code := make([]byte, 0, codelen) 725 var pclinetab []uint16 726 prev := pclinecol{ 727 pc: 0, 728 line: fcomp.fn.Pos.Line, 729 col: fcomp.fn.Pos.Col, 730 } 731 732 for _, b := range blocks { 733 if Disassemble { 734 fmt.Fprintf(os.Stderr, "%d:\n", b.index) 735 } 736 pc := b.addr 737 for _, insn := range b.insns { 738 if insn.line != 0 { 739 // Instruction has a source position. Delta-encode it. 740 // See Funcode.Position for the encoding. 741 for { 742 var incomplete uint16 743 744 // Δpc, uint4 745 deltapc := pc - prev.pc 746 if deltapc > 0x0f { 747 deltapc = 0x0f 748 incomplete = 1 749 } 750 prev.pc += deltapc 751 752 // Δline, int5 753 deltaline, ok := clip(insn.line-prev.line, -0x10, 0x0f) 754 if !ok { 755 incomplete = 1 756 } 757 prev.line += deltaline 758 759 // Δcol, int6 760 deltacol, ok := clip(insn.col-prev.col, -0x20, 0x1f) 761 if !ok { 762 incomplete = 1 763 } 764 prev.col += deltacol 765 766 entry := uint16(deltapc<<12) | uint16(deltaline&0x1f)<<7 | uint16(deltacol&0x3f)<<1 | incomplete 767 pclinetab = append(pclinetab, entry) 768 if incomplete == 0 { 769 break 770 } 771 } 772 773 if Disassemble { 774 fmt.Fprintf(os.Stderr, "\t\t\t\t\t; %s:%d:%d\n", 775 filepath.Base(fcomp.fn.Pos.Filename()), insn.line, insn.col) 776 } 777 } 778 if Disassemble { 779 PrintOp(fcomp.fn, pc, insn.op, insn.arg) 780 } 781 code = append(code, byte(insn.op)) 782 pc++ 783 if insn.op >= OpcodeArgMin { 784 if insn.op == CJMP || insn.op == ITERJMP { 785 code = addUint32(code, insn.arg, 4) // pad arg to 4 bytes 786 } else { 787 code = addUint32(code, insn.arg, 0) 788 } 789 pc = uint32(len(code)) 790 } 791 } 792 793 if b.jmp != nil && b.jmp.index != b.index+1 { 794 addr := b.jmp.addr 795 if Disassemble { 796 fmt.Fprintf(os.Stderr, "\t%d\tjmp\t\t%d\t; block %d\n", 797 pc, addr, b.jmp.index) 798 } 799 code = append(code, byte(JMP)) 800 code = addUint32(code, addr, 4) 801 } 802 } 803 if len(code) != int(codelen) { 804 panic("internal error: wrong code length") 805 } 806 807 fcomp.fn.pclinetab = pclinetab 808 fcomp.fn.Code = code 809 } 810 811 // clip returns the value nearest x in the range [min...max], 812 // and whether it equals x. 813 func clip(x, min, max int32) (int32, bool) { 814 if x > max { 815 return max, false 816 } else if x < min { 817 return min, false 818 } else { 819 return x, true 820 } 821 } 822 823 // addUint32 encodes x as 7-bit little-endian varint. 824 // TODO(adonovan): opt: steal top two bits of opcode 825 // to encode the number of complete bytes that follow. 826 func addUint32(code []byte, x uint32, min int) []byte { 827 end := len(code) + min 828 for x >= 0x80 { 829 code = append(code, byte(x)|0x80) 830 x >>= 7 831 } 832 code = append(code, byte(x)) 833 // Pad the operand with NOPs to exactly min bytes. 834 for len(code) < end { 835 code = append(code, byte(NOP)) 836 } 837 return code 838 } 839 840 func argLen(x uint32) int { 841 n := 0 842 for x >= 0x80 { 843 n++ 844 x >>= 7 845 } 846 return n + 1 847 } 848 849 // PrintOp prints an instruction. 850 // It is provided for debugging. 851 func PrintOp(fn *Funcode, pc uint32, op Opcode, arg uint32) { 852 if op < OpcodeArgMin { 853 fmt.Fprintf(os.Stderr, "\t%d\t%s\n", pc, op) 854 return 855 } 856 857 var comment string 858 switch op { 859 case CONSTANT: 860 switch x := fn.Prog.Constants[arg].(type) { 861 case string: 862 comment = strconv.Quote(x) 863 default: 864 comment = fmt.Sprint(x) 865 } 866 case MAKEFUNC: 867 comment = fn.Prog.Functions[arg].Name 868 case SETLOCAL, LOCAL: 869 comment = fn.Locals[arg].Name 870 case SETGLOBAL, GLOBAL: 871 comment = fn.Prog.Globals[arg].Name 872 case ATTR, SETFIELD, PREDECLARED, UNIVERSAL: 873 comment = fn.Prog.Names[arg] 874 case FREE: 875 comment = fn.Freevars[arg].Name 876 case CALL, CALL_VAR, CALL_KW, CALL_VAR_KW: 877 comment = fmt.Sprintf("%d pos, %d named", arg>>8, arg&0xff) 878 default: 879 // JMP, CJMP, ITERJMP, MAKETUPLE, MAKELIST, LOAD, UNPACK: 880 // arg is just a number 881 } 882 var buf bytes.Buffer 883 fmt.Fprintf(&buf, "\t%d\t%-10s\t%d", pc, op, arg) 884 if comment != "" { 885 fmt.Fprint(&buf, "\t; ", comment) 886 } 887 fmt.Fprintln(&buf) 888 os.Stderr.Write(buf.Bytes()) 889 } 890 891 // newBlock returns a new block. 892 func (fcomp) newBlock() *block { 893 return &block{index: -1, initialstack: -1} 894 } 895 896 // emit emits an instruction to the current block. 897 func (fcomp *fcomp) emit(op Opcode) { 898 if op >= OpcodeArgMin { 899 panic("missing arg: " + op.String()) 900 } 901 insn := insn{op: op, line: fcomp.pos.Line, col: fcomp.pos.Col} 902 fcomp.block.insns = append(fcomp.block.insns, insn) 903 fcomp.pos.Line = 0 904 fcomp.pos.Col = 0 905 } 906 907 // emit1 emits an instruction with an immediate operand. 908 func (fcomp *fcomp) emit1(op Opcode, arg uint32) { 909 if op < OpcodeArgMin { 910 panic("unwanted arg: " + op.String()) 911 } 912 insn := insn{op: op, arg: arg, line: fcomp.pos.Line, col: fcomp.pos.Col} 913 fcomp.block.insns = append(fcomp.block.insns, insn) 914 fcomp.pos.Line = 0 915 fcomp.pos.Col = 0 916 } 917 918 // jump emits a jump to the specified block. 919 // On return, the current block is unset. 920 func (fcomp *fcomp) jump(b *block) { 921 if b == fcomp.block { 922 panic("self-jump") // unreachable: Starlark has no arbitrary looping constructs 923 } 924 fcomp.block.jmp = b 925 fcomp.block = nil 926 } 927 928 // condjump emits a conditional jump (CJMP or ITERJMP) 929 // to the specified true/false blocks. 930 // (For ITERJMP, the cases are jmp/f/ok and cjmp/t/exhausted.) 931 // On return, the current block is unset. 932 func (fcomp *fcomp) condjump(op Opcode, t, f *block) { 933 if !(op == CJMP || op == ITERJMP) { 934 panic("not a conditional jump: " + op.String()) 935 } 936 fcomp.emit1(op, 0) // fill in address later 937 fcomp.block.cjmp = t 938 fcomp.jump(f) 939 } 940 941 // nameIndex returns the index of the specified name 942 // within the name pool, adding it if necessary. 943 func (pcomp *pcomp) nameIndex(name string) uint32 { 944 index, ok := pcomp.names[name] 945 if !ok { 946 index = uint32(len(pcomp.prog.Names)) 947 pcomp.names[name] = index 948 pcomp.prog.Names = append(pcomp.prog.Names, name) 949 } 950 return index 951 } 952 953 // constantIndex returns the index of the specified constant 954 // within the constant pool, adding it if necessary. 955 func (pcomp *pcomp) constantIndex(v interface{}) uint32 { 956 index, ok := pcomp.constants[v] 957 if !ok { 958 index = uint32(len(pcomp.prog.Constants)) 959 pcomp.constants[v] = index 960 pcomp.prog.Constants = append(pcomp.prog.Constants, v) 961 } 962 return index 963 } 964 965 // functionIndex returns the index of the specified function 966 // AST the nestedfun pool, adding it if necessary. 967 func (pcomp *pcomp) functionIndex(fn *Funcode) uint32 { 968 index, ok := pcomp.functions[fn] 969 if !ok { 970 index = uint32(len(pcomp.prog.Functions)) 971 pcomp.functions[fn] = index 972 pcomp.prog.Functions = append(pcomp.prog.Functions, fn) 973 } 974 return index 975 } 976 977 // string emits code to push the specified string. 978 func (fcomp *fcomp) string(s string) { 979 fcomp.emit1(CONSTANT, fcomp.pcomp.constantIndex(s)) 980 } 981 982 // setPos sets the current source position. 983 // It should be called prior to any operation that can fail dynamically. 984 // All positions are assumed to belong to the same file. 985 func (fcomp *fcomp) setPos(pos syntax.Position) { 986 fcomp.pos = pos 987 } 988 989 // set emits code to store the top-of-stack value 990 // to the specified local, cell, or global variable. 991 func (fcomp *fcomp) set(id *syntax.Ident) { 992 bind := id.Binding.(*resolve.Binding) 993 switch bind.Scope { 994 case resolve.Local: 995 fcomp.emit1(SETLOCAL, uint32(bind.Index)) 996 case resolve.Cell: 997 // TODO(adonovan): opt: make a single op for LOCAL<n>, SETCELL. 998 fcomp.emit1(LOCAL, uint32(bind.Index)) 999 fcomp.emit(SETCELL) 1000 case resolve.Global: 1001 fcomp.emit1(SETGLOBAL, uint32(bind.Index)) 1002 default: 1003 log.Fatalf("%s: set(%s): not global/local/cell (%d)", id.NamePos, id.Name, bind.Scope) 1004 } 1005 } 1006 1007 // lookup emits code to push the value of the specified variable. 1008 func (fcomp *fcomp) lookup(id *syntax.Ident) { 1009 bind := id.Binding.(*resolve.Binding) 1010 if bind.Scope != resolve.Universal { // (universal lookup can't fail) 1011 fcomp.setPos(id.NamePos) 1012 } 1013 switch bind.Scope { 1014 case resolve.Local: 1015 fcomp.emit1(LOCAL, uint32(bind.Index)) 1016 case resolve.Free: 1017 // TODO(adonovan): opt: make a single op for FREE<n>, CELL. 1018 fcomp.emit1(FREE, uint32(bind.Index)) 1019 fcomp.emit(CELL) 1020 case resolve.Cell: 1021 // TODO(adonovan): opt: make a single op for LOCAL<n>, CELL. 1022 fcomp.emit1(LOCAL, uint32(bind.Index)) 1023 fcomp.emit(CELL) 1024 case resolve.Global: 1025 fcomp.emit1(GLOBAL, uint32(bind.Index)) 1026 case resolve.Predeclared: 1027 fcomp.emit1(PREDECLARED, fcomp.pcomp.nameIndex(id.Name)) 1028 case resolve.Universal: 1029 fcomp.emit1(UNIVERSAL, fcomp.pcomp.nameIndex(id.Name)) 1030 default: 1031 log.Fatalf("%s: compiler.lookup(%s): scope = %d", id.NamePos, id.Name, bind.Scope) 1032 } 1033 } 1034 1035 func (fcomp *fcomp) stmts(stmts []syntax.Stmt) { 1036 for _, stmt := range stmts { 1037 fcomp.stmt(stmt) 1038 } 1039 } 1040 1041 func (fcomp *fcomp) stmt(stmt syntax.Stmt) { 1042 switch stmt := stmt.(type) { 1043 case *syntax.ExprStmt: 1044 if _, ok := stmt.X.(*syntax.Literal); ok { 1045 // Opt: don't compile doc comments only to pop them. 1046 return 1047 } 1048 fcomp.expr(stmt.X) 1049 fcomp.emit(POP) 1050 1051 case *syntax.BranchStmt: 1052 // Resolver invariant: break/continue appear only within loops. 1053 switch stmt.Token { 1054 case syntax.PASS: 1055 // no-op 1056 case syntax.BREAK: 1057 b := fcomp.loops[len(fcomp.loops)-1].break_ 1058 fcomp.jump(b) 1059 fcomp.block = fcomp.newBlock() // dead code 1060 case syntax.CONTINUE: 1061 b := fcomp.loops[len(fcomp.loops)-1].continue_ 1062 fcomp.jump(b) 1063 fcomp.block = fcomp.newBlock() // dead code 1064 } 1065 1066 case *syntax.IfStmt: 1067 // Keep consistent with CondExpr. 1068 t := fcomp.newBlock() 1069 f := fcomp.newBlock() 1070 done := fcomp.newBlock() 1071 1072 fcomp.ifelse(stmt.Cond, t, f) 1073 1074 fcomp.block = t 1075 fcomp.stmts(stmt.True) 1076 fcomp.jump(done) 1077 1078 fcomp.block = f 1079 fcomp.stmts(stmt.False) 1080 fcomp.jump(done) 1081 1082 fcomp.block = done 1083 1084 case *syntax.AssignStmt: 1085 switch stmt.Op { 1086 case syntax.EQ: 1087 // simple assignment: x = y 1088 fcomp.expr(stmt.RHS) 1089 fcomp.assign(stmt.OpPos, stmt.LHS) 1090 1091 case syntax.PLUS_EQ, 1092 syntax.MINUS_EQ, 1093 syntax.STAR_EQ, 1094 syntax.SLASH_EQ, 1095 syntax.SLASHSLASH_EQ, 1096 syntax.PERCENT_EQ, 1097 syntax.AMP_EQ, 1098 syntax.PIPE_EQ, 1099 syntax.CIRCUMFLEX_EQ, 1100 syntax.LTLT_EQ, 1101 syntax.GTGT_EQ: 1102 // augmented assignment: x += y 1103 1104 var set func() 1105 1106 // Evaluate "address" of x exactly once to avoid duplicate side-effects. 1107 switch lhs := unparen(stmt.LHS).(type) { 1108 case *syntax.Ident: 1109 // x = ... 1110 fcomp.lookup(lhs) 1111 set = func() { 1112 fcomp.set(lhs) 1113 } 1114 1115 case *syntax.IndexExpr: 1116 // x[y] = ... 1117 fcomp.expr(lhs.X) 1118 fcomp.expr(lhs.Y) 1119 fcomp.emit(DUP2) 1120 fcomp.setPos(lhs.Lbrack) 1121 fcomp.emit(INDEX) 1122 set = func() { 1123 fcomp.setPos(lhs.Lbrack) 1124 fcomp.emit(SETINDEX) 1125 } 1126 1127 case *syntax.DotExpr: 1128 // x.f = ... 1129 fcomp.expr(lhs.X) 1130 fcomp.emit(DUP) 1131 name := fcomp.pcomp.nameIndex(lhs.Name.Name) 1132 fcomp.setPos(lhs.Dot) 1133 fcomp.emit1(ATTR, name) 1134 set = func() { 1135 fcomp.setPos(lhs.Dot) 1136 fcomp.emit1(SETFIELD, name) 1137 } 1138 1139 default: 1140 panic(lhs) 1141 } 1142 1143 fcomp.expr(stmt.RHS) 1144 1145 if stmt.Op == syntax.PLUS_EQ { 1146 // Allow the runtime to optimize list += iterable. 1147 fcomp.setPos(stmt.OpPos) 1148 fcomp.emit(INPLACE_ADD) 1149 } else { 1150 fcomp.binop(stmt.OpPos, stmt.Op-syntax.PLUS_EQ+syntax.PLUS) 1151 } 1152 set() 1153 } 1154 1155 case *syntax.DefStmt: 1156 fcomp.function(stmt.Function.(*resolve.Function)) 1157 fcomp.set(stmt.Name) 1158 1159 case *syntax.ForStmt: 1160 // Keep consistent with ForClause. 1161 head := fcomp.newBlock() 1162 body := fcomp.newBlock() 1163 tail := fcomp.newBlock() 1164 1165 fcomp.expr(stmt.X) 1166 fcomp.setPos(stmt.For) 1167 fcomp.emit(ITERPUSH) 1168 fcomp.jump(head) 1169 1170 fcomp.block = head 1171 fcomp.condjump(ITERJMP, tail, body) 1172 1173 fcomp.block = body 1174 fcomp.assign(stmt.For, stmt.Vars) 1175 fcomp.loops = append(fcomp.loops, loop{break_: tail, continue_: head}) 1176 fcomp.stmts(stmt.Body) 1177 fcomp.loops = fcomp.loops[:len(fcomp.loops)-1] 1178 fcomp.jump(head) 1179 1180 fcomp.block = tail 1181 fcomp.emit(ITERPOP) 1182 1183 case *syntax.WhileStmt: 1184 head := fcomp.newBlock() 1185 body := fcomp.newBlock() 1186 done := fcomp.newBlock() 1187 1188 fcomp.jump(head) 1189 fcomp.block = head 1190 fcomp.ifelse(stmt.Cond, body, done) 1191 1192 fcomp.block = body 1193 fcomp.loops = append(fcomp.loops, loop{break_: done, continue_: head}) 1194 fcomp.stmts(stmt.Body) 1195 fcomp.loops = fcomp.loops[:len(fcomp.loops)-1] 1196 fcomp.jump(head) 1197 1198 fcomp.block = done 1199 1200 case *syntax.ReturnStmt: 1201 if stmt.Result != nil { 1202 fcomp.expr(stmt.Result) 1203 } else { 1204 fcomp.emit(NONE) 1205 } 1206 fcomp.emit(RETURN) 1207 fcomp.block = fcomp.newBlock() // dead code 1208 1209 case *syntax.LoadStmt: 1210 for i := range stmt.From { 1211 fcomp.string(stmt.From[i].Name) 1212 } 1213 module := stmt.Module.Value.(string) 1214 fcomp.pcomp.prog.Loads = append(fcomp.pcomp.prog.Loads, Binding{ 1215 Name: module, 1216 Pos: stmt.Module.TokenPos, 1217 }) 1218 fcomp.string(module) 1219 fcomp.setPos(stmt.Load) 1220 fcomp.emit1(LOAD, uint32(len(stmt.From))) 1221 for i := range stmt.To { 1222 fcomp.set(stmt.To[len(stmt.To)-1-i]) 1223 } 1224 1225 default: 1226 start, _ := stmt.Span() 1227 log.Fatalf("%s: exec: unexpected statement %T", start, stmt) 1228 } 1229 } 1230 1231 // assign implements lhs = rhs for arbitrary expressions lhs. 1232 // RHS is on top of stack, consumed. 1233 func (fcomp *fcomp) assign(pos syntax.Position, lhs syntax.Expr) { 1234 switch lhs := lhs.(type) { 1235 case *syntax.ParenExpr: 1236 // (lhs) = rhs 1237 fcomp.assign(pos, lhs.X) 1238 1239 case *syntax.Ident: 1240 // x = rhs 1241 fcomp.set(lhs) 1242 1243 case *syntax.TupleExpr: 1244 // x, y = rhs 1245 fcomp.assignSequence(pos, lhs.List) 1246 1247 case *syntax.ListExpr: 1248 // [x, y] = rhs 1249 fcomp.assignSequence(pos, lhs.List) 1250 1251 case *syntax.IndexExpr: 1252 // x[y] = rhs 1253 fcomp.expr(lhs.X) 1254 fcomp.emit(EXCH) 1255 fcomp.expr(lhs.Y) 1256 fcomp.emit(EXCH) 1257 fcomp.setPos(lhs.Lbrack) 1258 fcomp.emit(SETINDEX) 1259 1260 case *syntax.DotExpr: 1261 // x.f = rhs 1262 fcomp.expr(lhs.X) 1263 fcomp.emit(EXCH) 1264 fcomp.setPos(lhs.Dot) 1265 fcomp.emit1(SETFIELD, fcomp.pcomp.nameIndex(lhs.Name.Name)) 1266 1267 default: 1268 panic(lhs) 1269 } 1270 } 1271 1272 func (fcomp *fcomp) assignSequence(pos syntax.Position, lhs []syntax.Expr) { 1273 fcomp.setPos(pos) 1274 fcomp.emit1(UNPACK, uint32(len(lhs))) 1275 for i := range lhs { 1276 fcomp.assign(pos, lhs[i]) 1277 } 1278 } 1279 1280 func (fcomp *fcomp) expr(e syntax.Expr) { 1281 switch e := e.(type) { 1282 case *syntax.ParenExpr: 1283 fcomp.expr(e.X) 1284 1285 case *syntax.Ident: 1286 fcomp.lookup(e) 1287 1288 case *syntax.Literal: 1289 // e.Value is int64, float64, *bigInt, or string. 1290 fcomp.emit1(CONSTANT, fcomp.pcomp.constantIndex(e.Value)) 1291 1292 case *syntax.ListExpr: 1293 for _, x := range e.List { 1294 fcomp.expr(x) 1295 } 1296 fcomp.emit1(MAKELIST, uint32(len(e.List))) 1297 1298 case *syntax.CondExpr: 1299 // Keep consistent with IfStmt. 1300 t := fcomp.newBlock() 1301 f := fcomp.newBlock() 1302 done := fcomp.newBlock() 1303 1304 fcomp.ifelse(e.Cond, t, f) 1305 1306 fcomp.block = t 1307 fcomp.expr(e.True) 1308 fcomp.jump(done) 1309 1310 fcomp.block = f 1311 fcomp.expr(e.False) 1312 fcomp.jump(done) 1313 1314 fcomp.block = done 1315 1316 case *syntax.IndexExpr: 1317 fcomp.expr(e.X) 1318 fcomp.expr(e.Y) 1319 fcomp.setPos(e.Lbrack) 1320 fcomp.emit(INDEX) 1321 1322 case *syntax.SliceExpr: 1323 fcomp.setPos(e.Lbrack) 1324 fcomp.expr(e.X) 1325 if e.Lo != nil { 1326 fcomp.expr(e.Lo) 1327 } else { 1328 fcomp.emit(NONE) 1329 } 1330 if e.Hi != nil { 1331 fcomp.expr(e.Hi) 1332 } else { 1333 fcomp.emit(NONE) 1334 } 1335 if e.Step != nil { 1336 fcomp.expr(e.Step) 1337 } else { 1338 fcomp.emit(NONE) 1339 } 1340 fcomp.emit(SLICE) 1341 1342 case *syntax.Comprehension: 1343 if e.Curly { 1344 fcomp.emit(MAKEDICT) 1345 } else { 1346 fcomp.emit1(MAKELIST, 0) 1347 } 1348 fcomp.comprehension(e, 0) 1349 1350 case *syntax.TupleExpr: 1351 fcomp.tuple(e.List) 1352 1353 case *syntax.DictExpr: 1354 fcomp.emit(MAKEDICT) 1355 for _, entry := range e.List { 1356 entry := entry.(*syntax.DictEntry) 1357 fcomp.emit(DUP) 1358 fcomp.expr(entry.Key) 1359 fcomp.expr(entry.Value) 1360 fcomp.setPos(entry.Colon) 1361 fcomp.emit(SETDICTUNIQ) 1362 } 1363 1364 case *syntax.UnaryExpr: 1365 fcomp.expr(e.X) 1366 fcomp.setPos(e.OpPos) 1367 switch e.Op { 1368 case syntax.MINUS: 1369 fcomp.emit(UMINUS) 1370 case syntax.PLUS: 1371 fcomp.emit(UPLUS) 1372 case syntax.NOT: 1373 fcomp.emit(NOT) 1374 case syntax.TILDE: 1375 fcomp.emit(TILDE) 1376 default: 1377 log.Fatalf("%s: unexpected unary op: %s", e.OpPos, e.Op) 1378 } 1379 1380 case *syntax.BinaryExpr: 1381 switch e.Op { 1382 // short-circuit operators 1383 // TODO(adonovan): use ifelse to simplify conditions. 1384 case syntax.OR: 1385 // x or y => if x then x else y 1386 done := fcomp.newBlock() 1387 y := fcomp.newBlock() 1388 1389 fcomp.expr(e.X) 1390 fcomp.emit(DUP) 1391 fcomp.condjump(CJMP, done, y) 1392 1393 fcomp.block = y 1394 fcomp.emit(POP) // discard X 1395 fcomp.expr(e.Y) 1396 fcomp.jump(done) 1397 1398 fcomp.block = done 1399 1400 case syntax.AND: 1401 // x and y => if x then y else x 1402 done := fcomp.newBlock() 1403 y := fcomp.newBlock() 1404 1405 fcomp.expr(e.X) 1406 fcomp.emit(DUP) 1407 fcomp.condjump(CJMP, y, done) 1408 1409 fcomp.block = y 1410 fcomp.emit(POP) // discard X 1411 fcomp.expr(e.Y) 1412 fcomp.jump(done) 1413 1414 fcomp.block = done 1415 1416 case syntax.PLUS: 1417 fcomp.plus(e) 1418 1419 default: 1420 // all other strict binary operator (includes comparisons) 1421 fcomp.expr(e.X) 1422 fcomp.expr(e.Y) 1423 fcomp.binop(e.OpPos, e.Op) 1424 } 1425 1426 case *syntax.DotExpr: 1427 fcomp.expr(e.X) 1428 fcomp.setPos(e.Dot) 1429 fcomp.emit1(ATTR, fcomp.pcomp.nameIndex(e.Name.Name)) 1430 1431 case *syntax.CallExpr: 1432 fcomp.call(e) 1433 1434 case *syntax.LambdaExpr: 1435 fcomp.function(e.Function.(*resolve.Function)) 1436 1437 default: 1438 start, _ := e.Span() 1439 log.Fatalf("%s: unexpected expr %T", start, e) 1440 } 1441 } 1442 1443 type summand struct { 1444 x syntax.Expr 1445 plusPos syntax.Position 1446 } 1447 1448 // plus emits optimized code for ((a+b)+...)+z that avoids naive 1449 // quadratic behavior for strings, tuples, and lists, 1450 // and folds together adjacent literals of the same type. 1451 func (fcomp *fcomp) plus(e *syntax.BinaryExpr) { 1452 // Gather all the right operands of the left tree of plusses. 1453 // A tree (((a+b)+c)+d) becomes args=[a +b +c +d]. 1454 args := make([]summand, 0, 2) // common case: 2 operands 1455 for plus := e; ; { 1456 args = append(args, summand{unparen(plus.Y), plus.OpPos}) 1457 left := unparen(plus.X) 1458 x, ok := left.(*syntax.BinaryExpr) 1459 if !ok || x.Op != syntax.PLUS { 1460 args = append(args, summand{x: left}) 1461 break 1462 } 1463 plus = x 1464 } 1465 // Reverse args to syntactic order. 1466 for i, n := 0, len(args)/2; i < n; i++ { 1467 j := len(args) - 1 - i 1468 args[i], args[j] = args[j], args[i] 1469 } 1470 1471 // Fold sums of adjacent literals of the same type: ""+"", []+[], ()+(). 1472 out := args[:0] // compact in situ 1473 for i := 0; i < len(args); { 1474 j := i + 1 1475 if code := addable(args[i].x); code != 0 { 1476 for j < len(args) && addable(args[j].x) == code { 1477 j++ 1478 } 1479 if j > i+1 { 1480 args[i].x = add(code, args[i:j]) 1481 } 1482 } 1483 out = append(out, args[i]) 1484 i = j 1485 } 1486 args = out 1487 1488 // Emit code for an n-ary sum (n > 0). 1489 fcomp.expr(args[0].x) 1490 for _, summand := range args[1:] { 1491 fcomp.expr(summand.x) 1492 fcomp.setPos(summand.plusPos) 1493 fcomp.emit(PLUS) 1494 } 1495 1496 // If len(args) > 2, use of an accumulator instead of a chain of 1497 // PLUS operations may be more efficient. 1498 // However, no gain was measured on a workload analogous to Bazel loading; 1499 // TODO(adonovan): opt: re-evaluate on a Bazel analysis-like workload. 1500 // 1501 // We cannot use a single n-ary SUM operation 1502 // a b c SUM<3> 1503 // because we need to report a distinct error for each 1504 // individual '+' operation, so three additional operations are 1505 // needed: 1506 // 1507 // ACCSTART => create buffer and append to it 1508 // ACCUM => append to buffer 1509 // ACCEND => get contents of buffer 1510 // 1511 // For string, list, and tuple values, the interpreter can 1512 // optimize these operations by using a mutable buffer. 1513 // For all other types, ACCSTART and ACCEND would behave like 1514 // the identity function and ACCUM behaves like PLUS. 1515 // ACCUM must correctly support user-defined operations 1516 // such as list+foo. 1517 // 1518 // fcomp.emit(ACCSTART) 1519 // for _, summand := range args[1:] { 1520 // fcomp.expr(summand.x) 1521 // fcomp.setPos(summand.plusPos) 1522 // fcomp.emit(ACCUM) 1523 // } 1524 // fcomp.emit(ACCEND) 1525 } 1526 1527 // addable reports whether e is a statically addable 1528 // expression: a [s]tring, [l]ist, or [t]uple. 1529 func addable(e syntax.Expr) rune { 1530 switch e := e.(type) { 1531 case *syntax.Literal: 1532 // TODO(adonovan): opt: support INT/FLOAT/BIGINT constant folding. 1533 switch e.Token { 1534 case syntax.STRING: 1535 return 's' 1536 } 1537 case *syntax.ListExpr: 1538 return 'l' 1539 case *syntax.TupleExpr: 1540 return 't' 1541 } 1542 return 0 1543 } 1544 1545 // add returns an expression denoting the sum of args, 1546 // which are all addable values of the type indicated by code. 1547 // The resulting syntax is degenerate, lacking position, etc. 1548 func add(code rune, args []summand) syntax.Expr { 1549 switch code { 1550 case 's': 1551 var buf bytes.Buffer 1552 for _, arg := range args { 1553 buf.WriteString(arg.x.(*syntax.Literal).Value.(string)) 1554 } 1555 return &syntax.Literal{Token: syntax.STRING, Value: buf.String()} 1556 case 'l': 1557 var elems []syntax.Expr 1558 for _, arg := range args { 1559 elems = append(elems, arg.x.(*syntax.ListExpr).List...) 1560 } 1561 return &syntax.ListExpr{List: elems} 1562 case 't': 1563 var elems []syntax.Expr 1564 for _, arg := range args { 1565 elems = append(elems, arg.x.(*syntax.TupleExpr).List...) 1566 } 1567 return &syntax.TupleExpr{List: elems} 1568 } 1569 panic(code) 1570 } 1571 1572 func unparen(e syntax.Expr) syntax.Expr { 1573 if p, ok := e.(*syntax.ParenExpr); ok { 1574 return unparen(p.X) 1575 } 1576 return e 1577 } 1578 1579 func (fcomp *fcomp) binop(pos syntax.Position, op syntax.Token) { 1580 // TODO(adonovan): simplify by assuming syntax and compiler constants align. 1581 fcomp.setPos(pos) 1582 switch op { 1583 // arithmetic 1584 case syntax.PLUS: 1585 fcomp.emit(PLUS) 1586 case syntax.MINUS: 1587 fcomp.emit(MINUS) 1588 case syntax.STAR: 1589 fcomp.emit(STAR) 1590 case syntax.SLASH: 1591 fcomp.emit(SLASH) 1592 case syntax.SLASHSLASH: 1593 fcomp.emit(SLASHSLASH) 1594 case syntax.PERCENT: 1595 fcomp.emit(PERCENT) 1596 case syntax.AMP: 1597 fcomp.emit(AMP) 1598 case syntax.PIPE: 1599 fcomp.emit(PIPE) 1600 case syntax.CIRCUMFLEX: 1601 fcomp.emit(CIRCUMFLEX) 1602 case syntax.LTLT: 1603 fcomp.emit(LTLT) 1604 case syntax.GTGT: 1605 fcomp.emit(GTGT) 1606 case syntax.IN: 1607 fcomp.emit(IN) 1608 case syntax.NOT_IN: 1609 fcomp.emit(IN) 1610 fcomp.emit(NOT) 1611 1612 // comparisons 1613 case syntax.EQL, 1614 syntax.NEQ, 1615 syntax.GT, 1616 syntax.LT, 1617 syntax.LE, 1618 syntax.GE: 1619 fcomp.emit(Opcode(op-syntax.EQL) + EQL) 1620 1621 default: 1622 log.Fatalf("%s: unexpected binary op: %s", pos, op) 1623 } 1624 } 1625 1626 func (fcomp *fcomp) call(call *syntax.CallExpr) { 1627 // TODO(adonovan): opt: Use optimized path for calling methods 1628 // of built-ins: x.f(...) to avoid materializing a closure. 1629 // if dot, ok := call.Fcomp.(*syntax.DotExpr); ok { 1630 // fcomp.expr(dot.X) 1631 // fcomp.args(call) 1632 // fcomp.emit1(CALL_ATTR, fcomp.name(dot.Name.Name)) 1633 // return 1634 // } 1635 1636 // usual case 1637 fcomp.expr(call.Fn) 1638 op, arg := fcomp.args(call) 1639 fcomp.setPos(call.Lparen) 1640 fcomp.emit1(op, arg) 1641 } 1642 1643 // args emits code to push a tuple of positional arguments 1644 // and a tuple of named arguments containing alternating keys and values. 1645 // Either or both tuples may be empty (TODO(adonovan): optimize). 1646 func (fcomp *fcomp) args(call *syntax.CallExpr) (op Opcode, arg uint32) { 1647 var callmode int 1648 // Compute the number of each kind of parameter. 1649 var p, n int // number of positional, named arguments 1650 var varargs, kwargs syntax.Expr 1651 for _, arg := range call.Args { 1652 if binary, ok := arg.(*syntax.BinaryExpr); ok && binary.Op == syntax.EQ { 1653 1654 // named argument (name, value) 1655 fcomp.string(binary.X.(*syntax.Ident).Name) 1656 fcomp.expr(binary.Y) 1657 n++ 1658 continue 1659 } 1660 if unary, ok := arg.(*syntax.UnaryExpr); ok { 1661 if unary.Op == syntax.STAR { 1662 callmode |= 1 1663 varargs = unary.X 1664 continue 1665 } else if unary.Op == syntax.STARSTAR { 1666 callmode |= 2 1667 kwargs = unary.X 1668 continue 1669 } 1670 } 1671 1672 // positional argument 1673 fcomp.expr(arg) 1674 p++ 1675 } 1676 1677 // Python2 and Python3 both permit named arguments 1678 // to appear both before and after a *args argument: 1679 // f(1, 2, x=3, *[4], y=5, **dict(z=6)) 1680 // 1681 // They also differ in their evaluation order: 1682 // Python2: 1 2 3 5 4 6 (*args and **kwargs evaluated last) 1683 // Python3: 1 2 4 3 5 6 (positional args evaluated before named args) 1684 // Starlark-in-Java historically used a third order: 1685 // Lexical: 1 2 3 4 5 6 (all args evaluated left-to-right) 1686 // 1687 // After discussion in github.com/bazelbuild/starlark#13, the 1688 // spec now requires Starlark to statically reject named 1689 // arguments after *args (e.g. y=5), and to use Python2-style 1690 // evaluation order. This is both easy to implement and 1691 // consistent with lexical order: 1692 // 1693 // f(1, 2, x=3, *[4], **dict(z=6)) # 1 2 3 4 6 1694 1695 // *args 1696 if varargs != nil { 1697 fcomp.expr(varargs) 1698 } 1699 1700 // **kwargs 1701 if kwargs != nil { 1702 fcomp.expr(kwargs) 1703 } 1704 1705 // TODO(adonovan): avoid this with a more flexible encoding. 1706 if p >= 256 || n >= 256 { 1707 // resolve already checked this; should be unreachable 1708 panic("too many arguments in call") 1709 } 1710 1711 return CALL + Opcode(callmode), uint32(p<<8 | n) 1712 } 1713 1714 func (fcomp *fcomp) tuple(elems []syntax.Expr) { 1715 for _, elem := range elems { 1716 fcomp.expr(elem) 1717 } 1718 fcomp.emit1(MAKETUPLE, uint32(len(elems))) 1719 } 1720 1721 func (fcomp *fcomp) comprehension(comp *syntax.Comprehension, clauseIndex int) { 1722 if clauseIndex == len(comp.Clauses) { 1723 fcomp.emit(DUP) // accumulator 1724 if comp.Curly { 1725 // dict: {k:v for ...} 1726 // Parser ensures that body is of form k:v. 1727 // Python-style set comprehensions {body for vars in x} 1728 // are not supported. 1729 entry := comp.Body.(*syntax.DictEntry) 1730 fcomp.expr(entry.Key) 1731 fcomp.expr(entry.Value) 1732 fcomp.setPos(entry.Colon) 1733 fcomp.emit(SETDICT) 1734 } else { 1735 // list: [body for vars in x] 1736 fcomp.expr(comp.Body) 1737 fcomp.emit(APPEND) 1738 } 1739 return 1740 } 1741 1742 clause := comp.Clauses[clauseIndex] 1743 switch clause := clause.(type) { 1744 case *syntax.IfClause: 1745 t := fcomp.newBlock() 1746 done := fcomp.newBlock() 1747 fcomp.ifelse(clause.Cond, t, done) 1748 1749 fcomp.block = t 1750 fcomp.comprehension(comp, clauseIndex+1) 1751 fcomp.jump(done) 1752 1753 fcomp.block = done 1754 return 1755 1756 case *syntax.ForClause: 1757 // Keep consistent with ForStmt. 1758 head := fcomp.newBlock() 1759 body := fcomp.newBlock() 1760 tail := fcomp.newBlock() 1761 1762 fcomp.expr(clause.X) 1763 fcomp.setPos(clause.For) 1764 fcomp.emit(ITERPUSH) 1765 fcomp.jump(head) 1766 1767 fcomp.block = head 1768 fcomp.condjump(ITERJMP, tail, body) 1769 1770 fcomp.block = body 1771 fcomp.assign(clause.For, clause.Vars) 1772 fcomp.comprehension(comp, clauseIndex+1) 1773 fcomp.jump(head) 1774 1775 fcomp.block = tail 1776 fcomp.emit(ITERPOP) 1777 return 1778 } 1779 1780 start, _ := clause.Span() 1781 log.Fatalf("%s: unexpected comprehension clause %T", start, clause) 1782 } 1783 1784 func (fcomp *fcomp) function(f *resolve.Function) { 1785 // Evaluation of the defaults may fail, so record the position. 1786 fcomp.setPos(f.Pos) 1787 1788 // To reduce allocation, we emit a combined tuple 1789 // for the defaults and the freevars. 1790 // The function knows where to split it at run time. 1791 1792 // Generate tuple of parameter defaults. For: 1793 // def f(p1, p2=dp2, p3=dp3, *, k1, k2=dk2, k3, **kwargs) 1794 // the tuple is: 1795 // (dp2, dp3, MANDATORY, dk2, MANDATORY). 1796 ndefaults := 0 1797 seenStar := false 1798 for _, param := range f.Params { 1799 switch param := param.(type) { 1800 case *syntax.BinaryExpr: 1801 fcomp.expr(param.Y) 1802 ndefaults++ 1803 case *syntax.UnaryExpr: 1804 seenStar = true // * or *args (also **kwargs) 1805 case *syntax.Ident: 1806 if seenStar { 1807 fcomp.emit(MANDATORY) 1808 ndefaults++ 1809 } 1810 } 1811 } 1812 1813 // Capture the cells of the function's 1814 // free variables from the lexical environment. 1815 for _, freevar := range f.FreeVars { 1816 // Don't call fcomp.lookup because we want 1817 // the cell itself, not its content. 1818 switch freevar.Scope { 1819 case resolve.Free: 1820 fcomp.emit1(FREE, uint32(freevar.Index)) 1821 case resolve.Cell: 1822 fcomp.emit1(LOCAL, uint32(freevar.Index)) 1823 } 1824 } 1825 1826 fcomp.emit1(MAKETUPLE, uint32(ndefaults+len(f.FreeVars))) 1827 1828 funcode := fcomp.pcomp.function(f.Name, f.Pos, f.Body, f.Locals, f.FreeVars) 1829 1830 if debug { 1831 // TODO(adonovan): do compilations sequentially not as a tree, 1832 // to make the log easier to read. 1833 // Simplify by identifying Toplevel and functionIndex 0. 1834 fmt.Fprintf(os.Stderr, "resuming %s @ %s\n", fcomp.fn.Name, fcomp.pos) 1835 } 1836 1837 // def f(a, *, b=1) has only 2 parameters. 1838 numParams := len(f.Params) 1839 if f.NumKwonlyParams > 0 && !f.HasVarargs { 1840 numParams-- 1841 } 1842 1843 funcode.NumParams = numParams 1844 funcode.NumKwonlyParams = f.NumKwonlyParams 1845 funcode.HasVarargs = f.HasVarargs 1846 funcode.HasKwargs = f.HasKwargs 1847 fcomp.emit1(MAKEFUNC, fcomp.pcomp.functionIndex(funcode)) 1848 } 1849 1850 // ifelse emits a Boolean control flow decision. 1851 // On return, the current block is unset. 1852 func (fcomp *fcomp) ifelse(cond syntax.Expr, t, f *block) { 1853 switch cond := cond.(type) { 1854 case *syntax.UnaryExpr: 1855 if cond.Op == syntax.NOT { 1856 // if not x then goto t else goto f 1857 // => 1858 // if x then goto f else goto t 1859 fcomp.ifelse(cond.X, f, t) 1860 return 1861 } 1862 1863 case *syntax.BinaryExpr: 1864 switch cond.Op { 1865 case syntax.AND: 1866 // if x and y then goto t else goto f 1867 // => 1868 // if x then ifelse(y, t, f) else goto f 1869 fcomp.expr(cond.X) 1870 y := fcomp.newBlock() 1871 fcomp.condjump(CJMP, y, f) 1872 1873 fcomp.block = y 1874 fcomp.ifelse(cond.Y, t, f) 1875 return 1876 1877 case syntax.OR: 1878 // if x or y then goto t else goto f 1879 // => 1880 // if x then goto t else ifelse(y, t, f) 1881 fcomp.expr(cond.X) 1882 y := fcomp.newBlock() 1883 fcomp.condjump(CJMP, t, y) 1884 1885 fcomp.block = y 1886 fcomp.ifelse(cond.Y, t, f) 1887 return 1888 case syntax.NOT_IN: 1889 // if x not in y then goto t else goto f 1890 // => 1891 // if x in y then goto f else goto t 1892 copy := *cond 1893 copy.Op = syntax.IN 1894 fcomp.expr(©) 1895 fcomp.condjump(CJMP, f, t) 1896 return 1897 } 1898 } 1899 1900 // general case 1901 fcomp.expr(cond) 1902 fcomp.condjump(CJMP, t, f) 1903 }