go.starlark.net@v0.0.0-20231101134539-556fd59b42f6/internal/compile/compile.go (about) 1 // Package compile defines the Starlark bytecode compiler. 2 // It is an internal package of the Starlark interpreter and is not directly accessible to clients. 3 // 4 // The compiler generates byte code with optional uint32 operands for a 5 // virtual machine with the following components: 6 // - a program counter, which is an index into the byte code array. 7 // - an operand stack, whose maximum size is computed for each function by the compiler. 8 // - an stack of active iterators. 9 // - an array of local variables. 10 // The number of local variables and their indices are computed by the resolver. 11 // Locals (possibly including parameters) that are shared with nested functions 12 // are 'cells': their locals array slot will contain a value of type 'cell', 13 // an indirect value in a box that is explicitly read/updated by instructions. 14 // - an array of free variables, for nested functions. 15 // Free variables are a subset of the ancestors' cell variables. 16 // As with locals and cells, these are computed by the resolver. 17 // - an array of global variables, shared among all functions in the same module. 18 // All elements are initially nil. 19 // - two maps of predeclared and universal identifiers. 20 // 21 // Each function has a line number table that maps each program counter 22 // offset to a source position, including the column number. 23 // 24 // Operands, logically uint32s, are encoded using little-endian 7-bit 25 // varints, the top bit indicating that more bytes follow. 26 package compile // import "go.starlark.net/internal/compile" 27 28 import ( 29 "bytes" 30 "fmt" 31 "log" 32 "os" 33 "path/filepath" 34 "strconv" 35 "strings" 36 "sync" 37 38 "go.starlark.net/resolve" 39 "go.starlark.net/syntax" 40 ) 41 42 // Disassemble causes the assembly code for each function 43 // to be printed to stderr as it is generated. 44 var Disassemble = false 45 46 const debug = false // make code generation verbose, for debugging the compiler 47 48 // Increment this to force recompilation of saved bytecode files. 49 const Version = 14 50 51 type Opcode uint8 52 53 // "x DUP x x" is a "stack picture" that describes the state of the 54 // stack before and after execution of the instruction. 55 // 56 // OP<index> indicates an immediate operand that is an index into the 57 // specified table: locals, names, freevars, constants. 58 const ( 59 NOP Opcode = iota // - NOP - 60 61 // stack operations 62 DUP // x DUP x x 63 DUP2 // x y DUP2 x y x y 64 POP // x POP - 65 EXCH // x y EXCH y x 66 67 // binary comparisons 68 // (order must match Token) 69 LT 70 GT 71 GE 72 LE 73 EQL 74 NEQ 75 76 // binary arithmetic 77 // (order must match Token) 78 PLUS 79 MINUS 80 STAR 81 SLASH 82 SLASHSLASH 83 PERCENT 84 AMP 85 PIPE 86 CIRCUMFLEX 87 LTLT 88 GTGT 89 90 IN 91 92 // unary operators 93 UPLUS // x UPLUS x 94 UMINUS // x UMINUS -x 95 TILDE // x TILDE ~x 96 97 NONE // - NONE None 98 TRUE // - TRUE True 99 FALSE // - FALSE False 100 MANDATORY // - MANDATORY Mandatory [sentinel value for required kwonly args] 101 102 ITERPUSH // iterable ITERPUSH - [pushes the iterator stack] 103 ITERPOP // - ITERPOP - [pops the iterator stack] 104 NOT // value NOT bool 105 RETURN // value RETURN - 106 SETINDEX // a i new SETINDEX - 107 INDEX // a i INDEX elem 108 SETDICT // dict key value SETDICT - 109 SETDICTUNIQ // dict key value SETDICTUNIQ - 110 APPEND // list elem APPEND - 111 SLICE // x lo hi step SLICE slice 112 INPLACE_ADD // x y INPLACE_ADD z where z is x+y or x.extend(y) 113 INPLACE_PIPE // x y INPLACE_PIPE z where z is x|y 114 MAKEDICT // - MAKEDICT dict 115 116 // --- opcodes with an argument must go below this line --- 117 118 // control flow 119 JMP // - JMP<addr> - 120 CJMP // cond CJMP<addr> - 121 ITERJMP // - ITERJMP<addr> elem (and fall through) [acts on topmost iterator] 122 // or: - ITERJMP<addr> - (and jump) 123 124 CONSTANT // - CONSTANT<constant> value 125 MAKETUPLE // x1 ... xn MAKETUPLE<n> tuple 126 MAKELIST // x1 ... xn MAKELIST<n> list 127 MAKEFUNC // defaults+freevars MAKEFUNC<func> fn 128 LOAD // from1 ... fromN module LOAD<n> v1 ... vN 129 SETLOCAL // value SETLOCAL<local> - 130 SETGLOBAL // value SETGLOBAL<global> - 131 LOCAL // - LOCAL<local> value 132 FREE // - FREE<freevar> cell 133 FREECELL // - FREECELL<freevar> value (content of FREE cell) 134 LOCALCELL // - LOCALCELL<local> value (content of LOCAL cell) 135 SETLOCALCELL // value SETLOCALCELL<local> - (set content of LOCAL cell) 136 GLOBAL // - GLOBAL<global> value 137 PREDECLARED // - PREDECLARED<name> value 138 UNIVERSAL // - UNIVERSAL<name> value 139 ATTR // x ATTR<name> y y = x.name 140 SETFIELD // x y SETFIELD<name> - x.name = y 141 UNPACK // iterable UNPACK<n> vn ... v1 142 143 // n>>8 is #positional args and n&0xff is #named args (pairs). 144 CALL // fn positional named CALL<n> result 145 CALL_VAR // fn positional named *args CALL_VAR<n> result 146 CALL_KW // fn positional named **kwargs CALL_KW<n> result 147 CALL_VAR_KW // fn positional named *args **kwargs CALL_VAR_KW<n> result 148 149 OpcodeArgMin = JMP 150 OpcodeMax = CALL_VAR_KW 151 ) 152 153 // TODO(adonovan): add dynamic checks for missing opcodes in the tables below. 154 155 var opcodeNames = [...]string{ 156 AMP: "amp", 157 APPEND: "append", 158 ATTR: "attr", 159 CALL: "call", 160 CALL_KW: "call_kw ", 161 CALL_VAR: "call_var", 162 CALL_VAR_KW: "call_var_kw", 163 CIRCUMFLEX: "circumflex", 164 CJMP: "cjmp", 165 CONSTANT: "constant", 166 DUP2: "dup2", 167 DUP: "dup", 168 EQL: "eql", 169 EXCH: "exch", 170 FALSE: "false", 171 FREE: "free", 172 FREECELL: "freecell", 173 GE: "ge", 174 GLOBAL: "global", 175 GT: "gt", 176 GTGT: "gtgt", 177 IN: "in", 178 INDEX: "index", 179 INPLACE_ADD: "inplace_add", 180 INPLACE_PIPE: "inplace_pipe", 181 ITERJMP: "iterjmp", 182 ITERPOP: "iterpop", 183 ITERPUSH: "iterpush", 184 JMP: "jmp", 185 LE: "le", 186 LOAD: "load", 187 LOCAL: "local", 188 LOCALCELL: "localcell", 189 LT: "lt", 190 LTLT: "ltlt", 191 MAKEDICT: "makedict", 192 MAKEFUNC: "makefunc", 193 MAKELIST: "makelist", 194 MAKETUPLE: "maketuple", 195 MANDATORY: "mandatory", 196 MINUS: "minus", 197 NEQ: "neq", 198 NONE: "none", 199 NOP: "nop", 200 NOT: "not", 201 PERCENT: "percent", 202 PIPE: "pipe", 203 PLUS: "plus", 204 POP: "pop", 205 PREDECLARED: "predeclared", 206 RETURN: "return", 207 SETDICT: "setdict", 208 SETDICTUNIQ: "setdictuniq", 209 SETFIELD: "setfield", 210 SETGLOBAL: "setglobal", 211 SETINDEX: "setindex", 212 SETLOCAL: "setlocal", 213 SETLOCALCELL: "setlocalcell", 214 SLASH: "slash", 215 SLASHSLASH: "slashslash", 216 SLICE: "slice", 217 STAR: "star", 218 TILDE: "tilde", 219 TRUE: "true", 220 UMINUS: "uminus", 221 UNIVERSAL: "universal", 222 UNPACK: "unpack", 223 UPLUS: "uplus", 224 } 225 226 const variableStackEffect = 0x7f 227 228 // stackEffect records the effect on the size of the operand stack of 229 // each kind of instruction. For some instructions this requires computation. 230 var stackEffect = [...]int8{ 231 AMP: -1, 232 APPEND: -2, 233 ATTR: 0, 234 CALL: variableStackEffect, 235 CALL_KW: variableStackEffect, 236 CALL_VAR: variableStackEffect, 237 CALL_VAR_KW: variableStackEffect, 238 CIRCUMFLEX: -1, 239 CJMP: -1, 240 CONSTANT: +1, 241 DUP2: +2, 242 DUP: +1, 243 EQL: -1, 244 FALSE: +1, 245 FREE: +1, 246 FREECELL: +1, 247 GE: -1, 248 GLOBAL: +1, 249 GT: -1, 250 GTGT: -1, 251 IN: -1, 252 INDEX: -1, 253 INPLACE_ADD: -1, 254 INPLACE_PIPE: -1, 255 ITERJMP: variableStackEffect, 256 ITERPOP: 0, 257 ITERPUSH: -1, 258 JMP: 0, 259 LE: -1, 260 LOAD: -1, 261 LOCAL: +1, 262 LOCALCELL: +1, 263 LT: -1, 264 LTLT: -1, 265 MAKEDICT: +1, 266 MAKEFUNC: 0, 267 MAKELIST: variableStackEffect, 268 MAKETUPLE: variableStackEffect, 269 MANDATORY: +1, 270 MINUS: -1, 271 NEQ: -1, 272 NONE: +1, 273 NOP: 0, 274 NOT: 0, 275 PERCENT: -1, 276 PIPE: -1, 277 PLUS: -1, 278 POP: -1, 279 PREDECLARED: +1, 280 RETURN: -1, 281 SETLOCALCELL: -1, 282 SETDICT: -3, 283 SETDICTUNIQ: -3, 284 SETFIELD: -2, 285 SETGLOBAL: -1, 286 SETINDEX: -3, 287 SETLOCAL: -1, 288 SLASH: -1, 289 SLASHSLASH: -1, 290 SLICE: -3, 291 STAR: -1, 292 TRUE: +1, 293 UMINUS: 0, 294 UNIVERSAL: +1, 295 UNPACK: variableStackEffect, 296 UPLUS: 0, 297 } 298 299 func (op Opcode) String() string { 300 if op < OpcodeMax { 301 if name := opcodeNames[op]; name != "" { 302 return name 303 } 304 } 305 return fmt.Sprintf("illegal op (%d)", op) 306 } 307 308 // A Program is a Starlark file in executable form. 309 // 310 // Programs are serialized by the Program.Encode method, 311 // which must be updated whenever this declaration is changed. 312 type Program struct { 313 Loads []Binding // name (really, string) and position of each load stmt 314 Names []string // names of attributes and predeclared variables 315 Constants []interface{} // = string | int64 | float64 | *big.Int | Bytes 316 Functions []*Funcode 317 Globals []Binding // for error messages and tracing 318 Toplevel *Funcode // module initialization function 319 Recursion bool // disable recursion check for functions in this file 320 } 321 322 // The type of a bytes literal value, to distinguish from text string. 323 type Bytes string 324 325 // A Funcode is the code of a compiled Starlark function. 326 // 327 // Funcodes are serialized by the encoder.function method, 328 // which must be updated whenever this declaration is changed. 329 type Funcode struct { 330 Prog *Program 331 Pos syntax.Position // position of def or lambda token 332 Name string // name of this function 333 Doc string // docstring of this function 334 Code []byte // the byte code 335 pclinetab []uint16 // mapping from pc to linenum 336 Locals []Binding // locals, parameters first 337 Cells []int // indices of Locals that require cells 338 Freevars []Binding // for tracing 339 MaxStack int 340 NumParams int 341 NumKwonlyParams int 342 HasVarargs, HasKwargs bool 343 344 // -- transient state -- 345 346 lntOnce sync.Once 347 lnt []pclinecol // decoded line number table 348 } 349 350 type pclinecol struct { 351 pc uint32 352 line, col int32 353 } 354 355 // A Binding is the name and position of a binding identifier. 356 type Binding struct { 357 Name string 358 Pos syntax.Position 359 } 360 361 // A pcomp holds the compiler state for a Program. 362 type pcomp struct { 363 prog *Program // what we're building 364 365 names map[string]uint32 366 constants map[interface{}]uint32 367 functions map[*Funcode]uint32 368 } 369 370 // An fcomp holds the compiler state for a Funcode. 371 type fcomp struct { 372 fn *Funcode // what we're building 373 374 pcomp *pcomp 375 pos syntax.Position // current position of generated code 376 loops []loop 377 block *block 378 } 379 380 type loop struct { 381 break_, continue_ *block 382 } 383 384 type block struct { 385 insns []insn 386 387 // If the last insn is a RETURN, jmp and cjmp are nil. 388 // If the last insn is a CJMP or ITERJMP, 389 // cjmp and jmp are the "true" and "false" successors. 390 // Otherwise, jmp is the sole successor. 391 jmp, cjmp *block 392 393 initialstack int // for stack depth computation 394 395 // Used during encoding 396 index int // -1 => not encoded yet 397 addr uint32 398 } 399 400 type insn struct { 401 op Opcode 402 arg uint32 403 line, col int32 404 } 405 406 // Position returns the source position for program counter pc. 407 func (fn *Funcode) Position(pc uint32) syntax.Position { 408 fn.lntOnce.Do(fn.decodeLNT) 409 410 // Binary search to find last LNT entry not greater than pc. 411 // To avoid dynamic dispatch, this is a specialization of 412 // sort.Search using this predicate: 413 // !(i < len(fn.lnt)-1 && fn.lnt[i+1].pc <= pc) 414 n := len(fn.lnt) 415 i, j := 0, n 416 for i < j { 417 h := int(uint(i+j) >> 1) 418 if !(h >= n-1 || fn.lnt[h+1].pc > pc) { 419 i = h + 1 420 } else { 421 j = h 422 } 423 } 424 425 var line, col int32 426 if i < n { 427 line = fn.lnt[i].line 428 col = fn.lnt[i].col 429 } 430 431 pos := fn.Pos // copy the (annoyingly inaccessible) filename 432 pos.Col = col 433 pos.Line = line 434 return pos 435 } 436 437 // decodeLNT decodes the line number table and populates fn.lnt. 438 // It is called at most once. 439 func (fn *Funcode) decodeLNT() { 440 // Conceptually the table contains rows of the form 441 // (pc uint32, line int32, col int32), sorted by pc. 442 // We use a delta encoding, since the differences 443 // between successive pc, line, and column values 444 // are typically small and positive (though line and 445 // especially column differences may be negative). 446 // The delta encoding starts from 447 // {pc: 0, line: fn.Pos.Line, col: fn.Pos.Col}. 448 // 449 // Each entry is packed into one or more 16-bit values: 450 // Δpc uint4 451 // Δline int5 452 // Δcol int6 453 // incomplete uint1 454 // The top 4 bits are the unsigned delta pc. 455 // The next 5 bits are the signed line number delta. 456 // The next 6 bits are the signed column number delta. 457 // The bottom bit indicates that more rows follow because 458 // one of the deltas was maxed out. 459 // These field widths were chosen from a sample of real programs, 460 // and allow >97% of rows to be encoded in a single uint16. 461 462 fn.lnt = make([]pclinecol, 0, len(fn.pclinetab)) // a minor overapproximation 463 entry := pclinecol{ 464 pc: 0, 465 line: fn.Pos.Line, 466 col: fn.Pos.Col, 467 } 468 for _, x := range fn.pclinetab { 469 entry.pc += uint32(x) >> 12 470 entry.line += int32((int16(x) << 4) >> (16 - 5)) // sign extend Δline 471 entry.col += int32((int16(x) << 9) >> (16 - 6)) // sign extend Δcol 472 if (x & 1) == 0 { 473 fn.lnt = append(fn.lnt, entry) 474 } 475 } 476 } 477 478 // bindings converts resolve.Bindings to compiled form. 479 func bindings(bindings []*resolve.Binding) []Binding { 480 res := make([]Binding, len(bindings)) 481 for i, bind := range bindings { 482 res[i].Name = bind.First.Name 483 res[i].Pos = bind.First.NamePos 484 } 485 return res 486 } 487 488 // Expr compiles an expression to a program whose toplevel function evaluates it. 489 // The options must be consistent with those used when parsing expr. 490 func Expr(opts *syntax.FileOptions, expr syntax.Expr, name string, locals []*resolve.Binding) *Program { 491 pos := syntax.Start(expr) 492 stmts := []syntax.Stmt{&syntax.ReturnStmt{Result: expr}} 493 return File(opts, stmts, pos, name, locals, nil) 494 } 495 496 // File compiles the statements of a file into a program. 497 // The options must be consistent with those used when parsing stmts. 498 func File(opts *syntax.FileOptions, stmts []syntax.Stmt, pos syntax.Position, name string, locals, globals []*resolve.Binding) *Program { 499 pcomp := &pcomp{ 500 prog: &Program{ 501 Globals: bindings(globals), 502 Recursion: opts.Recursion, 503 }, 504 names: make(map[string]uint32), 505 constants: make(map[interface{}]uint32), 506 functions: make(map[*Funcode]uint32), 507 } 508 pcomp.prog.Toplevel = pcomp.function(name, pos, stmts, locals, nil) 509 510 return pcomp.prog 511 } 512 513 func (pcomp *pcomp) function(name string, pos syntax.Position, stmts []syntax.Stmt, locals, freevars []*resolve.Binding) *Funcode { 514 fcomp := &fcomp{ 515 pcomp: pcomp, 516 pos: pos, 517 fn: &Funcode{ 518 Prog: pcomp.prog, 519 Pos: pos, 520 Name: name, 521 Doc: docStringFromBody(stmts), 522 Locals: bindings(locals), 523 Freevars: bindings(freevars), 524 }, 525 } 526 527 // Record indices of locals that require cells. 528 for i, local := range locals { 529 if local.Scope == resolve.Cell { 530 fcomp.fn.Cells = append(fcomp.fn.Cells, i) 531 } 532 } 533 534 if debug { 535 fmt.Fprintf(os.Stderr, "start function(%s @ %s)\n", name, pos) 536 } 537 538 // Convert AST to a CFG of instructions. 539 entry := fcomp.newBlock() 540 fcomp.block = entry 541 fcomp.stmts(stmts) 542 if fcomp.block != nil { 543 fcomp.emit(NONE) 544 fcomp.emit(RETURN) 545 } 546 547 var oops bool // something bad happened 548 549 setinitialstack := func(b *block, depth int) { 550 if b.initialstack == -1 { 551 b.initialstack = depth 552 } else if b.initialstack != depth { 553 fmt.Fprintf(os.Stderr, "%d: setinitialstack: depth mismatch: %d vs %d\n", 554 b.index, b.initialstack, depth) 555 oops = true 556 } 557 } 558 559 // Linearize the CFG: 560 // compute order, address, and initial 561 // stack depth of each reachable block. 562 var pc uint32 563 var blocks []*block 564 var maxstack int 565 var visit func(b *block) 566 visit = func(b *block) { 567 if b.index >= 0 { 568 return // already visited 569 } 570 b.index = len(blocks) 571 b.addr = pc 572 blocks = append(blocks, b) 573 574 stack := b.initialstack 575 if debug { 576 fmt.Fprintf(os.Stderr, "%s block %d: (stack = %d)\n", name, b.index, stack) 577 } 578 var cjmpAddr *uint32 579 var isiterjmp int 580 for i, insn := range b.insns { 581 pc++ 582 583 // Compute size of argument. 584 if insn.op >= OpcodeArgMin { 585 switch insn.op { 586 case ITERJMP: 587 isiterjmp = 1 588 fallthrough 589 case CJMP: 590 cjmpAddr = &b.insns[i].arg 591 pc += 4 592 default: 593 pc += uint32(argLen(insn.arg)) 594 } 595 } 596 597 // Compute effect on stack. 598 se := insn.stackeffect() 599 if debug { 600 fmt.Fprintln(os.Stderr, "\t", insn.op, stack, stack+se) 601 } 602 stack += se 603 if stack < 0 { 604 fmt.Fprintf(os.Stderr, "After pc=%d: stack underflow\n", pc) 605 oops = true 606 } 607 if stack+isiterjmp > maxstack { 608 maxstack = stack + isiterjmp 609 } 610 } 611 612 if debug { 613 fmt.Fprintf(os.Stderr, "successors of block %d (start=%d):\n", 614 b.addr, b.index) 615 if b.jmp != nil { 616 fmt.Fprintf(os.Stderr, "jmp to %d\n", b.jmp.index) 617 } 618 if b.cjmp != nil { 619 fmt.Fprintf(os.Stderr, "cjmp to %d\n", b.cjmp.index) 620 } 621 } 622 623 // Place the jmp block next. 624 if b.jmp != nil { 625 // jump threading (empty cycles are impossible) 626 for b.jmp.insns == nil { 627 b.jmp = b.jmp.jmp 628 } 629 630 setinitialstack(b.jmp, stack+isiterjmp) 631 if b.jmp.index < 0 { 632 // Successor is not yet visited: 633 // place it next and fall through. 634 visit(b.jmp) 635 } else { 636 // Successor already visited; 637 // explicit backward jump required. 638 pc += 5 639 } 640 } 641 642 // Then the cjmp block. 643 if b.cjmp != nil { 644 // jump threading (empty cycles are impossible) 645 for b.cjmp.insns == nil { 646 b.cjmp = b.cjmp.jmp 647 } 648 649 setinitialstack(b.cjmp, stack) 650 visit(b.cjmp) 651 652 // Patch the CJMP/ITERJMP, if present. 653 if cjmpAddr != nil { 654 *cjmpAddr = b.cjmp.addr 655 } 656 } 657 } 658 setinitialstack(entry, 0) 659 visit(entry) 660 661 fn := fcomp.fn 662 fn.MaxStack = maxstack 663 664 // Emit bytecode (and position table). 665 if Disassemble { 666 fmt.Fprintf(os.Stderr, "Function %s: (%d blocks, %d bytes)\n", name, len(blocks), pc) 667 } 668 fcomp.generate(blocks, pc) 669 670 if debug { 671 fmt.Fprintf(os.Stderr, "code=%d maxstack=%d\n", fn.Code, fn.MaxStack) 672 } 673 674 // Don't panic until we've completed printing of the function. 675 if oops { 676 panic("internal error") 677 } 678 679 if debug { 680 fmt.Fprintf(os.Stderr, "end function(%s @ %s)\n", name, pos) 681 } 682 683 return fn 684 } 685 686 func docStringFromBody(body []syntax.Stmt) string { 687 if len(body) == 0 { 688 return "" 689 } 690 expr, ok := body[0].(*syntax.ExprStmt) 691 if !ok { 692 return "" 693 } 694 lit, ok := expr.X.(*syntax.Literal) 695 if !ok { 696 return "" 697 } 698 if lit.Token != syntax.STRING { 699 return "" 700 } 701 return lit.Value.(string) 702 } 703 704 func (insn *insn) stackeffect() int { 705 se := int(stackEffect[insn.op]) 706 if se == variableStackEffect { 707 arg := int(insn.arg) 708 switch insn.op { 709 case CALL, CALL_KW, CALL_VAR, CALL_VAR_KW: 710 se = -int(2*(insn.arg&0xff) + insn.arg>>8) 711 if insn.op != CALL { 712 se-- 713 } 714 if insn.op == CALL_VAR_KW { 715 se-- 716 } 717 case ITERJMP: 718 // Stack effect differs by successor: 719 // +1 for jmp/false/ok 720 // 0 for cjmp/true/exhausted 721 // Handled specially in caller. 722 se = 0 723 case MAKELIST, MAKETUPLE: 724 se = 1 - arg 725 case UNPACK: 726 se = arg - 1 727 default: 728 panic(insn.op) 729 } 730 } 731 return se 732 } 733 734 // generate emits the linear instruction stream from the CFG, 735 // and builds the PC-to-line number table. 736 func (fcomp *fcomp) generate(blocks []*block, codelen uint32) { 737 code := make([]byte, 0, codelen) 738 var pclinetab []uint16 739 prev := pclinecol{ 740 pc: 0, 741 line: fcomp.fn.Pos.Line, 742 col: fcomp.fn.Pos.Col, 743 } 744 745 for _, b := range blocks { 746 if Disassemble { 747 fmt.Fprintf(os.Stderr, "%d:\n", b.index) 748 } 749 pc := b.addr 750 for _, insn := range b.insns { 751 if insn.line != 0 { 752 // Instruction has a source position. Delta-encode it. 753 // See Funcode.Position for the encoding. 754 for { 755 var incomplete uint16 756 757 // Δpc, uint4 758 deltapc := pc - prev.pc 759 if deltapc > 0x0f { 760 deltapc = 0x0f 761 incomplete = 1 762 } 763 prev.pc += deltapc 764 765 // Δline, int5 766 deltaline, ok := clip(insn.line-prev.line, -0x10, 0x0f) 767 if !ok { 768 incomplete = 1 769 } 770 prev.line += deltaline 771 772 // Δcol, int6 773 deltacol, ok := clip(insn.col-prev.col, -0x20, 0x1f) 774 if !ok { 775 incomplete = 1 776 } 777 prev.col += deltacol 778 779 entry := uint16(deltapc<<12) | uint16(deltaline&0x1f)<<7 | uint16(deltacol&0x3f)<<1 | incomplete 780 pclinetab = append(pclinetab, entry) 781 if incomplete == 0 { 782 break 783 } 784 } 785 786 if Disassemble { 787 fmt.Fprintf(os.Stderr, "\t\t\t\t\t; %s:%d:%d\n", 788 filepath.Base(fcomp.fn.Pos.Filename()), insn.line, insn.col) 789 } 790 } 791 if Disassemble { 792 PrintOp(fcomp.fn, pc, insn.op, insn.arg) 793 } 794 code = append(code, byte(insn.op)) 795 pc++ 796 if insn.op >= OpcodeArgMin { 797 if insn.op == CJMP || insn.op == ITERJMP { 798 code = addUint32(code, insn.arg, 4) // pad arg to 4 bytes 799 } else { 800 code = addUint32(code, insn.arg, 0) 801 } 802 pc = uint32(len(code)) 803 } 804 } 805 806 if b.jmp != nil && b.jmp.index != b.index+1 { 807 addr := b.jmp.addr 808 if Disassemble { 809 fmt.Fprintf(os.Stderr, "\t%d\tjmp\t\t%d\t; block %d\n", 810 pc, addr, b.jmp.index) 811 } 812 code = append(code, byte(JMP)) 813 code = addUint32(code, addr, 4) 814 } 815 } 816 if len(code) != int(codelen) { 817 panic("internal error: wrong code length") 818 } 819 820 fcomp.fn.pclinetab = pclinetab 821 fcomp.fn.Code = code 822 } 823 824 // clip returns the value nearest x in the range [min...max], 825 // and whether it equals x. 826 func clip(x, min, max int32) (int32, bool) { 827 if x > max { 828 return max, false 829 } else if x < min { 830 return min, false 831 } else { 832 return x, true 833 } 834 } 835 836 // addUint32 encodes x as 7-bit little-endian varint. 837 // TODO(adonovan): opt: steal top two bits of opcode 838 // to encode the number of complete bytes that follow. 839 func addUint32(code []byte, x uint32, min int) []byte { 840 end := len(code) + min 841 for x >= 0x80 { 842 code = append(code, byte(x)|0x80) 843 x >>= 7 844 } 845 code = append(code, byte(x)) 846 // Pad the operand with NOPs to exactly min bytes. 847 for len(code) < end { 848 code = append(code, byte(NOP)) 849 } 850 return code 851 } 852 853 func argLen(x uint32) int { 854 n := 0 855 for x >= 0x80 { 856 n++ 857 x >>= 7 858 } 859 return n + 1 860 } 861 862 // PrintOp prints an instruction. 863 // It is provided for debugging. 864 func PrintOp(fn *Funcode, pc uint32, op Opcode, arg uint32) { 865 if op < OpcodeArgMin { 866 fmt.Fprintf(os.Stderr, "\t%d\t%s\n", pc, op) 867 return 868 } 869 870 var comment string 871 switch op { 872 case CONSTANT: 873 switch x := fn.Prog.Constants[arg].(type) { 874 case string: 875 comment = strconv.Quote(x) 876 case Bytes: 877 comment = "b" + strconv.Quote(string(x)) 878 default: 879 comment = fmt.Sprint(x) 880 } 881 case MAKEFUNC: 882 comment = fn.Prog.Functions[arg].Name 883 case SETLOCAL, LOCAL: 884 comment = fn.Locals[arg].Name 885 case SETGLOBAL, GLOBAL: 886 comment = fn.Prog.Globals[arg].Name 887 case ATTR, SETFIELD, PREDECLARED, UNIVERSAL: 888 comment = fn.Prog.Names[arg] 889 case FREE: 890 comment = fn.Freevars[arg].Name 891 case CALL, CALL_VAR, CALL_KW, CALL_VAR_KW: 892 comment = fmt.Sprintf("%d pos, %d named", arg>>8, arg&0xff) 893 default: 894 // JMP, CJMP, ITERJMP, MAKETUPLE, MAKELIST, LOAD, UNPACK: 895 // arg is just a number 896 } 897 var buf bytes.Buffer 898 fmt.Fprintf(&buf, "\t%d\t%-10s\t%d", pc, op, arg) 899 if comment != "" { 900 fmt.Fprint(&buf, "\t; ", comment) 901 } 902 fmt.Fprintln(&buf) 903 os.Stderr.Write(buf.Bytes()) 904 } 905 906 // newBlock returns a new block. 907 func (fcomp) newBlock() *block { 908 return &block{index: -1, initialstack: -1} 909 } 910 911 // emit emits an instruction to the current block. 912 func (fcomp *fcomp) emit(op Opcode) { 913 if op >= OpcodeArgMin { 914 panic("missing arg: " + op.String()) 915 } 916 insn := insn{op: op, line: fcomp.pos.Line, col: fcomp.pos.Col} 917 fcomp.block.insns = append(fcomp.block.insns, insn) 918 fcomp.pos.Line = 0 919 fcomp.pos.Col = 0 920 } 921 922 // emit1 emits an instruction with an immediate operand. 923 func (fcomp *fcomp) emit1(op Opcode, arg uint32) { 924 if op < OpcodeArgMin { 925 panic("unwanted arg: " + op.String()) 926 } 927 insn := insn{op: op, arg: arg, line: fcomp.pos.Line, col: fcomp.pos.Col} 928 fcomp.block.insns = append(fcomp.block.insns, insn) 929 fcomp.pos.Line = 0 930 fcomp.pos.Col = 0 931 } 932 933 // jump emits a jump to the specified block. 934 // On return, the current block is unset. 935 func (fcomp *fcomp) jump(b *block) { 936 if b == fcomp.block { 937 panic("self-jump") // unreachable: Starlark has no arbitrary looping constructs 938 } 939 fcomp.block.jmp = b 940 fcomp.block = nil 941 } 942 943 // condjump emits a conditional jump (CJMP or ITERJMP) 944 // to the specified true/false blocks. 945 // (For ITERJMP, the cases are jmp/f/ok and cjmp/t/exhausted.) 946 // On return, the current block is unset. 947 func (fcomp *fcomp) condjump(op Opcode, t, f *block) { 948 if !(op == CJMP || op == ITERJMP) { 949 panic("not a conditional jump: " + op.String()) 950 } 951 fcomp.emit1(op, 0) // fill in address later 952 fcomp.block.cjmp = t 953 fcomp.jump(f) 954 } 955 956 // nameIndex returns the index of the specified name 957 // within the name pool, adding it if necessary. 958 func (pcomp *pcomp) nameIndex(name string) uint32 { 959 index, ok := pcomp.names[name] 960 if !ok { 961 index = uint32(len(pcomp.prog.Names)) 962 pcomp.names[name] = index 963 pcomp.prog.Names = append(pcomp.prog.Names, name) 964 } 965 return index 966 } 967 968 // constantIndex returns the index of the specified constant 969 // within the constant pool, adding it if necessary. 970 func (pcomp *pcomp) constantIndex(v interface{}) uint32 { 971 index, ok := pcomp.constants[v] 972 if !ok { 973 index = uint32(len(pcomp.prog.Constants)) 974 pcomp.constants[v] = index 975 pcomp.prog.Constants = append(pcomp.prog.Constants, v) 976 } 977 return index 978 } 979 980 // functionIndex returns the index of the specified function 981 // AST the nestedfun pool, adding it if necessary. 982 func (pcomp *pcomp) functionIndex(fn *Funcode) uint32 { 983 index, ok := pcomp.functions[fn] 984 if !ok { 985 index = uint32(len(pcomp.prog.Functions)) 986 pcomp.functions[fn] = index 987 pcomp.prog.Functions = append(pcomp.prog.Functions, fn) 988 } 989 return index 990 } 991 992 // string emits code to push the specified string. 993 func (fcomp *fcomp) string(s string) { 994 fcomp.emit1(CONSTANT, fcomp.pcomp.constantIndex(s)) 995 } 996 997 // setPos sets the current source position. 998 // It should be called prior to any operation that can fail dynamically. 999 // All positions are assumed to belong to the same file. 1000 func (fcomp *fcomp) setPos(pos syntax.Position) { 1001 fcomp.pos = pos 1002 } 1003 1004 // set emits code to store the top-of-stack value 1005 // to the specified local, cell, or global variable. 1006 func (fcomp *fcomp) set(id *syntax.Ident) { 1007 bind := id.Binding.(*resolve.Binding) 1008 switch bind.Scope { 1009 case resolve.Local: 1010 fcomp.emit1(SETLOCAL, uint32(bind.Index)) 1011 case resolve.Cell: 1012 fcomp.emit1(SETLOCALCELL, uint32(bind.Index)) 1013 case resolve.Global: 1014 fcomp.emit1(SETGLOBAL, uint32(bind.Index)) 1015 default: 1016 log.Panicf("%s: set(%s): not global/local/cell (%d)", id.NamePos, id.Name, bind.Scope) 1017 } 1018 } 1019 1020 // lookup emits code to push the value of the specified variable. 1021 func (fcomp *fcomp) lookup(id *syntax.Ident) { 1022 bind := id.Binding.(*resolve.Binding) 1023 if bind.Scope != resolve.Universal { // (universal lookup can't fail) 1024 fcomp.setPos(id.NamePos) 1025 } 1026 switch bind.Scope { 1027 case resolve.Local: 1028 fcomp.emit1(LOCAL, uint32(bind.Index)) 1029 case resolve.Free: 1030 fcomp.emit1(FREECELL, uint32(bind.Index)) 1031 case resolve.Cell: 1032 fcomp.emit1(LOCALCELL, uint32(bind.Index)) 1033 case resolve.Global: 1034 fcomp.emit1(GLOBAL, uint32(bind.Index)) 1035 case resolve.Predeclared: 1036 fcomp.emit1(PREDECLARED, fcomp.pcomp.nameIndex(id.Name)) 1037 case resolve.Universal: 1038 fcomp.emit1(UNIVERSAL, fcomp.pcomp.nameIndex(id.Name)) 1039 default: 1040 log.Panicf("%s: compiler.lookup(%s): scope = %d", id.NamePos, id.Name, bind.Scope) 1041 } 1042 } 1043 1044 func (fcomp *fcomp) stmts(stmts []syntax.Stmt) { 1045 for _, stmt := range stmts { 1046 fcomp.stmt(stmt) 1047 } 1048 } 1049 1050 func (fcomp *fcomp) stmt(stmt syntax.Stmt) { 1051 switch stmt := stmt.(type) { 1052 case *syntax.ExprStmt: 1053 if _, ok := stmt.X.(*syntax.Literal); ok { 1054 // Opt: don't compile doc comments only to pop them. 1055 return 1056 } 1057 fcomp.expr(stmt.X) 1058 fcomp.emit(POP) 1059 1060 case *syntax.BranchStmt: 1061 // Resolver invariant: break/continue appear only within loops. 1062 switch stmt.Token { 1063 case syntax.PASS: 1064 // no-op 1065 case syntax.BREAK: 1066 b := fcomp.loops[len(fcomp.loops)-1].break_ 1067 fcomp.jump(b) 1068 fcomp.block = fcomp.newBlock() // dead code 1069 case syntax.CONTINUE: 1070 b := fcomp.loops[len(fcomp.loops)-1].continue_ 1071 fcomp.jump(b) 1072 fcomp.block = fcomp.newBlock() // dead code 1073 } 1074 1075 case *syntax.IfStmt: 1076 // Keep consistent with CondExpr. 1077 t := fcomp.newBlock() 1078 f := fcomp.newBlock() 1079 done := fcomp.newBlock() 1080 1081 fcomp.ifelse(stmt.Cond, t, f) 1082 1083 fcomp.block = t 1084 fcomp.stmts(stmt.True) 1085 fcomp.jump(done) 1086 1087 fcomp.block = f 1088 fcomp.stmts(stmt.False) 1089 fcomp.jump(done) 1090 1091 fcomp.block = done 1092 1093 case *syntax.AssignStmt: 1094 switch stmt.Op { 1095 case syntax.EQ: 1096 // simple assignment: x = y 1097 fcomp.expr(stmt.RHS) 1098 fcomp.assign(stmt.OpPos, stmt.LHS) 1099 1100 case syntax.PLUS_EQ, 1101 syntax.MINUS_EQ, 1102 syntax.STAR_EQ, 1103 syntax.SLASH_EQ, 1104 syntax.SLASHSLASH_EQ, 1105 syntax.PERCENT_EQ, 1106 syntax.AMP_EQ, 1107 syntax.PIPE_EQ, 1108 syntax.CIRCUMFLEX_EQ, 1109 syntax.LTLT_EQ, 1110 syntax.GTGT_EQ: 1111 // augmented assignment: x += y 1112 1113 var set func() 1114 1115 // Evaluate "address" of x exactly once to avoid duplicate side-effects. 1116 switch lhs := unparen(stmt.LHS).(type) { 1117 case *syntax.Ident: 1118 // x = ... 1119 fcomp.lookup(lhs) 1120 set = func() { 1121 fcomp.set(lhs) 1122 } 1123 1124 case *syntax.IndexExpr: 1125 // x[y] = ... 1126 fcomp.expr(lhs.X) 1127 fcomp.expr(lhs.Y) 1128 fcomp.emit(DUP2) 1129 fcomp.setPos(lhs.Lbrack) 1130 fcomp.emit(INDEX) 1131 set = func() { 1132 fcomp.setPos(lhs.Lbrack) 1133 fcomp.emit(SETINDEX) 1134 } 1135 1136 case *syntax.DotExpr: 1137 // x.f = ... 1138 fcomp.expr(lhs.X) 1139 fcomp.emit(DUP) 1140 name := fcomp.pcomp.nameIndex(lhs.Name.Name) 1141 fcomp.setPos(lhs.Dot) 1142 fcomp.emit1(ATTR, name) 1143 set = func() { 1144 fcomp.setPos(lhs.Dot) 1145 fcomp.emit1(SETFIELD, name) 1146 } 1147 1148 default: 1149 panic(lhs) 1150 } 1151 1152 fcomp.expr(stmt.RHS) 1153 1154 // In-place x+=y and x|=y have special semantics: 1155 // the resulting x aliases the original x. 1156 switch stmt.Op { 1157 case syntax.PLUS_EQ: 1158 fcomp.setPos(stmt.OpPos) 1159 fcomp.emit(INPLACE_ADD) 1160 case syntax.PIPE_EQ: 1161 fcomp.setPos(stmt.OpPos) 1162 fcomp.emit(INPLACE_PIPE) 1163 default: 1164 fcomp.binop(stmt.OpPos, stmt.Op-syntax.PLUS_EQ+syntax.PLUS) 1165 } 1166 set() 1167 } 1168 1169 case *syntax.DefStmt: 1170 fcomp.function(stmt.Function.(*resolve.Function)) 1171 fcomp.set(stmt.Name) 1172 1173 case *syntax.ForStmt: 1174 // Keep consistent with ForClause. 1175 head := fcomp.newBlock() 1176 body := fcomp.newBlock() 1177 tail := fcomp.newBlock() 1178 1179 fcomp.expr(stmt.X) 1180 fcomp.setPos(stmt.For) 1181 fcomp.emit(ITERPUSH) 1182 fcomp.jump(head) 1183 1184 fcomp.block = head 1185 fcomp.condjump(ITERJMP, tail, body) 1186 1187 fcomp.block = body 1188 fcomp.assign(stmt.For, stmt.Vars) 1189 fcomp.loops = append(fcomp.loops, loop{break_: tail, continue_: head}) 1190 fcomp.stmts(stmt.Body) 1191 fcomp.loops = fcomp.loops[:len(fcomp.loops)-1] 1192 fcomp.jump(head) 1193 1194 fcomp.block = tail 1195 fcomp.emit(ITERPOP) 1196 1197 case *syntax.WhileStmt: 1198 head := fcomp.newBlock() 1199 body := fcomp.newBlock() 1200 done := fcomp.newBlock() 1201 1202 fcomp.jump(head) 1203 fcomp.block = head 1204 fcomp.ifelse(stmt.Cond, body, done) 1205 1206 fcomp.block = body 1207 fcomp.loops = append(fcomp.loops, loop{break_: done, continue_: head}) 1208 fcomp.stmts(stmt.Body) 1209 fcomp.loops = fcomp.loops[:len(fcomp.loops)-1] 1210 fcomp.jump(head) 1211 1212 fcomp.block = done 1213 1214 case *syntax.ReturnStmt: 1215 if stmt.Result != nil { 1216 fcomp.expr(stmt.Result) 1217 } else { 1218 fcomp.emit(NONE) 1219 } 1220 fcomp.emit(RETURN) 1221 fcomp.block = fcomp.newBlock() // dead code 1222 1223 case *syntax.LoadStmt: 1224 for i := range stmt.From { 1225 fcomp.string(stmt.From[i].Name) 1226 } 1227 module := stmt.Module.Value.(string) 1228 fcomp.pcomp.prog.Loads = append(fcomp.pcomp.prog.Loads, Binding{ 1229 Name: module, 1230 Pos: stmt.Module.TokenPos, 1231 }) 1232 fcomp.string(module) 1233 fcomp.setPos(stmt.Load) 1234 fcomp.emit1(LOAD, uint32(len(stmt.From))) 1235 for i := range stmt.To { 1236 fcomp.set(stmt.To[len(stmt.To)-1-i]) 1237 } 1238 1239 default: 1240 start, _ := stmt.Span() 1241 log.Panicf("%s: exec: unexpected statement %T", start, stmt) 1242 } 1243 } 1244 1245 // assign implements lhs = rhs for arbitrary expressions lhs. 1246 // RHS is on top of stack, consumed. 1247 func (fcomp *fcomp) assign(pos syntax.Position, lhs syntax.Expr) { 1248 switch lhs := lhs.(type) { 1249 case *syntax.ParenExpr: 1250 // (lhs) = rhs 1251 fcomp.assign(pos, lhs.X) 1252 1253 case *syntax.Ident: 1254 // x = rhs 1255 fcomp.set(lhs) 1256 1257 case *syntax.TupleExpr: 1258 // x, y = rhs 1259 fcomp.assignSequence(pos, lhs.List) 1260 1261 case *syntax.ListExpr: 1262 // [x, y] = rhs 1263 fcomp.assignSequence(pos, lhs.List) 1264 1265 case *syntax.IndexExpr: 1266 // x[y] = rhs 1267 fcomp.expr(lhs.X) 1268 fcomp.emit(EXCH) 1269 fcomp.expr(lhs.Y) 1270 fcomp.emit(EXCH) 1271 fcomp.setPos(lhs.Lbrack) 1272 fcomp.emit(SETINDEX) 1273 1274 case *syntax.DotExpr: 1275 // x.f = rhs 1276 fcomp.expr(lhs.X) 1277 fcomp.emit(EXCH) 1278 fcomp.setPos(lhs.Dot) 1279 fcomp.emit1(SETFIELD, fcomp.pcomp.nameIndex(lhs.Name.Name)) 1280 1281 default: 1282 panic(lhs) 1283 } 1284 } 1285 1286 func (fcomp *fcomp) assignSequence(pos syntax.Position, lhs []syntax.Expr) { 1287 fcomp.setPos(pos) 1288 fcomp.emit1(UNPACK, uint32(len(lhs))) 1289 for i := range lhs { 1290 fcomp.assign(pos, lhs[i]) 1291 } 1292 } 1293 1294 func (fcomp *fcomp) expr(e syntax.Expr) { 1295 switch e := e.(type) { 1296 case *syntax.ParenExpr: 1297 fcomp.expr(e.X) 1298 1299 case *syntax.Ident: 1300 fcomp.lookup(e) 1301 1302 case *syntax.Literal: 1303 // e.Value is int64, float64, *bigInt, string 1304 v := e.Value 1305 if e.Token == syntax.BYTES { 1306 v = Bytes(v.(string)) 1307 } 1308 fcomp.emit1(CONSTANT, fcomp.pcomp.constantIndex(v)) 1309 1310 case *syntax.ListExpr: 1311 for _, x := range e.List { 1312 fcomp.expr(x) 1313 } 1314 fcomp.emit1(MAKELIST, uint32(len(e.List))) 1315 1316 case *syntax.CondExpr: 1317 // Keep consistent with IfStmt. 1318 t := fcomp.newBlock() 1319 f := fcomp.newBlock() 1320 done := fcomp.newBlock() 1321 1322 fcomp.ifelse(e.Cond, t, f) 1323 1324 fcomp.block = t 1325 fcomp.expr(e.True) 1326 fcomp.jump(done) 1327 1328 fcomp.block = f 1329 fcomp.expr(e.False) 1330 fcomp.jump(done) 1331 1332 fcomp.block = done 1333 1334 case *syntax.IndexExpr: 1335 fcomp.expr(e.X) 1336 fcomp.expr(e.Y) 1337 fcomp.setPos(e.Lbrack) 1338 fcomp.emit(INDEX) 1339 1340 case *syntax.SliceExpr: 1341 fcomp.setPos(e.Lbrack) 1342 fcomp.expr(e.X) 1343 if e.Lo != nil { 1344 fcomp.expr(e.Lo) 1345 } else { 1346 fcomp.emit(NONE) 1347 } 1348 if e.Hi != nil { 1349 fcomp.expr(e.Hi) 1350 } else { 1351 fcomp.emit(NONE) 1352 } 1353 if e.Step != nil { 1354 fcomp.expr(e.Step) 1355 } else { 1356 fcomp.emit(NONE) 1357 } 1358 fcomp.emit(SLICE) 1359 1360 case *syntax.Comprehension: 1361 if e.Curly { 1362 fcomp.emit(MAKEDICT) 1363 } else { 1364 fcomp.emit1(MAKELIST, 0) 1365 } 1366 fcomp.comprehension(e, 0) 1367 1368 case *syntax.TupleExpr: 1369 fcomp.tuple(e.List) 1370 1371 case *syntax.DictExpr: 1372 fcomp.emit(MAKEDICT) 1373 for _, entry := range e.List { 1374 entry := entry.(*syntax.DictEntry) 1375 fcomp.emit(DUP) 1376 fcomp.expr(entry.Key) 1377 fcomp.expr(entry.Value) 1378 fcomp.setPos(entry.Colon) 1379 fcomp.emit(SETDICTUNIQ) 1380 } 1381 1382 case *syntax.UnaryExpr: 1383 fcomp.expr(e.X) 1384 fcomp.setPos(e.OpPos) 1385 switch e.Op { 1386 case syntax.MINUS: 1387 fcomp.emit(UMINUS) 1388 case syntax.PLUS: 1389 fcomp.emit(UPLUS) 1390 case syntax.NOT: 1391 fcomp.emit(NOT) 1392 case syntax.TILDE: 1393 fcomp.emit(TILDE) 1394 default: 1395 log.Panicf("%s: unexpected unary op: %s", e.OpPos, e.Op) 1396 } 1397 1398 case *syntax.BinaryExpr: 1399 switch e.Op { 1400 // short-circuit operators 1401 // TODO(adonovan): use ifelse to simplify conditions. 1402 case syntax.OR: 1403 // x or y => if x then x else y 1404 done := fcomp.newBlock() 1405 y := fcomp.newBlock() 1406 1407 fcomp.expr(e.X) 1408 fcomp.emit(DUP) 1409 fcomp.condjump(CJMP, done, y) 1410 1411 fcomp.block = y 1412 fcomp.emit(POP) // discard X 1413 fcomp.expr(e.Y) 1414 fcomp.jump(done) 1415 1416 fcomp.block = done 1417 1418 case syntax.AND: 1419 // x and y => if x then y else x 1420 done := fcomp.newBlock() 1421 y := fcomp.newBlock() 1422 1423 fcomp.expr(e.X) 1424 fcomp.emit(DUP) 1425 fcomp.condjump(CJMP, y, done) 1426 1427 fcomp.block = y 1428 fcomp.emit(POP) // discard X 1429 fcomp.expr(e.Y) 1430 fcomp.jump(done) 1431 1432 fcomp.block = done 1433 1434 case syntax.PLUS: 1435 fcomp.plus(e) 1436 1437 default: 1438 // all other strict binary operator (includes comparisons) 1439 fcomp.expr(e.X) 1440 fcomp.expr(e.Y) 1441 fcomp.binop(e.OpPos, e.Op) 1442 } 1443 1444 case *syntax.DotExpr: 1445 fcomp.expr(e.X) 1446 fcomp.setPos(e.Dot) 1447 fcomp.emit1(ATTR, fcomp.pcomp.nameIndex(e.Name.Name)) 1448 1449 case *syntax.CallExpr: 1450 fcomp.call(e) 1451 1452 case *syntax.LambdaExpr: 1453 fcomp.function(e.Function.(*resolve.Function)) 1454 1455 default: 1456 start, _ := e.Span() 1457 log.Panicf("%s: unexpected expr %T", start, e) 1458 } 1459 } 1460 1461 type summand struct { 1462 x syntax.Expr 1463 plusPos syntax.Position 1464 } 1465 1466 // plus emits optimized code for ((a+b)+...)+z that avoids naive 1467 // quadratic behavior for strings, tuples, and lists, 1468 // and folds together adjacent literals of the same type. 1469 func (fcomp *fcomp) plus(e *syntax.BinaryExpr) { 1470 // Gather all the right operands of the left tree of plusses. 1471 // A tree (((a+b)+c)+d) becomes args=[a +b +c +d]. 1472 args := make([]summand, 0, 2) // common case: 2 operands 1473 for plus := e; ; { 1474 args = append(args, summand{unparen(plus.Y), plus.OpPos}) 1475 left := unparen(plus.X) 1476 x, ok := left.(*syntax.BinaryExpr) 1477 if !ok || x.Op != syntax.PLUS { 1478 args = append(args, summand{x: left}) 1479 break 1480 } 1481 plus = x 1482 } 1483 // Reverse args to syntactic order. 1484 for i, n := 0, len(args)/2; i < n; i++ { 1485 j := len(args) - 1 - i 1486 args[i], args[j] = args[j], args[i] 1487 } 1488 1489 // Fold sums of adjacent literals of the same type: ""+"", []+[], ()+(). 1490 out := args[:0] // compact in situ 1491 for i := 0; i < len(args); { 1492 j := i + 1 1493 if code := addable(args[i].x); code != 0 { 1494 for j < len(args) && addable(args[j].x) == code { 1495 j++ 1496 } 1497 if j > i+1 { 1498 args[i].x = add(code, args[i:j]) 1499 } 1500 } 1501 out = append(out, args[i]) 1502 i = j 1503 } 1504 args = out 1505 1506 // Emit code for an n-ary sum (n > 0). 1507 fcomp.expr(args[0].x) 1508 for _, summand := range args[1:] { 1509 fcomp.expr(summand.x) 1510 fcomp.setPos(summand.plusPos) 1511 fcomp.emit(PLUS) 1512 } 1513 1514 // If len(args) > 2, use of an accumulator instead of a chain of 1515 // PLUS operations may be more efficient. 1516 // However, no gain was measured on a workload analogous to Bazel loading; 1517 // TODO(adonovan): opt: re-evaluate on a Bazel analysis-like workload. 1518 // 1519 // We cannot use a single n-ary SUM operation 1520 // a b c SUM<3> 1521 // because we need to report a distinct error for each 1522 // individual '+' operation, so three additional operations are 1523 // needed: 1524 // 1525 // ACCSTART => create buffer and append to it 1526 // ACCUM => append to buffer 1527 // ACCEND => get contents of buffer 1528 // 1529 // For string, list, and tuple values, the interpreter can 1530 // optimize these operations by using a mutable buffer. 1531 // For all other types, ACCSTART and ACCEND would behave like 1532 // the identity function and ACCUM behaves like PLUS. 1533 // ACCUM must correctly support user-defined operations 1534 // such as list+foo. 1535 // 1536 // fcomp.emit(ACCSTART) 1537 // for _, summand := range args[1:] { 1538 // fcomp.expr(summand.x) 1539 // fcomp.setPos(summand.plusPos) 1540 // fcomp.emit(ACCUM) 1541 // } 1542 // fcomp.emit(ACCEND) 1543 } 1544 1545 // addable reports whether e is a statically addable 1546 // expression: a [s]tring, [b]ytes, [l]ist, or [t]uple. 1547 func addable(e syntax.Expr) rune { 1548 switch e := e.(type) { 1549 case *syntax.Literal: 1550 // TODO(adonovan): opt: support INT/FLOAT/BIGINT constant folding. 1551 switch e.Token { 1552 case syntax.STRING: 1553 return 's' 1554 case syntax.BYTES: 1555 return 'b' 1556 } 1557 case *syntax.ListExpr: 1558 return 'l' 1559 case *syntax.TupleExpr: 1560 return 't' 1561 } 1562 return 0 1563 } 1564 1565 // add returns an expression denoting the sum of args, 1566 // which are all addable values of the type indicated by code. 1567 // The resulting syntax is degenerate, lacking position, etc. 1568 func add(code rune, args []summand) syntax.Expr { 1569 switch code { 1570 case 's', 'b': 1571 var buf strings.Builder 1572 for _, arg := range args { 1573 buf.WriteString(arg.x.(*syntax.Literal).Value.(string)) 1574 } 1575 tok := syntax.STRING 1576 if code == 'b' { 1577 tok = syntax.BYTES 1578 } 1579 return &syntax.Literal{Token: tok, Value: buf.String()} 1580 case 'l': 1581 var elems []syntax.Expr 1582 for _, arg := range args { 1583 elems = append(elems, arg.x.(*syntax.ListExpr).List...) 1584 } 1585 return &syntax.ListExpr{List: elems} 1586 case 't': 1587 var elems []syntax.Expr 1588 for _, arg := range args { 1589 elems = append(elems, arg.x.(*syntax.TupleExpr).List...) 1590 } 1591 return &syntax.TupleExpr{List: elems} 1592 } 1593 panic(code) 1594 } 1595 1596 func unparen(e syntax.Expr) syntax.Expr { 1597 if p, ok := e.(*syntax.ParenExpr); ok { 1598 return unparen(p.X) 1599 } 1600 return e 1601 } 1602 1603 func (fcomp *fcomp) binop(pos syntax.Position, op syntax.Token) { 1604 // TODO(adonovan): simplify by assuming syntax and compiler constants align. 1605 fcomp.setPos(pos) 1606 switch op { 1607 // arithmetic 1608 case syntax.PLUS: 1609 fcomp.emit(PLUS) 1610 case syntax.MINUS: 1611 fcomp.emit(MINUS) 1612 case syntax.STAR: 1613 fcomp.emit(STAR) 1614 case syntax.SLASH: 1615 fcomp.emit(SLASH) 1616 case syntax.SLASHSLASH: 1617 fcomp.emit(SLASHSLASH) 1618 case syntax.PERCENT: 1619 fcomp.emit(PERCENT) 1620 case syntax.AMP: 1621 fcomp.emit(AMP) 1622 case syntax.PIPE: 1623 fcomp.emit(PIPE) 1624 case syntax.CIRCUMFLEX: 1625 fcomp.emit(CIRCUMFLEX) 1626 case syntax.LTLT: 1627 fcomp.emit(LTLT) 1628 case syntax.GTGT: 1629 fcomp.emit(GTGT) 1630 case syntax.IN: 1631 fcomp.emit(IN) 1632 case syntax.NOT_IN: 1633 fcomp.emit(IN) 1634 fcomp.emit(NOT) 1635 1636 // comparisons 1637 case syntax.EQL, 1638 syntax.NEQ, 1639 syntax.GT, 1640 syntax.LT, 1641 syntax.LE, 1642 syntax.GE: 1643 fcomp.emit(Opcode(op-syntax.EQL) + EQL) 1644 1645 default: 1646 log.Panicf("%s: unexpected binary op: %s", pos, op) 1647 } 1648 } 1649 1650 func (fcomp *fcomp) call(call *syntax.CallExpr) { 1651 // TODO(adonovan): opt: Use optimized path for calling methods 1652 // of built-ins: x.f(...) to avoid materializing a closure. 1653 // if dot, ok := call.Fcomp.(*syntax.DotExpr); ok { 1654 // fcomp.expr(dot.X) 1655 // fcomp.args(call) 1656 // fcomp.emit1(CALL_ATTR, fcomp.name(dot.Name.Name)) 1657 // return 1658 // } 1659 1660 // usual case 1661 fcomp.expr(call.Fn) 1662 op, arg := fcomp.args(call) 1663 fcomp.setPos(call.Lparen) 1664 fcomp.emit1(op, arg) 1665 } 1666 1667 // args emits code to push a tuple of positional arguments 1668 // and a tuple of named arguments containing alternating keys and values. 1669 // Either or both tuples may be empty (TODO(adonovan): optimize). 1670 func (fcomp *fcomp) args(call *syntax.CallExpr) (op Opcode, arg uint32) { 1671 var callmode int 1672 // Compute the number of each kind of parameter. 1673 var p, n int // number of positional, named arguments 1674 var varargs, kwargs syntax.Expr 1675 for _, arg := range call.Args { 1676 if binary, ok := arg.(*syntax.BinaryExpr); ok && binary.Op == syntax.EQ { 1677 1678 // named argument (name, value) 1679 fcomp.string(binary.X.(*syntax.Ident).Name) 1680 fcomp.expr(binary.Y) 1681 n++ 1682 continue 1683 } 1684 if unary, ok := arg.(*syntax.UnaryExpr); ok { 1685 if unary.Op == syntax.STAR { 1686 callmode |= 1 1687 varargs = unary.X 1688 continue 1689 } else if unary.Op == syntax.STARSTAR { 1690 callmode |= 2 1691 kwargs = unary.X 1692 continue 1693 } 1694 } 1695 1696 // positional argument 1697 fcomp.expr(arg) 1698 p++ 1699 } 1700 1701 // Python2 and Python3 both permit named arguments 1702 // to appear both before and after a *args argument: 1703 // f(1, 2, x=3, *[4], y=5, **dict(z=6)) 1704 // 1705 // They also differ in their evaluation order: 1706 // Python2: 1 2 3 5 4 6 (*args and **kwargs evaluated last) 1707 // Python3: 1 2 4 3 5 6 (positional args evaluated before named args) 1708 // Starlark-in-Java historically used a third order: 1709 // Lexical: 1 2 3 4 5 6 (all args evaluated left-to-right) 1710 // 1711 // After discussion in github.com/bazelbuild/starlark#13, the 1712 // spec now requires Starlark to statically reject named 1713 // arguments after *args (e.g. y=5), and to use Python2-style 1714 // evaluation order. This is both easy to implement and 1715 // consistent with lexical order: 1716 // 1717 // f(1, 2, x=3, *[4], **dict(z=6)) # 1 2 3 4 6 1718 1719 // *args 1720 if varargs != nil { 1721 fcomp.expr(varargs) 1722 } 1723 1724 // **kwargs 1725 if kwargs != nil { 1726 fcomp.expr(kwargs) 1727 } 1728 1729 // TODO(adonovan): avoid this with a more flexible encoding. 1730 if p >= 256 || n >= 256 { 1731 // resolve already checked this; should be unreachable 1732 panic("too many arguments in call") 1733 } 1734 1735 return CALL + Opcode(callmode), uint32(p<<8 | n) 1736 } 1737 1738 func (fcomp *fcomp) tuple(elems []syntax.Expr) { 1739 for _, elem := range elems { 1740 fcomp.expr(elem) 1741 } 1742 fcomp.emit1(MAKETUPLE, uint32(len(elems))) 1743 } 1744 1745 func (fcomp *fcomp) comprehension(comp *syntax.Comprehension, clauseIndex int) { 1746 if clauseIndex == len(comp.Clauses) { 1747 fcomp.emit(DUP) // accumulator 1748 if comp.Curly { 1749 // dict: {k:v for ...} 1750 // Parser ensures that body is of form k:v. 1751 // Python-style set comprehensions {body for vars in x} 1752 // are not supported. 1753 entry := comp.Body.(*syntax.DictEntry) 1754 fcomp.expr(entry.Key) 1755 fcomp.expr(entry.Value) 1756 fcomp.setPos(entry.Colon) 1757 fcomp.emit(SETDICT) 1758 } else { 1759 // list: [body for vars in x] 1760 fcomp.expr(comp.Body) 1761 fcomp.emit(APPEND) 1762 } 1763 return 1764 } 1765 1766 clause := comp.Clauses[clauseIndex] 1767 switch clause := clause.(type) { 1768 case *syntax.IfClause: 1769 t := fcomp.newBlock() 1770 done := fcomp.newBlock() 1771 fcomp.ifelse(clause.Cond, t, done) 1772 1773 fcomp.block = t 1774 fcomp.comprehension(comp, clauseIndex+1) 1775 fcomp.jump(done) 1776 1777 fcomp.block = done 1778 return 1779 1780 case *syntax.ForClause: 1781 // Keep consistent with ForStmt. 1782 head := fcomp.newBlock() 1783 body := fcomp.newBlock() 1784 tail := fcomp.newBlock() 1785 1786 fcomp.expr(clause.X) 1787 fcomp.setPos(clause.For) 1788 fcomp.emit(ITERPUSH) 1789 fcomp.jump(head) 1790 1791 fcomp.block = head 1792 fcomp.condjump(ITERJMP, tail, body) 1793 1794 fcomp.block = body 1795 fcomp.assign(clause.For, clause.Vars) 1796 fcomp.comprehension(comp, clauseIndex+1) 1797 fcomp.jump(head) 1798 1799 fcomp.block = tail 1800 fcomp.emit(ITERPOP) 1801 return 1802 } 1803 1804 start, _ := clause.Span() 1805 log.Panicf("%s: unexpected comprehension clause %T", start, clause) 1806 } 1807 1808 func (fcomp *fcomp) function(f *resolve.Function) { 1809 // Evaluation of the defaults may fail, so record the position. 1810 fcomp.setPos(f.Pos) 1811 1812 // To reduce allocation, we emit a combined tuple 1813 // for the defaults and the freevars. 1814 // The function knows where to split it at run time. 1815 1816 // Generate tuple of parameter defaults. For: 1817 // def f(p1, p2=dp2, p3=dp3, *, k1, k2=dk2, k3, **kwargs) 1818 // the tuple is: 1819 // (dp2, dp3, MANDATORY, dk2, MANDATORY). 1820 ndefaults := 0 1821 seenStar := false 1822 for _, param := range f.Params { 1823 switch param := param.(type) { 1824 case *syntax.BinaryExpr: 1825 fcomp.expr(param.Y) 1826 ndefaults++ 1827 case *syntax.UnaryExpr: 1828 seenStar = true // * or *args (also **kwargs) 1829 case *syntax.Ident: 1830 if seenStar { 1831 fcomp.emit(MANDATORY) 1832 ndefaults++ 1833 } 1834 } 1835 } 1836 1837 // Capture the cells of the function's 1838 // free variables from the lexical environment. 1839 for _, freevar := range f.FreeVars { 1840 // Don't call fcomp.lookup because we want 1841 // the cell itself, not its content. 1842 switch freevar.Scope { 1843 case resolve.Free: 1844 fcomp.emit1(FREE, uint32(freevar.Index)) 1845 case resolve.Cell: 1846 fcomp.emit1(LOCAL, uint32(freevar.Index)) 1847 } 1848 } 1849 1850 fcomp.emit1(MAKETUPLE, uint32(ndefaults+len(f.FreeVars))) 1851 1852 funcode := fcomp.pcomp.function(f.Name, f.Pos, f.Body, f.Locals, f.FreeVars) 1853 1854 if debug { 1855 // TODO(adonovan): do compilations sequentially not as a tree, 1856 // to make the log easier to read. 1857 // Simplify by identifying Toplevel and functionIndex 0. 1858 fmt.Fprintf(os.Stderr, "resuming %s @ %s\n", fcomp.fn.Name, fcomp.pos) 1859 } 1860 1861 // def f(a, *, b=1) has only 2 parameters. 1862 numParams := len(f.Params) 1863 if f.NumKwonlyParams > 0 && !f.HasVarargs { 1864 numParams-- 1865 } 1866 1867 funcode.NumParams = numParams 1868 funcode.NumKwonlyParams = f.NumKwonlyParams 1869 funcode.HasVarargs = f.HasVarargs 1870 funcode.HasKwargs = f.HasKwargs 1871 fcomp.emit1(MAKEFUNC, fcomp.pcomp.functionIndex(funcode)) 1872 } 1873 1874 // ifelse emits a Boolean control flow decision. 1875 // On return, the current block is unset. 1876 func (fcomp *fcomp) ifelse(cond syntax.Expr, t, f *block) { 1877 switch cond := cond.(type) { 1878 case *syntax.UnaryExpr: 1879 if cond.Op == syntax.NOT { 1880 // if not x then goto t else goto f 1881 // => 1882 // if x then goto f else goto t 1883 fcomp.ifelse(cond.X, f, t) 1884 return 1885 } 1886 1887 case *syntax.BinaryExpr: 1888 switch cond.Op { 1889 case syntax.AND: 1890 // if x and y then goto t else goto f 1891 // => 1892 // if x then ifelse(y, t, f) else goto f 1893 fcomp.expr(cond.X) 1894 y := fcomp.newBlock() 1895 fcomp.condjump(CJMP, y, f) 1896 1897 fcomp.block = y 1898 fcomp.ifelse(cond.Y, t, f) 1899 return 1900 1901 case syntax.OR: 1902 // if x or y then goto t else goto f 1903 // => 1904 // if x then goto t else ifelse(y, t, f) 1905 fcomp.expr(cond.X) 1906 y := fcomp.newBlock() 1907 fcomp.condjump(CJMP, t, y) 1908 1909 fcomp.block = y 1910 fcomp.ifelse(cond.Y, t, f) 1911 return 1912 case syntax.NOT_IN: 1913 // if x not in y then goto t else goto f 1914 // => 1915 // if x in y then goto f else goto t 1916 copy := *cond 1917 copy.Op = syntax.IN 1918 fcomp.expr(©) 1919 fcomp.condjump(CJMP, f, t) 1920 return 1921 } 1922 } 1923 1924 // general case 1925 fcomp.expr(cond) 1926 fcomp.condjump(CJMP, t, f) 1927 }