github.com/google/skylark@v0.0.0-20181101142754-a5f7082aabed/internal/compile/compile.go (about) 1 // The compile package defines the Skylark bytecode compiler. 2 // It is an internal package of the Skylark interpreter and is not directly accessible to clients. 3 // 4 // The compiler generates byte code with optional uint32 operands for a 5 // virtual machine with the following components: 6 // - a program counter, which is an index into the byte code array. 7 // - an operand stack, whose maximum size is computed for each function by the compiler. 8 // - an stack of active iterators. 9 // - an array of local variables. 10 // The number of local variables and their indices are computed by the resolver. 11 // - an array of free variables, for nested functions. 12 // As with locals, these are computed by the resolver. 13 // - an array of global variables, shared among all functions in the same module. 14 // All elements are initially nil. 15 // - two maps of predeclared and universal identifiers. 16 // 17 // A line number table maps each program counter value to a source position; 18 // these source positions do not currently record column information. 19 // 20 // Operands, logically uint32s, are encoded using little-endian 7-bit 21 // varints, the top bit indicating that more bytes follow. 22 // 23 package compile 24 25 import ( 26 "bytes" 27 "fmt" 28 "log" 29 "os" 30 "path/filepath" 31 "strconv" 32 33 "github.com/google/skylark/resolve" 34 "github.com/google/skylark/syntax" 35 ) 36 37 const debug = false // TODO(adonovan): use a bitmap of options; and regexp to match files 38 39 // Increment this to force recompilation of saved bytecode files. 40 const Version = 3 41 42 type Opcode uint8 43 44 // "x DUP x x" is a "stack picture" that describes the state of the 45 // stack before and after execution of the instruction. 46 // 47 // OP<index> indicates an immediate operand that is an index into the 48 // specified table: locals, names, freevars, constants. 49 const ( 50 NOP Opcode = iota // - NOP - 51 52 // stack operations 53 DUP // x DUP x x 54 DUP2 // x y DUP2 x y x y 55 POP // x POP - 56 EXCH // x y EXCH y x 57 58 // binary comparisons 59 // (order must match Token) 60 LT 61 GT 62 GE 63 LE 64 EQL 65 NEQ 66 67 // binary arithmetic 68 // (order must match Token) 69 PLUS 70 MINUS 71 STAR 72 SLASH 73 SLASHSLASH 74 PERCENT 75 AMP 76 PIPE 77 CIRCUMFLEX 78 LTLT 79 GTGT 80 81 IN 82 83 // unary operators 84 UPLUS // x UPLUS x 85 UMINUS // x UMINUS -x 86 TILDE // x TILDE ~x 87 88 NONE // - NONE None 89 TRUE // - TRUE True 90 FALSE // - FALSE False 91 92 ITERPUSH // iterable ITERPUSH - [pushes the iterator stack] 93 ITERPOP // - ITERPOP - [pops the iterator stack] 94 NOT // value NOT bool 95 RETURN // value RETURN - 96 SETINDEX // a i new SETINDEX - 97 INDEX // a i INDEX elem 98 SETDICT // dict key value SETDICT - 99 SETDICTUNIQ // dict key value SETDICTUNIQ - 100 APPEND // list elem APPEND - 101 SLICE // x lo hi step SLICE slice 102 INPLACE_ADD // x y INPLACE_ADD z where z is x+y or x.extend(y) 103 MAKEDICT // - MAKEDICT dict 104 105 // --- opcodes with an argument must go below this line --- 106 107 // control flow 108 JMP // - JMP<addr> - 109 CJMP // cond CJMP<addr> - 110 ITERJMP // - ITERJMP<addr> elem (and fall through) [acts on topmost iterator] 111 // or: - ITERJMP<addr> - (and jump) 112 113 CONSTANT // - CONSTANT<constant> value 114 MAKETUPLE // x1 ... xn MAKETUPLE<n> tuple 115 MAKELIST // x1 ... xn MAKELIST<n> list 116 MAKEFUNC // args kwargs MAKEFUNC<func> fn 117 LOAD // from1 ... fromN module LOAD<n> v1 ... vN 118 SETLOCAL // value SETLOCAL<local> - 119 SETGLOBAL // value SETGLOBAL<global> - 120 LOCAL // - LOCAL<local> value 121 FREE // - FREE<freevar> value 122 GLOBAL // - GLOBAL<global> value 123 PREDECLARED // - PREDECLARED<name> value 124 UNIVERSAL // - UNIVERSAL<name> value 125 ATTR // x ATTR<name> y y = x.name 126 SETFIELD // x y SETFIELD<name> - x.name = y 127 UNPACK // iterable UNPACK<n> vn ... v1 128 129 // n>>8 is #positional args and n&0xff is #named args (pairs). 130 CALL // fn positional named CALL<n> result 131 CALL_VAR // fn positional named *args CALL_VAR<n> result 132 CALL_KW // fn positional named **kwargs CALL_KW<n> result 133 CALL_VAR_KW // fn positional named *args **kwargs CALL_VAR_KW<n> result 134 135 OpcodeArgMin = JMP 136 OpcodeMax = CALL_VAR_KW 137 ) 138 139 // TODO(adonovan): add dynamic checks for missing opcodes in the tables below. 140 141 var opcodeNames = [...]string{ 142 AMP: "amp", 143 APPEND: "append", 144 ATTR: "attr", 145 CALL: "call", 146 CALL_KW: "call_kw ", 147 CALL_VAR: "call_var", 148 CALL_VAR_KW: "call_var_kw", 149 CIRCUMFLEX: "circumflex", 150 CJMP: "cjmp", 151 CONSTANT: "constant", 152 DUP2: "dup2", 153 DUP: "dup", 154 EQL: "eql", 155 FALSE: "false", 156 FREE: "free", 157 GE: "ge", 158 GLOBAL: "global", 159 GT: "gt", 160 GTGT: "gtgt", 161 IN: "in", 162 INDEX: "index", 163 INPLACE_ADD: "inplace_add", 164 ITERJMP: "iterjmp", 165 ITERPOP: "iterpop", 166 ITERPUSH: "iterpush", 167 JMP: "jmp", 168 LE: "le", 169 LOAD: "load", 170 LOCAL: "local", 171 LT: "lt", 172 LTLT: "ltlt", 173 MAKEDICT: "makedict", 174 MAKEFUNC: "makefunc", 175 MAKELIST: "makelist", 176 MAKETUPLE: "maketuple", 177 MINUS: "minus", 178 NEQ: "neq", 179 NONE: "none", 180 NOP: "nop", 181 NOT: "not", 182 PERCENT: "percent", 183 PIPE: "pipe", 184 PLUS: "plus", 185 POP: "pop", 186 PREDECLARED: "predeclared", 187 RETURN: "return", 188 SETDICT: "setdict", 189 SETDICTUNIQ: "setdictuniq", 190 SETFIELD: "setfield", 191 SETGLOBAL: "setglobal", 192 SETINDEX: "setindex", 193 SETLOCAL: "setlocal", 194 SLASH: "slash", 195 SLASHSLASH: "slashslash", 196 SLICE: "slice", 197 STAR: "star", 198 TILDE: "tilde", 199 TRUE: "true", 200 UMINUS: "uminus", 201 UNIVERSAL: "universal", 202 UNPACK: "unpack", 203 UPLUS: "uplus", 204 } 205 206 const variableStackEffect = 0x7f 207 208 // stackEffect records the effect on the size of the operand stack of 209 // each kind of instruction. For some instructions this requires computation. 210 var stackEffect = [...]int8{ 211 AMP: -1, 212 APPEND: -2, 213 ATTR: 0, 214 CALL: variableStackEffect, 215 CALL_KW: variableStackEffect, 216 CALL_VAR: variableStackEffect, 217 CALL_VAR_KW: variableStackEffect, 218 CIRCUMFLEX: -1, 219 CJMP: -1, 220 CONSTANT: +1, 221 DUP2: +2, 222 DUP: +1, 223 EQL: -1, 224 FALSE: +1, 225 FREE: +1, 226 GE: -1, 227 GLOBAL: +1, 228 GT: -1, 229 GTGT: -1, 230 IN: -1, 231 INDEX: -1, 232 INPLACE_ADD: -1, 233 ITERJMP: variableStackEffect, 234 ITERPOP: 0, 235 ITERPUSH: -1, 236 JMP: 0, 237 LE: -1, 238 LOAD: -1, 239 LOCAL: +1, 240 LT: -1, 241 LTLT: -1, 242 MAKEDICT: +1, 243 MAKEFUNC: -1, 244 MAKELIST: variableStackEffect, 245 MAKETUPLE: variableStackEffect, 246 MINUS: -1, 247 NEQ: -1, 248 NONE: +1, 249 NOP: 0, 250 NOT: 0, 251 PERCENT: -1, 252 PIPE: -1, 253 PLUS: -1, 254 POP: -1, 255 PREDECLARED: +1, 256 RETURN: -1, 257 SETDICT: -3, 258 SETDICTUNIQ: -3, 259 SETFIELD: -2, 260 SETGLOBAL: -1, 261 SETINDEX: -3, 262 SETLOCAL: -1, 263 SLASH: -1, 264 SLASHSLASH: -1, 265 SLICE: -3, 266 STAR: -1, 267 TRUE: +1, 268 UNIVERSAL: +1, 269 UNPACK: variableStackEffect, 270 } 271 272 func (op Opcode) String() string { 273 if op < OpcodeMax { 274 return opcodeNames[op] 275 } 276 return fmt.Sprintf("illegal op (%d)", op) 277 } 278 279 // A Program is a Skylark file in executable form. 280 // 281 // Programs are serialized by the gobProgram function, 282 // which must be updated whenever this declaration is changed. 283 type Program struct { 284 Loads []Ident // name (really, string) and position of each load stmt 285 Names []string // names of attributes and predeclared variables 286 Constants []interface{} // = string | int64 | float64 | *big.Int 287 Functions []*Funcode 288 Globals []Ident // for error messages and tracing 289 Toplevel *Funcode // module initialization function 290 } 291 292 // A Funcode is the code of a compiled Skylark function. 293 // 294 // Funcodes are serialized by the gobFunc function, 295 // which must be updated whenever this declaration is changed. 296 type Funcode struct { 297 Prog *Program 298 Pos syntax.Position // position of def or lambda token 299 Name string // name of this function 300 Code []byte // the byte code 301 pclinetab []uint16 // mapping from pc to linenum 302 Locals []Ident // for error messages and tracing 303 Freevars []Ident // for tracing 304 MaxStack int 305 NumParams int 306 HasVarargs, HasKwargs bool 307 } 308 309 // An Ident is the name and position of an identifier. 310 type Ident struct { 311 Name string 312 Pos syntax.Position 313 } 314 315 // A pcomp holds the compiler state for a Program. 316 type pcomp struct { 317 prog *Program // what we're building 318 319 names map[string]uint32 320 constants map[interface{}]uint32 321 functions map[*Funcode]uint32 322 } 323 324 // An fcomp holds the compiler state for a Funcode. 325 type fcomp struct { 326 fn *Funcode // what we're building 327 328 pcomp *pcomp 329 pos syntax.Position // current position of generated code 330 loops []loop 331 block *block 332 } 333 334 type loop struct { 335 break_, continue_ *block 336 } 337 338 type block struct { 339 insns []insn 340 341 // If the last insn is a RETURN, jmp and cjmp are nil. 342 // If the last insn is a CJMP or ITERJMP, 343 // cjmp and jmp are the "true" and "false" successors. 344 // Otherwise, jmp is the sole successor. 345 jmp, cjmp *block 346 347 initialstack int // for stack depth computation 348 349 // Used during encoding 350 index int // -1 => not encoded yet 351 addr uint32 352 } 353 354 type insn struct { 355 op Opcode 356 arg uint32 357 line int32 358 } 359 360 func (fn *Funcode) Position(pc uint32) syntax.Position { 361 // Conceptually the table contains rows of the form (pc uint32, 362 // line int32). Since the pc always increases, usually by a 363 // small amount, and the line number typically also does too 364 // although it may decrease, again typically by a small amount, 365 // we use delta encoding, starting from {pc: 0, line: 0}. 366 // 367 // Each entry is encoded in 16 bits. 368 // The top 8 bits are the unsigned delta pc; the next 7 bits are 369 // the signed line number delta; and the bottom bit indicates 370 // that more rows follow because one of the deltas was maxed out. 371 // 372 // TODO(adonovan): opt: improve the encoding; include the column. 373 374 pos := fn.Pos // copy the (annoyingly inaccessible) filename 375 pos.Line = 0 376 pos.Col = 0 377 378 // Position returns the record for the 379 // largest PC value not greater than 'pc'. 380 var prevpc uint32 381 complete := true 382 for _, x := range fn.pclinetab { 383 nextpc := prevpc + uint32(x>>8) 384 if complete && nextpc > pc { 385 return pos 386 } 387 prevpc = nextpc 388 pos.Line += int32(int8(x) >> 1) // sign extend Ξ”line from 7 to 32 bits 389 complete = (x & 1) == 0 390 } 391 return pos 392 } 393 394 // idents convert syntactic identifiers to compiled form. 395 func idents(ids []*syntax.Ident) []Ident { 396 res := make([]Ident, len(ids)) 397 for i, id := range ids { 398 res[i].Name = id.Name 399 res[i].Pos = id.NamePos 400 } 401 return res 402 } 403 404 // Expr compiles an expression to a program consisting of a single toplevel function. 405 func Expr(expr syntax.Expr, locals []*syntax.Ident) *Funcode { 406 stmts := []syntax.Stmt{&syntax.ReturnStmt{Result: expr}} 407 return File(stmts, locals, nil).Toplevel 408 } 409 410 // File compiles the statements of a file into a program. 411 func File(stmts []syntax.Stmt, locals, globals []*syntax.Ident) *Program { 412 pcomp := &pcomp{ 413 prog: &Program{ 414 Globals: idents(globals), 415 }, 416 names: make(map[string]uint32), 417 constants: make(map[interface{}]uint32), 418 functions: make(map[*Funcode]uint32), 419 } 420 421 var pos syntax.Position 422 if len(stmts) > 0 { 423 pos = syntax.Start(stmts[0]) 424 } 425 426 pcomp.prog.Toplevel = pcomp.function("<toplevel>", pos, stmts, locals, nil) 427 428 return pcomp.prog 429 } 430 431 func (pcomp *pcomp) function(name string, pos syntax.Position, stmts []syntax.Stmt, locals, freevars []*syntax.Ident) *Funcode { 432 fcomp := &fcomp{ 433 pcomp: pcomp, 434 pos: pos, 435 fn: &Funcode{ 436 Prog: pcomp.prog, 437 Pos: pos, 438 Name: name, 439 Locals: idents(locals), 440 Freevars: idents(freevars), 441 }, 442 } 443 444 if debug { 445 fmt.Fprintf(os.Stderr, "start function(%s @ %s)\n", name, pos) 446 } 447 448 // Convert AST to a CFG of instructions. 449 entry := fcomp.newBlock() 450 fcomp.block = entry 451 fcomp.stmts(stmts) 452 if fcomp.block != nil { 453 fcomp.emit(NONE) 454 fcomp.emit(RETURN) 455 } 456 457 var oops bool // something bad happened 458 459 setinitialstack := func(b *block, depth int) { 460 if b.initialstack == -1 { 461 b.initialstack = depth 462 } else if b.initialstack != depth { 463 fmt.Fprintf(os.Stderr, "%d: setinitialstack: depth mismatch: %d vs %d\n", 464 b.index, b.initialstack, depth) 465 oops = true 466 } 467 } 468 469 // Linearize the CFG: 470 // compute order, address, and initial 471 // stack depth of each reachable block. 472 var pc uint32 473 var blocks []*block 474 var maxstack int 475 var visit func(b *block) 476 visit = func(b *block) { 477 if b.index >= 0 { 478 return // already visited 479 } 480 b.index = len(blocks) 481 b.addr = pc 482 blocks = append(blocks, b) 483 484 stack := b.initialstack 485 if debug { 486 fmt.Fprintf(os.Stderr, "%s block %d: (stack = %d)\n", name, b.index, stack) 487 } 488 var cjmpAddr *uint32 489 var isiterjmp int 490 for i, insn := range b.insns { 491 pc++ 492 493 // Compute size of argument. 494 if insn.op >= OpcodeArgMin { 495 switch insn.op { 496 case ITERJMP: 497 isiterjmp = 1 498 fallthrough 499 case CJMP: 500 cjmpAddr = &b.insns[i].arg 501 pc += 4 502 default: 503 pc += uint32(argLen(insn.arg)) 504 } 505 } 506 507 // Compute effect on stack. 508 se := insn.stackeffect() 509 if debug { 510 fmt.Fprintln(os.Stderr, "\t", insn.op, stack, stack+se) 511 } 512 stack += se 513 if stack < 0 { 514 fmt.Fprintf(os.Stderr, "After pc=%d: stack underflow\n", pc) 515 oops = true 516 } 517 if stack+isiterjmp > maxstack { 518 maxstack = stack + isiterjmp 519 } 520 } 521 522 if debug { 523 fmt.Fprintf(os.Stderr, "successors of block %d (start=%d):\n", 524 b.addr, b.index) 525 if b.jmp != nil { 526 fmt.Fprintf(os.Stderr, "jmp to %d\n", b.jmp.index) 527 } 528 if b.cjmp != nil { 529 fmt.Fprintf(os.Stderr, "cjmp to %d\n", b.cjmp.index) 530 } 531 } 532 533 // Place the jmp block next. 534 if b.jmp != nil { 535 // jump threading (empty cycles are impossible) 536 for b.jmp.insns == nil { 537 b.jmp = b.jmp.jmp 538 } 539 540 setinitialstack(b.jmp, stack+isiterjmp) 541 if b.jmp.index < 0 { 542 // Successor is not yet visited: 543 // place it next and fall through. 544 visit(b.jmp) 545 } else { 546 // Successor already visited; 547 // explicit backward jump required. 548 pc += 5 549 } 550 } 551 552 // Then the cjmp block. 553 if b.cjmp != nil { 554 // jump threading (empty cycles are impossible) 555 for b.cjmp.insns == nil { 556 b.cjmp = b.cjmp.jmp 557 } 558 559 setinitialstack(b.cjmp, stack) 560 visit(b.cjmp) 561 562 // Patch the CJMP/ITERJMP, if present. 563 if cjmpAddr != nil { 564 *cjmpAddr = b.cjmp.addr 565 } 566 } 567 } 568 setinitialstack(entry, 0) 569 visit(entry) 570 571 fn := fcomp.fn 572 fn.MaxStack = maxstack 573 574 // Emit bytecode (and position table). 575 if debug { 576 fmt.Fprintf(os.Stderr, "Function %s: (%d blocks, %d bytes)\n", name, len(blocks), pc) 577 } 578 fcomp.generate(blocks, pc) 579 580 if debug { 581 fmt.Fprintf(os.Stderr, "code=%d maxstack=%d\n", fn.Code, fn.MaxStack) 582 } 583 584 // Don't panic until we've completed printing of the function. 585 if oops { 586 panic("internal error") 587 } 588 589 if debug { 590 fmt.Fprintf(os.Stderr, "end function(%s @ %s)\n", name, pos) 591 } 592 593 return fn 594 } 595 596 func (insn *insn) stackeffect() int { 597 se := int(stackEffect[insn.op]) 598 if se == variableStackEffect { 599 arg := int(insn.arg) 600 switch insn.op { 601 case CALL, CALL_KW, CALL_VAR, CALL_VAR_KW: 602 se = -int(2*(insn.arg&0xff) + insn.arg>>8) 603 if insn.op != CALL { 604 se-- 605 } 606 if insn.op == CALL_VAR_KW { 607 se-- 608 } 609 case ITERJMP: 610 // Stack effect differs by successor: 611 // +1 for jmp/false/ok 612 // 0 for cjmp/true/exhausted 613 // Handled specially in caller. 614 se = 0 615 case MAKELIST, MAKETUPLE: 616 se = 1 - arg 617 case UNPACK: 618 se = arg - 1 619 default: 620 panic(insn.op) 621 } 622 } 623 return se 624 } 625 626 // generate emits the linear instruction stream from the CFG, 627 // and builds the PC-to-line number table. 628 func (fcomp *fcomp) generate(blocks []*block, codelen uint32) { 629 code := make([]byte, 0, codelen) 630 var pclinetab []uint16 631 var prev struct { 632 pc uint32 633 line int32 634 } 635 636 for _, b := range blocks { 637 if debug { 638 fmt.Fprintf(os.Stderr, "%d:\n", b.index) 639 } 640 pc := b.addr 641 for _, insn := range b.insns { 642 if insn.line != 0 { 643 // Instruction has a source position. Delta-encode it. 644 // See Funcode.Position for the encoding. 645 for { 646 var incomplete uint16 647 648 deltapc := pc - prev.pc 649 if deltapc > 0xff { 650 deltapc = 0xff 651 incomplete = 1 652 } 653 prev.pc += deltapc 654 655 deltaline := insn.line - prev.line 656 if deltaline > 0x3f { 657 deltaline = 0x3f 658 incomplete = 1 659 } else if deltaline < -0x40 { 660 deltaline = -0x40 661 incomplete = 1 662 } 663 prev.line += deltaline 664 665 entry := uint16(deltapc<<8) | uint16(uint8(deltaline<<1)) | incomplete 666 pclinetab = append(pclinetab, entry) 667 if incomplete == 0 { 668 break 669 } 670 } 671 672 if debug { 673 fmt.Fprintf(os.Stderr, "\t\t\t\t\t; %s %d\n", 674 filepath.Base(fcomp.fn.Pos.Filename()), insn.line) 675 } 676 } 677 if debug { 678 PrintOp(fcomp.fn, pc, insn.op, insn.arg) 679 } 680 code = append(code, byte(insn.op)) 681 pc++ 682 if insn.op >= OpcodeArgMin { 683 if insn.op == CJMP || insn.op == ITERJMP { 684 code = addUint32(code, insn.arg, 4) // pad arg to 4 bytes 685 } else { 686 code = addUint32(code, insn.arg, 0) 687 } 688 pc = uint32(len(code)) 689 } 690 } 691 692 if b.jmp != nil && b.jmp.index != b.index+1 { 693 addr := b.jmp.addr 694 if debug { 695 fmt.Fprintf(os.Stderr, "\t%d\tjmp\t\t%d\t; block %d\n", 696 pc, addr, b.jmp.index) 697 } 698 code = append(code, byte(JMP)) 699 code = addUint32(code, addr, 4) 700 } 701 } 702 if len(code) != int(codelen) { 703 panic("internal error: wrong code length") 704 } 705 706 fcomp.fn.pclinetab = pclinetab 707 fcomp.fn.Code = code 708 } 709 710 // addUint32 encodes x as 7-bit little-endian varint. 711 // TODO(adonovan): opt: steal top two bits of opcode 712 // to encode the number of complete bytes that follow. 713 func addUint32(code []byte, x uint32, min int) []byte { 714 end := len(code) + min 715 for x >= 0x80 { 716 code = append(code, byte(x)|0x80) 717 x >>= 7 718 } 719 code = append(code, byte(x)) 720 // Pad the operand with NOPs to exactly min bytes. 721 for len(code) < end { 722 code = append(code, byte(NOP)) 723 } 724 return code 725 } 726 727 func argLen(x uint32) int { 728 n := 0 729 for x >= 0x80 { 730 n++ 731 x >>= 7 732 } 733 return n + 1 734 } 735 736 // PrintOp prints an instruction. 737 // It is provided for debugging. 738 func PrintOp(fn *Funcode, pc uint32, op Opcode, arg uint32) { 739 if op < OpcodeArgMin { 740 fmt.Fprintf(os.Stderr, "\t%d\t%s\n", pc, op) 741 return 742 } 743 744 var comment string 745 switch op { 746 case CONSTANT: 747 switch x := fn.Prog.Constants[arg].(type) { 748 case string: 749 comment = strconv.Quote(x) 750 default: 751 comment = fmt.Sprint(x) 752 } 753 case MAKEFUNC: 754 comment = fn.Prog.Functions[arg].Name 755 case SETLOCAL, LOCAL: 756 comment = fn.Locals[arg].Name 757 case SETGLOBAL, GLOBAL: 758 comment = fn.Prog.Globals[arg].Name 759 case ATTR, SETFIELD, PREDECLARED, UNIVERSAL: 760 comment = fn.Prog.Names[arg] 761 case FREE: 762 comment = fn.Freevars[arg].Name 763 case CALL, CALL_VAR, CALL_KW, CALL_VAR_KW: 764 comment = fmt.Sprintf("%d pos, %d named", arg>>8, arg&0xff) 765 default: 766 // JMP, CJMP, ITERJMP, MAKETUPLE, MAKELIST, LOAD, UNPACK: 767 // arg is just a number 768 } 769 var buf bytes.Buffer 770 fmt.Fprintf(&buf, "\t%d\t%-10s\t%d", pc, op, arg) 771 if comment != "" { 772 fmt.Fprint(&buf, "\t; ", comment) 773 } 774 fmt.Fprintln(&buf) 775 os.Stderr.Write(buf.Bytes()) 776 } 777 778 // newBlock returns a new block. 779 func (fcomp) newBlock() *block { 780 return &block{index: -1, initialstack: -1} 781 } 782 783 // emit emits an instruction to the current block. 784 func (fcomp *fcomp) emit(op Opcode) { 785 if op >= OpcodeArgMin { 786 panic("missing arg: " + op.String()) 787 } 788 insn := insn{op: op, line: fcomp.pos.Line} 789 fcomp.block.insns = append(fcomp.block.insns, insn) 790 fcomp.pos.Line = 0 791 } 792 793 // emit1 emits an instruction with an immediate operand. 794 func (fcomp *fcomp) emit1(op Opcode, arg uint32) { 795 if op < OpcodeArgMin { 796 panic("unwanted arg: " + op.String()) 797 } 798 insn := insn{op: op, arg: arg, line: fcomp.pos.Line} 799 fcomp.block.insns = append(fcomp.block.insns, insn) 800 fcomp.pos.Line = 0 801 } 802 803 // jump emits a jump to the specified block. 804 // On return, the current block is unset. 805 func (fcomp *fcomp) jump(b *block) { 806 if b == fcomp.block { 807 panic("self-jump") // unreachable: Skylark has no arbitrary looping constructs 808 } 809 fcomp.block.jmp = b 810 fcomp.block = nil 811 } 812 813 // condjump emits a conditional jump (CJMP or ITERJMP) 814 // to the specified true/false blocks. 815 // (For ITERJMP, the cases are jmp/f/ok and cjmp/t/exhausted.) 816 // On return, the current block is unset. 817 func (fcomp *fcomp) condjump(op Opcode, t, f *block) { 818 if !(op == CJMP || op == ITERJMP) { 819 panic("not a conditional jump: " + op.String()) 820 } 821 fcomp.emit1(op, 0) // fill in address later 822 fcomp.block.cjmp = t 823 fcomp.jump(f) 824 } 825 826 // nameIndex returns the index of the specified name 827 // within the name pool, adding it if necessary. 828 func (pcomp *pcomp) nameIndex(name string) uint32 { 829 index, ok := pcomp.names[name] 830 if !ok { 831 index = uint32(len(pcomp.prog.Names)) 832 pcomp.names[name] = index 833 pcomp.prog.Names = append(pcomp.prog.Names, name) 834 } 835 return index 836 } 837 838 // constantIndex returns the index of the specified constant 839 // within the constant pool, adding it if necessary. 840 func (pcomp *pcomp) constantIndex(v interface{}) uint32 { 841 index, ok := pcomp.constants[v] 842 if !ok { 843 index = uint32(len(pcomp.prog.Constants)) 844 pcomp.constants[v] = index 845 pcomp.prog.Constants = append(pcomp.prog.Constants, v) 846 } 847 return index 848 } 849 850 // functionIndex returns the index of the specified function 851 // AST the nestedfun pool, adding it if necessary. 852 func (pcomp *pcomp) functionIndex(fn *Funcode) uint32 { 853 index, ok := pcomp.functions[fn] 854 if !ok { 855 index = uint32(len(pcomp.prog.Functions)) 856 pcomp.functions[fn] = index 857 pcomp.prog.Functions = append(pcomp.prog.Functions, fn) 858 } 859 return index 860 } 861 862 // string emits code to push the specified string. 863 func (fcomp *fcomp) string(s string) { 864 fcomp.emit1(CONSTANT, fcomp.pcomp.constantIndex(s)) 865 } 866 867 // setPos sets the current source position. 868 // It should be called prior to any operation that can fail dynamically. 869 // All positions are assumed to belong to the same file. 870 func (fcomp *fcomp) setPos(pos syntax.Position) { 871 fcomp.pos = pos 872 } 873 874 // set emits code to store the top-of-stack value 875 // to the specified local or global variable. 876 func (fcomp *fcomp) set(id *syntax.Ident) { 877 switch resolve.Scope(id.Scope) { 878 case resolve.Local: 879 fcomp.emit1(SETLOCAL, uint32(id.Index)) 880 case resolve.Global: 881 fcomp.emit1(SETGLOBAL, uint32(id.Index)) 882 default: 883 log.Fatalf("%s: set(%s): neither global nor local (%d)", id.NamePos, id.Name, id.Scope) 884 } 885 } 886 887 // lookup emits code to push the value of the specified variable. 888 func (fcomp *fcomp) lookup(id *syntax.Ident) { 889 switch resolve.Scope(id.Scope) { 890 case resolve.Local: 891 fcomp.setPos(id.NamePos) 892 fcomp.emit1(LOCAL, uint32(id.Index)) 893 case resolve.Free: 894 fcomp.emit1(FREE, uint32(id.Index)) 895 case resolve.Global: 896 fcomp.setPos(id.NamePos) 897 fcomp.emit1(GLOBAL, uint32(id.Index)) 898 case resolve.Predeclared: 899 fcomp.setPos(id.NamePos) 900 fcomp.emit1(PREDECLARED, fcomp.pcomp.nameIndex(id.Name)) 901 case resolve.Universal: 902 fcomp.emit1(UNIVERSAL, fcomp.pcomp.nameIndex(id.Name)) 903 default: 904 log.Fatalf("%s: compiler.lookup(%s): scope = %d", id.NamePos, id.Name, id.Scope) 905 } 906 } 907 908 func (fcomp *fcomp) stmts(stmts []syntax.Stmt) { 909 for _, stmt := range stmts { 910 fcomp.stmt(stmt) 911 } 912 } 913 914 func (fcomp *fcomp) stmt(stmt syntax.Stmt) { 915 switch stmt := stmt.(type) { 916 case *syntax.ExprStmt: 917 if _, ok := stmt.X.(*syntax.Literal); ok { 918 // Opt: don't compile doc comments only to pop them. 919 return 920 } 921 fcomp.expr(stmt.X) 922 fcomp.emit(POP) 923 924 case *syntax.BranchStmt: 925 // Resolver invariant: break/continue appear only within loops. 926 switch stmt.Token { 927 case syntax.PASS: 928 // no-op 929 case syntax.BREAK: 930 b := fcomp.loops[len(fcomp.loops)-1].break_ 931 fcomp.jump(b) 932 fcomp.block = fcomp.newBlock() // dead code 933 case syntax.CONTINUE: 934 b := fcomp.loops[len(fcomp.loops)-1].continue_ 935 fcomp.jump(b) 936 fcomp.block = fcomp.newBlock() // dead code 937 } 938 939 case *syntax.IfStmt: 940 // Keep consistent with CondExpr. 941 t := fcomp.newBlock() 942 f := fcomp.newBlock() 943 done := fcomp.newBlock() 944 945 fcomp.ifelse(stmt.Cond, t, f) 946 947 fcomp.block = t 948 fcomp.stmts(stmt.True) 949 fcomp.jump(done) 950 951 fcomp.block = f 952 fcomp.stmts(stmt.False) 953 fcomp.jump(done) 954 955 fcomp.block = done 956 957 case *syntax.AssignStmt: 958 switch stmt.Op { 959 case syntax.EQ: 960 // simple assignment: x = y 961 fcomp.expr(stmt.RHS) 962 fcomp.assign(stmt.OpPos, stmt.LHS) 963 964 case syntax.PLUS_EQ, 965 syntax.MINUS_EQ, 966 syntax.STAR_EQ, 967 syntax.SLASH_EQ, 968 syntax.SLASHSLASH_EQ, 969 syntax.PERCENT_EQ, 970 syntax.AMP_EQ, 971 syntax.PIPE_EQ, 972 syntax.CIRCUMFLEX_EQ, 973 syntax.LTLT_EQ, 974 syntax.GTGT_EQ: 975 // augmented assignment: x += y 976 977 var set func() 978 979 // Evaluate "address" of x exactly once to avoid duplicate side-effects. 980 switch lhs := stmt.LHS.(type) { 981 case *syntax.Ident: 982 // x = ... 983 fcomp.lookup(lhs) 984 set = func() { 985 fcomp.set(lhs) 986 } 987 988 case *syntax.IndexExpr: 989 // x[y] = ... 990 fcomp.expr(lhs.X) 991 fcomp.expr(lhs.Y) 992 fcomp.emit(DUP2) 993 fcomp.setPos(lhs.Lbrack) 994 fcomp.emit(INDEX) 995 set = func() { 996 fcomp.setPos(lhs.Lbrack) 997 fcomp.emit(SETINDEX) 998 } 999 1000 case *syntax.DotExpr: 1001 // x.f = ... 1002 fcomp.expr(lhs.X) 1003 fcomp.emit(DUP) 1004 name := fcomp.pcomp.nameIndex(lhs.Name.Name) 1005 fcomp.setPos(lhs.Dot) 1006 fcomp.emit1(ATTR, name) 1007 set = func() { 1008 fcomp.setPos(lhs.Dot) 1009 fcomp.emit1(SETFIELD, name) 1010 } 1011 1012 default: 1013 panic(lhs) 1014 } 1015 1016 fcomp.expr(stmt.RHS) 1017 1018 if stmt.Op == syntax.PLUS_EQ { 1019 // Allow the runtime to optimize list += iterable. 1020 fcomp.setPos(stmt.OpPos) 1021 fcomp.emit(INPLACE_ADD) 1022 } else { 1023 fcomp.binop(stmt.OpPos, stmt.Op-syntax.PLUS_EQ+syntax.PLUS) 1024 } 1025 set() 1026 } 1027 1028 case *syntax.DefStmt: 1029 fcomp.function(stmt.Def, stmt.Name.Name, &stmt.Function) 1030 fcomp.set(stmt.Name) 1031 1032 case *syntax.ForStmt: 1033 // Keep consistent with ForClause. 1034 head := fcomp.newBlock() 1035 body := fcomp.newBlock() 1036 tail := fcomp.newBlock() 1037 1038 fcomp.expr(stmt.X) 1039 fcomp.setPos(stmt.For) 1040 fcomp.emit(ITERPUSH) 1041 fcomp.jump(head) 1042 1043 fcomp.block = head 1044 fcomp.condjump(ITERJMP, tail, body) 1045 1046 fcomp.block = body 1047 fcomp.assign(stmt.For, stmt.Vars) 1048 fcomp.loops = append(fcomp.loops, loop{break_: tail, continue_: head}) 1049 fcomp.stmts(stmt.Body) 1050 fcomp.loops = fcomp.loops[:len(fcomp.loops)-1] 1051 fcomp.jump(head) 1052 1053 fcomp.block = tail 1054 fcomp.emit(ITERPOP) 1055 1056 case *syntax.ReturnStmt: 1057 if stmt.Result != nil { 1058 fcomp.expr(stmt.Result) 1059 } else { 1060 fcomp.emit(NONE) 1061 } 1062 fcomp.emit(RETURN) 1063 fcomp.block = fcomp.newBlock() // dead code 1064 1065 case *syntax.LoadStmt: 1066 for i := range stmt.From { 1067 fcomp.string(stmt.From[i].Name) 1068 } 1069 module := stmt.Module.Value.(string) 1070 fcomp.pcomp.prog.Loads = append(fcomp.pcomp.prog.Loads, Ident{ 1071 Name: module, 1072 Pos: stmt.Module.TokenPos, 1073 }) 1074 fcomp.string(module) 1075 fcomp.setPos(stmt.Load) 1076 fcomp.emit1(LOAD, uint32(len(stmt.From))) 1077 for i := range stmt.To { 1078 fcomp.emit1(SETGLOBAL, uint32(stmt.To[len(stmt.To)-1-i].Index)) 1079 } 1080 1081 default: 1082 start, _ := stmt.Span() 1083 log.Fatalf("%s: exec: unexpected statement %T", start, stmt) 1084 } 1085 } 1086 1087 // assign implements lhs = rhs for arbitrary expressions lhs. 1088 // RHS is on top of stack, consumed. 1089 func (fcomp *fcomp) assign(pos syntax.Position, lhs syntax.Expr) { 1090 switch lhs := lhs.(type) { 1091 case *syntax.ParenExpr: 1092 // (lhs) = rhs 1093 fcomp.assign(pos, lhs.X) 1094 1095 case *syntax.Ident: 1096 // x = rhs 1097 fcomp.set(lhs) 1098 1099 case *syntax.TupleExpr: 1100 // x, y = rhs 1101 fcomp.assignSequence(pos, lhs.List) 1102 1103 case *syntax.ListExpr: 1104 // [x, y] = rhs 1105 fcomp.assignSequence(pos, lhs.List) 1106 1107 case *syntax.IndexExpr: 1108 // x[y] = rhs 1109 fcomp.expr(lhs.X) 1110 fcomp.emit(EXCH) 1111 fcomp.expr(lhs.Y) 1112 fcomp.emit(EXCH) 1113 fcomp.setPos(lhs.Lbrack) 1114 fcomp.emit(SETINDEX) 1115 1116 case *syntax.DotExpr: 1117 // x.f = rhs 1118 fcomp.expr(lhs.X) 1119 fcomp.emit(EXCH) 1120 fcomp.setPos(lhs.Dot) 1121 fcomp.emit1(SETFIELD, fcomp.pcomp.nameIndex(lhs.Name.Name)) 1122 1123 default: 1124 panic(lhs) 1125 } 1126 } 1127 1128 func (fcomp *fcomp) assignSequence(pos syntax.Position, lhs []syntax.Expr) { 1129 fcomp.setPos(pos) 1130 fcomp.emit1(UNPACK, uint32(len(lhs))) 1131 for i := range lhs { 1132 fcomp.assign(pos, lhs[i]) 1133 } 1134 } 1135 1136 func (fcomp *fcomp) expr(e syntax.Expr) { 1137 switch e := e.(type) { 1138 case *syntax.ParenExpr: 1139 fcomp.expr(e.X) 1140 1141 case *syntax.Ident: 1142 fcomp.lookup(e) 1143 1144 case *syntax.Literal: 1145 // e.Value is int64, float64, *bigInt, or string. 1146 fcomp.emit1(CONSTANT, fcomp.pcomp.constantIndex(e.Value)) 1147 1148 case *syntax.ListExpr: 1149 for _, x := range e.List { 1150 fcomp.expr(x) 1151 } 1152 fcomp.emit1(MAKELIST, uint32(len(e.List))) 1153 1154 case *syntax.CondExpr: 1155 // Keep consistent with IfStmt. 1156 t := fcomp.newBlock() 1157 f := fcomp.newBlock() 1158 done := fcomp.newBlock() 1159 1160 fcomp.ifelse(e.Cond, t, f) 1161 1162 fcomp.block = t 1163 fcomp.expr(e.True) 1164 fcomp.jump(done) 1165 1166 fcomp.block = f 1167 fcomp.expr(e.False) 1168 fcomp.jump(done) 1169 1170 fcomp.block = done 1171 1172 case *syntax.IndexExpr: 1173 fcomp.expr(e.X) 1174 fcomp.expr(e.Y) 1175 fcomp.setPos(e.Lbrack) 1176 fcomp.emit(INDEX) 1177 1178 case *syntax.SliceExpr: 1179 fcomp.setPos(e.Lbrack) 1180 fcomp.expr(e.X) 1181 if e.Lo != nil { 1182 fcomp.expr(e.Lo) 1183 } else { 1184 fcomp.emit(NONE) 1185 } 1186 if e.Hi != nil { 1187 fcomp.expr(e.Hi) 1188 } else { 1189 fcomp.emit(NONE) 1190 } 1191 if e.Step != nil { 1192 fcomp.expr(e.Step) 1193 } else { 1194 fcomp.emit(NONE) 1195 } 1196 fcomp.emit(SLICE) 1197 1198 case *syntax.Comprehension: 1199 if e.Curly { 1200 fcomp.emit(MAKEDICT) 1201 } else { 1202 fcomp.emit1(MAKELIST, 0) 1203 } 1204 fcomp.comprehension(e, 0) 1205 1206 case *syntax.TupleExpr: 1207 fcomp.tuple(e.List) 1208 1209 case *syntax.DictExpr: 1210 fcomp.emit(MAKEDICT) 1211 for _, entry := range e.List { 1212 entry := entry.(*syntax.DictEntry) 1213 fcomp.emit(DUP) 1214 fcomp.expr(entry.Key) 1215 fcomp.expr(entry.Value) 1216 fcomp.setPos(entry.Colon) 1217 fcomp.emit(SETDICTUNIQ) 1218 } 1219 1220 case *syntax.UnaryExpr: 1221 fcomp.expr(e.X) 1222 fcomp.setPos(e.OpPos) 1223 switch e.Op { 1224 case syntax.MINUS: 1225 fcomp.emit(UMINUS) 1226 case syntax.PLUS: 1227 fcomp.emit(UPLUS) 1228 case syntax.NOT: 1229 fcomp.emit(NOT) 1230 case syntax.TILDE: 1231 fcomp.emit(TILDE) 1232 default: 1233 log.Fatalf("%s: unexpected unary op: %s", e.OpPos, e.Op) 1234 } 1235 1236 case *syntax.BinaryExpr: 1237 switch e.Op { 1238 // short-circuit operators 1239 // TODO(adonovan): use ifelse to simplify conditions. 1240 case syntax.OR: 1241 // x or y => if x then x else y 1242 done := fcomp.newBlock() 1243 y := fcomp.newBlock() 1244 1245 fcomp.expr(e.X) 1246 fcomp.emit(DUP) 1247 fcomp.condjump(CJMP, done, y) 1248 1249 fcomp.block = y 1250 fcomp.emit(POP) // discard X 1251 fcomp.expr(e.Y) 1252 fcomp.jump(done) 1253 1254 fcomp.block = done 1255 1256 case syntax.AND: 1257 // x and y => if x then y else x 1258 done := fcomp.newBlock() 1259 y := fcomp.newBlock() 1260 1261 fcomp.expr(e.X) 1262 fcomp.emit(DUP) 1263 fcomp.condjump(CJMP, y, done) 1264 1265 fcomp.block = y 1266 fcomp.emit(POP) // discard X 1267 fcomp.expr(e.Y) 1268 fcomp.jump(done) 1269 1270 fcomp.block = done 1271 1272 case syntax.PLUS: 1273 fcomp.plus(e) 1274 1275 default: 1276 // all other strict binary operator (includes comparisons) 1277 fcomp.expr(e.X) 1278 fcomp.expr(e.Y) 1279 fcomp.binop(e.OpPos, e.Op) 1280 } 1281 1282 case *syntax.DotExpr: 1283 fcomp.expr(e.X) 1284 fcomp.setPos(e.Dot) 1285 fcomp.emit1(ATTR, fcomp.pcomp.nameIndex(e.Name.Name)) 1286 1287 case *syntax.CallExpr: 1288 fcomp.call(e) 1289 1290 case *syntax.LambdaExpr: 1291 fcomp.function(e.Lambda, "lambda", &e.Function) 1292 1293 default: 1294 start, _ := e.Span() 1295 log.Fatalf("%s: unexpected expr %T", start, e) 1296 } 1297 } 1298 1299 type summand struct { 1300 x syntax.Expr 1301 plusPos syntax.Position 1302 } 1303 1304 // plus emits optimized code for ((a+b)+...)+z that avoids naive 1305 // quadratic behavior for strings, tuples, and lists, 1306 // and folds together adjacent literals of the same type. 1307 func (fcomp *fcomp) plus(e *syntax.BinaryExpr) { 1308 // Gather all the right operands of the left tree of plusses. 1309 // A tree (((a+b)+c)+d) becomes args=[a +b +c +d]. 1310 args := make([]summand, 0, 2) // common case: 2 operands 1311 for plus := e; ; { 1312 args = append(args, summand{unparen(plus.Y), plus.OpPos}) 1313 left := unparen(plus.X) 1314 x, ok := left.(*syntax.BinaryExpr) 1315 if !ok || x.Op != syntax.PLUS { 1316 args = append(args, summand{x: left}) 1317 break 1318 } 1319 plus = x 1320 } 1321 // Reverse args to syntactic order. 1322 for i, n := 0, len(args)/2; i < n; i++ { 1323 j := len(args) - 1 - i 1324 args[i], args[j] = args[j], args[i] 1325 } 1326 1327 // Fold sums of adjacent literals of the same type: ""+"", []+[], ()+(). 1328 out := args[:0] // compact in situ 1329 for i := 0; i < len(args); { 1330 j := i + 1 1331 if code := addable(args[i].x); code != 0 { 1332 for j < len(args) && addable(args[j].x) == code { 1333 j++ 1334 } 1335 if j > i+1 { 1336 args[i].x = add(code, args[i:j]) 1337 } 1338 } 1339 out = append(out, args[i]) 1340 i = j 1341 } 1342 args = out 1343 1344 // Emit code for an n-ary sum (n > 0). 1345 fcomp.expr(args[0].x) 1346 for _, summand := range args[1:] { 1347 fcomp.expr(summand.x) 1348 fcomp.setPos(summand.plusPos) 1349 fcomp.emit(PLUS) 1350 } 1351 1352 // If len(args) > 2, use of an accumulator instead of a chain of 1353 // PLUS operations may be more efficient. 1354 // However, no gain was measured on a workload analogous to Bazel loading; 1355 // TODO(adonovan): opt: re-evaluate on a Bazel analysis-like workload. 1356 // 1357 // We cannot use a single n-ary SUM operation 1358 // a b c SUM<3> 1359 // because we need to report a distinct error for each 1360 // individual '+' operation, so three additional operations are 1361 // needed: 1362 // 1363 // ACCSTART => create buffer and append to it 1364 // ACCUM => append to buffer 1365 // ACCEND => get contents of buffer 1366 // 1367 // For string, list, and tuple values, the interpreter can 1368 // optimize these operations by using a mutable buffer. 1369 // For all other types, ACCSTART and ACCEND would behave like 1370 // the identity function and ACCUM behaves like PLUS. 1371 // ACCUM must correctly support user-defined operations 1372 // such as list+foo. 1373 // 1374 // fcomp.emit(ACCSTART) 1375 // for _, summand := range args[1:] { 1376 // fcomp.expr(summand.x) 1377 // fcomp.setPos(summand.plusPos) 1378 // fcomp.emit(ACCUM) 1379 // } 1380 // fcomp.emit(ACCEND) 1381 } 1382 1383 // addable reports whether e is a statically addable 1384 // expression: a [s]tring, [l]ist, or [t]uple. 1385 func addable(e syntax.Expr) rune { 1386 switch e := e.(type) { 1387 case *syntax.Literal: 1388 // TODO(adonovan): opt: support INT/FLOAT/BIGINT constant folding. 1389 switch e.Token { 1390 case syntax.STRING: 1391 return 's' 1392 } 1393 case *syntax.ListExpr: 1394 return 'l' 1395 case *syntax.TupleExpr: 1396 return 't' 1397 } 1398 return 0 1399 } 1400 1401 // add returns an expression denoting the sum of args, 1402 // which are all addable values of the type indicated by code. 1403 // The resulting syntax is degenerate, lacking position, etc. 1404 func add(code rune, args []summand) syntax.Expr { 1405 switch code { 1406 case 's': 1407 var buf bytes.Buffer 1408 for _, arg := range args { 1409 buf.WriteString(arg.x.(*syntax.Literal).Value.(string)) 1410 } 1411 return &syntax.Literal{Token: syntax.STRING, Value: buf.String()} 1412 case 'l': 1413 var elems []syntax.Expr 1414 for _, arg := range args { 1415 elems = append(elems, arg.x.(*syntax.ListExpr).List...) 1416 } 1417 return &syntax.ListExpr{List: elems} 1418 case 't': 1419 var elems []syntax.Expr 1420 for _, arg := range args { 1421 elems = append(elems, arg.x.(*syntax.TupleExpr).List...) 1422 } 1423 return &syntax.TupleExpr{List: elems} 1424 } 1425 panic(code) 1426 } 1427 1428 func unparen(e syntax.Expr) syntax.Expr { 1429 if p, ok := e.(*syntax.ParenExpr); ok { 1430 return unparen(p.X) 1431 } 1432 return e 1433 } 1434 1435 func (fcomp *fcomp) binop(pos syntax.Position, op syntax.Token) { 1436 // TODO(adonovan): simplify by assuming syntax and compiler constants align. 1437 fcomp.setPos(pos) 1438 switch op { 1439 // arithmetic 1440 case syntax.PLUS: 1441 fcomp.emit(PLUS) 1442 case syntax.MINUS: 1443 fcomp.emit(MINUS) 1444 case syntax.STAR: 1445 fcomp.emit(STAR) 1446 case syntax.SLASH: 1447 fcomp.emit(SLASH) 1448 case syntax.SLASHSLASH: 1449 fcomp.emit(SLASHSLASH) 1450 case syntax.PERCENT: 1451 fcomp.emit(PERCENT) 1452 case syntax.AMP: 1453 fcomp.emit(AMP) 1454 case syntax.PIPE: 1455 fcomp.emit(PIPE) 1456 case syntax.CIRCUMFLEX: 1457 fcomp.emit(CIRCUMFLEX) 1458 case syntax.LTLT: 1459 fcomp.emit(LTLT) 1460 case syntax.GTGT: 1461 fcomp.emit(GTGT) 1462 case syntax.IN: 1463 fcomp.emit(IN) 1464 case syntax.NOT_IN: 1465 fcomp.emit(IN) 1466 fcomp.emit(NOT) 1467 1468 // comparisons 1469 case syntax.EQL, 1470 syntax.NEQ, 1471 syntax.GT, 1472 syntax.LT, 1473 syntax.LE, 1474 syntax.GE: 1475 fcomp.emit(Opcode(op-syntax.EQL) + EQL) 1476 1477 default: 1478 log.Fatalf("%s: unexpected binary op: %s", pos, op) 1479 } 1480 } 1481 1482 func (fcomp *fcomp) call(call *syntax.CallExpr) { 1483 // TODO(adonovan): opt: Use optimized path for calling methods 1484 // of built-ins: x.f(...) to avoid materializing a closure. 1485 // if dot, ok := call.Fcomp.(*syntax.DotExpr); ok { 1486 // fcomp.expr(dot.X) 1487 // fcomp.args(call) 1488 // fcomp.emit1(CALL_ATTR, fcomp.name(dot.Name.Name)) 1489 // return 1490 // } 1491 1492 // usual case 1493 fcomp.expr(call.Fn) 1494 op, arg := fcomp.args(call) 1495 fcomp.setPos(call.Lparen) 1496 fcomp.emit1(op, arg) 1497 } 1498 1499 // args emits code to push a tuple of positional arguments 1500 // and a tuple of named arguments containing alternating keys and values. 1501 // Either or both tuples may be empty (TODO(adonovan): optimize). 1502 func (fcomp *fcomp) args(call *syntax.CallExpr) (op Opcode, arg uint32) { 1503 var callmode int 1504 // Compute the number of each kind of parameter. 1505 var p, n int // number of positional, named arguments 1506 var varargs, kwargs syntax.Expr 1507 for _, arg := range call.Args { 1508 if binary, ok := arg.(*syntax.BinaryExpr); ok && binary.Op == syntax.EQ { 1509 1510 // named argument (name, value) 1511 fcomp.string(binary.X.(*syntax.Ident).Name) 1512 fcomp.expr(binary.Y) 1513 n++ 1514 continue 1515 } 1516 if unary, ok := arg.(*syntax.UnaryExpr); ok { 1517 if unary.Op == syntax.STAR { 1518 callmode |= 1 1519 varargs = unary.X 1520 continue 1521 } else if unary.Op == syntax.STARSTAR { 1522 callmode |= 2 1523 kwargs = unary.X 1524 continue 1525 } 1526 } 1527 1528 // positional argument 1529 fcomp.expr(arg) 1530 p++ 1531 } 1532 1533 // Python2, Python3, and Skylark-in-Java all permit named arguments 1534 // to appear both before and after a *args argument: 1535 // f(1, 2, x=3, *[4], y=5, **dict(z=6)) 1536 // 1537 // However all three implement different argument evaluation orders: 1538 // Python2: 1 2 3 5 4 6 (*args and **kwargs evaluated last) 1539 // Python3: 1 2 4 3 5 6 (positional args evaluated before named args) 1540 // Skylark-in-Java: 1 2 3 4 5 6 (lexical order) 1541 // 1542 // The Skylark-in-Java semantics are clean but hostile to a 1543 // compiler-based implementation because they require that the 1544 // compiler emit code for positional, named, *args, more named, 1545 // and *kwargs arguments and provide the callee with a map of 1546 // the terrain. 1547 // 1548 // For now we implement the Python2 semantics, but 1549 // the spec needs to clarify the correct approach. 1550 // Perhaps it would be best if we statically rejected 1551 // named arguments after *args (e.g. y=5) so that the 1552 // Python2 implementation strategy matches lexical order. 1553 // Discussion in github.com/bazelbuild/starlark#13. 1554 1555 // *args 1556 if varargs != nil { 1557 fcomp.expr(varargs) 1558 } 1559 1560 // **kwargs 1561 if kwargs != nil { 1562 fcomp.expr(kwargs) 1563 } 1564 1565 // TODO(adonovan): avoid this with a more flexible encoding. 1566 if p >= 256 || n >= 256 { 1567 log.Fatalf("%s: compiler error: too many arguments in call", call.Lparen) 1568 } 1569 1570 return CALL + Opcode(callmode), uint32(p<<8 | n) 1571 } 1572 1573 func (fcomp *fcomp) tuple(elems []syntax.Expr) { 1574 for _, elem := range elems { 1575 fcomp.expr(elem) 1576 } 1577 fcomp.emit1(MAKETUPLE, uint32(len(elems))) 1578 } 1579 1580 func (fcomp *fcomp) comprehension(comp *syntax.Comprehension, clauseIndex int) { 1581 if clauseIndex == len(comp.Clauses) { 1582 fcomp.emit(DUP) // accumulator 1583 if comp.Curly { 1584 // dict: {k:v for ...} 1585 // Parser ensures that body is of form k:v. 1586 // Python-style set comprehensions {body for vars in x} 1587 // are not supported. 1588 entry := comp.Body.(*syntax.DictEntry) 1589 fcomp.expr(entry.Key) 1590 fcomp.expr(entry.Value) 1591 fcomp.setPos(entry.Colon) 1592 fcomp.emit(SETDICT) 1593 } else { 1594 // list: [body for vars in x] 1595 fcomp.expr(comp.Body) 1596 fcomp.emit(APPEND) 1597 } 1598 return 1599 } 1600 1601 clause := comp.Clauses[clauseIndex] 1602 switch clause := clause.(type) { 1603 case *syntax.IfClause: 1604 t := fcomp.newBlock() 1605 done := fcomp.newBlock() 1606 fcomp.ifelse(clause.Cond, t, done) 1607 1608 fcomp.block = t 1609 fcomp.comprehension(comp, clauseIndex+1) 1610 fcomp.jump(done) 1611 1612 fcomp.block = done 1613 return 1614 1615 case *syntax.ForClause: 1616 // Keep consistent with ForStmt. 1617 head := fcomp.newBlock() 1618 body := fcomp.newBlock() 1619 tail := fcomp.newBlock() 1620 1621 fcomp.expr(clause.X) 1622 fcomp.setPos(clause.For) 1623 fcomp.emit(ITERPUSH) 1624 fcomp.jump(head) 1625 1626 fcomp.block = head 1627 fcomp.condjump(ITERJMP, tail, body) 1628 1629 fcomp.block = body 1630 fcomp.assign(clause.For, clause.Vars) 1631 fcomp.comprehension(comp, clauseIndex+1) 1632 fcomp.jump(head) 1633 1634 fcomp.block = tail 1635 fcomp.emit(ITERPOP) 1636 return 1637 } 1638 1639 start, _ := clause.Span() 1640 log.Fatalf("%s: unexpected comprehension clause %T", start, clause) 1641 } 1642 1643 func (fcomp *fcomp) function(pos syntax.Position, name string, f *syntax.Function) { 1644 // Evalution of the elements of both MAKETUPLEs may fail, 1645 // so record the position. 1646 fcomp.setPos(pos) 1647 1648 // Generate tuple of parameter defaults. 1649 n := 0 1650 for _, param := range f.Params { 1651 if binary, ok := param.(*syntax.BinaryExpr); ok { 1652 fcomp.expr(binary.Y) 1653 n++ 1654 } 1655 } 1656 fcomp.emit1(MAKETUPLE, uint32(n)) 1657 1658 // Capture the values of the function's 1659 // free variables from the lexical environment. 1660 for _, freevar := range f.FreeVars { 1661 fcomp.lookup(freevar) 1662 } 1663 fcomp.emit1(MAKETUPLE, uint32(len(f.FreeVars))) 1664 1665 funcode := fcomp.pcomp.function(name, pos, f.Body, f.Locals, f.FreeVars) 1666 1667 if debug { 1668 // TODO(adonovan): do compilations sequentially not as a tree, 1669 // to make the log easier to read. 1670 // Simplify by identifying Toplevel and functionIndex 0. 1671 fmt.Fprintf(os.Stderr, "resuming %s @ %s\n", fcomp.fn.Name, fcomp.pos) 1672 } 1673 1674 funcode.NumParams = len(f.Params) 1675 funcode.HasVarargs = f.HasVarargs 1676 funcode.HasKwargs = f.HasKwargs 1677 fcomp.emit1(MAKEFUNC, fcomp.pcomp.functionIndex(funcode)) 1678 } 1679 1680 // ifelse emits a Boolean control flow decision. 1681 // On return, the current block is unset. 1682 func (fcomp *fcomp) ifelse(cond syntax.Expr, t, f *block) { 1683 switch cond := cond.(type) { 1684 case *syntax.UnaryExpr: 1685 if cond.Op == syntax.NOT { 1686 // if not x then goto t else goto f 1687 // => 1688 // if x then goto f else goto t 1689 fcomp.ifelse(cond.X, f, t) 1690 return 1691 } 1692 1693 case *syntax.BinaryExpr: 1694 switch cond.Op { 1695 case syntax.AND: 1696 // if x and y then goto t else goto f 1697 // => 1698 // if x then ifelse(y, t, f) else goto f 1699 fcomp.expr(cond.X) 1700 y := fcomp.newBlock() 1701 fcomp.condjump(CJMP, y, f) 1702 1703 fcomp.block = y 1704 fcomp.ifelse(cond.Y, t, f) 1705 return 1706 1707 case syntax.OR: 1708 // if x or y then goto t else goto f 1709 // => 1710 // if x then goto t else ifelse(y, t, f) 1711 fcomp.expr(cond.X) 1712 y := fcomp.newBlock() 1713 fcomp.condjump(CJMP, t, y) 1714 1715 fcomp.block = y 1716 fcomp.ifelse(cond.Y, t, f) 1717 return 1718 case syntax.NOT_IN: 1719 // if x not in y then goto t else goto f 1720 // => 1721 // if x in y then goto f else goto t 1722 copy := *cond 1723 copy.Op = syntax.IN 1724 fcomp.expr(©) 1725 fcomp.condjump(CJMP, f, t) 1726 return 1727 } 1728 } 1729 1730 // general case 1731 fcomp.expr(cond) 1732 fcomp.condjump(CJMP, t, f) 1733 }