github.com/tinygo-org/tinygo@v0.31.3-0.20240404173401-90b0bf646c27/interp/compiler.go (about) 1 package interp 2 3 // This file compiles the LLVM IR to a form that's easy to efficiently 4 // interpret. 5 6 import ( 7 "strings" 8 9 "tinygo.org/x/go-llvm" 10 ) 11 12 // A function is a compiled LLVM function, which means that interpreting it 13 // avoids most CGo calls necessary. This is done in a separate step so the 14 // result can be cached. 15 // Functions are in SSA form, just like the LLVM version if it. The first block 16 // (blocks[0]) is the entry block. 17 type function struct { 18 llvmFn llvm.Value 19 name string // precalculated llvmFn.Name() 20 params []llvm.Value // precalculated llvmFn.Params() 21 blocks []*basicBlock 22 locals map[llvm.Value]int 23 } 24 25 // basicBlock represents a LLVM basic block and contains a slice of 26 // instructions. The last instruction must be a terminator instruction. 27 type basicBlock struct { 28 phiNodes []instruction 29 instructions []instruction 30 } 31 32 // instruction is a precompiled LLVM IR instruction. The operands can be either 33 // an already known value (such as literalValue or pointerValue) but can also be 34 // the special localValue, which means that the value is a function parameter or 35 // is produced by another instruction in the function. In that case, the 36 // interpreter will replace the operand with that local value. 37 type instruction struct { 38 opcode llvm.Opcode 39 localIndex int 40 operands []value 41 llvmInst llvm.Value 42 name string 43 } 44 45 // String returns a nice human-readable version of this instruction. 46 func (inst *instruction) String() string { 47 operands := make([]string, len(inst.operands)) 48 for i, op := range inst.operands { 49 operands[i] = op.String() 50 } 51 52 name := "" 53 if int(inst.opcode) < len(instructionNameMap) { 54 name = instructionNameMap[inst.opcode] 55 } 56 if name == "" { 57 name = "<unknown op>" 58 } 59 return name + " " + strings.Join(operands, " ") 60 } 61 62 // compileFunction compiles a given LLVM function to an easier to interpret 63 // version of the function. As far as possible, all operands are preprocessed so 64 // that the interpreter doesn't have to call into LLVM. 65 func (r *runner) compileFunction(llvmFn llvm.Value) *function { 66 fn := &function{ 67 llvmFn: llvmFn, 68 name: llvmFn.Name(), 69 params: llvmFn.Params(), 70 locals: make(map[llvm.Value]int), 71 } 72 if llvmFn.IsDeclaration() { 73 // Nothing to do. 74 return fn 75 } 76 77 for i, param := range fn.params { 78 fn.locals[param] = i 79 } 80 81 // Make a map of all the blocks, to quickly find the block number for a 82 // given branch instruction. 83 blockIndices := make(map[llvm.Value]int) 84 for llvmBB := llvmFn.FirstBasicBlock(); !llvmBB.IsNil(); llvmBB = llvm.NextBasicBlock(llvmBB) { 85 index := len(blockIndices) 86 blockIndices[llvmBB.AsValue()] = index 87 } 88 89 // Compile every block. 90 for llvmBB := llvmFn.FirstBasicBlock(); !llvmBB.IsNil(); llvmBB = llvm.NextBasicBlock(llvmBB) { 91 bb := &basicBlock{} 92 fn.blocks = append(fn.blocks, bb) 93 94 // Compile every instruction in the block. 95 for llvmInst := llvmBB.FirstInstruction(); !llvmInst.IsNil(); llvmInst = llvm.NextInstruction(llvmInst) { 96 // Create instruction skeleton. 97 opcode := llvmInst.InstructionOpcode() 98 inst := instruction{ 99 opcode: opcode, 100 localIndex: len(fn.locals), 101 llvmInst: llvmInst, 102 } 103 fn.locals[llvmInst] = len(fn.locals) 104 105 // Add operands specific for this instruction. 106 switch opcode { 107 case llvm.Ret: 108 // Return instruction, which can either be a `ret void` (no 109 // return value) or return a value. 110 numOperands := llvmInst.OperandsCount() 111 if numOperands != 0 { 112 inst.operands = []value{ 113 r.getValue(llvmInst.Operand(0)), 114 } 115 } 116 case llvm.Br: 117 // Branch instruction. Can be either a conditional branch (with 118 // 3 operands) or unconditional branch (with just one basic 119 // block operand). 120 numOperands := llvmInst.OperandsCount() 121 switch numOperands { 122 case 3: 123 // Conditional jump to one of two blocks. Comparable to an 124 // if/else in procedural languages. 125 thenBB := llvmInst.Operand(2) 126 elseBB := llvmInst.Operand(1) 127 inst.operands = []value{ 128 r.getValue(llvmInst.Operand(0)), 129 literalValue{uint32(blockIndices[thenBB])}, 130 literalValue{uint32(blockIndices[elseBB])}, 131 } 132 case 1: 133 // Unconditional jump to a target basic block. Comparable to 134 // a jump in C and Go. 135 jumpBB := llvmInst.Operand(0) 136 inst.operands = []value{ 137 literalValue{uint32(blockIndices[jumpBB])}, 138 } 139 default: 140 panic("unknown number of operands") 141 } 142 case llvm.Switch: 143 // A switch is an array of (value, label) pairs, of which the 144 // first one indicates the to-switch value and the default 145 // label. 146 numOperands := llvmInst.OperandsCount() 147 for i := 0; i < numOperands; i += 2 { 148 inst.operands = append(inst.operands, r.getValue(llvmInst.Operand(i))) 149 inst.operands = append(inst.operands, literalValue{uint32(blockIndices[llvmInst.Operand(i+1)])}) 150 } 151 case llvm.PHI: 152 inst.name = llvmInst.Name() 153 incomingCount := inst.llvmInst.IncomingCount() 154 for i := 0; i < incomingCount; i++ { 155 incomingBB := inst.llvmInst.IncomingBlock(i) 156 incomingValue := inst.llvmInst.IncomingValue(i) 157 inst.operands = append(inst.operands, 158 literalValue{uint32(blockIndices[incomingBB.AsValue()])}, 159 r.getValue(incomingValue), 160 ) 161 } 162 case llvm.Select: 163 // Select is a special instruction that is much like a ternary 164 // operator. It produces operand 1 or 2 based on the boolean 165 // that is operand 0. 166 inst.name = llvmInst.Name() 167 inst.operands = []value{ 168 r.getValue(llvmInst.Operand(0)), 169 r.getValue(llvmInst.Operand(1)), 170 r.getValue(llvmInst.Operand(2)), 171 } 172 case llvm.Call: 173 // Call is a regular function call but could also be a runtime 174 // intrinsic. Some runtime intrinsics are treated specially by 175 // the interpreter, such as runtime.alloc. We don't 176 // differentiate between them here because these calls may also 177 // need to be run at runtime, in which case they should all be 178 // created in the same way. 179 llvmCalledValue := llvmInst.CalledValue() 180 if !llvmCalledValue.IsAFunction().IsNil() { 181 name := llvmCalledValue.Name() 182 if name == "llvm.dbg.value" || strings.HasPrefix(name, "llvm.lifetime.") { 183 // These intrinsics should not be interpreted, they are not 184 // relevant to the execution of this function. 185 continue 186 } 187 } 188 inst.name = llvmInst.Name() 189 numOperands := llvmInst.OperandsCount() 190 inst.operands = append(inst.operands, r.getValue(llvmCalledValue)) 191 for i := 0; i < numOperands-1; i++ { 192 inst.operands = append(inst.operands, r.getValue(llvmInst.Operand(i))) 193 } 194 case llvm.Load: 195 // Load instruction. The interpreter will load from the 196 // appropriate memory view. 197 // Also provide the memory size to be loaded, which is necessary 198 // with a lack of type information. 199 inst.name = llvmInst.Name() 200 inst.operands = []value{ 201 r.getValue(llvmInst.Operand(0)), 202 literalValue{r.targetData.TypeAllocSize(llvmInst.Type())}, 203 } 204 case llvm.Store: 205 // Store instruction. The interpreter will create a new object 206 // in the memory view of the function invocation and store to 207 // that, to make it possible to roll back this store. 208 inst.operands = []value{ 209 r.getValue(llvmInst.Operand(0)), 210 r.getValue(llvmInst.Operand(1)), 211 } 212 case llvm.Alloca: 213 // Alloca allocates stack space for local variables. 214 numElements := r.getValue(inst.llvmInst.Operand(0)).(literalValue).value.(uint32) 215 elementSize := r.targetData.TypeAllocSize(inst.llvmInst.AllocatedType()) 216 inst.operands = []value{ 217 literalValue{elementSize * uint64(numElements)}, 218 } 219 case llvm.GetElementPtr: 220 // GetElementPtr does pointer arithmetic. 221 inst.name = llvmInst.Name() 222 ptr := llvmInst.Operand(0) 223 n := llvmInst.OperandsCount() 224 elementType := llvmInst.GEPSourceElementType() 225 // gep: [source ptr, dest value size, pairs of indices...] 226 inst.operands = []value{ 227 r.getValue(ptr), 228 r.getValue(llvmInst.Operand(1)), 229 literalValue{r.targetData.TypeAllocSize(elementType)}, 230 } 231 for i := 2; i < n; i++ { 232 operand := r.getValue(llvmInst.Operand(i)) 233 switch elementType.TypeKind() { 234 case llvm.StructTypeKind: 235 index := operand.(literalValue).value.(uint32) 236 elementOffset := r.targetData.ElementOffset(elementType, int(index)) 237 // Encode operands in a special way. The elementOffset 238 // is just the offset in bytes. The elementSize is a 239 // negative number (when cast to a int64) by flipping 240 // all the bits. This allows the interpreter to detect 241 // this is a struct field and that it should not 242 // multiply it with the elementOffset to get the offset. 243 // It is important for the interpreter to know the 244 // struct field index for when the GEP must be done at 245 // runtime. 246 inst.operands = append(inst.operands, literalValue{elementOffset}, literalValue{^uint64(index)}) 247 elementType = elementType.StructElementTypes()[index] 248 case llvm.ArrayTypeKind: 249 elementType = elementType.ElementType() 250 elementSize := r.targetData.TypeAllocSize(elementType) 251 elementSizeOperand := literalValue{elementSize} 252 // Add operand * elementSizeOperand bytes to the pointer. 253 inst.operands = append(inst.operands, operand, elementSizeOperand) 254 default: 255 // This should be unreachable. 256 panic("unknown type: " + elementType.String()) 257 } 258 } 259 case llvm.BitCast, llvm.IntToPtr, llvm.PtrToInt: 260 // Bitcasts are usually used to cast a pointer from one type to 261 // another leaving the pointer itself intact. 262 inst.name = llvmInst.Name() 263 inst.operands = []value{ 264 r.getValue(llvmInst.Operand(0)), 265 } 266 case llvm.ExtractValue: 267 inst.name = llvmInst.Name() 268 agg := llvmInst.Operand(0) 269 var offset uint64 270 indexingType := agg.Type() 271 for _, index := range inst.llvmInst.Indices() { 272 switch indexingType.TypeKind() { 273 case llvm.StructTypeKind: 274 offset += r.targetData.ElementOffset(indexingType, int(index)) 275 indexingType = indexingType.StructElementTypes()[index] 276 case llvm.ArrayTypeKind: 277 indexingType = indexingType.ElementType() 278 elementSize := r.targetData.TypeAllocSize(indexingType) 279 offset += elementSize * uint64(index) 280 default: 281 panic("unknown type kind") // unreachable 282 } 283 } 284 size := r.targetData.TypeAllocSize(inst.llvmInst.Type()) 285 // extractvalue [agg, byteOffset, byteSize] 286 inst.operands = []value{ 287 r.getValue(agg), 288 literalValue{offset}, 289 literalValue{size}, 290 } 291 case llvm.InsertValue: 292 inst.name = llvmInst.Name() 293 agg := llvmInst.Operand(0) 294 var offset uint64 295 indexingType := agg.Type() 296 for _, index := range inst.llvmInst.Indices() { 297 switch indexingType.TypeKind() { 298 case llvm.StructTypeKind: 299 offset += r.targetData.ElementOffset(indexingType, int(index)) 300 indexingType = indexingType.StructElementTypes()[index] 301 case llvm.ArrayTypeKind: 302 indexingType = indexingType.ElementType() 303 elementSize := r.targetData.TypeAllocSize(indexingType) 304 offset += elementSize * uint64(index) 305 default: 306 panic("unknown type kind") // unreachable 307 } 308 } 309 // insertvalue [agg, elt, byteOffset] 310 inst.operands = []value{ 311 r.getValue(agg), 312 r.getValue(llvmInst.Operand(1)), 313 literalValue{offset}, 314 } 315 case llvm.ICmp: 316 inst.name = llvmInst.Name() 317 inst.operands = []value{ 318 r.getValue(llvmInst.Operand(0)), 319 r.getValue(llvmInst.Operand(1)), 320 literalValue{uint8(llvmInst.IntPredicate())}, 321 } 322 case llvm.FCmp: 323 inst.name = llvmInst.Name() 324 inst.operands = []value{ 325 r.getValue(llvmInst.Operand(0)), 326 r.getValue(llvmInst.Operand(1)), 327 literalValue{uint8(llvmInst.FloatPredicate())}, 328 } 329 case llvm.Add, llvm.Sub, llvm.Mul, llvm.UDiv, llvm.SDiv, llvm.URem, llvm.SRem, llvm.Shl, llvm.LShr, llvm.AShr, llvm.And, llvm.Or, llvm.Xor: 330 // Integer binary operations. 331 inst.name = llvmInst.Name() 332 inst.operands = []value{ 333 r.getValue(llvmInst.Operand(0)), 334 r.getValue(llvmInst.Operand(1)), 335 } 336 case llvm.SExt, llvm.ZExt, llvm.Trunc: 337 // Extend or shrink an integer size. 338 // No sign extension going on so easy to do. 339 // zext: [value, bitwidth] 340 // trunc: [value, bitwidth] 341 inst.name = llvmInst.Name() 342 inst.operands = []value{ 343 r.getValue(llvmInst.Operand(0)), 344 literalValue{uint64(llvmInst.Type().IntTypeWidth())}, 345 } 346 case llvm.SIToFP, llvm.UIToFP: 347 // Convert an integer to a floating point instruction. 348 // opcode: [value, bitwidth] 349 inst.name = llvmInst.Name() 350 inst.operands = []value{ 351 r.getValue(llvmInst.Operand(0)), 352 literalValue{uint64(r.targetData.TypeAllocSize(llvmInst.Type()) * 8)}, 353 } 354 default: 355 // Unknown instruction, which is already set in inst.opcode so 356 // is detectable. 357 // This error is handled when actually trying to interpret this 358 // instruction (to not trigger on code that won't be executed). 359 } 360 if inst.opcode == llvm.PHI { 361 // PHI nodes need to be treated specially, see the comment in 362 // interpreter.go for an explanation. 363 bb.phiNodes = append(bb.phiNodes, inst) 364 } else { 365 bb.instructions = append(bb.instructions, inst) 366 } 367 } 368 } 369 return fn 370 } 371 372 // instructionNameMap maps from instruction opcodes to instruction names. This 373 // can be useful for debug logging. 374 var instructionNameMap = [...]string{ 375 llvm.Ret: "ret", 376 llvm.Br: "br", 377 llvm.Switch: "switch", 378 llvm.IndirectBr: "indirectbr", 379 llvm.Invoke: "invoke", 380 llvm.Unreachable: "unreachable", 381 382 // Standard Binary Operators 383 llvm.Add: "add", 384 llvm.FAdd: "fadd", 385 llvm.Sub: "sub", 386 llvm.FSub: "fsub", 387 llvm.Mul: "mul", 388 llvm.FMul: "fmul", 389 llvm.UDiv: "udiv", 390 llvm.SDiv: "sdiv", 391 llvm.FDiv: "fdiv", 392 llvm.URem: "urem", 393 llvm.SRem: "srem", 394 llvm.FRem: "frem", 395 396 // Logical Operators 397 llvm.Shl: "shl", 398 llvm.LShr: "lshr", 399 llvm.AShr: "ashr", 400 llvm.And: "and", 401 llvm.Or: "or", 402 llvm.Xor: "xor", 403 404 // Memory Operators 405 llvm.Alloca: "alloca", 406 llvm.Load: "load", 407 llvm.Store: "store", 408 llvm.GetElementPtr: "getelementptr", 409 410 // Cast Operators 411 llvm.Trunc: "trunc", 412 llvm.ZExt: "zext", 413 llvm.SExt: "sext", 414 llvm.FPToUI: "fptoui", 415 llvm.FPToSI: "fptosi", 416 llvm.UIToFP: "uitofp", 417 llvm.SIToFP: "sitofp", 418 llvm.FPTrunc: "fptrunc", 419 llvm.FPExt: "fpext", 420 llvm.PtrToInt: "ptrtoint", 421 llvm.IntToPtr: "inttoptr", 422 llvm.BitCast: "bitcast", 423 424 // Other Operators 425 llvm.ICmp: "icmp", 426 llvm.FCmp: "fcmp", 427 llvm.PHI: "phi", 428 llvm.Call: "call", 429 llvm.Select: "select", 430 llvm.VAArg: "vaarg", 431 llvm.ExtractElement: "extractelement", 432 llvm.InsertElement: "insertelement", 433 llvm.ShuffleVector: "shufflevector", 434 llvm.ExtractValue: "extractvalue", 435 llvm.InsertValue: "insertvalue", 436 }