github.com/tinygo-org/tinygo@v0.31.3-0.20240404173401-90b0bf646c27/interp/compiler.go (about)

     1  package interp
     2  
     3  // This file compiles the LLVM IR to a form that's easy to efficiently
     4  // interpret.
     5  
     6  import (
     7  	"strings"
     8  
     9  	"tinygo.org/x/go-llvm"
    10  )
    11  
    12  // A function is a compiled LLVM function, which means that interpreting it
    13  // avoids most CGo calls necessary. This is done in a separate step so the
    14  // result can be cached.
    15  // Functions are in SSA form, just like the LLVM version if it. The first block
    16  // (blocks[0]) is the entry block.
    17  type function struct {
    18  	llvmFn llvm.Value
    19  	name   string       // precalculated llvmFn.Name()
    20  	params []llvm.Value // precalculated llvmFn.Params()
    21  	blocks []*basicBlock
    22  	locals map[llvm.Value]int
    23  }
    24  
    25  // basicBlock represents a LLVM basic block and contains a slice of
    26  // instructions. The last instruction must be a terminator instruction.
    27  type basicBlock struct {
    28  	phiNodes     []instruction
    29  	instructions []instruction
    30  }
    31  
    32  // instruction is a precompiled LLVM IR instruction. The operands can be either
    33  // an already known value (such as literalValue or pointerValue) but can also be
    34  // the special localValue, which means that the value is a function parameter or
    35  // is produced by another instruction in the function. In that case, the
    36  // interpreter will replace the operand with that local value.
    37  type instruction struct {
    38  	opcode     llvm.Opcode
    39  	localIndex int
    40  	operands   []value
    41  	llvmInst   llvm.Value
    42  	name       string
    43  }
    44  
    45  // String returns a nice human-readable version of this instruction.
    46  func (inst *instruction) String() string {
    47  	operands := make([]string, len(inst.operands))
    48  	for i, op := range inst.operands {
    49  		operands[i] = op.String()
    50  	}
    51  
    52  	name := ""
    53  	if int(inst.opcode) < len(instructionNameMap) {
    54  		name = instructionNameMap[inst.opcode]
    55  	}
    56  	if name == "" {
    57  		name = "<unknown op>"
    58  	}
    59  	return name + " " + strings.Join(operands, " ")
    60  }
    61  
    62  // compileFunction compiles a given LLVM function to an easier to interpret
    63  // version of the function. As far as possible, all operands are preprocessed so
    64  // that the interpreter doesn't have to call into LLVM.
    65  func (r *runner) compileFunction(llvmFn llvm.Value) *function {
    66  	fn := &function{
    67  		llvmFn: llvmFn,
    68  		name:   llvmFn.Name(),
    69  		params: llvmFn.Params(),
    70  		locals: make(map[llvm.Value]int),
    71  	}
    72  	if llvmFn.IsDeclaration() {
    73  		// Nothing to do.
    74  		return fn
    75  	}
    76  
    77  	for i, param := range fn.params {
    78  		fn.locals[param] = i
    79  	}
    80  
    81  	// Make a map of all the blocks, to quickly find the block number for a
    82  	// given branch instruction.
    83  	blockIndices := make(map[llvm.Value]int)
    84  	for llvmBB := llvmFn.FirstBasicBlock(); !llvmBB.IsNil(); llvmBB = llvm.NextBasicBlock(llvmBB) {
    85  		index := len(blockIndices)
    86  		blockIndices[llvmBB.AsValue()] = index
    87  	}
    88  
    89  	// Compile every block.
    90  	for llvmBB := llvmFn.FirstBasicBlock(); !llvmBB.IsNil(); llvmBB = llvm.NextBasicBlock(llvmBB) {
    91  		bb := &basicBlock{}
    92  		fn.blocks = append(fn.blocks, bb)
    93  
    94  		// Compile every instruction in the block.
    95  		for llvmInst := llvmBB.FirstInstruction(); !llvmInst.IsNil(); llvmInst = llvm.NextInstruction(llvmInst) {
    96  			// Create instruction skeleton.
    97  			opcode := llvmInst.InstructionOpcode()
    98  			inst := instruction{
    99  				opcode:     opcode,
   100  				localIndex: len(fn.locals),
   101  				llvmInst:   llvmInst,
   102  			}
   103  			fn.locals[llvmInst] = len(fn.locals)
   104  
   105  			// Add operands specific for this instruction.
   106  			switch opcode {
   107  			case llvm.Ret:
   108  				// Return instruction, which can either be a `ret void` (no
   109  				// return value) or return a value.
   110  				numOperands := llvmInst.OperandsCount()
   111  				if numOperands != 0 {
   112  					inst.operands = []value{
   113  						r.getValue(llvmInst.Operand(0)),
   114  					}
   115  				}
   116  			case llvm.Br:
   117  				// Branch instruction. Can be either a conditional branch (with
   118  				// 3 operands) or unconditional branch (with just one basic
   119  				// block operand).
   120  				numOperands := llvmInst.OperandsCount()
   121  				switch numOperands {
   122  				case 3:
   123  					// Conditional jump to one of two blocks. Comparable to an
   124  					// if/else in procedural languages.
   125  					thenBB := llvmInst.Operand(2)
   126  					elseBB := llvmInst.Operand(1)
   127  					inst.operands = []value{
   128  						r.getValue(llvmInst.Operand(0)),
   129  						literalValue{uint32(blockIndices[thenBB])},
   130  						literalValue{uint32(blockIndices[elseBB])},
   131  					}
   132  				case 1:
   133  					// Unconditional jump to a target basic block. Comparable to
   134  					// a jump in C and Go.
   135  					jumpBB := llvmInst.Operand(0)
   136  					inst.operands = []value{
   137  						literalValue{uint32(blockIndices[jumpBB])},
   138  					}
   139  				default:
   140  					panic("unknown number of operands")
   141  				}
   142  			case llvm.Switch:
   143  				// A switch is an array of (value, label) pairs, of which the
   144  				// first one indicates the to-switch value and the default
   145  				// label.
   146  				numOperands := llvmInst.OperandsCount()
   147  				for i := 0; i < numOperands; i += 2 {
   148  					inst.operands = append(inst.operands, r.getValue(llvmInst.Operand(i)))
   149  					inst.operands = append(inst.operands, literalValue{uint32(blockIndices[llvmInst.Operand(i+1)])})
   150  				}
   151  			case llvm.PHI:
   152  				inst.name = llvmInst.Name()
   153  				incomingCount := inst.llvmInst.IncomingCount()
   154  				for i := 0; i < incomingCount; i++ {
   155  					incomingBB := inst.llvmInst.IncomingBlock(i)
   156  					incomingValue := inst.llvmInst.IncomingValue(i)
   157  					inst.operands = append(inst.operands,
   158  						literalValue{uint32(blockIndices[incomingBB.AsValue()])},
   159  						r.getValue(incomingValue),
   160  					)
   161  				}
   162  			case llvm.Select:
   163  				// Select is a special instruction that is much like a ternary
   164  				// operator. It produces operand 1 or 2 based on the boolean
   165  				// that is operand 0.
   166  				inst.name = llvmInst.Name()
   167  				inst.operands = []value{
   168  					r.getValue(llvmInst.Operand(0)),
   169  					r.getValue(llvmInst.Operand(1)),
   170  					r.getValue(llvmInst.Operand(2)),
   171  				}
   172  			case llvm.Call:
   173  				// Call is a regular function call but could also be a runtime
   174  				// intrinsic. Some runtime intrinsics are treated specially by
   175  				// the interpreter, such as runtime.alloc. We don't
   176  				// differentiate between them here because these calls may also
   177  				// need to be run at runtime, in which case they should all be
   178  				// created in the same way.
   179  				llvmCalledValue := llvmInst.CalledValue()
   180  				if !llvmCalledValue.IsAFunction().IsNil() {
   181  					name := llvmCalledValue.Name()
   182  					if name == "llvm.dbg.value" || strings.HasPrefix(name, "llvm.lifetime.") {
   183  						// These intrinsics should not be interpreted, they are not
   184  						// relevant to the execution of this function.
   185  						continue
   186  					}
   187  				}
   188  				inst.name = llvmInst.Name()
   189  				numOperands := llvmInst.OperandsCount()
   190  				inst.operands = append(inst.operands, r.getValue(llvmCalledValue))
   191  				for i := 0; i < numOperands-1; i++ {
   192  					inst.operands = append(inst.operands, r.getValue(llvmInst.Operand(i)))
   193  				}
   194  			case llvm.Load:
   195  				// Load instruction. The interpreter will load from the
   196  				// appropriate memory view.
   197  				// Also provide the memory size to be loaded, which is necessary
   198  				// with a lack of type information.
   199  				inst.name = llvmInst.Name()
   200  				inst.operands = []value{
   201  					r.getValue(llvmInst.Operand(0)),
   202  					literalValue{r.targetData.TypeAllocSize(llvmInst.Type())},
   203  				}
   204  			case llvm.Store:
   205  				// Store instruction. The interpreter will create a new object
   206  				// in the memory view of the function invocation and store to
   207  				// that, to make it possible to roll back this store.
   208  				inst.operands = []value{
   209  					r.getValue(llvmInst.Operand(0)),
   210  					r.getValue(llvmInst.Operand(1)),
   211  				}
   212  			case llvm.Alloca:
   213  				// Alloca allocates stack space for local variables.
   214  				numElements := r.getValue(inst.llvmInst.Operand(0)).(literalValue).value.(uint32)
   215  				elementSize := r.targetData.TypeAllocSize(inst.llvmInst.AllocatedType())
   216  				inst.operands = []value{
   217  					literalValue{elementSize * uint64(numElements)},
   218  				}
   219  			case llvm.GetElementPtr:
   220  				// GetElementPtr does pointer arithmetic.
   221  				inst.name = llvmInst.Name()
   222  				ptr := llvmInst.Operand(0)
   223  				n := llvmInst.OperandsCount()
   224  				elementType := llvmInst.GEPSourceElementType()
   225  				// gep: [source ptr, dest value size, pairs of indices...]
   226  				inst.operands = []value{
   227  					r.getValue(ptr),
   228  					r.getValue(llvmInst.Operand(1)),
   229  					literalValue{r.targetData.TypeAllocSize(elementType)},
   230  				}
   231  				for i := 2; i < n; i++ {
   232  					operand := r.getValue(llvmInst.Operand(i))
   233  					switch elementType.TypeKind() {
   234  					case llvm.StructTypeKind:
   235  						index := operand.(literalValue).value.(uint32)
   236  						elementOffset := r.targetData.ElementOffset(elementType, int(index))
   237  						// Encode operands in a special way. The elementOffset
   238  						// is just the offset in bytes. The elementSize is a
   239  						// negative number (when cast to a int64) by flipping
   240  						// all the bits. This allows the interpreter to detect
   241  						// this is a struct field and that it should not
   242  						// multiply it with the elementOffset to get the offset.
   243  						// It is important for the interpreter to know the
   244  						// struct field index for when the GEP must be done at
   245  						// runtime.
   246  						inst.operands = append(inst.operands, literalValue{elementOffset}, literalValue{^uint64(index)})
   247  						elementType = elementType.StructElementTypes()[index]
   248  					case llvm.ArrayTypeKind:
   249  						elementType = elementType.ElementType()
   250  						elementSize := r.targetData.TypeAllocSize(elementType)
   251  						elementSizeOperand := literalValue{elementSize}
   252  						// Add operand * elementSizeOperand bytes to the pointer.
   253  						inst.operands = append(inst.operands, operand, elementSizeOperand)
   254  					default:
   255  						// This should be unreachable.
   256  						panic("unknown type: " + elementType.String())
   257  					}
   258  				}
   259  			case llvm.BitCast, llvm.IntToPtr, llvm.PtrToInt:
   260  				// Bitcasts are usually used to cast a pointer from one type to
   261  				// another leaving the pointer itself intact.
   262  				inst.name = llvmInst.Name()
   263  				inst.operands = []value{
   264  					r.getValue(llvmInst.Operand(0)),
   265  				}
   266  			case llvm.ExtractValue:
   267  				inst.name = llvmInst.Name()
   268  				agg := llvmInst.Operand(0)
   269  				var offset uint64
   270  				indexingType := agg.Type()
   271  				for _, index := range inst.llvmInst.Indices() {
   272  					switch indexingType.TypeKind() {
   273  					case llvm.StructTypeKind:
   274  						offset += r.targetData.ElementOffset(indexingType, int(index))
   275  						indexingType = indexingType.StructElementTypes()[index]
   276  					case llvm.ArrayTypeKind:
   277  						indexingType = indexingType.ElementType()
   278  						elementSize := r.targetData.TypeAllocSize(indexingType)
   279  						offset += elementSize * uint64(index)
   280  					default:
   281  						panic("unknown type kind") // unreachable
   282  					}
   283  				}
   284  				size := r.targetData.TypeAllocSize(inst.llvmInst.Type())
   285  				// extractvalue [agg, byteOffset, byteSize]
   286  				inst.operands = []value{
   287  					r.getValue(agg),
   288  					literalValue{offset},
   289  					literalValue{size},
   290  				}
   291  			case llvm.InsertValue:
   292  				inst.name = llvmInst.Name()
   293  				agg := llvmInst.Operand(0)
   294  				var offset uint64
   295  				indexingType := agg.Type()
   296  				for _, index := range inst.llvmInst.Indices() {
   297  					switch indexingType.TypeKind() {
   298  					case llvm.StructTypeKind:
   299  						offset += r.targetData.ElementOffset(indexingType, int(index))
   300  						indexingType = indexingType.StructElementTypes()[index]
   301  					case llvm.ArrayTypeKind:
   302  						indexingType = indexingType.ElementType()
   303  						elementSize := r.targetData.TypeAllocSize(indexingType)
   304  						offset += elementSize * uint64(index)
   305  					default:
   306  						panic("unknown type kind") // unreachable
   307  					}
   308  				}
   309  				// insertvalue [agg, elt, byteOffset]
   310  				inst.operands = []value{
   311  					r.getValue(agg),
   312  					r.getValue(llvmInst.Operand(1)),
   313  					literalValue{offset},
   314  				}
   315  			case llvm.ICmp:
   316  				inst.name = llvmInst.Name()
   317  				inst.operands = []value{
   318  					r.getValue(llvmInst.Operand(0)),
   319  					r.getValue(llvmInst.Operand(1)),
   320  					literalValue{uint8(llvmInst.IntPredicate())},
   321  				}
   322  			case llvm.FCmp:
   323  				inst.name = llvmInst.Name()
   324  				inst.operands = []value{
   325  					r.getValue(llvmInst.Operand(0)),
   326  					r.getValue(llvmInst.Operand(1)),
   327  					literalValue{uint8(llvmInst.FloatPredicate())},
   328  				}
   329  			case llvm.Add, llvm.Sub, llvm.Mul, llvm.UDiv, llvm.SDiv, llvm.URem, llvm.SRem, llvm.Shl, llvm.LShr, llvm.AShr, llvm.And, llvm.Or, llvm.Xor:
   330  				// Integer binary operations.
   331  				inst.name = llvmInst.Name()
   332  				inst.operands = []value{
   333  					r.getValue(llvmInst.Operand(0)),
   334  					r.getValue(llvmInst.Operand(1)),
   335  				}
   336  			case llvm.SExt, llvm.ZExt, llvm.Trunc:
   337  				// Extend or shrink an integer size.
   338  				// No sign extension going on so easy to do.
   339  				// zext: [value, bitwidth]
   340  				// trunc: [value, bitwidth]
   341  				inst.name = llvmInst.Name()
   342  				inst.operands = []value{
   343  					r.getValue(llvmInst.Operand(0)),
   344  					literalValue{uint64(llvmInst.Type().IntTypeWidth())},
   345  				}
   346  			case llvm.SIToFP, llvm.UIToFP:
   347  				// Convert an integer to a floating point instruction.
   348  				// opcode: [value, bitwidth]
   349  				inst.name = llvmInst.Name()
   350  				inst.operands = []value{
   351  					r.getValue(llvmInst.Operand(0)),
   352  					literalValue{uint64(r.targetData.TypeAllocSize(llvmInst.Type()) * 8)},
   353  				}
   354  			default:
   355  				// Unknown instruction, which is already set in inst.opcode so
   356  				// is detectable.
   357  				// This error is handled when actually trying to interpret this
   358  				// instruction (to not trigger on code that won't be executed).
   359  			}
   360  			if inst.opcode == llvm.PHI {
   361  				// PHI nodes need to be treated specially, see the comment in
   362  				// interpreter.go for an explanation.
   363  				bb.phiNodes = append(bb.phiNodes, inst)
   364  			} else {
   365  				bb.instructions = append(bb.instructions, inst)
   366  			}
   367  		}
   368  	}
   369  	return fn
   370  }
   371  
   372  // instructionNameMap maps from instruction opcodes to instruction names. This
   373  // can be useful for debug logging.
   374  var instructionNameMap = [...]string{
   375  	llvm.Ret:         "ret",
   376  	llvm.Br:          "br",
   377  	llvm.Switch:      "switch",
   378  	llvm.IndirectBr:  "indirectbr",
   379  	llvm.Invoke:      "invoke",
   380  	llvm.Unreachable: "unreachable",
   381  
   382  	// Standard Binary Operators
   383  	llvm.Add:  "add",
   384  	llvm.FAdd: "fadd",
   385  	llvm.Sub:  "sub",
   386  	llvm.FSub: "fsub",
   387  	llvm.Mul:  "mul",
   388  	llvm.FMul: "fmul",
   389  	llvm.UDiv: "udiv",
   390  	llvm.SDiv: "sdiv",
   391  	llvm.FDiv: "fdiv",
   392  	llvm.URem: "urem",
   393  	llvm.SRem: "srem",
   394  	llvm.FRem: "frem",
   395  
   396  	// Logical Operators
   397  	llvm.Shl:  "shl",
   398  	llvm.LShr: "lshr",
   399  	llvm.AShr: "ashr",
   400  	llvm.And:  "and",
   401  	llvm.Or:   "or",
   402  	llvm.Xor:  "xor",
   403  
   404  	// Memory Operators
   405  	llvm.Alloca:        "alloca",
   406  	llvm.Load:          "load",
   407  	llvm.Store:         "store",
   408  	llvm.GetElementPtr: "getelementptr",
   409  
   410  	// Cast Operators
   411  	llvm.Trunc:    "trunc",
   412  	llvm.ZExt:     "zext",
   413  	llvm.SExt:     "sext",
   414  	llvm.FPToUI:   "fptoui",
   415  	llvm.FPToSI:   "fptosi",
   416  	llvm.UIToFP:   "uitofp",
   417  	llvm.SIToFP:   "sitofp",
   418  	llvm.FPTrunc:  "fptrunc",
   419  	llvm.FPExt:    "fpext",
   420  	llvm.PtrToInt: "ptrtoint",
   421  	llvm.IntToPtr: "inttoptr",
   422  	llvm.BitCast:  "bitcast",
   423  
   424  	// Other Operators
   425  	llvm.ICmp:           "icmp",
   426  	llvm.FCmp:           "fcmp",
   427  	llvm.PHI:            "phi",
   428  	llvm.Call:           "call",
   429  	llvm.Select:         "select",
   430  	llvm.VAArg:          "vaarg",
   431  	llvm.ExtractElement: "extractelement",
   432  	llvm.InsertElement:  "insertelement",
   433  	llvm.ShuffleVector:  "shufflevector",
   434  	llvm.ExtractValue:   "extractvalue",
   435  	llvm.InsertValue:    "insertvalue",
   436  }