github.com/tinygo-org/tinygo@v0.31.3-0.20240404173401-90b0bf646c27/stacksize/dwarf.go (about)

     1  package stacksize
     2  
     3  // This file implements parsing DWARF call frame information and interpreting
     4  // the CFI bytecode, or enough of it for most practical code.
     5  
     6  import (
     7  	"bytes"
     8  	"debug/elf"
     9  	"encoding/binary"
    10  	"fmt"
    11  	"io"
    12  )
    13  
    14  // dwarfCIE represents one DWARF Call Frame Information structure.
    15  type dwarfCIE struct {
    16  	bytecode            []byte
    17  	codeAlignmentFactor uint64
    18  }
    19  
    20  // parseFrames parses all call frame information from a .debug_frame section and
    21  // provides the passed in symbols map with frame size information.
    22  func parseFrames(f *elf.File, data []byte, symbols map[uint64]*CallNode) error {
    23  	if f.Class != elf.ELFCLASS32 {
    24  		// TODO: ELF64
    25  		return fmt.Errorf("expected ELF32")
    26  	}
    27  	cies := make(map[uint32]*dwarfCIE)
    28  
    29  	// Read each entity.
    30  	r := bytes.NewBuffer(data)
    31  	for {
    32  		start := len(data) - r.Len()
    33  		var length uint32
    34  		err := binary.Read(r, binary.LittleEndian, &length)
    35  		if err == io.EOF {
    36  			return nil
    37  		}
    38  		if err != nil {
    39  			return err
    40  		}
    41  		var cie uint32
    42  		err = binary.Read(r, binary.LittleEndian, &cie)
    43  		if err != nil {
    44  			return err
    45  		}
    46  		if cie == 0xffffffff {
    47  			// This is a CIE.
    48  			var fields struct {
    49  				Version      uint8
    50  				Augmentation uint8
    51  				AddressSize  uint8
    52  				SegmentSize  uint8
    53  			}
    54  			err = binary.Read(r, binary.LittleEndian, &fields)
    55  			if err != nil {
    56  				return err
    57  			}
    58  			if fields.Version != 4 {
    59  				return fmt.Errorf("unimplemented: .debug_frame version %d", fields.Version)
    60  			}
    61  			if fields.Augmentation != 0 {
    62  				return fmt.Errorf("unimplemented: .debug_frame with augmentation")
    63  			}
    64  			if fields.SegmentSize != 0 {
    65  				return fmt.Errorf("unimplemented: .debug_frame with segment size")
    66  			}
    67  			codeAlignmentFactor, err := readULEB128(r)
    68  			if err != nil {
    69  				return err
    70  			}
    71  			_, err = readSLEB128(r) // data alignment factor
    72  			if err != nil {
    73  				return err
    74  			}
    75  			_, err = readULEB128(r) // return address register
    76  			if err != nil {
    77  				return err
    78  			}
    79  			rest := (start + int(length) + 4) - (len(data) - r.Len())
    80  			bytecode := r.Next(rest)
    81  			cies[uint32(start)] = &dwarfCIE{
    82  				codeAlignmentFactor: codeAlignmentFactor,
    83  				bytecode:            bytecode,
    84  			}
    85  		} else {
    86  			// This is a FDE.
    87  			var fields struct {
    88  				InitialLocation uint32
    89  				AddressRange    uint32
    90  			}
    91  			err = binary.Read(r, binary.LittleEndian, &fields)
    92  			if err != nil {
    93  				return err
    94  			}
    95  			if _, ok := cies[cie]; !ok {
    96  				return fmt.Errorf("could not find CIE 0x%x in .debug_frame section", cie)
    97  			}
    98  			frame := frameInfo{
    99  				cie:    cies[cie],
   100  				start:  uint64(fields.InitialLocation),
   101  				loc:    uint64(fields.InitialLocation),
   102  				length: uint64(fields.AddressRange),
   103  			}
   104  			rest := (start + int(length) + 4) - (len(data) - r.Len())
   105  			bytecode := r.Next(rest)
   106  
   107  			if frame.start == 0 {
   108  				// Not sure where these come from but they don't seem to be
   109  				// important.
   110  				continue
   111  			}
   112  
   113  			_, err = frame.exec(frame.cie.bytecode)
   114  			if err != nil {
   115  				return err
   116  			}
   117  			entries, err := frame.exec(bytecode)
   118  			if err != nil {
   119  				return err
   120  			}
   121  			var maxFrameSize uint64
   122  			for _, entry := range entries {
   123  				switch f.Machine {
   124  				case elf.EM_ARM:
   125  					if entry.cfaRegister != 13 { // r13 or sp
   126  						// something other than a stack pointer (on ARM)
   127  						return fmt.Errorf("%08x..%08x: unknown CFA register number %d", frame.start, frame.start+frame.length, entry.cfaRegister)
   128  					}
   129  				default:
   130  					return fmt.Errorf("unknown architecture: %s", f.Machine)
   131  				}
   132  				if entry.cfaOffset > maxFrameSize {
   133  					maxFrameSize = entry.cfaOffset
   134  				}
   135  			}
   136  			node := symbols[frame.start]
   137  			if node.Size != frame.length {
   138  				return fmt.Errorf("%s: symtab gives symbol length %d while DWARF gives symbol length %d", node, node.Size, frame.length)
   139  			}
   140  			node.FrameSize = maxFrameSize
   141  			node.FrameSizeType = Bounded
   142  			if debugPrint {
   143  				fmt.Printf("%08x..%08x: frame size %4d %s\n", frame.start, frame.start+frame.length, maxFrameSize, node)
   144  			}
   145  		}
   146  	}
   147  }
   148  
   149  // frameInfo contains the state of executing call frame information bytecode.
   150  type frameInfo struct {
   151  	cie         *dwarfCIE
   152  	start       uint64
   153  	loc         uint64
   154  	length      uint64
   155  	cfaRegister uint64
   156  	cfaOffset   uint64
   157  }
   158  
   159  // frameInfoLine represents one line in the frame table (.debug_frame) at one
   160  // point in the execution of the bytecode.
   161  type frameInfoLine struct {
   162  	loc         uint64
   163  	cfaRegister uint64
   164  	cfaOffset   uint64
   165  }
   166  
   167  func (fi *frameInfo) newLine() frameInfoLine {
   168  	return frameInfoLine{
   169  		loc:         fi.loc,
   170  		cfaRegister: fi.cfaRegister,
   171  		cfaOffset:   fi.cfaOffset,
   172  	}
   173  }
   174  
   175  // exec executes the given bytecode in the CFI. Most CFI bytecode is actually
   176  // very simple and provides a way to determine the maximum call frame size.
   177  //
   178  // The frame size often changes multiple times in a function, for example the
   179  // frame size may be adjusted in the prologue and epilogue. Each frameInfoLine
   180  // may contain such a change.
   181  func (fi *frameInfo) exec(bytecode []byte) ([]frameInfoLine, error) {
   182  	var entries []frameInfoLine
   183  	r := bytes.NewBuffer(bytecode)
   184  	for {
   185  		op, err := r.ReadByte()
   186  		if err != nil {
   187  			if err == io.EOF {
   188  				entries = append(entries, fi.newLine())
   189  				return entries, nil
   190  			}
   191  			return nil, err
   192  		}
   193  		// For details on the various opcodes, see:
   194  		// http://dwarfstd.org/doc/DWARF5.pdf (page 239)
   195  		highBits := op >> 6 // high order 2 bits
   196  		lowBits := op & 0x1f
   197  		switch highBits {
   198  		case 1: // DW_CFA_advance_loc
   199  			fi.loc += uint64(lowBits) * fi.cie.codeAlignmentFactor
   200  			entries = append(entries, fi.newLine())
   201  		case 2: // DW_CFA_offset
   202  			// This indicates where a register is saved on the stack in the
   203  			// prologue. We can ignore that for our purposes.
   204  			_, err := readULEB128(r)
   205  			if err != nil {
   206  				return nil, err
   207  			}
   208  		case 3: // DW_CFA_restore
   209  			// Restore a register. Used after an outlined function call.
   210  			// It should be possible to ignore this.
   211  			// TODO: check that this is not the stack pointer.
   212  		case 0:
   213  			switch lowBits {
   214  			case 0: // DW_CFA_nop
   215  				// no operation
   216  			case 0x02: // DW_CFA_advance_loc1
   217  				// Very similar to DW_CFA_advance_loc but allows for a slightly
   218  				// larger range.
   219  				offset, err := r.ReadByte()
   220  				if err != nil {
   221  					return nil, err
   222  				}
   223  				fi.loc += uint64(offset) * fi.cie.codeAlignmentFactor
   224  				entries = append(entries, fi.newLine())
   225  			case 0x03: // DW_CFA_advance_loc2
   226  				var offset uint16
   227  				err := binary.Read(r, binary.LittleEndian, &offset)
   228  				if err != nil {
   229  					return nil, err
   230  				}
   231  				fi.loc += uint64(offset) * fi.cie.codeAlignmentFactor
   232  				entries = append(entries, fi.newLine())
   233  			case 0x04: // DW_CFA_advance_loc4
   234  				var offset uint32
   235  				err := binary.Read(r, binary.LittleEndian, &offset)
   236  				if err != nil {
   237  					return nil, err
   238  				}
   239  				fi.loc += uint64(offset) * fi.cie.codeAlignmentFactor
   240  				entries = append(entries, fi.newLine())
   241  			case 0x05: // DW_CFA_offset_extended
   242  				// Semantics are the same as DW_CFA_offset, but the encoding is
   243  				// different. Ignore it just like DW_CFA_offset.
   244  				_, err := readULEB128(r) // ULEB128 register
   245  				if err != nil {
   246  					return nil, err
   247  				}
   248  				_, err = readULEB128(r) // ULEB128 offset
   249  				if err != nil {
   250  					return nil, err
   251  				}
   252  			case 0x07: // DW_CFA_undefined
   253  				// Marks a single register as undefined. This is used to stop
   254  				// unwinding in tinygo_startTask using:
   255  				//     .cfi_undefined lr
   256  				// Ignore this directive.
   257  				_, err := readULEB128(r)
   258  				if err != nil {
   259  					return nil, err
   260  				}
   261  			case 0x09: // DW_CFA_register
   262  				// Copies a register. Emitted by the machine outliner, for example.
   263  				// It should be possible to ignore this.
   264  				// TODO: check that the stack pointer is not affected.
   265  				_, err := readULEB128(r)
   266  				if err != nil {
   267  					return nil, err
   268  				}
   269  				_, err = readULEB128(r)
   270  				if err != nil {
   271  					return nil, err
   272  				}
   273  			case 0x0c: // DW_CFA_def_cfa
   274  				register, err := readULEB128(r)
   275  				if err != nil {
   276  					return nil, err
   277  				}
   278  				offset, err := readULEB128(r)
   279  				if err != nil {
   280  					return nil, err
   281  				}
   282  				fi.cfaRegister = register
   283  				fi.cfaOffset = offset
   284  			case 0x0e: // DW_CFA_def_cfa_offset
   285  				offset, err := readULEB128(r)
   286  				if err != nil {
   287  					return nil, err
   288  				}
   289  				fi.cfaOffset = offset
   290  			default:
   291  				return nil, fmt.Errorf("could not decode .debug_frame bytecode op 0x%x (for address 0x%x)", op, fi.loc)
   292  			}
   293  		default:
   294  			return nil, fmt.Errorf("could not decode .debug_frame bytecode op 0x%x (for address 0x%x)", op, fi.loc)
   295  		}
   296  	}
   297  }
   298  
   299  // Source: https://en.wikipedia.org/wiki/LEB128#Decode_unsigned_integer
   300  func readULEB128(r *bytes.Buffer) (result uint64, err error) {
   301  	// TODO: guard against overflowing 64-bit integers.
   302  	var shift uint8
   303  	for {
   304  		b, err := r.ReadByte()
   305  		if err != nil {
   306  			return 0, err
   307  		}
   308  		result |= uint64(b&0x7f) << shift
   309  		if b&0x80 == 0 {
   310  			break
   311  		}
   312  		shift += 7
   313  	}
   314  	return
   315  }
   316  
   317  // Source: https://en.wikipedia.org/wiki/LEB128#Decode_signed_integer
   318  func readSLEB128(r *bytes.Buffer) (result int64, err error) {
   319  	var shift uint8
   320  
   321  	var b byte
   322  	var rawResult uint64
   323  	for {
   324  		b, err = r.ReadByte()
   325  		if err != nil {
   326  			return 0, err
   327  		}
   328  		rawResult |= uint64(b&0x7f) << shift
   329  		shift += 7
   330  		if b&0x80 == 0 {
   331  			break
   332  		}
   333  	}
   334  
   335  	// sign bit of byte is second high order bit (0x40)
   336  	if shift < 64 && b&0x40 != 0 {
   337  		// sign extend
   338  		rawResult |= ^uint64(0) << shift
   339  	}
   340  	result = int64(rawResult)
   341  
   342  	return
   343  }