github.com/wasilibs/wazerox@v0.0.0-20240124024944-4923be63ab5f/internal/wasmdebug/dwarf.go (about)

     1  package wasmdebug
     2  
     3  import (
     4  	"debug/dwarf"
     5  	"errors"
     6  	"fmt"
     7  	"io"
     8  	"sort"
     9  	"strings"
    10  	"sync"
    11  )
    12  
    13  // DWARFLines is used to retrieve source code line information from the DWARF data.
    14  type DWARFLines struct {
    15  	// d is created by DWARF custom sections.
    16  	d *dwarf.Data
    17  	// linesPerEntry maps dwarf.Offset for dwarf.Entry to the list of lines contained by the entry.
    18  	// The value is sorted in the increasing order by the address.
    19  	linesPerEntry map[dwarf.Offset][]line
    20  	mux           sync.Mutex
    21  }
    22  
    23  type line struct {
    24  	addr uint64
    25  	pos  dwarf.LineReaderPos
    26  }
    27  
    28  // NewDWARFLines returns DWARFLines for the given *dwarf.Data.
    29  func NewDWARFLines(d *dwarf.Data) *DWARFLines {
    30  	if d == nil {
    31  		return nil
    32  	}
    33  	return &DWARFLines{d: d, linesPerEntry: map[dwarf.Offset][]line{}}
    34  }
    35  
    36  // isTombstoneAddr returns true if the given address is invalid a.k.a tombstone address which was made no longer valid
    37  // by linker. According to the DWARF spec[1], the value is encoded as 0xffffffff for Wasm (as 32-bit target),
    38  // but some tools encode it either in -1, -2 [2] or 1<<32 (This might not be by tools, but by debug/dwarf package's bug).
    39  //
    40  // [1] https://dwarfstd.org/issues/200609.1.html
    41  // [2] https://github.com/WebAssembly/binaryen/blob/97178d08d4a20d2a5e3a6be813fc6a7079ef86e1/src/wasm/wasm-debug.cpp#L651-L660
    42  // [3] https://reviews.llvm.org/D81784
    43  func isTombstoneAddr(addr uint64) bool {
    44  	addr32 := int32(addr)
    45  	return addr32 == -1 || addr32 == -2 ||
    46  		addr32 == 0 // This covers 1 <<32.
    47  }
    48  
    49  // Line returns the line information for the given instructionOffset which is an offset in
    50  // the code section of the original Wasm binary. Returns empty string if the info is not found.
    51  func (d *DWARFLines) Line(instructionOffset uint64) (ret []string) {
    52  	if d == nil {
    53  		return
    54  	}
    55  
    56  	// DWARFLines is created per Wasm binary, so there's a possibility that multiple instances
    57  	// created from a same binary face runtime error at the same time, and that results in
    58  	// concurrent access to this function.
    59  	d.mux.Lock()
    60  	defer d.mux.Unlock()
    61  
    62  	r := d.d.Reader()
    63  
    64  	var inlinedRoutines []*dwarf.Entry
    65  	var cu *dwarf.Entry
    66  	var inlinedDone bool
    67  entry:
    68  	for {
    69  		ent, err := r.Next()
    70  		if err != nil || ent == nil {
    71  			break
    72  		}
    73  
    74  		// If we already found the compilation unit and relevant inlined routines, we can stop searching entries.
    75  		if cu != nil && inlinedDone {
    76  			break
    77  		}
    78  
    79  		switch ent.Tag {
    80  		case dwarf.TagCompileUnit, dwarf.TagInlinedSubroutine:
    81  		default:
    82  			// Only CompileUnit and InlinedSubroutines are relevant.
    83  			continue
    84  		}
    85  
    86  		// Check if the entry spans the range which contains the target instruction.
    87  		ranges, err := d.d.Ranges(ent)
    88  		if err != nil {
    89  			continue
    90  		}
    91  		for _, pcs := range ranges {
    92  			start, end := pcs[0], pcs[1]
    93  			if isTombstoneAddr(start) || isTombstoneAddr(end) {
    94  				continue
    95  			}
    96  			if start <= instructionOffset && instructionOffset < end {
    97  				switch ent.Tag {
    98  				case dwarf.TagCompileUnit:
    99  					cu = ent
   100  				case dwarf.TagInlinedSubroutine:
   101  					inlinedRoutines = append(inlinedRoutines, ent)
   102  					// Search inlined subroutines until all the children.
   103  					inlinedDone = !ent.Children
   104  					// Not that "children" in the DWARF spec is defined as the next entry to this entry.
   105  					// See "2.3 Relationship of Debugging Information Entries" in https://dwarfstd.org/doc/DWARF4.pdf
   106  				}
   107  				continue entry
   108  			}
   109  		}
   110  	}
   111  
   112  	// If the relevant compilation unit is not found, nothing we can do with this DWARF info.
   113  	if cu == nil {
   114  		return
   115  	}
   116  
   117  	lineReader, err := d.d.LineReader(cu)
   118  	if err != nil || lineReader == nil {
   119  		return
   120  	}
   121  	var lines []line
   122  	var ok bool
   123  	var le dwarf.LineEntry
   124  	// Get the lines inside the entry.
   125  	if lines, ok = d.linesPerEntry[cu.Offset]; !ok {
   126  		// If not found, we create the list of lines by reading all the LineEntries in the Entry.
   127  		//
   128  		// Note that the dwarf.LineEntry.SeekPC API shouldn't be used because the Go's dwarf package assumes that
   129  		// all the line entries in an Entry are sorted in increasing order which *might not* be true
   130  		// for some languages. Such order requirement is not a part of DWARF specification,
   131  		// and in fact Zig language tends to emit interleaved line information.
   132  		//
   133  		// Thus, here we read all line entries here, and sort them in the increasing order wrt addresses.
   134  		for {
   135  			pos := lineReader.Tell()
   136  			err = lineReader.Next(&le)
   137  			if errors.Is(err, io.EOF) {
   138  				break
   139  			} else if err != nil {
   140  				return
   141  			}
   142  			// TODO: Maybe we should ignore tombstone addresses by using isTombstoneAddr,
   143  			//  but not sure if that would be an issue in practice.
   144  			lines = append(lines, line{addr: le.Address, pos: pos})
   145  		}
   146  		sort.Slice(lines, func(i, j int) bool { return lines[i].addr < lines[j].addr })
   147  		d.linesPerEntry[cu.Offset] = lines // Caches for the future inquiries for the same Entry.
   148  	}
   149  
   150  	// Now we have the lines for this entry. We can find the corresponding source line for instructionOffset
   151  	// via binary search on the list.
   152  	n := len(lines)
   153  	index := sort.Search(n, func(i int) bool { return lines[i].addr >= instructionOffset })
   154  
   155  	if index == n { // This case the address is not found. See the doc sort.Search.
   156  		return
   157  	}
   158  
   159  	ln := lines[index]
   160  	if ln.addr != instructionOffset {
   161  		// If the address doesn't match exactly, the previous entry is the one that contains the instruction.
   162  		// That can happen anytime as the DWARF spec allows it, and other tools can handle it in this way conventionally
   163  		// https://github.com/gimli-rs/addr2line/blob/3a2dbaf84551a06a429f26e9c96071bb409b371f/src/lib.rs#L236-L242
   164  		// https://github.com/kateinoigakukun/wasminspect/blob/f29f052f1b03104da9f702508ac0c1bbc3530ae4/crates/debugger/src/dwarf/mod.rs#L453-L459
   165  		if index-1 < 0 {
   166  			return
   167  		}
   168  		ln = lines[index-1]
   169  	}
   170  
   171  	// Advance the line reader for the found position.
   172  	lineReader.Seek(ln.pos)
   173  	err = lineReader.Next(&le)
   174  
   175  	if err != nil {
   176  		// If we reach this block, that means there's a bug in the []line creation logic above.
   177  		panic("BUG: stored dwarf.LineReaderPos is invalid")
   178  	}
   179  
   180  	// In the inlined case, the line info is the innermost inlined function call.
   181  	inlined := len(inlinedRoutines) != 0
   182  	prefix := fmt.Sprintf("%#x: ", instructionOffset)
   183  	ret = append(ret, formatLine(prefix, le.File.Name, int64(le.Line), int64(le.Column), inlined))
   184  
   185  	if inlined {
   186  		prefix = strings.Repeat(" ", len(prefix))
   187  		files := lineReader.Files()
   188  		// inlinedRoutines contain the inlined call information in the reverse order (children is higher than parent),
   189  		// so we traverse the reverse order and emit the inlined calls.
   190  		for i := len(inlinedRoutines) - 1; i >= 0; i-- {
   191  			inlined := inlinedRoutines[i]
   192  			fileIndex, ok := inlined.Val(dwarf.AttrCallFile).(int64)
   193  			if !ok {
   194  				return
   195  			} else if fileIndex >= int64(len(files)) {
   196  				// This in theory shouldn't happen according to the spec, but guard against ill-formed DWARF info.
   197  				return
   198  			}
   199  			fileName := files[fileIndex]
   200  			line, _ := inlined.Val(dwarf.AttrCallLine).(int64)
   201  			col, _ := inlined.Val(dwarf.AttrCallColumn).(int64)
   202  			ret = append(ret, formatLine(prefix, fileName.Name, line, col,
   203  				// Last one is the origin of the inlined function calls.
   204  				i != 0))
   205  		}
   206  	}
   207  	return
   208  }
   209  
   210  func formatLine(prefix, fileName string, line, col int64, inlined bool) string {
   211  	builder := strings.Builder{}
   212  	builder.WriteString(prefix)
   213  	builder.WriteString(fileName)
   214  
   215  	if line != 0 {
   216  		builder.WriteString(fmt.Sprintf(":%d", line))
   217  		if col != 0 {
   218  			builder.WriteString(fmt.Sprintf(":%d", col))
   219  		}
   220  	}
   221  
   222  	if inlined {
   223  		builder.WriteString(" (inlined)")
   224  	}
   225  	return builder.String()
   226  }