github.com/tetratelabs/wazero@v1.7.3-0.20240513003603-48f702e154b5/internal/wasmdebug/dwarf.go (about) 1 package wasmdebug 2 3 import ( 4 "debug/dwarf" 5 "errors" 6 "fmt" 7 "io" 8 "sort" 9 "strings" 10 "sync" 11 ) 12 13 // DWARFLines is used to retrieve source code line information from the DWARF data. 14 type DWARFLines struct { 15 // d is created by DWARF custom sections. 16 d *dwarf.Data 17 // linesPerEntry maps dwarf.Offset for dwarf.Entry to the list of lines contained by the entry. 18 // The value is sorted in the increasing order by the address. 19 linesPerEntry map[dwarf.Offset][]line 20 mux sync.Mutex 21 } 22 23 type line struct { 24 addr uint64 25 pos dwarf.LineReaderPos 26 } 27 28 // NewDWARFLines returns DWARFLines for the given *dwarf.Data. 29 func NewDWARFLines(d *dwarf.Data) *DWARFLines { 30 if d == nil { 31 return nil 32 } 33 return &DWARFLines{d: d, linesPerEntry: map[dwarf.Offset][]line{}} 34 } 35 36 // isTombstoneAddr returns true if the given address is invalid a.k.a tombstone address which was made no longer valid 37 // by linker. According to the DWARF spec[1], the value is encoded as 0xffffffff for Wasm (as 32-bit target), 38 // but some tools encode it either in -1, -2 [2] or 1<<32 (This might not be by tools, but by debug/dwarf package's bug). 39 // 40 // [1] https://dwarfstd.org/issues/200609.1.html 41 // [2] https://github.com/WebAssembly/binaryen/blob/97178d08d4a20d2a5e3a6be813fc6a7079ef86e1/src/wasm/wasm-debug.cpp#L651-L660 42 // [3] https://reviews.llvm.org/D81784 43 func isTombstoneAddr(addr uint64) bool { 44 addr32 := int32(addr) 45 return addr32 == -1 || addr32 == -2 || 46 addr32 == 0 // This covers 1 <<32. 47 } 48 49 // Line returns the line information for the given instructionOffset which is an offset in 50 // the code section of the original Wasm binary. Returns empty string if the info is not found. 51 func (d *DWARFLines) Line(instructionOffset uint64) (ret []string) { 52 if d == nil { 53 return 54 } 55 56 // DWARFLines is created per Wasm binary, so there's a possibility that multiple instances 57 // created from a same binary face runtime error at the same time, and that results in 58 // concurrent access to this function. 59 d.mux.Lock() 60 defer d.mux.Unlock() 61 62 r := d.d.Reader() 63 64 var inlinedRoutines []*dwarf.Entry 65 var cu *dwarf.Entry 66 var inlinedDone bool 67 entry: 68 for { 69 ent, err := r.Next() 70 if err != nil || ent == nil { 71 break 72 } 73 74 // If we already found the compilation unit and relevant inlined routines, we can stop searching entries. 75 if cu != nil && inlinedDone { 76 break 77 } 78 79 switch ent.Tag { 80 case dwarf.TagCompileUnit, dwarf.TagInlinedSubroutine: 81 default: 82 // Only CompileUnit and InlinedSubroutines are relevant. 83 continue 84 } 85 86 // Check if the entry spans the range which contains the target instruction. 87 ranges, err := d.d.Ranges(ent) 88 if err != nil { 89 continue 90 } 91 for _, pcs := range ranges { 92 start, end := pcs[0], pcs[1] 93 if isTombstoneAddr(start) || isTombstoneAddr(end) { 94 continue 95 } 96 if start <= instructionOffset && instructionOffset < end { 97 switch ent.Tag { 98 case dwarf.TagCompileUnit: 99 cu = ent 100 case dwarf.TagInlinedSubroutine: 101 inlinedRoutines = append(inlinedRoutines, ent) 102 // Search inlined subroutines until all the children. 103 inlinedDone = !ent.Children 104 // Not that "children" in the DWARF spec is defined as the next entry to this entry. 105 // See "2.3 Relationship of Debugging Information Entries" in https://dwarfstd.org/doc/DWARF4.pdf 106 } 107 continue entry 108 } 109 } 110 } 111 112 // If the relevant compilation unit is not found, nothing we can do with this DWARF info. 113 if cu == nil { 114 return 115 } 116 117 lineReader, err := d.d.LineReader(cu) 118 if err != nil || lineReader == nil { 119 return 120 } 121 var lines []line 122 var ok bool 123 var le dwarf.LineEntry 124 // Get the lines inside the entry. 125 if lines, ok = d.linesPerEntry[cu.Offset]; !ok { 126 // If not found, we create the list of lines by reading all the LineEntries in the Entry. 127 // 128 // Note that the dwarf.LineEntry.SeekPC API shouldn't be used because the Go's dwarf package assumes that 129 // all the line entries in an Entry are sorted in increasing order which *might not* be true 130 // for some languages. Such order requirement is not a part of DWARF specification, 131 // and in fact Zig language tends to emit interleaved line information. 132 // 133 // Thus, here we read all line entries here, and sort them in the increasing order wrt addresses. 134 for { 135 pos := lineReader.Tell() 136 err = lineReader.Next(&le) 137 if errors.Is(err, io.EOF) { 138 break 139 } else if err != nil { 140 return 141 } 142 // TODO: Maybe we should ignore tombstone addresses by using isTombstoneAddr, 143 // but not sure if that would be an issue in practice. 144 lines = append(lines, line{addr: le.Address, pos: pos}) 145 } 146 sort.Slice(lines, func(i, j int) bool { return lines[i].addr < lines[j].addr }) 147 d.linesPerEntry[cu.Offset] = lines // Caches for the future inquiries for the same Entry. 148 } 149 150 // Now we have the lines for this entry. We can find the corresponding source line for instructionOffset 151 // via binary search on the list. 152 n := len(lines) 153 index := sort.Search(n, func(i int) bool { return lines[i].addr >= instructionOffset }) 154 155 if index == n { // This case the address is not found. See the doc sort.Search. 156 return 157 } 158 159 ln := lines[index] 160 if ln.addr != instructionOffset { 161 // If the address doesn't match exactly, the previous entry is the one that contains the instruction. 162 // That can happen anytime as the DWARF spec allows it, and other tools can handle it in this way conventionally 163 // https://github.com/gimli-rs/addr2line/blob/3a2dbaf84551a06a429f26e9c96071bb409b371f/src/lib.rs#L236-L242 164 // https://github.com/kateinoigakukun/wasminspect/blob/f29f052f1b03104da9f702508ac0c1bbc3530ae4/crates/debugger/src/dwarf/mod.rs#L453-L459 165 if index-1 < 0 { 166 return 167 } 168 ln = lines[index-1] 169 } 170 171 // Advance the line reader for the found position. 172 lineReader.Seek(ln.pos) 173 err = lineReader.Next(&le) 174 175 if err != nil { 176 // If we reach this block, that means there's a bug in the []line creation logic above. 177 panic("BUG: stored dwarf.LineReaderPos is invalid") 178 } 179 180 // In the inlined case, the line info is the innermost inlined function call. 181 inlined := len(inlinedRoutines) != 0 182 prefix := fmt.Sprintf("%#x: ", instructionOffset) 183 ret = append(ret, formatLine(prefix, le.File.Name, int64(le.Line), int64(le.Column), inlined)) 184 185 if inlined { 186 prefix = strings.Repeat(" ", len(prefix)) 187 files := lineReader.Files() 188 // inlinedRoutines contain the inlined call information in the reverse order (children is higher than parent), 189 // so we traverse the reverse order and emit the inlined calls. 190 for i := len(inlinedRoutines) - 1; i >= 0; i-- { 191 inlined := inlinedRoutines[i] 192 fileIndex, ok := inlined.Val(dwarf.AttrCallFile).(int64) 193 if !ok { 194 return 195 } else if fileIndex >= int64(len(files)) { 196 // This in theory shouldn't happen according to the spec, but guard against ill-formed DWARF info. 197 return 198 } 199 fileName := files[fileIndex] 200 line, _ := inlined.Val(dwarf.AttrCallLine).(int64) 201 col, _ := inlined.Val(dwarf.AttrCallColumn).(int64) 202 ret = append(ret, formatLine(prefix, fileName.Name, line, col, 203 // Last one is the origin of the inlined function calls. 204 i != 0)) 205 } 206 } 207 return 208 } 209 210 func formatLine(prefix, fileName string, line, col int64, inlined bool) string { 211 builder := strings.Builder{} 212 builder.WriteString(prefix) 213 builder.WriteString(fileName) 214 215 if line != 0 { 216 builder.WriteString(fmt.Sprintf(":%d", line)) 217 if col != 0 { 218 builder.WriteString(fmt.Sprintf(":%d", col)) 219 } 220 } 221 222 if inlined { 223 builder.WriteString(" (inlined)") 224 } 225 return builder.String() 226 }