github.com/stealthrocket/wzprof@v0.2.1-0.20230830205924-5fa86be5e5b3/dwarf.go (about) 1 package wzprof 2 3 import ( 4 "debug/dwarf" 5 "errors" 6 "fmt" 7 "io" 8 "log" 9 "math" 10 "sort" 11 "sync" 12 13 "github.com/tetratelabs/wazero" 14 "github.com/tetratelabs/wazero/experimental" 15 ) 16 17 // buildDwarfSymbolizer constructs a Symbolizer instance from the DWARF sections 18 // of the given WebAssembly module. 19 func buildDwarfSymbolizer(parser dwarfparser) symbolizer { 20 return newDwarfmapper(parser) 21 } 22 23 type sourceOffsetRange = [2]uint64 24 25 type subprogram struct { 26 Entry *dwarf.Entry 27 CU *dwarf.Entry 28 Inlines []entryRanges 29 Namespace string 30 } 31 32 type entryRanges struct { 33 entry *dwarf.Entry 34 ranges []sourceOffsetRange 35 } 36 37 type subprogramRange struct { 38 Range sourceOffsetRange 39 Subprogram *subprogram 40 } 41 42 type dwarfmapper struct { 43 d *dwarf.Data 44 subprograms []subprogramRange 45 // once value used to limit the logging output on error 46 onceSourceOffsetNotFound sync.Once 47 } 48 49 const ( 50 debugInfo = ".debug_info" 51 debugLine = ".debug_line" 52 debugStr = ".debug_str" 53 debugAbbrev = ".debug_abbrev" 54 debugRanges = ".debug_ranges" 55 ) 56 57 func newDwarfparser(module wazero.CompiledModule) (dwarfparser, error) { 58 sections := module.CustomSections() 59 60 var info, line, ranges, str, abbrev []byte 61 for _, section := range sections { 62 log.Printf("dwarf: found section %s", section.Name()) 63 switch section.Name() { 64 case debugInfo: 65 info = section.Data() 66 case debugLine: 67 line = section.Data() 68 case debugStr: 69 str = section.Data() 70 case debugAbbrev: 71 abbrev = section.Data() 72 case debugRanges: 73 ranges = section.Data() 74 } 75 } 76 77 d, err := dwarf.New(abbrev, nil, nil, info, line, nil, ranges, str) 78 if err != nil { 79 return dwarfparser{}, fmt.Errorf("dwarf: %w", err) 80 } 81 82 r := d.Reader() 83 return dwarfparser{d: d, r: r}, nil 84 } 85 86 func newDwarfParserFromBin(wasmbin []byte) (dwarfparser, error) { 87 info := wasmCustomSection(wasmbin, debugInfo) 88 line := wasmCustomSection(wasmbin, debugLine) 89 ranges := wasmCustomSection(wasmbin, debugRanges) 90 str := wasmCustomSection(wasmbin, debugStr) 91 abbrev := wasmCustomSection(wasmbin, debugAbbrev) 92 93 d, err := dwarf.New(abbrev, nil, nil, info, line, nil, ranges, str) 94 if err != nil { 95 return dwarfparser{}, fmt.Errorf("dwarf: %w", err) 96 } 97 98 r := d.Reader() 99 return dwarfparser{d: d, r: r}, nil 100 } 101 102 func newDwarfmapper(p dwarfparser) *dwarfmapper { 103 subprograms := p.Parse() 104 log.Printf("dwarf: parsed %d subprogramm ranges", len(subprograms)) 105 106 return &dwarfmapper{ 107 d: p.d, 108 subprograms: subprograms, 109 } 110 } 111 112 type dwarfparser struct { 113 d *dwarf.Data 114 r *dwarf.Reader 115 116 subprograms []subprogramRange 117 } 118 119 func (d *dwarfparser) Parse() []subprogramRange { 120 for { 121 ent, err := d.r.Next() 122 if err != nil || ent == nil { 123 break 124 } 125 if ent.Tag == dwarf.TagCompileUnit { 126 d.parseCompileUnit(ent, "") 127 } else { 128 d.r.SkipChildren() 129 } 130 } 131 return d.subprograms 132 } 133 134 func (d *dwarfparser) parseCompileUnit(cu *dwarf.Entry, ns string) { 135 // Assumption is that r has just read the top level entry of the CU (or 136 // possibly a namespace), that is cu. 137 d.parseAny(cu, ns, cu) 138 } 139 140 func (d *dwarfparser) parseAny(cu *dwarf.Entry, ns string, e *dwarf.Entry) { 141 // Assumption is that r has just read the top level entry e. 142 143 for e.Children { 144 ent, err := d.r.Next() 145 if err != nil || ent == nil { 146 return 147 } 148 149 switch ent.Tag { 150 case 0: 151 // end of block 152 return 153 case dwarf.TagSubprogram: 154 d.parseSubprogram(cu, ns, ent) 155 case dwarf.TagNamespace: 156 d.parseNamespace(cu, ns, ent) 157 default: 158 d.parseAny(cu, ns, ent) 159 } 160 } 161 } 162 163 func (d *dwarfparser) parseNamespace(cu *dwarf.Entry, ns string, e *dwarf.Entry) { 164 // Assumption is that r has just read the top level entry of this 165 // namespace, which is e. 166 name, ok := e.Val(dwarf.AttrName).(string) 167 if ok { 168 ns += name + ":" 169 } 170 d.parseCompileUnit(cu, ns) 171 } 172 173 func (d *dwarfparser) parseSubprogram(cu *dwarf.Entry, ns string, e *dwarf.Entry) { 174 // Assumption is r has just read the top entry of the subprogram, which 175 // is e. 176 177 var inlines []entryRanges 178 for e.Children { 179 ent, err := d.r.Next() 180 if err != nil || ent == nil { 181 break 182 } 183 if ent.Tag == 0 { 184 break 185 } 186 if ent.Tag != dwarf.TagInlinedSubroutine { 187 d.r.SkipChildren() 188 continue 189 } 190 ranges, err := d.d.Ranges(ent) 191 if err != nil { 192 d.r.SkipChildren() 193 continue 194 } 195 inlines = append(inlines, entryRanges{ent, ranges}) 196 // Inlines can have children that describe which variables were 197 // used during inlining. 198 d.r.SkipChildren() 199 } 200 201 ranges, err := d.d.Ranges(e) 202 if err != nil { 203 log.Printf("dwarf: failed to read ranges: %s\n", err) 204 return 205 } 206 207 spgm := &subprogram{ 208 Entry: e, 209 CU: cu, 210 Inlines: inlines, 211 Namespace: ns, 212 } 213 214 if len(ranges) == 0 { 215 // If there is no range provided by dwarf, attach this 216 // subprogram to an artificial empty range unlikely to be used. 217 // This is so that we still have a record of the function in the 218 // subprograms collection, as that's where the name resolution 219 // for inline functions searches for the inlined function. 220 // Notably, it's likely that a subprogram without range 221 // represent a function that has only been inlined. This 222 // situation is temporary until we rework the subprograms data 223 // structure. 224 ranges = append(ranges, sourceOffsetRange{math.MaxUint64, math.MaxUint64}) 225 } 226 227 for _, pcr := range ranges { 228 d.subprograms = append(d.subprograms, subprogramRange{ 229 Range: pcr, 230 Subprogram: spgm, 231 }) 232 } 233 } 234 235 func (d *dwarfmapper) Locations(fn experimental.InternalFunction, pc experimental.ProgramCounter) (uint64, []location) { 236 offset := fn.SourceOffsetForPC(pc) 237 if offset == 0 { 238 return offset, nil 239 } 240 241 // TODO: replace with binary search 242 243 var spgm *subprogram 244 245 for _, sr := range d.subprograms { 246 if sr.Range[0] <= offset && offset <= sr.Range[1] { 247 spgm = sr.Subprogram 248 break 249 } 250 } 251 252 if spgm == nil { 253 d.onceSourceOffsetNotFound.Do(func() { 254 log.Printf("dwarf: no subprogram ranges found for source offset %d (silencing similar errors now)", offset) 255 }) 256 return offset, nil 257 } 258 259 lr, err := d.d.LineReader(spgm.CU) 260 if err != nil || lr == nil { 261 log.Printf("dwarf: failed to read lines: %s\n", err) 262 return offset, nil 263 } 264 265 // TODO: cache this 266 var lines []line 267 var le dwarf.LineEntry 268 for { 269 pos := lr.Tell() 270 err = lr.Next(&le) 271 if errors.Is(err, io.EOF) { 272 break 273 } 274 if err != nil { 275 log.Printf("dwarf: failed to iterate on lines: %s\n", err) 276 break 277 } 278 lines = append(lines, line{Pos: pos, Address: le.Address}) 279 } 280 sort.Slice(lines, func(i, j int) bool { return lines[i].Address < lines[j].Address }) 281 282 i := sort.Search(len(lines), func(i int) bool { return lines[i].Address >= offset }) 283 if i == len(lines) { 284 // no line information for this source offset. 285 log.Printf("dwarf: no line information for source offset %d", offset) 286 return offset, nil 287 } 288 289 l := lines[i] 290 if l.Address != offset { 291 // https://github.com/stealthrocket/wazero/blob/867459d7d5ed988a55452d6317ff3cc8451b8ff0/internal/wasmdebug/dwarf.go#L141-L150 292 // If the address doesn't match exactly, the previous 293 // entry is the one that contains the instruction. 294 // That can happen anytime as the DWARF spec allows 295 // it, and other tools can handle it in this way 296 // conventionally 297 // https://github.com/gimli-rs/addr2line/blob/3a2dbaf84551a06a429f26e9c96071bb409b371f/src/lib.rs#L236-L242 298 // https://github.com/kateinoigakukun/wasminspect/blob/f29f052f1b03104da9f702508ac0c1bbc3530ae4/crates/debugger/src/dwarf/mod.rs#L453-L459 299 if i-1 < 0 { 300 log.Printf("dwarf: first line address does not match source (line=%d offset=%d)", l.Address, offset) 301 return offset, nil 302 } 303 l = lines[i-1] 304 } 305 306 lr.Seek(l.Pos) 307 err = lr.Next(&le) 308 if err != nil { 309 // l.Pos was created from parsing dwarf, should not 310 // happen. 311 panic("BUG: l.Pos was created from parsing dwarf but got error: " + err.Error()) 312 } 313 314 human, stable := d.namesForSubprogram(spgm.Entry, spgm) 315 locations := make([]location, 0, 1+len(spgm.Inlines)) 316 locations = append(locations, location{ 317 File: le.File.Name, 318 Line: int64(le.Line), 319 Column: int64(le.Column), 320 Inlined: false, 321 HumanName: human, 322 StableName: stable, 323 }) 324 325 if len(spgm.Inlines) > 0 { 326 files := lr.Files() 327 for i := len(spgm.Inlines) - 1; i >= 0; i-- { 328 er := spgm.Inlines[i] 329 fileIdx, ok := er.entry.Val(dwarf.AttrCallFile).(int64) 330 if !ok || fileIdx >= int64(len(files)) || !offsetInRanges(er.ranges, offset) { 331 continue 332 } 333 334 file := files[fileIdx] 335 line, _ := er.entry.Val(dwarf.AttrCallLine).(int64) 336 col, _ := er.entry.Val(dwarf.AttrCallLine).(int64) 337 human, stable := d.namesForSubprogram(er.entry, nil) 338 locations = append(locations, location{ 339 File: file.Name, 340 Line: line, 341 Column: col, 342 Inlined: true, 343 StableName: stable, 344 HumanName: human, 345 }) 346 } 347 } 348 349 return offset, locations 350 } 351 352 func offsetInRanges(ranges []sourceOffsetRange, offset uint64) bool { 353 for _, x := range ranges { 354 if x[0] <= offset && offset <= x[1] { 355 return true 356 } 357 } 358 return false 359 } 360 361 // line is used to cache line entries for a given compilation unit. 362 type line struct { 363 Pos dwarf.LineReaderPos 364 Address uint64 365 } 366 367 // Returns a human-readable name and the name the most likely to match the one 368 // used in the wasm module. Walks up the inlining chain. 369 // 370 // Subprogram is optional. This function will look for the associated subprogram 371 // if spgm is nil. 372 func (d *dwarfmapper) namesForSubprogram(e *dwarf.Entry, spgm *subprogram) (string, string) { 373 // If an inlined function, grab the name from the origin. 374 var err error 375 r := d.d.Reader() 376 for { 377 ao, ok := e.Val(dwarf.AttrAbstractOrigin).(dwarf.Offset) 378 if !ok { 379 break 380 } 381 r.Seek(ao) 382 e, err = r.Next() 383 if err != nil { 384 // malformed dwarf 385 break 386 } 387 } 388 389 // TODO: index 390 if spgm == nil { 391 for _, s := range d.subprograms { 392 if s.Subprogram.Entry.Offset == e.Offset { 393 spgm = s.Subprogram 394 break 395 } 396 } 397 } 398 399 var ns string 400 if spgm != nil { 401 ns = spgm.Namespace 402 // } else { 403 // panic("spgm not found") 404 } 405 406 name, _ := e.Val(dwarf.AttrName).(string) 407 name = ns + name 408 stableName, ok := e.Val(dwarf.AttrLinkageName).(string) 409 if !ok { 410 stableName = name 411 } 412 413 return name, stableName 414 }