github.com/stealthrocket/wzprof@v0.2.1-0.20230830205924-5fa86be5e5b3/dwarf.go (about)

     1  package wzprof
     2  
     3  import (
     4  	"debug/dwarf"
     5  	"errors"
     6  	"fmt"
     7  	"io"
     8  	"log"
     9  	"math"
    10  	"sort"
    11  	"sync"
    12  
    13  	"github.com/tetratelabs/wazero"
    14  	"github.com/tetratelabs/wazero/experimental"
    15  )
    16  
    17  // buildDwarfSymbolizer constructs a Symbolizer instance from the DWARF sections
    18  // of the given WebAssembly module.
    19  func buildDwarfSymbolizer(parser dwarfparser) symbolizer {
    20  	return newDwarfmapper(parser)
    21  }
    22  
    23  type sourceOffsetRange = [2]uint64
    24  
    25  type subprogram struct {
    26  	Entry     *dwarf.Entry
    27  	CU        *dwarf.Entry
    28  	Inlines   []entryRanges
    29  	Namespace string
    30  }
    31  
    32  type entryRanges struct {
    33  	entry  *dwarf.Entry
    34  	ranges []sourceOffsetRange
    35  }
    36  
    37  type subprogramRange struct {
    38  	Range      sourceOffsetRange
    39  	Subprogram *subprogram
    40  }
    41  
    42  type dwarfmapper struct {
    43  	d           *dwarf.Data
    44  	subprograms []subprogramRange
    45  	// once value used to limit the logging output on error
    46  	onceSourceOffsetNotFound sync.Once
    47  }
    48  
    49  const (
    50  	debugInfo   = ".debug_info"
    51  	debugLine   = ".debug_line"
    52  	debugStr    = ".debug_str"
    53  	debugAbbrev = ".debug_abbrev"
    54  	debugRanges = ".debug_ranges"
    55  )
    56  
    57  func newDwarfparser(module wazero.CompiledModule) (dwarfparser, error) {
    58  	sections := module.CustomSections()
    59  
    60  	var info, line, ranges, str, abbrev []byte
    61  	for _, section := range sections {
    62  		log.Printf("dwarf: found section %s", section.Name())
    63  		switch section.Name() {
    64  		case debugInfo:
    65  			info = section.Data()
    66  		case debugLine:
    67  			line = section.Data()
    68  		case debugStr:
    69  			str = section.Data()
    70  		case debugAbbrev:
    71  			abbrev = section.Data()
    72  		case debugRanges:
    73  			ranges = section.Data()
    74  		}
    75  	}
    76  
    77  	d, err := dwarf.New(abbrev, nil, nil, info, line, nil, ranges, str)
    78  	if err != nil {
    79  		return dwarfparser{}, fmt.Errorf("dwarf: %w", err)
    80  	}
    81  
    82  	r := d.Reader()
    83  	return dwarfparser{d: d, r: r}, nil
    84  }
    85  
    86  func newDwarfParserFromBin(wasmbin []byte) (dwarfparser, error) {
    87  	info := wasmCustomSection(wasmbin, debugInfo)
    88  	line := wasmCustomSection(wasmbin, debugLine)
    89  	ranges := wasmCustomSection(wasmbin, debugRanges)
    90  	str := wasmCustomSection(wasmbin, debugStr)
    91  	abbrev := wasmCustomSection(wasmbin, debugAbbrev)
    92  
    93  	d, err := dwarf.New(abbrev, nil, nil, info, line, nil, ranges, str)
    94  	if err != nil {
    95  		return dwarfparser{}, fmt.Errorf("dwarf: %w", err)
    96  	}
    97  
    98  	r := d.Reader()
    99  	return dwarfparser{d: d, r: r}, nil
   100  }
   101  
   102  func newDwarfmapper(p dwarfparser) *dwarfmapper {
   103  	subprograms := p.Parse()
   104  	log.Printf("dwarf: parsed %d subprogramm ranges", len(subprograms))
   105  
   106  	return &dwarfmapper{
   107  		d:           p.d,
   108  		subprograms: subprograms,
   109  	}
   110  }
   111  
   112  type dwarfparser struct {
   113  	d *dwarf.Data
   114  	r *dwarf.Reader
   115  
   116  	subprograms []subprogramRange
   117  }
   118  
   119  func (d *dwarfparser) Parse() []subprogramRange {
   120  	for {
   121  		ent, err := d.r.Next()
   122  		if err != nil || ent == nil {
   123  			break
   124  		}
   125  		if ent.Tag == dwarf.TagCompileUnit {
   126  			d.parseCompileUnit(ent, "")
   127  		} else {
   128  			d.r.SkipChildren()
   129  		}
   130  	}
   131  	return d.subprograms
   132  }
   133  
   134  func (d *dwarfparser) parseCompileUnit(cu *dwarf.Entry, ns string) {
   135  	// Assumption is that r has just read the top level entry of the CU (or
   136  	// possibly a namespace), that is cu.
   137  	d.parseAny(cu, ns, cu)
   138  }
   139  
   140  func (d *dwarfparser) parseAny(cu *dwarf.Entry, ns string, e *dwarf.Entry) {
   141  	// Assumption is that r has just read the top level entry e.
   142  
   143  	for e.Children {
   144  		ent, err := d.r.Next()
   145  		if err != nil || ent == nil {
   146  			return
   147  		}
   148  
   149  		switch ent.Tag {
   150  		case 0:
   151  			// end of block
   152  			return
   153  		case dwarf.TagSubprogram:
   154  			d.parseSubprogram(cu, ns, ent)
   155  		case dwarf.TagNamespace:
   156  			d.parseNamespace(cu, ns, ent)
   157  		default:
   158  			d.parseAny(cu, ns, ent)
   159  		}
   160  	}
   161  }
   162  
   163  func (d *dwarfparser) parseNamespace(cu *dwarf.Entry, ns string, e *dwarf.Entry) {
   164  	// Assumption is that r has just read the top level entry of this
   165  	// namespace, which is e.
   166  	name, ok := e.Val(dwarf.AttrName).(string)
   167  	if ok {
   168  		ns += name + ":"
   169  	}
   170  	d.parseCompileUnit(cu, ns)
   171  }
   172  
   173  func (d *dwarfparser) parseSubprogram(cu *dwarf.Entry, ns string, e *dwarf.Entry) {
   174  	// Assumption is r has just read the top entry of the subprogram, which
   175  	// is e.
   176  
   177  	var inlines []entryRanges
   178  	for e.Children {
   179  		ent, err := d.r.Next()
   180  		if err != nil || ent == nil {
   181  			break
   182  		}
   183  		if ent.Tag == 0 {
   184  			break
   185  		}
   186  		if ent.Tag != dwarf.TagInlinedSubroutine {
   187  			d.r.SkipChildren()
   188  			continue
   189  		}
   190  		ranges, err := d.d.Ranges(ent)
   191  		if err != nil {
   192  			d.r.SkipChildren()
   193  			continue
   194  		}
   195  		inlines = append(inlines, entryRanges{ent, ranges})
   196  		// Inlines can have children that describe which variables were
   197  		// used during inlining.
   198  		d.r.SkipChildren()
   199  	}
   200  
   201  	ranges, err := d.d.Ranges(e)
   202  	if err != nil {
   203  		log.Printf("dwarf: failed to read ranges: %s\n", err)
   204  		return
   205  	}
   206  
   207  	spgm := &subprogram{
   208  		Entry:     e,
   209  		CU:        cu,
   210  		Inlines:   inlines,
   211  		Namespace: ns,
   212  	}
   213  
   214  	if len(ranges) == 0 {
   215  		// If there is no range provided by dwarf, attach this
   216  		// subprogram to an artificial empty range unlikely to be used.
   217  		// This is so that we still have a record of the function in the
   218  		// subprograms collection, as that's where the name resolution
   219  		// for inline functions searches for the inlined function.
   220  		// Notably, it's likely that a subprogram without range
   221  		// represent a function that has only been inlined. This
   222  		// situation is temporary until we rework the subprograms data
   223  		// structure.
   224  		ranges = append(ranges, sourceOffsetRange{math.MaxUint64, math.MaxUint64})
   225  	}
   226  
   227  	for _, pcr := range ranges {
   228  		d.subprograms = append(d.subprograms, subprogramRange{
   229  			Range:      pcr,
   230  			Subprogram: spgm,
   231  		})
   232  	}
   233  }
   234  
   235  func (d *dwarfmapper) Locations(fn experimental.InternalFunction, pc experimental.ProgramCounter) (uint64, []location) {
   236  	offset := fn.SourceOffsetForPC(pc)
   237  	if offset == 0 {
   238  		return offset, nil
   239  	}
   240  
   241  	// TODO: replace with binary search
   242  
   243  	var spgm *subprogram
   244  
   245  	for _, sr := range d.subprograms {
   246  		if sr.Range[0] <= offset && offset <= sr.Range[1] {
   247  			spgm = sr.Subprogram
   248  			break
   249  		}
   250  	}
   251  
   252  	if spgm == nil {
   253  		d.onceSourceOffsetNotFound.Do(func() {
   254  			log.Printf("dwarf: no subprogram ranges found for source offset %d (silencing similar errors now)", offset)
   255  		})
   256  		return offset, nil
   257  	}
   258  
   259  	lr, err := d.d.LineReader(spgm.CU)
   260  	if err != nil || lr == nil {
   261  		log.Printf("dwarf: failed to read lines: %s\n", err)
   262  		return offset, nil
   263  	}
   264  
   265  	// TODO: cache this
   266  	var lines []line
   267  	var le dwarf.LineEntry
   268  	for {
   269  		pos := lr.Tell()
   270  		err = lr.Next(&le)
   271  		if errors.Is(err, io.EOF) {
   272  			break
   273  		}
   274  		if err != nil {
   275  			log.Printf("dwarf: failed to iterate on lines: %s\n", err)
   276  			break
   277  		}
   278  		lines = append(lines, line{Pos: pos, Address: le.Address})
   279  	}
   280  	sort.Slice(lines, func(i, j int) bool { return lines[i].Address < lines[j].Address })
   281  
   282  	i := sort.Search(len(lines), func(i int) bool { return lines[i].Address >= offset })
   283  	if i == len(lines) {
   284  		// no line information for this source offset.
   285  		log.Printf("dwarf: no line information for source offset %d", offset)
   286  		return offset, nil
   287  	}
   288  
   289  	l := lines[i]
   290  	if l.Address != offset {
   291  		// https://github.com/stealthrocket/wazero/blob/867459d7d5ed988a55452d6317ff3cc8451b8ff0/internal/wasmdebug/dwarf.go#L141-L150
   292  		// If the address doesn't match exactly, the previous
   293  		// entry is the one that contains the instruction.
   294  		// That can happen anytime as the DWARF spec allows
   295  		// it, and other tools can handle it in this way
   296  		// conventionally
   297  		// https://github.com/gimli-rs/addr2line/blob/3a2dbaf84551a06a429f26e9c96071bb409b371f/src/lib.rs#L236-L242
   298  		// https://github.com/kateinoigakukun/wasminspect/blob/f29f052f1b03104da9f702508ac0c1bbc3530ae4/crates/debugger/src/dwarf/mod.rs#L453-L459
   299  		if i-1 < 0 {
   300  			log.Printf("dwarf: first line address does not match source (line=%d offset=%d)", l.Address, offset)
   301  			return offset, nil
   302  		}
   303  		l = lines[i-1]
   304  	}
   305  
   306  	lr.Seek(l.Pos)
   307  	err = lr.Next(&le)
   308  	if err != nil {
   309  		// l.Pos was created from parsing dwarf, should not
   310  		// happen.
   311  		panic("BUG: l.Pos was created from parsing dwarf but got error: " + err.Error())
   312  	}
   313  
   314  	human, stable := d.namesForSubprogram(spgm.Entry, spgm)
   315  	locations := make([]location, 0, 1+len(spgm.Inlines))
   316  	locations = append(locations, location{
   317  		File:       le.File.Name,
   318  		Line:       int64(le.Line),
   319  		Column:     int64(le.Column),
   320  		Inlined:    false,
   321  		HumanName:  human,
   322  		StableName: stable,
   323  	})
   324  
   325  	if len(spgm.Inlines) > 0 {
   326  		files := lr.Files()
   327  		for i := len(spgm.Inlines) - 1; i >= 0; i-- {
   328  			er := spgm.Inlines[i]
   329  			fileIdx, ok := er.entry.Val(dwarf.AttrCallFile).(int64)
   330  			if !ok || fileIdx >= int64(len(files)) || !offsetInRanges(er.ranges, offset) {
   331  				continue
   332  			}
   333  
   334  			file := files[fileIdx]
   335  			line, _ := er.entry.Val(dwarf.AttrCallLine).(int64)
   336  			col, _ := er.entry.Val(dwarf.AttrCallLine).(int64)
   337  			human, stable := d.namesForSubprogram(er.entry, nil)
   338  			locations = append(locations, location{
   339  				File:       file.Name,
   340  				Line:       line,
   341  				Column:     col,
   342  				Inlined:    true,
   343  				StableName: stable,
   344  				HumanName:  human,
   345  			})
   346  		}
   347  	}
   348  
   349  	return offset, locations
   350  }
   351  
   352  func offsetInRanges(ranges []sourceOffsetRange, offset uint64) bool {
   353  	for _, x := range ranges {
   354  		if x[0] <= offset && offset <= x[1] {
   355  			return true
   356  		}
   357  	}
   358  	return false
   359  }
   360  
   361  // line is used to cache line entries for a given compilation unit.
   362  type line struct {
   363  	Pos     dwarf.LineReaderPos
   364  	Address uint64
   365  }
   366  
   367  // Returns a human-readable name and the name the most likely to match the one
   368  // used in the wasm module. Walks up the inlining chain.
   369  //
   370  // Subprogram is optional. This function will look for the associated subprogram
   371  // if spgm is nil.
   372  func (d *dwarfmapper) namesForSubprogram(e *dwarf.Entry, spgm *subprogram) (string, string) {
   373  	// If an inlined function, grab the name from the origin.
   374  	var err error
   375  	r := d.d.Reader()
   376  	for {
   377  		ao, ok := e.Val(dwarf.AttrAbstractOrigin).(dwarf.Offset)
   378  		if !ok {
   379  			break
   380  		}
   381  		r.Seek(ao)
   382  		e, err = r.Next()
   383  		if err != nil {
   384  			// malformed dwarf
   385  			break
   386  		}
   387  	}
   388  
   389  	// TODO: index
   390  	if spgm == nil {
   391  		for _, s := range d.subprograms {
   392  			if s.Subprogram.Entry.Offset == e.Offset {
   393  				spgm = s.Subprogram
   394  				break
   395  			}
   396  		}
   397  	}
   398  
   399  	var ns string
   400  	if spgm != nil {
   401  		ns = spgm.Namespace
   402  		// } else {
   403  		//		panic("spgm not found")
   404  	}
   405  
   406  	name, _ := e.Val(dwarf.AttrName).(string)
   407  	name = ns + name
   408  	stableName, ok := e.Val(dwarf.AttrLinkageName).(string)
   409  	if !ok {
   410  		stableName = name
   411  	}
   412  
   413  	return name, stableName
   414  }