github.com/google/syzkaller@v0.0.0-20251211124644-a066d2bc4b02/pkg/cover/backend/dwarf.go (about)

     1  // Copyright 2021 syzkaller project authors. All rights reserved.
     2  // Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
     3  
     4  package backend
     5  
     6  import (
     7  	"bufio"
     8  	"bytes"
     9  	"debug/dwarf"
    10  	"debug/elf"
    11  	"encoding/binary"
    12  	"fmt"
    13  	"io"
    14  	"path/filepath"
    15  	"regexp"
    16  	"runtime"
    17  	"sort"
    18  	"strconv"
    19  	"strings"
    20  
    21  	"github.com/google/syzkaller/pkg/log"
    22  	"github.com/google/syzkaller/pkg/mgrconfig"
    23  	"github.com/google/syzkaller/pkg/osutil"
    24  	"github.com/google/syzkaller/pkg/symbolizer"
    25  	"github.com/google/syzkaller/pkg/vminfo"
    26  	"github.com/google/syzkaller/sys/targets"
    27  )
    28  
    29  type dwarfParams struct {
    30  	target                *targets.Target
    31  	kernelDirs            *mgrconfig.KernelDirs
    32  	splitBuildDelimiters  []string
    33  	moduleObj             []string
    34  	hostModules           []*vminfo.KernelModule
    35  	readSymbols           func(*vminfo.KernelModule, *symbolInfo) ([]*Symbol, error)
    36  	readTextData          func(*vminfo.KernelModule) ([]byte, error)
    37  	readModuleCoverPoints func(*targets.Target, *vminfo.KernelModule, *symbolInfo) ([2][]uint64, error)
    38  	readTextRanges        func(*vminfo.KernelModule) ([]pcRange, []*CompileUnit, error)
    39  	getCompilerVersion    func(string) string
    40  }
    41  
    42  type Arch struct {
    43  	scanSize      int
    44  	callLen       int
    45  	relaOffset    uint64
    46  	callRelocType uint64
    47  	isCallInsn    func(arch *Arch, insn []byte) bool
    48  	callTarget    func(arch *Arch, insn []byte, pc uint64) uint64
    49  }
    50  
    51  var arches = map[string]*Arch{
    52  	targets.AMD64: {
    53  		scanSize:      1,
    54  		callLen:       5,
    55  		relaOffset:    1,
    56  		callRelocType: uint64(elf.R_X86_64_PLT32),
    57  		isCallInsn: func(arch *Arch, insn []byte) bool {
    58  			return insn[0] == 0xe8
    59  		},
    60  		callTarget: func(arch *Arch, insn []byte, pc uint64) uint64 {
    61  			off := uint64(int64(int32(binary.LittleEndian.Uint32(insn[1:]))))
    62  			return pc + off + uint64(arch.callLen)
    63  		},
    64  	},
    65  	targets.ARM64: {
    66  		scanSize:      4,
    67  		callLen:       4,
    68  		callRelocType: uint64(elf.R_AARCH64_CALL26),
    69  		isCallInsn: func(arch *Arch, insn []byte) bool {
    70  			const mask = uint32(0xfc000000)
    71  			const opc = uint32(0x94000000)
    72  			return binary.LittleEndian.Uint32(insn)&mask == opc
    73  		},
    74  		callTarget: func(arch *Arch, insn []byte, pc uint64) uint64 {
    75  			off26 := binary.LittleEndian.Uint32(insn) & 0x3ffffff
    76  			sign := off26 >> 25
    77  			off := uint64(off26)
    78  			// Sign-extend the 26-bit offset stored in the instruction.
    79  			if sign == 1 {
    80  				off |= 0xfffffffffc000000
    81  			}
    82  			return pc + 4*off
    83  		},
    84  	},
    85  	targets.S390x: {
    86  		scanSize:      1,
    87  		callLen:       6,
    88  		callRelocType: uint64(elf.R_390_PLT32DBL),
    89  		isCallInsn: func(arch *Arch, insn []byte) bool {
    90  			return insn[0] == 0xc0 && insn[1] == 0xe5
    91  		},
    92  		callTarget: func(arch *Arch, insn []byte, pc uint64) uint64 {
    93  			off := uint64(int64(int32(binary.BigEndian.Uint32(insn[2:]))))
    94  			return pc + 2*off
    95  		},
    96  	},
    97  }
    98  
    99  func makeDWARF(params *dwarfParams) (impl *Impl, err error) {
   100  	defer func() {
   101  		// It turns out that the DWARF-parsing library in Go crashes while parsing DWARF 5 data.
   102  		// As GCC11 uses DWARF 5 by default, we can expect larger number of problems with
   103  		// syzkallers compiled using old go versions.
   104  		// So we just catch the panic and turn it into a meaningful error message.
   105  		if recErr := recover(); recErr != nil {
   106  			impl = nil
   107  			err = fmt.Errorf("panic occurred while parsing DWARF "+
   108  				"(possible remedy: use go1.16+ which support DWARF 5 debug data): %s", recErr)
   109  		}
   110  	}()
   111  	impl, err = makeDWARFUnsafe(params)
   112  	return
   113  }
   114  
   115  type Result struct {
   116  	CoverPoints [2][]uint64
   117  	Symbols     []*Symbol
   118  }
   119  
   120  func processModule(params *dwarfParams, module *vminfo.KernelModule, info *symbolInfo,
   121  	target *targets.Target) (*Result, error) {
   122  	symbols, err := params.readSymbols(module, info)
   123  	if err != nil {
   124  		return nil, err
   125  	}
   126  
   127  	var data []byte
   128  	var coverPoints [2][]uint64
   129  	if _, ok := arches[target.Arch]; !ok {
   130  		coverPoints, err = objdump(target, module)
   131  	} else if module.Name == "" {
   132  		data, err = params.readTextData(module)
   133  		if err != nil {
   134  			return nil, err
   135  		}
   136  		coverPoints, err = readCoverPoints(target, info, data)
   137  	} else {
   138  		coverPoints, err = params.readModuleCoverPoints(target, module, info)
   139  	}
   140  	if err != nil {
   141  		return nil, err
   142  	}
   143  
   144  	result := &Result{
   145  		Symbols:     symbols,
   146  		CoverPoints: coverPoints,
   147  	}
   148  	return result, nil
   149  }
   150  
   151  func makeDWARFUnsafe(params *dwarfParams) (*Impl, error) {
   152  	target := params.target
   153  	kernelDirs := params.kernelDirs
   154  	splitBuildDelimiters := params.splitBuildDelimiters
   155  	modules := params.hostModules
   156  
   157  	// Here and below index 0 refers to coverage callbacks (__sanitizer_cov_trace_pc(_guard))
   158  	// and index 1 refers to comparison callbacks (__sanitizer_cov_trace_cmp*).
   159  	var allCoverPoints [2][]uint64
   160  	var allSymbols []*Symbol
   161  	var allRanges []pcRange
   162  	var allUnits []*CompileUnit
   163  	preciseCoverage := true
   164  	type binResult struct {
   165  		symbols     []*Symbol
   166  		coverPoints [2][]uint64
   167  		ranges      []pcRange
   168  		units       []*CompileUnit
   169  		err         error
   170  	}
   171  	binC := make(chan binResult, len(modules))
   172  	for _, module := range modules {
   173  		go func() {
   174  			info := &symbolInfo{
   175  				tracePC:     make(map[uint64]bool),
   176  				traceCmp:    make(map[uint64]bool),
   177  				tracePCIdx:  make(map[int]bool),
   178  				traceCmpIdx: make(map[int]bool),
   179  			}
   180  			result, err := processModule(params, module, info, target)
   181  			if err != nil {
   182  				binC <- binResult{err: err}
   183  				return
   184  			}
   185  			if module.Name == "" && len(result.CoverPoints[0]) == 0 {
   186  				err = fmt.Errorf("%v doesn't contain coverage callbacks (set CONFIG_KCOV=y on linux)", module.Path)
   187  				binC <- binResult{err: err}
   188  				return
   189  			}
   190  			ranges, units, err := params.readTextRanges(module)
   191  			if err != nil {
   192  				binC <- binResult{err: err}
   193  				return
   194  			}
   195  			binC <- binResult{symbols: result.Symbols, coverPoints: result.CoverPoints, ranges: ranges, units: units}
   196  		}()
   197  		if isKcovBrokenInCompiler(params.getCompilerVersion(module.Path)) {
   198  			preciseCoverage = false
   199  		}
   200  	}
   201  	for range modules {
   202  		result := <-binC
   203  		if err := result.err; err != nil {
   204  			return nil, err
   205  		}
   206  		allSymbols = append(allSymbols, result.symbols...)
   207  		allCoverPoints[0] = append(allCoverPoints[0], result.coverPoints[0]...)
   208  		allCoverPoints[1] = append(allCoverPoints[1], result.coverPoints[1]...)
   209  		allRanges = append(allRanges, result.ranges...)
   210  		allUnits = append(allUnits, result.units...)
   211  	}
   212  	log.Logf(1, "discovered %v source files, %v symbols", len(allUnits), len(allSymbols))
   213  	// TODO: need better way to remove symbols having the same Start
   214  	uniqSymbs := make(map[uint64]*Symbol)
   215  	for _, sym := range allSymbols {
   216  		if _, ok := uniqSymbs[sym.Start]; !ok {
   217  			uniqSymbs[sym.Start] = sym
   218  		}
   219  	}
   220  	allSymbols = []*Symbol{}
   221  	for _, sym := range uniqSymbs {
   222  		allSymbols = append(allSymbols, sym)
   223  	}
   224  	sort.Slice(allSymbols, func(i, j int) bool {
   225  		return allSymbols[i].Start < allSymbols[j].Start
   226  	})
   227  	sort.Slice(allRanges, func(i, j int) bool {
   228  		return allRanges[i].start < allRanges[j].start
   229  	})
   230  	for k := range allCoverPoints {
   231  		sort.Slice(allCoverPoints[k], func(i, j int) bool {
   232  			return allCoverPoints[k][i] < allCoverPoints[k][j]
   233  		})
   234  	}
   235  
   236  	allSymbols = buildSymbols(allSymbols, allRanges, allCoverPoints)
   237  	nunit := 0
   238  	for _, unit := range allUnits {
   239  		if len(unit.PCs) == 0 {
   240  			continue // drop the unit
   241  		}
   242  		// TODO: objDir won't work for out-of-tree modules.
   243  		unit.Name, unit.Path = CleanPath(unit.Name, kernelDirs, splitBuildDelimiters)
   244  		allUnits[nunit] = unit
   245  		nunit++
   246  	}
   247  	allUnits = allUnits[:nunit]
   248  	if len(allSymbols) == 0 || len(allUnits) == 0 {
   249  		return nil, fmt.Errorf("failed to parse DWARF (set CONFIG_DEBUG_INFO=y on linux)")
   250  	}
   251  	var interner symbolizer.Interner
   252  	impl := &Impl{
   253  		Units:   allUnits,
   254  		Symbols: allSymbols,
   255  		Symbolize: func(pcs map[*vminfo.KernelModule][]uint64) ([]*Frame, error) {
   256  			return symbolize(target, &interner, kernelDirs, splitBuildDelimiters, pcs)
   257  		},
   258  		CallbackPoints:  allCoverPoints[0],
   259  		PreciseCoverage: preciseCoverage,
   260  	}
   261  	return impl, nil
   262  }
   263  
   264  func buildSymbols(symbols []*Symbol, ranges []pcRange, coverPoints [2][]uint64) []*Symbol {
   265  	// Assign coverage point PCs to symbols.
   266  	// Both symbols and coverage points are sorted, so we do it one pass over both.
   267  	selectPCs := func(u *ObjectUnit, typ int) *[]uint64 {
   268  		return [2]*[]uint64{&u.PCs, &u.CMPs}[typ]
   269  	}
   270  	for pcType := range coverPoints {
   271  		pcs := coverPoints[pcType]
   272  		var curSymbol *Symbol
   273  		firstSymbolPC, symbolIdx := -1, 0
   274  		for i := 0; i < len(pcs); i++ {
   275  			pc := pcs[i]
   276  			for ; symbolIdx < len(symbols) && pc >= symbols[symbolIdx].End; symbolIdx++ {
   277  			}
   278  			var symb *Symbol
   279  			if symbolIdx < len(symbols) && pc >= symbols[symbolIdx].Start && pc < symbols[symbolIdx].End {
   280  				symb = symbols[symbolIdx]
   281  			}
   282  			if curSymbol != nil && curSymbol != symb {
   283  				*selectPCs(&curSymbol.ObjectUnit, pcType) = pcs[firstSymbolPC:i]
   284  				firstSymbolPC = -1
   285  			}
   286  			curSymbol = symb
   287  			if symb != nil && firstSymbolPC == -1 {
   288  				firstSymbolPC = i
   289  			}
   290  		}
   291  		if curSymbol != nil {
   292  			*selectPCs(&curSymbol.ObjectUnit, pcType) = pcs[firstSymbolPC:]
   293  		}
   294  	}
   295  	// Assign compile units to symbols based on unit pc ranges.
   296  	// Do it one pass as both are sorted.
   297  	nsymbol := 0
   298  	rangeIndex := 0
   299  	for _, s := range symbols {
   300  		for ; rangeIndex < len(ranges) && ranges[rangeIndex].end <= s.Start; rangeIndex++ {
   301  		}
   302  		if rangeIndex == len(ranges) || s.Start < ranges[rangeIndex].start || len(s.PCs) == 0 {
   303  			continue // drop the symbol
   304  		}
   305  		unit := ranges[rangeIndex].unit
   306  		s.Unit = unit
   307  		symbols[nsymbol] = s
   308  		nsymbol++
   309  	}
   310  	symbols = symbols[:nsymbol]
   311  
   312  	for pcType := range coverPoints {
   313  		for _, s := range symbols {
   314  			symbPCs := selectPCs(&s.ObjectUnit, pcType)
   315  			unitPCs := selectPCs(&s.Unit.ObjectUnit, pcType)
   316  			pos := len(*unitPCs)
   317  			*unitPCs = append(*unitPCs, *symbPCs...)
   318  			*symbPCs = (*unitPCs)[pos:]
   319  		}
   320  	}
   321  	return symbols
   322  }
   323  
   324  // Regexps to parse compiler version string in isKcovBrokenInCompiler.
   325  // Some targets (e.g. NetBSD) use g++ instead of gcc.
   326  var gccRE = regexp.MustCompile(`gcc|GCC|g\+\+`)
   327  var gccVersionRE = regexp.MustCompile(`(gcc|GCC|g\+\+).* ([0-9]{1,2})\.[0-9]+\.[0-9]+`)
   328  
   329  // GCC < 14 incorrectly tail-calls kcov callbacks, which does not let syzkaller
   330  // verify that collected coverage points have matching callbacks.
   331  // See https://github.com/google/syzkaller/issues/4447 for more information.
   332  func isKcovBrokenInCompiler(versionStr string) bool {
   333  	if !gccRE.MatchString(versionStr) {
   334  		return false
   335  	}
   336  	groups := gccVersionRE.FindStringSubmatch(versionStr)
   337  	if len(groups) > 0 {
   338  		version, err := strconv.Atoi(groups[2])
   339  		if err == nil {
   340  			return version < 14
   341  		}
   342  	}
   343  	return true
   344  }
   345  
   346  type symbolInfo struct {
   347  	textAddr uint64
   348  	// Set of addresses that correspond to __sanitizer_cov_trace_pc or its trampolines.
   349  	tracePC     map[uint64]bool
   350  	traceCmp    map[uint64]bool
   351  	tracePCIdx  map[int]bool
   352  	traceCmpIdx map[int]bool
   353  }
   354  
   355  type pcRange struct {
   356  	// [start; end)
   357  	start uint64
   358  	end   uint64
   359  	unit  *CompileUnit
   360  }
   361  
   362  type pcFixFn = (func([2]uint64) ([2]uint64, bool))
   363  
   364  func readTextRanges(debugInfo *dwarf.Data, module *vminfo.KernelModule, pcFix pcFixFn) (
   365  	[]pcRange, []*CompileUnit, error) {
   366  	var ranges []pcRange
   367  	unitMap := map[string]*CompileUnit{}
   368  	addRange := func(r [2]uint64, fileName string) {
   369  		if pcFix != nil {
   370  			var filtered bool
   371  			r, filtered = pcFix(r)
   372  			if filtered {
   373  				return
   374  			}
   375  		}
   376  		unit, ok := unitMap[fileName]
   377  		if !ok {
   378  			unit = &CompileUnit{
   379  				ObjectUnit: ObjectUnit{
   380  					Name: fileName,
   381  				},
   382  				Module: module,
   383  			}
   384  			unitMap[fileName] = unit
   385  		}
   386  		if module.Name == "" {
   387  			ranges = append(ranges, pcRange{r[0], r[1], unit})
   388  		} else {
   389  			ranges = append(ranges, pcRange{r[0] + module.Addr, r[1] + module.Addr, unit})
   390  		}
   391  	}
   392  
   393  	for r := debugInfo.Reader(); ; {
   394  		ent, err := r.Next()
   395  		if err != nil {
   396  			return nil, nil, err
   397  		}
   398  		if ent == nil {
   399  			break
   400  		}
   401  		if ent.Tag != dwarf.TagCompileUnit {
   402  			return nil, nil, fmt.Errorf("found unexpected tag %v on top level", ent.Tag)
   403  		}
   404  		attrName, ok := ent.Val(dwarf.AttrName).(string)
   405  		if !ok {
   406  			continue
   407  		}
   408  		attrCompDir, _ := ent.Val(dwarf.AttrCompDir).(string)
   409  
   410  		const languageRust = 28
   411  		if language, ok := ent.Val(dwarf.AttrLanguage).(int64); ok && language == languageRust {
   412  			rawRanges, err := rustRanges(debugInfo, ent)
   413  			if err != nil {
   414  				return nil, nil, fmt.Errorf("failed to query Rust PC ranges: %w", err)
   415  			}
   416  			for _, r := range rawRanges {
   417  				addRange([2]uint64{r.start, r.end}, r.file)
   418  			}
   419  		} else {
   420  			// Compile unit names are relative to the compilation dir,
   421  			// while per-line info isn't.
   422  			// attrName could be an absolute path for out-of-tree modules.
   423  			unitName := attrName
   424  			if !filepath.IsAbs(attrName) {
   425  				unitName = filepath.Join(attrCompDir, attrName)
   426  			}
   427  			ranges1, err := debugInfo.Ranges(ent)
   428  			if err != nil {
   429  				return nil, nil, err
   430  			}
   431  			for _, r := range ranges1 {
   432  				addRange(r, unitName)
   433  			}
   434  		}
   435  		r.SkipChildren()
   436  	}
   437  	var units []*CompileUnit
   438  	for _, unit := range unitMap {
   439  		units = append(units, unit)
   440  	}
   441  	return ranges, units, nil
   442  }
   443  
   444  type rustRange struct {
   445  	// [start; end)
   446  	start uint64
   447  	end   uint64
   448  	file  string
   449  }
   450  
   451  func rustRanges(debugInfo *dwarf.Data, ent *dwarf.Entry) ([]rustRange, error) {
   452  	// For Rust, a single compilation unit may comprise all .rs files that belong to the crate.
   453  	// To properly render the coverage, we need to somehow infer the ranges that belong to
   454  	// those individual .rs files.
   455  	// For simplicity, let's create fake ranges by looking at the DWARF line information.
   456  	var ret []rustRange
   457  	lr, err := debugInfo.LineReader(ent)
   458  	if err != nil {
   459  		return nil, fmt.Errorf("failed to query line reader: %w", err)
   460  	}
   461  	var startPC uint64
   462  	var files []string
   463  	for {
   464  		var entry dwarf.LineEntry
   465  		if err = lr.Next(&entry); err != nil {
   466  			if err == io.EOF {
   467  				break
   468  			}
   469  			return nil, fmt.Errorf("failed to parse next line entry: %w", err)
   470  		}
   471  		if startPC == 0 || entry.Address != startPC {
   472  			for _, file := range files {
   473  				ret = append(ret, rustRange{
   474  					start: startPC,
   475  					end:   entry.Address,
   476  					file:  file,
   477  				})
   478  			}
   479  			files = files[:0]
   480  			startPC = entry.Address
   481  		}
   482  		// Keep on collecting file names that are covered by the range.
   483  		files = append(files, entry.File.Name)
   484  	}
   485  	if startPC != 0 {
   486  		// We don't know the end PC for these, but let's still add them to the ranges.
   487  		for _, file := range files {
   488  			ret = append(ret, rustRange{
   489  				start: startPC,
   490  				end:   startPC + 1,
   491  				file:  file,
   492  			})
   493  		}
   494  	}
   495  	return ret, nil
   496  }
   497  
   498  func symbolizeModule(target *targets.Target, interner *symbolizer.Interner, kernelDirs *mgrconfig.KernelDirs,
   499  	splitBuildDelimiters []string, mod *vminfo.KernelModule, pcs []uint64) ([]*Frame, error) {
   500  	procs := min(runtime.GOMAXPROCS(0)/2, len(pcs)/1000)
   501  	const (
   502  		minProcs = 1
   503  		maxProcs = 4
   504  	)
   505  	// addr2line on a beefy vmlinux takes up to 1.6GB of RAM, so don't create too many of them.
   506  	procs = min(procs, maxProcs)
   507  	procs = max(procs, minProcs)
   508  	type symbolizerResult struct {
   509  		frames []symbolizer.Frame
   510  		err    error
   511  	}
   512  	symbolizerC := make(chan symbolizerResult, procs)
   513  	pcchan := make(chan []uint64, procs)
   514  	for p := 0; p < procs; p++ {
   515  		go func() {
   516  			symb := symbolizer.Make(target)
   517  			defer symb.Close()
   518  			var res symbolizerResult
   519  			for pcs := range pcchan {
   520  				for i, pc := range pcs {
   521  					if mod.Name == "" {
   522  						pcs[i] = pc
   523  					} else {
   524  						pcs[i] = pc - mod.Addr
   525  					}
   526  				}
   527  				frames, err := symb.Symbolize(mod.Path, pcs...)
   528  				if err != nil {
   529  					res.err = fmt.Errorf("failed to symbolize: %w", err)
   530  				}
   531  				res.frames = append(res.frames, frames...)
   532  			}
   533  			symbolizerC <- res
   534  		}()
   535  	}
   536  	for i := 0; i < len(pcs); {
   537  		end := min(i+100, len(pcs))
   538  		pcchan <- pcs[i:end]
   539  		i = end
   540  	}
   541  	close(pcchan)
   542  	var err0 error
   543  	var frames []*Frame
   544  	for p := 0; p < procs; p++ {
   545  		res := <-symbolizerC
   546  		if res.err != nil {
   547  			err0 = res.err
   548  		}
   549  		for _, frame := range res.frames {
   550  			name, path := CleanPath(frame.File, kernelDirs, splitBuildDelimiters)
   551  			pc := frame.PC
   552  			if mod.Name != "" {
   553  				pc = frame.PC + mod.Addr
   554  			}
   555  			frames = append(frames, &Frame{
   556  				Module:   mod,
   557  				PC:       pc,
   558  				Name:     interner.Do(name),
   559  				FuncName: frame.Func,
   560  				Path:     interner.Do(path),
   561  				Inline:   frame.Inline,
   562  				Range: Range{
   563  					StartLine: frame.Line,
   564  					StartCol:  0,
   565  					EndLine:   frame.Line,
   566  					EndCol:    LineEnd,
   567  				},
   568  			})
   569  		}
   570  	}
   571  	if err0 != nil {
   572  		return nil, err0
   573  	}
   574  	return frames, nil
   575  }
   576  
   577  func symbolize(target *targets.Target, interner *symbolizer.Interner, kernelDirs *mgrconfig.KernelDirs,
   578  	splitBuildDelimiters []string, pcs map[*vminfo.KernelModule][]uint64) ([]*Frame, error) {
   579  	var frames []*Frame
   580  	type frameResult struct {
   581  		frames []*Frame
   582  		err    error
   583  	}
   584  	frameC := make(chan frameResult, len(pcs))
   585  	for mod, pcs1 := range pcs {
   586  		go func(mod *vminfo.KernelModule, pcs []uint64) {
   587  			frames, err := symbolizeModule(target, interner, kernelDirs, splitBuildDelimiters, mod, pcs)
   588  			frameC <- frameResult{frames: frames, err: err}
   589  		}(mod, pcs1)
   590  	}
   591  	for range pcs {
   592  		res := <-frameC
   593  		if res.err != nil {
   594  			return nil, res.err
   595  		}
   596  		frames = append(frames, res.frames...)
   597  	}
   598  	return frames, nil
   599  }
   600  
   601  // nextCallTarget finds the next call instruction in data[] starting at *pos and returns that
   602  // instruction's target and pc.
   603  func nextCallTarget(arch *Arch, textAddr uint64, data []byte, pos *int) (uint64, uint64) {
   604  	for *pos < len(data) {
   605  		i := *pos
   606  		if i+arch.callLen > len(data) {
   607  			break
   608  		}
   609  		*pos += arch.scanSize
   610  		insn := data[i : i+arch.callLen]
   611  		if !arch.isCallInsn(arch, insn) {
   612  			continue
   613  		}
   614  		pc := textAddr + uint64(i)
   615  		callTarget := arch.callTarget(arch, insn, pc)
   616  		*pos = i + arch.scanSize
   617  		return callTarget, pc
   618  	}
   619  	return 0, 0
   620  }
   621  
   622  // readCoverPoints finds all coverage points (calls of __sanitizer_cov_trace_*) in the object file.
   623  // Currently it is [amd64|arm64]-specific: looks for opcode and correct offset.
   624  // Running objdump on the whole object file is too slow.
   625  func readCoverPoints(target *targets.Target, info *symbolInfo, data []byte) ([2][]uint64, error) {
   626  	var pcs [2][]uint64
   627  	if len(info.tracePC) == 0 {
   628  		return pcs, fmt.Errorf("no __sanitizer_cov_trace_pc symbol in the object file")
   629  	}
   630  
   631  	i := 0
   632  	arch := arches[target.Arch]
   633  	for {
   634  		callTarget, pc := nextCallTarget(arch, info.textAddr, data, &i)
   635  		if callTarget == 0 {
   636  			break
   637  		}
   638  		if info.tracePC[callTarget] {
   639  			pcs[0] = append(pcs[0], pc)
   640  		} else if info.traceCmp[callTarget] {
   641  			pcs[1] = append(pcs[1], pc)
   642  		}
   643  	}
   644  	return pcs, nil
   645  }
   646  
   647  // Source files for Android may be split between two subdirectories: the common AOSP kernel
   648  // and the device-specific drivers: https://source.android.com/docs/setup/build/building-pixel-kernels.
   649  // Android build system references these subdirectories in various ways, which often results in
   650  // paths to non-existent files being recorded in the debug info.
   651  //
   652  // cleanPathAndroid() assumes that the subdirectories reside in `srcDir`, with their names being listed in
   653  // `delimiters`.
   654  // If one of the `delimiters` occurs in the `path`, it is stripped together with the path prefix, and the
   655  // remaining file path is appended to `srcDir + delimiter`.
   656  // If none of the `delimiters` occur in the `path`, `path` is treated as a relative path that needs to be
   657  // looked up in `srcDir + delimiters[i]`.
   658  func cleanPathAndroid(path, srcDir string, delimiters []string, existFn func(string) bool) (string, string) {
   659  	if len(delimiters) == 0 {
   660  		return "", ""
   661  	}
   662  	reStr := "(" + strings.Join(delimiters, "|") + ")(.*)"
   663  	re := regexp.MustCompile(reStr)
   664  	match := re.FindStringSubmatch(path)
   665  	if match != nil {
   666  		delimiter := match[1]
   667  		filename := match[2]
   668  		path := filepath.Clean(srcDir + delimiter + filename)
   669  		return filename, path
   670  	}
   671  	// None of the delimiters found in `path`: it is probably a relative path to the source file.
   672  	// Try to look it up in every subdirectory of srcDir.
   673  	for _, delimiter := range delimiters {
   674  		absPath := filepath.Clean(srcDir + delimiter + path)
   675  		if existFn(absPath) {
   676  			return path, absPath
   677  		}
   678  	}
   679  	return "", ""
   680  }
   681  
   682  func CleanPath(path string, kernelDirs *mgrconfig.KernelDirs, splitBuildDelimiters []string) (string, string) {
   683  	filename := ""
   684  
   685  	path = filepath.Clean(path)
   686  	aname, apath := cleanPathAndroid(path, kernelDirs.Src, splitBuildDelimiters, osutil.IsExist)
   687  	if aname != "" {
   688  		return aname, apath
   689  	}
   690  	absPath := osutil.Abs(path)
   691  	switch {
   692  	case strings.HasPrefix(absPath, kernelDirs.Obj):
   693  		// Assume the file was built there.
   694  		path = strings.TrimPrefix(absPath, kernelDirs.Obj)
   695  		filename = filepath.Join(kernelDirs.Obj, path)
   696  	case strings.HasPrefix(absPath, kernelDirs.BuildSrc):
   697  		// Assume the file was moved from buildDir to srcDir.
   698  		path = strings.TrimPrefix(absPath, kernelDirs.BuildSrc)
   699  		filename = filepath.Join(kernelDirs.Src, path)
   700  	default:
   701  		// Assume this is relative path.
   702  		filename = filepath.Join(kernelDirs.Src, path)
   703  	}
   704  	return strings.TrimLeft(filepath.Clean(path), "/\\"), filename
   705  }
   706  
   707  // objdump is an old, slow way of finding coverage points.
   708  // amd64 uses faster option of parsing binary directly (readCoverPoints).
   709  // TODO: use the faster approach for all other arches and drop this.
   710  func objdump(target *targets.Target, mod *vminfo.KernelModule) ([2][]uint64, error) {
   711  	var pcs [2][]uint64
   712  	cmd := osutil.Command(target.Objdump, "-d", "--no-show-raw-insn", mod.Path)
   713  	stdout, err := cmd.StdoutPipe()
   714  	if err != nil {
   715  		return pcs, err
   716  	}
   717  	defer stdout.Close()
   718  	stderr, err := cmd.StderrPipe()
   719  	if err != nil {
   720  		return pcs, err
   721  	}
   722  	defer stderr.Close()
   723  	if err := cmd.Start(); err != nil {
   724  		return pcs, fmt.Errorf("failed to run objdump on %v: %w", mod.Path, err)
   725  	}
   726  	defer func() {
   727  		cmd.Process.Kill()
   728  		cmd.Wait()
   729  	}()
   730  	s := bufio.NewScanner(stdout)
   731  	callInsns, traceFuncs := archCallInsn(target)
   732  	for s.Scan() {
   733  		if pc := parseLine(callInsns, traceFuncs, s.Bytes()); pc != 0 {
   734  			if mod.Name != "" {
   735  				pc = pc + mod.Addr
   736  			}
   737  			pcs[0] = append(pcs[0], pc)
   738  		}
   739  	}
   740  	stderrOut, _ := io.ReadAll(stderr)
   741  	if err := cmd.Wait(); err != nil {
   742  		return pcs, fmt.Errorf("failed to run objdump on %v: %w\n%s", mod.Path, err, stderrOut)
   743  	}
   744  	if err := s.Err(); err != nil {
   745  		return pcs, fmt.Errorf("failed to run objdump on %v: %w\n%s", mod.Path, err, stderrOut)
   746  	}
   747  	return pcs, nil
   748  }
   749  
   750  func parseLine(callInsns, traceFuncs [][]byte, ln []byte) uint64 {
   751  	pos := -1
   752  	for _, callInsn := range callInsns {
   753  		if pos = bytes.Index(ln, callInsn); pos != -1 {
   754  			break
   755  		}
   756  	}
   757  	if pos == -1 {
   758  		return 0
   759  	}
   760  	hasCall := false
   761  	for _, traceFunc := range traceFuncs {
   762  		if hasCall = bytes.Contains(ln[pos:], traceFunc); hasCall {
   763  			break
   764  		}
   765  	}
   766  	if !hasCall {
   767  		return 0
   768  	}
   769  	for len(ln) != 0 && ln[0] == ' ' {
   770  		ln = ln[1:]
   771  	}
   772  	colon := bytes.IndexByte(ln, ':')
   773  	if colon == -1 {
   774  		return 0
   775  	}
   776  	pc, err := strconv.ParseUint(string(ln[:colon]), 16, 64)
   777  	if err != nil {
   778  		return 0
   779  	}
   780  	return pc
   781  }
   782  
   783  func archCallInsn(target *targets.Target) ([][]byte, [][]byte) {
   784  	callName := [][]byte{[]byte(" <__sanitizer_cov_trace_pc>")}
   785  	switch target.Arch {
   786  	case targets.I386:
   787  		// c1000102:       call   c10001f0 <__sanitizer_cov_trace_pc>
   788  		return [][]byte{[]byte("\tcall ")}, callName
   789  	case targets.ARM64:
   790  		// ffff0000080d9cc0:       bl      ffff00000820f478 <__sanitizer_cov_trace_pc>
   791  		return [][]byte{[]byte("\tbl ")}, [][]byte{
   792  			[]byte("<__sanitizer_cov_trace_pc>"),
   793  			[]byte("<____sanitizer_cov_trace_pc_veneer>"),
   794  		}
   795  
   796  	case targets.ARM:
   797  		// 8010252c:       bl      801c3280 <__sanitizer_cov_trace_pc>
   798  		return [][]byte{[]byte("\tbl\t")}, callName
   799  	case targets.PPC64LE:
   800  		// c00000000006d904:       bl      c000000000350780 <.__sanitizer_cov_trace_pc>
   801  		// This is only known to occur in the test:
   802  		// 838:   bl      824 <__sanitizer_cov_trace_pc+0x8>
   803  		// This occurs on PPC64LE:
   804  		// c0000000001c21a8:       bl      c0000000002df4a0 <__sanitizer_cov_trace_pc>
   805  		return [][]byte{[]byte("\tbl ")}, [][]byte{
   806  			[]byte("<__sanitizer_cov_trace_pc>"),
   807  			[]byte("<__sanitizer_cov_trace_pc+0x8>"),
   808  			[]byte(" <.__sanitizer_cov_trace_pc>"),
   809  		}
   810  	case targets.MIPS64LE:
   811  		// ffffffff80100420:       jal     ffffffff80205880 <__sanitizer_cov_trace_pc>
   812  		// This is only known to occur in the test:
   813  		// b58:   bal     b30 <__sanitizer_cov_trace_pc>
   814  		return [][]byte{[]byte("\tjal\t"), []byte("\tbal\t")}, callName
   815  	case targets.S390x:
   816  		// 1001de:       brasl   %r14,2bc090 <__sanitizer_cov_trace_pc>
   817  		return [][]byte{[]byte("\tbrasl\t")}, callName
   818  	case targets.RiscV64:
   819  		// ffffffe000200018:       jal     ra,ffffffe0002935b0 <__sanitizer_cov_trace_pc>
   820  		// ffffffe0000010da:       jalr    1242(ra) # ffffffe0002935b0 <__sanitizer_cov_trace_pc>
   821  		return [][]byte{[]byte("\tjal\t"), []byte("\tjalr\t")}, callName
   822  	default:
   823  		panic(fmt.Sprintf("unknown arch %q", target.Arch))
   824  	}
   825  }