github.com/google/syzkaller@v0.0.0-20251211124644-a066d2bc4b02/pkg/kfuzztest/extractor.go (about)

     1  // Copyright 2025 syzkaller project authors. All rights reserved.
     2  // Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
     3  package kfuzztest
     4  
     5  import (
     6  	"debug/dwarf"
     7  	"debug/elf"
     8  	"fmt"
     9  	"strings"
    10  )
    11  
    12  // Extractor's job is to extract all information relevant to KFuzzTest from a
    13  // VMlinux binary.
    14  type Extractor struct {
    15  	// Path to the `vmlinux` being parsed.
    16  	vmlinuxPath string
    17  	elfFile     *elf.File
    18  	dwarfData   *dwarf.Data
    19  
    20  	// We use an index to avoid repeated sequential scans of the whole binary,
    21  	// as this is by far the most expensive operation.
    22  	symbolsIndexInitialized bool
    23  	symbolsIndex            map[string]elf.Symbol
    24  }
    25  
    26  func NewExtractor(vmlinuxPath string) (*Extractor, error) {
    27  	elfFile, err := elf.Open(vmlinuxPath)
    28  	if err != nil {
    29  		return nil, err
    30  	}
    31  	dwarfData, err := elfFile.DWARF()
    32  	if err != nil {
    33  		elfFile.Close()
    34  		return nil, err
    35  	}
    36  	return &Extractor{vmlinuxPath, elfFile, dwarfData, false, make(map[string]elf.Symbol)}, nil
    37  }
    38  
    39  type ExtractAllResult struct {
    40  	VMLinuxPath string
    41  	Funcs       []SyzFunc
    42  	Structs     []SyzStruct
    43  	Constraints []SyzConstraint
    44  	Annotations []SyzAnnotation
    45  }
    46  
    47  func (e *Extractor) ExtractAll() (ExtractAllResult, error) {
    48  	funcs, err := e.extractFuncs()
    49  	if err != nil {
    50  		return ExtractAllResult{}, err
    51  	}
    52  	structs, err := e.extractStructs(funcs)
    53  	if err != nil {
    54  		return ExtractAllResult{}, err
    55  	}
    56  	constraints, err := e.extractDomainConstraints()
    57  	if err != nil {
    58  		return ExtractAllResult{}, err
    59  	}
    60  	annotations, err := e.extractAnnotations()
    61  	if err != nil {
    62  		return ExtractAllResult{}, err
    63  	}
    64  
    65  	if len(structs) < len(funcs) {
    66  		return ExtractAllResult{}, fmt.Errorf("inconsistent KFuzzTest metadata found in vmlinux")
    67  	}
    68  	if len(funcs) == 0 {
    69  		return ExtractAllResult{}, nil
    70  	}
    71  
    72  	return ExtractAllResult{
    73  		VMLinuxPath: e.vmlinuxPath,
    74  		Funcs:       funcs,
    75  		Structs:     structs,
    76  		Constraints: constraints,
    77  		Annotations: annotations,
    78  	}, nil
    79  }
    80  
    81  func (e *Extractor) Close() {
    82  	e.elfFile.Close()
    83  }
    84  
    85  func (e *ExtractAllResult) String() string {
    86  	var builder strings.Builder
    87  
    88  	fmt.Fprint(&builder, "extraction result:\n")
    89  	fmt.Fprintf(&builder, "\tVMLinux image:   %s\n", e.VMLinuxPath)
    90  	fmt.Fprintf(&builder, "\tnum targets:     %d\n", len(e.Funcs))
    91  	fmt.Fprintf(&builder, "\tnum struct:      %d\n", len(e.Structs))
    92  	fmt.Fprintf(&builder, "\tnum constraints: %d\n", len(e.Constraints))
    93  	fmt.Fprintf(&builder, "\tnum annotations: %d\n", len(e.Annotations))
    94  
    95  	return builder.String()
    96  }
    97  
    98  // Given an address, returns the elf section that this address belongs to in
    99  // the Extractor's elf file.
   100  func (e *Extractor) elfSection(addr uint64) *elf.Section {
   101  	for _, section := range e.elfFile.Sections {
   102  		if addr >= section.Addr && addr < section.Addr+section.Size {
   103  			return section
   104  		}
   105  	}
   106  	return nil
   107  }
   108  
   109  // Reads a string of length at most 128 bytes from the Extractor's elf file.
   110  func (e *Extractor) readElfString(offset uint64) (string, error) {
   111  	strSection := e.elfSection(offset)
   112  	if strSection == nil {
   113  		return "", fmt.Errorf("unable to find section for offset 0x%X", offset)
   114  	}
   115  
   116  	// 128 bytes is longer than we expect to see in KFuzzTest metadata.
   117  	buffer := make([]byte, 128)
   118  	_, err := strSection.ReadAt(buffer, int64(offset-strSection.Addr))
   119  	if err != nil {
   120  		return "", err
   121  	}
   122  
   123  	var builder strings.Builder
   124  	for _, chr := range buffer {
   125  		if chr == 0 {
   126  			return builder.String(), nil
   127  		}
   128  		builder.WriteByte(chr)
   129  	}
   130  
   131  	return "", fmt.Errorf("could not find null-terminated string with length < 128")
   132  }
   133  
   134  func (e *Extractor) buildSymbolIndex() error {
   135  	symbols, err := e.elfFile.Symbols()
   136  	if err != nil {
   137  		return err
   138  	}
   139  	for _, sym := range symbols {
   140  		e.symbolsIndex[sym.Name] = sym
   141  	}
   142  	return nil
   143  }
   144  
   145  func (e *Extractor) getSymbol(symbolName string) (elf.Symbol, error) {
   146  	if !e.symbolsIndexInitialized {
   147  		err := e.buildSymbolIndex()
   148  		e.symbolsIndexInitialized = true
   149  		if err != nil {
   150  			return elf.Symbol{}, err
   151  		}
   152  	}
   153  
   154  	symbol, contains := e.symbolsIndex[symbolName]
   155  	if !contains {
   156  		return elf.Symbol{}, fmt.Errorf("symbol %s not found in %s", symbolName, e.vmlinuxPath)
   157  	}
   158  	return symbol, nil
   159  }
   160  
   161  func (e *Extractor) extractFuncs() ([]SyzFunc, error) {
   162  	var rawFuncs []*kfuzztestTarget
   163  	var err error
   164  
   165  	rawFuncs, err = parseKftfObjects[*kfuzztestTarget](e)
   166  	if err != nil {
   167  		return nil, err
   168  	}
   169  
   170  	fuzzTargets := make([]SyzFunc, len(rawFuncs))
   171  	for i, raw := range rawFuncs {
   172  		name, err := e.readElfString(raw.name)
   173  		if err != nil {
   174  			return []SyzFunc{}, err
   175  		}
   176  		argType, err := e.readElfString(raw.argType)
   177  		if err != nil {
   178  			return []SyzFunc{}, err
   179  		}
   180  		fuzzTargets[i] = SyzFunc{
   181  			Name:            name,
   182  			InputStructName: argType,
   183  		}
   184  	}
   185  
   186  	return fuzzTargets, nil
   187  }
   188  
   189  func (e *Extractor) extractDomainConstraints() ([]SyzConstraint, error) {
   190  	var rawConstraints []*kfuzztestConstraint
   191  	var err error
   192  
   193  	rawConstraints, err = parseKftfObjects[*kfuzztestConstraint](e)
   194  	if err != nil {
   195  		return nil, err
   196  	}
   197  
   198  	constraints := make([]SyzConstraint, len(rawConstraints))
   199  	for i, raw := range rawConstraints {
   200  		typeName, err := e.readElfString(raw.inputType)
   201  		if err != nil {
   202  			return []SyzConstraint{}, err
   203  		}
   204  		fieldName, err := e.readElfString(raw.fieldName)
   205  		if err != nil {
   206  			return []SyzConstraint{}, err
   207  		}
   208  
   209  		constraints[i] = SyzConstraint{
   210  			InputType:      typeName,
   211  			FieldName:      fieldName,
   212  			Value1:         raw.value1,
   213  			Value2:         raw.value2,
   214  			ConstraintType: ConstraintType(raw.constraintType),
   215  		}
   216  	}
   217  
   218  	return constraints, nil
   219  }
   220  
   221  func (e *Extractor) extractAnnotations() ([]SyzAnnotation, error) {
   222  	var rawAnnotations []*kfuzztestAnnotation
   223  	var err error
   224  
   225  	rawAnnotations, err = parseKftfObjects[*kfuzztestAnnotation](e)
   226  	if err != nil {
   227  		return nil, err
   228  	}
   229  
   230  	annotations := make([]SyzAnnotation, len(rawAnnotations))
   231  	for i, raw := range rawAnnotations {
   232  		typeName, err := e.readElfString(raw.inputType)
   233  		if err != nil {
   234  			return nil, err
   235  		}
   236  		fieldName, err := e.readElfString(raw.fieldName)
   237  		if err != nil {
   238  			return nil, err
   239  		}
   240  		linkedFieldName, err := e.readElfString(raw.linkedFieldName)
   241  		if err != nil {
   242  			return nil, err
   243  		}
   244  
   245  		annotations[i] = SyzAnnotation{
   246  			InputType:       typeName,
   247  			FieldName:       fieldName,
   248  			LinkedFieldName: linkedFieldName,
   249  			Attribute:       AnnotationAttribute(raw.annotationAttribute),
   250  		}
   251  	}
   252  
   253  	return annotations, nil
   254  }
   255  
   256  func (e *Extractor) dwarfGetType(entry *dwarf.Entry) (dwarf.Type, error) {
   257  	// Case 1: The entry is itself a type definition (e.g., TagStructType, TagBaseType).
   258  	// We use its own offset to get the dwarf.Type object.
   259  	switch entry.Tag {
   260  	case dwarf.TagStructType, dwarf.TagBaseType, dwarf.TagTypedef, dwarf.TagPointerType, dwarf.TagArrayType:
   261  		return e.dwarfData.Type(entry.Offset)
   262  	}
   263  
   264  	// Case 2: The entry refers to a type (e.g., TagMember, TagVariable).
   265  	// We use its AttrType field to find the offset of the type definition.
   266  	typeOffset, ok := entry.Val(dwarf.AttrType).(dwarf.Offset)
   267  	if !ok {
   268  		return nil, fmt.Errorf("entry (Tag: %s) has no AttrType field", entry.Tag)
   269  	}
   270  
   271  	return e.dwarfData.Type(typeOffset)
   272  }
   273  
   274  // extractStructs extracts input structure metadata from discovered KFuzzTest
   275  // targets (funcs).
   276  // Performs a tree-traversal as all struct types need to be defined in the
   277  // resulting description that is emitted by the builder.
   278  func (e *Extractor) extractStructs(funcs []SyzFunc) ([]SyzStruct, error) {
   279  	// Set of input map names so that we can skip over entries that aren't
   280  	// interesting.
   281  	inputStructs := make(map[string]bool)
   282  	for _, fn := range funcs {
   283  		inputStructs[fn.InputStructName] = true
   284  	}
   285  	// Unpacks nested types to find an underlying struct type, or return nil
   286  	// if nothing is found. For example, when called on `struct myStruct **`
   287  	// we return `struct myStruct`.
   288  	unpackNested := func(t dwarf.Type) *dwarf.StructType {
   289  		for {
   290  			switch concreteType := t.(type) {
   291  			case *dwarf.StructType:
   292  				return concreteType
   293  			case *dwarf.PtrType:
   294  				t = concreteType.Type
   295  			case *dwarf.QualType:
   296  				t = concreteType.Type
   297  			default:
   298  				return nil
   299  			}
   300  		}
   301  	}
   302  
   303  	structs := make([]SyzStruct, 0)
   304  
   305  	// Perform a DFS on discovered struct types in order to discover nested
   306  	// struct types that may be contained within them.
   307  	visited := make(map[string]bool)
   308  	var visitRecur func(*dwarf.StructType)
   309  	visitRecur = func(start *dwarf.StructType) {
   310  		newStruct := SyzStruct{dwarfType: start, Name: start.StructName, Fields: make([]SyzField, 0)}
   311  		for _, child := range start.Field {
   312  			newField := SyzField{Name: child.Name, dwarfType: child.Type}
   313  			newStruct.Fields = append(newStruct.Fields, newField)
   314  			switch childType := child.Type.(type) {
   315  			case *dwarf.StructType:
   316  				if _, contains := visited[childType.StructName]; !contains {
   317  					visited[childType.StructName] = true
   318  					visitRecur(childType)
   319  				}
   320  			case *dwarf.PtrType, *dwarf.QualType:
   321  				// If we hit a pointer or a qualifier, we unpack to see if we
   322  				// find a nested struct type so that we can visit it.
   323  				maybeStructType := unpackNested(childType)
   324  				if maybeStructType != nil {
   325  					if _, contains := visited[maybeStructType.StructName]; !contains {
   326  						visited[maybeStructType.StructName] = true
   327  						visitRecur(maybeStructType)
   328  					}
   329  				}
   330  			default:
   331  				continue
   332  			}
   333  		}
   334  		structs = append(structs, newStruct)
   335  	}
   336  
   337  	dwarfReader := e.dwarfData.Reader()
   338  	for {
   339  		entry, err := dwarfReader.Next()
   340  		if err != nil {
   341  			return nil, err
   342  		}
   343  		// EOF.
   344  		if entry == nil {
   345  			break
   346  		}
   347  		if entry.Tag != dwarf.TagStructType {
   348  			continue
   349  		}
   350  		// Skip over unnamed structures.
   351  		nameField := entry.AttrField(dwarf.AttrName)
   352  		if nameField == nil {
   353  			continue
   354  		}
   355  		name, ok := nameField.Val.(string)
   356  		if !ok {
   357  			fmt.Printf("unable to get name field\n")
   358  			continue
   359  		}
   360  		// Dwarf file prefixes structures with `struct` so we must prepend
   361  		// before lookup.
   362  		structName := "struct " + name
   363  		// Check whether or not this type is one that we parsed previously
   364  		// while traversing the .kftf section of the vmlinux binary, discarding
   365  		// if this is not the case.
   366  		if _, ok := inputStructs[structName]; !ok {
   367  			continue
   368  		}
   369  
   370  		t, err := e.dwarfGetType(entry)
   371  		if err != nil {
   372  			return nil, err
   373  		}
   374  
   375  		switch entryType := t.(type) {
   376  		case *dwarf.StructType:
   377  			visitRecur(entryType)
   378  		default:
   379  			// We shouldn't hit this branch if everything before this is
   380  			// correct.
   381  			panic("Error parsing dwarf - well-formed?")
   382  		}
   383  	}
   384  
   385  	return structs, nil
   386  }
   387  
   388  // Parses a slice of kftf objects contained within a dedicated section. This
   389  // function assumes that all entries are tightly packed, and that each section
   390  // contains only one type of statically-sized entry types.
   391  func parseKftfObjects[T interface {
   392  	*P
   393  	parsableFromBytes
   394  }, P any](e *Extractor) ([]T, error) {
   395  	var typeinfo T
   396  
   397  	startSymbol, err := e.getSymbol(typeinfo.startSymbol())
   398  	if err != nil {
   399  		return nil, err
   400  	} else if startSymbol.Value == 0 {
   401  		return nil, fmt.Errorf("failed to resolve %s", typeinfo.startSymbol())
   402  	}
   403  
   404  	endSymbol, err := e.getSymbol(typeinfo.endSymbol())
   405  	if err != nil {
   406  		return nil, err
   407  	} else if endSymbol.Value == 0 {
   408  		return nil, fmt.Errorf("failed to resolve %s", typeinfo.endSymbol())
   409  	}
   410  
   411  	out := make([]T, 0)
   412  	data := make([]byte, typeinfo.size())
   413  	for addr := startSymbol.Value; addr < endSymbol.Value; addr += typeinfo.size() {
   414  		section := e.elfSection(addr)
   415  		if section == nil {
   416  			return nil, fmt.Errorf("failed to locate section for addr=0x%x", addr)
   417  		}
   418  
   419  		n, err := section.ReadAt(data, int64(addr-section.Addr))
   420  		if err != nil || n < int(typeinfo.size()) {
   421  			// If n < sizeof(T), then err is non-nil as per the documentation
   422  			// of section.ReadAt.
   423  			return nil, err
   424  		}
   425  
   426  		obj := T(new(P))
   427  		err = obj.fromBytes(e.elfFile, data)
   428  		if err != nil {
   429  			return nil, err
   430  		}
   431  		out = append(out, obj)
   432  	}
   433  
   434  	return out, nil
   435  }