github.com/google/syzkaller@v0.0.0-20251211124644-a066d2bc4b02/pkg/kfuzztest/extractor.go (about) 1 // Copyright 2025 syzkaller project authors. All rights reserved. 2 // Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. 3 package kfuzztest 4 5 import ( 6 "debug/dwarf" 7 "debug/elf" 8 "fmt" 9 "strings" 10 ) 11 12 // Extractor's job is to extract all information relevant to KFuzzTest from a 13 // VMlinux binary. 14 type Extractor struct { 15 // Path to the `vmlinux` being parsed. 16 vmlinuxPath string 17 elfFile *elf.File 18 dwarfData *dwarf.Data 19 20 // We use an index to avoid repeated sequential scans of the whole binary, 21 // as this is by far the most expensive operation. 22 symbolsIndexInitialized bool 23 symbolsIndex map[string]elf.Symbol 24 } 25 26 func NewExtractor(vmlinuxPath string) (*Extractor, error) { 27 elfFile, err := elf.Open(vmlinuxPath) 28 if err != nil { 29 return nil, err 30 } 31 dwarfData, err := elfFile.DWARF() 32 if err != nil { 33 elfFile.Close() 34 return nil, err 35 } 36 return &Extractor{vmlinuxPath, elfFile, dwarfData, false, make(map[string]elf.Symbol)}, nil 37 } 38 39 type ExtractAllResult struct { 40 VMLinuxPath string 41 Funcs []SyzFunc 42 Structs []SyzStruct 43 Constraints []SyzConstraint 44 Annotations []SyzAnnotation 45 } 46 47 func (e *Extractor) ExtractAll() (ExtractAllResult, error) { 48 funcs, err := e.extractFuncs() 49 if err != nil { 50 return ExtractAllResult{}, err 51 } 52 structs, err := e.extractStructs(funcs) 53 if err != nil { 54 return ExtractAllResult{}, err 55 } 56 constraints, err := e.extractDomainConstraints() 57 if err != nil { 58 return ExtractAllResult{}, err 59 } 60 annotations, err := e.extractAnnotations() 61 if err != nil { 62 return ExtractAllResult{}, err 63 } 64 65 if len(structs) < len(funcs) { 66 return ExtractAllResult{}, fmt.Errorf("inconsistent KFuzzTest metadata found in vmlinux") 67 } 68 if len(funcs) == 0 { 69 return ExtractAllResult{}, nil 70 } 71 72 return ExtractAllResult{ 73 VMLinuxPath: e.vmlinuxPath, 74 Funcs: funcs, 75 Structs: structs, 76 Constraints: constraints, 77 Annotations: annotations, 78 }, nil 79 } 80 81 func (e *Extractor) Close() { 82 e.elfFile.Close() 83 } 84 85 func (e *ExtractAllResult) String() string { 86 var builder strings.Builder 87 88 fmt.Fprint(&builder, "extraction result:\n") 89 fmt.Fprintf(&builder, "\tVMLinux image: %s\n", e.VMLinuxPath) 90 fmt.Fprintf(&builder, "\tnum targets: %d\n", len(e.Funcs)) 91 fmt.Fprintf(&builder, "\tnum struct: %d\n", len(e.Structs)) 92 fmt.Fprintf(&builder, "\tnum constraints: %d\n", len(e.Constraints)) 93 fmt.Fprintf(&builder, "\tnum annotations: %d\n", len(e.Annotations)) 94 95 return builder.String() 96 } 97 98 // Given an address, returns the elf section that this address belongs to in 99 // the Extractor's elf file. 100 func (e *Extractor) elfSection(addr uint64) *elf.Section { 101 for _, section := range e.elfFile.Sections { 102 if addr >= section.Addr && addr < section.Addr+section.Size { 103 return section 104 } 105 } 106 return nil 107 } 108 109 // Reads a string of length at most 128 bytes from the Extractor's elf file. 110 func (e *Extractor) readElfString(offset uint64) (string, error) { 111 strSection := e.elfSection(offset) 112 if strSection == nil { 113 return "", fmt.Errorf("unable to find section for offset 0x%X", offset) 114 } 115 116 // 128 bytes is longer than we expect to see in KFuzzTest metadata. 117 buffer := make([]byte, 128) 118 _, err := strSection.ReadAt(buffer, int64(offset-strSection.Addr)) 119 if err != nil { 120 return "", err 121 } 122 123 var builder strings.Builder 124 for _, chr := range buffer { 125 if chr == 0 { 126 return builder.String(), nil 127 } 128 builder.WriteByte(chr) 129 } 130 131 return "", fmt.Errorf("could not find null-terminated string with length < 128") 132 } 133 134 func (e *Extractor) buildSymbolIndex() error { 135 symbols, err := e.elfFile.Symbols() 136 if err != nil { 137 return err 138 } 139 for _, sym := range symbols { 140 e.symbolsIndex[sym.Name] = sym 141 } 142 return nil 143 } 144 145 func (e *Extractor) getSymbol(symbolName string) (elf.Symbol, error) { 146 if !e.symbolsIndexInitialized { 147 err := e.buildSymbolIndex() 148 e.symbolsIndexInitialized = true 149 if err != nil { 150 return elf.Symbol{}, err 151 } 152 } 153 154 symbol, contains := e.symbolsIndex[symbolName] 155 if !contains { 156 return elf.Symbol{}, fmt.Errorf("symbol %s not found in %s", symbolName, e.vmlinuxPath) 157 } 158 return symbol, nil 159 } 160 161 func (e *Extractor) extractFuncs() ([]SyzFunc, error) { 162 var rawFuncs []*kfuzztestTarget 163 var err error 164 165 rawFuncs, err = parseKftfObjects[*kfuzztestTarget](e) 166 if err != nil { 167 return nil, err 168 } 169 170 fuzzTargets := make([]SyzFunc, len(rawFuncs)) 171 for i, raw := range rawFuncs { 172 name, err := e.readElfString(raw.name) 173 if err != nil { 174 return []SyzFunc{}, err 175 } 176 argType, err := e.readElfString(raw.argType) 177 if err != nil { 178 return []SyzFunc{}, err 179 } 180 fuzzTargets[i] = SyzFunc{ 181 Name: name, 182 InputStructName: argType, 183 } 184 } 185 186 return fuzzTargets, nil 187 } 188 189 func (e *Extractor) extractDomainConstraints() ([]SyzConstraint, error) { 190 var rawConstraints []*kfuzztestConstraint 191 var err error 192 193 rawConstraints, err = parseKftfObjects[*kfuzztestConstraint](e) 194 if err != nil { 195 return nil, err 196 } 197 198 constraints := make([]SyzConstraint, len(rawConstraints)) 199 for i, raw := range rawConstraints { 200 typeName, err := e.readElfString(raw.inputType) 201 if err != nil { 202 return []SyzConstraint{}, err 203 } 204 fieldName, err := e.readElfString(raw.fieldName) 205 if err != nil { 206 return []SyzConstraint{}, err 207 } 208 209 constraints[i] = SyzConstraint{ 210 InputType: typeName, 211 FieldName: fieldName, 212 Value1: raw.value1, 213 Value2: raw.value2, 214 ConstraintType: ConstraintType(raw.constraintType), 215 } 216 } 217 218 return constraints, nil 219 } 220 221 func (e *Extractor) extractAnnotations() ([]SyzAnnotation, error) { 222 var rawAnnotations []*kfuzztestAnnotation 223 var err error 224 225 rawAnnotations, err = parseKftfObjects[*kfuzztestAnnotation](e) 226 if err != nil { 227 return nil, err 228 } 229 230 annotations := make([]SyzAnnotation, len(rawAnnotations)) 231 for i, raw := range rawAnnotations { 232 typeName, err := e.readElfString(raw.inputType) 233 if err != nil { 234 return nil, err 235 } 236 fieldName, err := e.readElfString(raw.fieldName) 237 if err != nil { 238 return nil, err 239 } 240 linkedFieldName, err := e.readElfString(raw.linkedFieldName) 241 if err != nil { 242 return nil, err 243 } 244 245 annotations[i] = SyzAnnotation{ 246 InputType: typeName, 247 FieldName: fieldName, 248 LinkedFieldName: linkedFieldName, 249 Attribute: AnnotationAttribute(raw.annotationAttribute), 250 } 251 } 252 253 return annotations, nil 254 } 255 256 func (e *Extractor) dwarfGetType(entry *dwarf.Entry) (dwarf.Type, error) { 257 // Case 1: The entry is itself a type definition (e.g., TagStructType, TagBaseType). 258 // We use its own offset to get the dwarf.Type object. 259 switch entry.Tag { 260 case dwarf.TagStructType, dwarf.TagBaseType, dwarf.TagTypedef, dwarf.TagPointerType, dwarf.TagArrayType: 261 return e.dwarfData.Type(entry.Offset) 262 } 263 264 // Case 2: The entry refers to a type (e.g., TagMember, TagVariable). 265 // We use its AttrType field to find the offset of the type definition. 266 typeOffset, ok := entry.Val(dwarf.AttrType).(dwarf.Offset) 267 if !ok { 268 return nil, fmt.Errorf("entry (Tag: %s) has no AttrType field", entry.Tag) 269 } 270 271 return e.dwarfData.Type(typeOffset) 272 } 273 274 // extractStructs extracts input structure metadata from discovered KFuzzTest 275 // targets (funcs). 276 // Performs a tree-traversal as all struct types need to be defined in the 277 // resulting description that is emitted by the builder. 278 func (e *Extractor) extractStructs(funcs []SyzFunc) ([]SyzStruct, error) { 279 // Set of input map names so that we can skip over entries that aren't 280 // interesting. 281 inputStructs := make(map[string]bool) 282 for _, fn := range funcs { 283 inputStructs[fn.InputStructName] = true 284 } 285 // Unpacks nested types to find an underlying struct type, or return nil 286 // if nothing is found. For example, when called on `struct myStruct **` 287 // we return `struct myStruct`. 288 unpackNested := func(t dwarf.Type) *dwarf.StructType { 289 for { 290 switch concreteType := t.(type) { 291 case *dwarf.StructType: 292 return concreteType 293 case *dwarf.PtrType: 294 t = concreteType.Type 295 case *dwarf.QualType: 296 t = concreteType.Type 297 default: 298 return nil 299 } 300 } 301 } 302 303 structs := make([]SyzStruct, 0) 304 305 // Perform a DFS on discovered struct types in order to discover nested 306 // struct types that may be contained within them. 307 visited := make(map[string]bool) 308 var visitRecur func(*dwarf.StructType) 309 visitRecur = func(start *dwarf.StructType) { 310 newStruct := SyzStruct{dwarfType: start, Name: start.StructName, Fields: make([]SyzField, 0)} 311 for _, child := range start.Field { 312 newField := SyzField{Name: child.Name, dwarfType: child.Type} 313 newStruct.Fields = append(newStruct.Fields, newField) 314 switch childType := child.Type.(type) { 315 case *dwarf.StructType: 316 if _, contains := visited[childType.StructName]; !contains { 317 visited[childType.StructName] = true 318 visitRecur(childType) 319 } 320 case *dwarf.PtrType, *dwarf.QualType: 321 // If we hit a pointer or a qualifier, we unpack to see if we 322 // find a nested struct type so that we can visit it. 323 maybeStructType := unpackNested(childType) 324 if maybeStructType != nil { 325 if _, contains := visited[maybeStructType.StructName]; !contains { 326 visited[maybeStructType.StructName] = true 327 visitRecur(maybeStructType) 328 } 329 } 330 default: 331 continue 332 } 333 } 334 structs = append(structs, newStruct) 335 } 336 337 dwarfReader := e.dwarfData.Reader() 338 for { 339 entry, err := dwarfReader.Next() 340 if err != nil { 341 return nil, err 342 } 343 // EOF. 344 if entry == nil { 345 break 346 } 347 if entry.Tag != dwarf.TagStructType { 348 continue 349 } 350 // Skip over unnamed structures. 351 nameField := entry.AttrField(dwarf.AttrName) 352 if nameField == nil { 353 continue 354 } 355 name, ok := nameField.Val.(string) 356 if !ok { 357 fmt.Printf("unable to get name field\n") 358 continue 359 } 360 // Dwarf file prefixes structures with `struct` so we must prepend 361 // before lookup. 362 structName := "struct " + name 363 // Check whether or not this type is one that we parsed previously 364 // while traversing the .kftf section of the vmlinux binary, discarding 365 // if this is not the case. 366 if _, ok := inputStructs[structName]; !ok { 367 continue 368 } 369 370 t, err := e.dwarfGetType(entry) 371 if err != nil { 372 return nil, err 373 } 374 375 switch entryType := t.(type) { 376 case *dwarf.StructType: 377 visitRecur(entryType) 378 default: 379 // We shouldn't hit this branch if everything before this is 380 // correct. 381 panic("Error parsing dwarf - well-formed?") 382 } 383 } 384 385 return structs, nil 386 } 387 388 // Parses a slice of kftf objects contained within a dedicated section. This 389 // function assumes that all entries are tightly packed, and that each section 390 // contains only one type of statically-sized entry types. 391 func parseKftfObjects[T interface { 392 *P 393 parsableFromBytes 394 }, P any](e *Extractor) ([]T, error) { 395 var typeinfo T 396 397 startSymbol, err := e.getSymbol(typeinfo.startSymbol()) 398 if err != nil { 399 return nil, err 400 } else if startSymbol.Value == 0 { 401 return nil, fmt.Errorf("failed to resolve %s", typeinfo.startSymbol()) 402 } 403 404 endSymbol, err := e.getSymbol(typeinfo.endSymbol()) 405 if err != nil { 406 return nil, err 407 } else if endSymbol.Value == 0 { 408 return nil, fmt.Errorf("failed to resolve %s", typeinfo.endSymbol()) 409 } 410 411 out := make([]T, 0) 412 data := make([]byte, typeinfo.size()) 413 for addr := startSymbol.Value; addr < endSymbol.Value; addr += typeinfo.size() { 414 section := e.elfSection(addr) 415 if section == nil { 416 return nil, fmt.Errorf("failed to locate section for addr=0x%x", addr) 417 } 418 419 n, err := section.ReadAt(data, int64(addr-section.Addr)) 420 if err != nil || n < int(typeinfo.size()) { 421 // If n < sizeof(T), then err is non-nil as per the documentation 422 // of section.ReadAt. 423 return nil, err 424 } 425 426 obj := T(new(P)) 427 err = obj.fromBytes(e.elfFile, data) 428 if err != nil { 429 return nil, err 430 } 431 out = append(out, obj) 432 } 433 434 return out, nil 435 }