github.com/google/syzkaller@v0.0.0-20251211124644-a066d2bc4b02/pkg/declextract/declextract.go (about) 1 // Copyright 2024 syzkaller project authors. All rights reserved. 2 // Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. 3 4 package declextract 5 6 import ( 7 "bytes" 8 "errors" 9 "fmt" 10 "io" 11 "os" 12 "slices" 13 "strings" 14 15 "github.com/google/syzkaller/pkg/clangtool" 16 "github.com/google/syzkaller/pkg/cover" 17 "github.com/google/syzkaller/pkg/ifaceprobe" 18 ) 19 20 type Result struct { 21 Descriptions []byte 22 Interfaces []*Interface 23 IncludeUse map[string]string 24 StructInfo map[string]*StructInfo 25 } 26 27 type StructInfo struct { 28 Size int 29 Align int 30 } 31 32 func Run(out *Output, probe *ifaceprobe.Info, coverage []*cover.FileCoverage, 33 syscallRename map[string][]string, trace io.Writer) (*Result, error) { 34 ctx := &context{ 35 Output: out, 36 probe: probe, 37 coverage: coverage, 38 syscallRename: syscallRename, 39 structs: make(map[string]*Struct), 40 funcs: make(map[string]*Function), 41 ioctls: make(map[string]*Type), 42 facts: make(map[string]*typingNode), 43 uniqualizer: make(map[string]int), 44 debugTrace: trace, 45 } 46 ctx.processFunctions() 47 ctx.processTypingFacts() 48 includeUse := ctx.processConsts() 49 ctx.processEnums() 50 structInfo := ctx.processStructs() 51 ctx.processSyscalls() 52 ctx.processIouring() 53 54 ctx.serialize() 55 ctx.finishInterfaces() 56 if len(ctx.errs) != 0 { 57 return nil, errors.Join(ctx.errs...) 58 } 59 return &Result{ 60 Descriptions: ctx.descriptions.Bytes(), 61 Interfaces: ctx.interfaces, 62 IncludeUse: includeUse, 63 StructInfo: structInfo, 64 }, nil 65 } 66 67 type context struct { 68 *Output 69 probe *ifaceprobe.Info 70 coverage []*cover.FileCoverage 71 syscallRename map[string][]string // syscall function -> syscall names 72 structs map[string]*Struct 73 funcs map[string]*Function 74 ioctls map[string]*Type 75 facts map[string]*typingNode 76 includes []string 77 defines []define 78 uniqualizer map[string]int 79 interfaces []*Interface 80 descriptions *bytes.Buffer 81 debugTrace io.Writer 82 errs []error 83 } 84 85 type define struct { 86 Name string 87 Value string 88 } 89 90 func (ctx *context) error(msg string, args ...any) { 91 ctx.errs = append(ctx.errs, fmt.Errorf(msg, args...)) 92 } 93 94 func (ctx *context) warn(msg string, args ...any) { 95 fmt.Fprintf(os.Stderr, msg+"\n", args...) 96 } 97 98 func (ctx *context) trace(msg string, args ...any) { 99 if ctx.debugTrace != nil { 100 fmt.Fprintf(ctx.debugTrace, msg+"\n", args...) 101 } 102 } 103 104 func (ctx *context) processConsts() map[string]string { 105 replaces := map[string]string{ 106 // Arches may use some includes from asm-generic and some from arch/arm. 107 // If the arch used for extract used asm-generic for a header, 108 // other arches may need arch/asm version of the header. So switch to 109 // a more generic file name that should resolve correctly for all arches. 110 "include/uapi/asm-generic/ioctls.h": "asm/ioctls.h", 111 "include/uapi/asm-generic/sockios.h": "asm/sockios.h", 112 } 113 defineDedup := make(map[string]bool) 114 includeUse := make(map[string]string) 115 for _, ci := range ctx.Consts { 116 if strings.Contains(ci.Filename, "/uapi/") && !strings.Contains(ci.Filename, "arch/x86/") && 117 strings.HasSuffix(ci.Filename, ".h") { 118 filename := ci.Filename 119 if replace := replaces[filename]; replace != "" { 120 filename = replace 121 } 122 ctx.includes = append(ctx.includes, filename) 123 includeUse[ci.Name] = filename 124 continue 125 } 126 // Remove duplicate defines (even with different values). Unfortunately we get few of these. 127 // There are some syscall numbers (presumably for 32/64 bits), and some macros that 128 // are defined in different files to different values (e.g. WMI_DATA_BE_SVC). 129 // Ideally we somehow rename defines (chosing one random value is never correct). 130 // But for now this helps to prevent compilation errors. 131 if defineDedup[ci.Name] { 132 continue 133 } 134 defineDedup[ci.Name] = true 135 ctx.defines = append(ctx.defines, define{ 136 Name: ci.Name, 137 Value: fmt.Sprint(ci.Value), 138 }) 139 } 140 ctx.includes = clangtool.SortAndDedupSlice(ctx.includes) 141 ctx.defines = clangtool.SortAndDedupSlice(ctx.defines) 142 // These additional includes must be at the top, because other kernel headers 143 // are broken and won't compile without these additional ones included first. 144 ctx.includes = append([]string{ 145 "vdso/bits.h", 146 "linux/types.h", 147 "linux/usbdevice_fs.h", // to fix broken include/uapi/linux/usbdevice_fs.h 148 "net/netlink.h", 149 }, ctx.includes...) 150 // Also pretend they are used. 151 includeUse["__NR_read"] = "vdso/bits.h" 152 includeUse["__NR_write"] = "linux/types.h" 153 includeUse["__NR_openat"] = "linux/usbdevice_fs.h" 154 includeUse["__NR_close"] = "net/netlink.h" 155 return includeUse 156 } 157 158 func (ctx *context) processEnums() { 159 for _, enum := range ctx.Enums { 160 enum.Name += autoSuffix 161 } 162 } 163 164 func (ctx *context) processSyscalls() { 165 var syscalls []*Syscall 166 for _, call := range ctx.Syscalls { 167 ctx.processFields(call.Args, "", false) 168 for varArg := range call.Args { 169 cmds := ctx.inferCommandVariants(call.Func, call.SourceFile, varArg) 170 for _, cmd := range cmds { 171 variant := *call 172 variant.Args = slices.Clone(call.Args) 173 for i, oldArg := range variant.Args { 174 arg := *oldArg 175 if i == varArg { 176 arg.syzType = fmt.Sprintf("const[%v]", cmd) 177 } else { 178 typ := ctx.inferArgType(call.Func, call.SourceFile, i, varArg, cmd) 179 refineFieldType(&arg, typ, false) 180 } 181 variant.Args[i] = &arg 182 } 183 variant.returnType = ctx.inferReturnType(call.Func, call.SourceFile, varArg, cmd) 184 suffix := cmd 185 if call.Func == "__do_sys_ioctl" { 186 suffix = ctx.uniqualize("ioctl cmd", cmd) 187 } 188 ctx.emitSyscall(&syscalls, &variant, "_"+suffix, cmd, varArg, cmd) 189 } 190 } 191 call.returnType = ctx.inferReturnType(call.Func, call.SourceFile, -1, "") 192 for i, arg := range call.Args { 193 typ := ctx.inferArgType(call.Func, call.SourceFile, i, -1, "") 194 refineFieldType(arg, typ, false) 195 } 196 ctx.emitSyscall(&syscalls, call, "", "", -1, "") 197 } 198 ctx.Syscalls = clangtool.SortAndDedupSlice(syscalls) 199 } 200 201 func (ctx *context) emitSyscall(syscalls *[]*Syscall, call *Syscall, 202 suffix, cmd string, scopeArg int, scopeVal string) { 203 fn := strings.TrimPrefix(call.Func, "__do_sys_") 204 for _, name := range ctx.syscallRename[fn] { 205 syscallName := name 206 identifyingConst := "__NR_" + name 207 if cmd != "" { 208 syscallName += "$" + cmd 209 identifyingConst = cmd 210 } 211 ctx.noteInterface(&Interface{ 212 Type: IfaceSyscall, 213 Name: syscallName, 214 IdentifyingConst: identifyingConst, 215 Files: []string{call.SourceFile}, 216 Func: call.Func, 217 AutoDescriptions: TristateYes, 218 scopeArg: scopeArg, 219 scopeVal: scopeVal, 220 }) 221 newCall := *call 222 newCall.Func = name + autoSuffix + suffix 223 *syscalls = append(*syscalls, &newCall) 224 } 225 } 226 227 func (ctx *context) processIouring() { 228 for _, op := range ctx.IouringOps { 229 ctx.noteInterface(&Interface{ 230 Type: IfaceIouring, 231 Name: op.Name, 232 IdentifyingConst: op.Name, 233 Files: []string{op.SourceFile}, 234 Func: op.Func, 235 Access: AccessUser, 236 AutoDescriptions: TristateNo, 237 }) 238 } 239 } 240 241 func (ctx *context) processStructs() map[string]*StructInfo { 242 structInfo := make(map[string]*StructInfo) 243 for _, str := range ctx.Structs { 244 str.Name += autoSuffix 245 ctx.structs[str.Name] = str 246 structInfo[str.Name] = &StructInfo{ 247 Size: str.ByteSize, 248 Align: str.Align, 249 } 250 } 251 for _, str := range ctx.Structs { 252 ctx.processFields(str.Fields, str.Name, true) 253 name := strings.TrimSuffix(str.Name, autoSuffix) 254 for _, f := range str.Fields { 255 typ := ctx.inferFieldType(name, f.Name) 256 refineFieldType(f, typ, true) 257 } 258 } 259 return structInfo 260 } 261 262 func (ctx *context) processFields(fields []*Field, parent string, needBase bool) { 263 counts := make([]*Field, len(fields)) 264 for _, f := range fields { 265 f.Name = fixIdentifier(f.Name) 266 if f.CountedBy != -1 { 267 counts[f.CountedBy] = f 268 } 269 } 270 for i, f := range fields { 271 f.syzType = ctx.fieldType(f, counts[i], parent, needBase) 272 } 273 } 274 275 func (ctx *context) fieldType(f, counts *Field, parent string, needBase bool) string { 276 if f.BitWidth != 0 && !needBase { 277 ctx.error("syscall arg %v is a bitfield", f.Name) 278 } 279 if f.BitWidth != 0 && f.Type.Int == nil { 280 ctx.error("non-int field %v is a bitfield", f.Name) 281 } 282 if counts != nil && f.Type.Int == nil && f.Type.Ptr == nil { 283 ctx.error("non-int/ptr field %v counts field %v", f.Name, counts.Name) 284 } 285 f.Name = strings.ToLower(f.Name) 286 switch { 287 case f.Type.Int != nil: 288 return ctx.fieldTypeInt(f, counts, needBase) 289 case f.Type.Ptr != nil: 290 return ctx.fieldTypePtr(f, counts, parent) 291 case f.Type.Array != nil: 292 return ctx.fieldTypeArray(f, parent) 293 case f.Type.Buffer != nil: 294 return ctx.fieldTypeBuffer(f) 295 case f.Type.Struct != "": 296 return ctx.fieldTypeStruct(f) 297 } 298 ctx.error("field %v does not have type", f.Name) 299 return "" 300 } 301 302 func (ctx *context) fieldTypeInt(f, counts *Field, needBase bool) string { 303 t := f.Type.Int 304 switch t.ByteSize { 305 case 1, 2, 4, 8: 306 default: 307 ctx.error("field %v has unsupported size %v", f.Name, t.ByteSize) 308 } 309 if t.Enum != "" && counts != nil { 310 ctx.error("field %v is both enum %v and counts field %v", f.Name, t.Enum, counts.Name) 311 } 312 baseType, isIntptr := ctx.baseIntType(f, needBase) 313 constType := fmt.Sprintf("const[%v %v]", t.MinValue, maybeBaseType(baseType, needBase)) 314 if f.IsAnonymous || t.IsConst { 315 return constType 316 } 317 if t.Enum != "" { 318 t.Enum += autoSuffix 319 return fmt.Sprintf("flags[%v %v]", t.Enum, maybeBaseType(baseType, needBase)) 320 } 321 if counts != nil { 322 return fmt.Sprintf("len[%v %v]", counts.Name, maybeBaseType(baseType, needBase)) 323 } 324 if t.Name == "TODO" { 325 return todoType 326 } 327 special := "" 328 switch t.ByteSize { 329 case 2: 330 special = ctx.specialInt2(f.Name, t.Name, needBase) 331 case 4: 332 special = ctx.specialInt4(f.Name, t.Name, needBase) 333 case 8: 334 if isIntptr { 335 special = ctx.specialIntptr(f.Name, t.Name, needBase) 336 } 337 } 338 if special != "" { 339 if f.BitWidth != 0 { 340 // We don't have syntax to express this. 341 ctx.error("field %v is both special %v and a bitfield", f.Name, special) 342 } 343 return special 344 } 345 if strings.HasSuffix(f.Name, "enabled") || strings.HasSuffix(f.Name, "enable") { 346 return "bool" + strings.TrimPrefix(baseType, "int") 347 } 348 if strings.Contains(f.Name, "pad") || strings.Contains(f.Name, "unused") || 349 strings.Contains(f.Name, "_reserved") { 350 return constType 351 } 352 if t.MinValue != 0 || t.MaxValue != 0 { 353 minVal, maxVal := uint64(t.MinValue), uint64(t.MaxValue) 354 if minVal > maxVal { 355 minVal, maxVal = maxVal, minVal 356 } 357 return baseType + fmt.Sprintf("[%v:%v]", minVal, maxVal) 358 } 359 return baseType 360 } 361 362 func (ctx *context) baseIntType(f *Field, needBase bool) (string, bool) { 363 t := f.Type.Int 364 baseType := fmt.Sprintf("int%v", t.ByteSize*8) 365 // Note: we make all 8-byte syscall arguments intptr b/c for 64-bit arches it does not matter, 366 // but for 32-bit arches int64 as syscall argument won't work. IIUC the ABI is that these 367 // are split into 2 32-bit arguments. 368 isIntptr := t.ByteSize == 8 && (!needBase || strings.Contains(t.Base, "long") && 369 !strings.Contains(t.Base, "long long")) 370 if isIntptr { 371 baseType = "intptr" 372 } 373 if t.isBigEndian && t.ByteSize != 1 { 374 baseType += "be" 375 } 376 if f.BitWidth == t.ByteSize*8 { 377 f.BitWidth = 0 378 } 379 if f.BitWidth != 0 { 380 baseType += fmt.Sprintf(":%v", f.BitWidth) 381 } 382 return baseType, isIntptr 383 } 384 385 func (ctx *context) specialInt2(field, typ string, needBase bool) string { 386 switch { 387 case strings.Contains(field, "port"): 388 return "sock_port" 389 } 390 return "" 391 } 392 393 // nolint: gocyclo 394 func (ctx *context) specialInt4(field, typ string, needBase bool) string { 395 switch { 396 case strings.Contains(field, "ipv4") || strings.Contains(field, "ip4") || 397 strings.HasSuffix(field, "address"): 398 return "ipv4_addr" 399 case strings.HasSuffix(field, "_pid") || strings.HasSuffix(field, "_tid") || 400 strings.HasSuffix(field, "_pgid") || strings.HasSuffix(field, "_tgid") || 401 field == "pid" || field == "tid" || field == "pgid" || field == "tgid": 402 return "pid" 403 case strings.HasSuffix(field, "dfd") && !strings.HasSuffix(field, "oldfd") && !strings.HasSuffix(field, "pidfd"): 404 return "fd_dir" 405 case strings.HasSuffix(field, "ns_fd"): 406 return "fd_namespace" 407 case strings.HasSuffix(field, "_uid") || field == "uid" || field == "user" || 408 field == "ruid" || field == "euid" || field == "suid": 409 return "uid" 410 case strings.HasSuffix(field, "_gid") || field == "gid" || field == "group" || 411 field == "rgid" || field == "egid" || field == "sgid": 412 return "gid" 413 case strings.HasSuffix(field, "fd") || strings.HasPrefix(field, "fd_") || 414 strings.Contains(field, "fildes") || field == "fdin" || field == "fdout": 415 return "fd" 416 case strings.Contains(field, "ifindex") || strings.Contains(field, "dev_index"): 417 return "ifindex" 418 } 419 return "" 420 } 421 422 func (ctx *context) specialIntptr(field, typ string, needBase bool) string { 423 switch field { 424 case "sigsetsize": 425 return fmt.Sprintf("const[8 %v]", maybeBaseType("intptr", needBase)) 426 } 427 return "" 428 } 429 430 func (ctx *context) fieldTypePtr(f, counts *Field, parent string) string { 431 t := f.Type.Ptr 432 dir := "inout" 433 if t.IsConst { 434 dir = "in" 435 } 436 opt := "" 437 // Use an opt pointer if the direct parent is the same as this node, or if the field name is next. 438 // Looking at the field name is a hack, but it's enough to avoid some recursion cases, 439 // e.g. for struct adf_user_cfg_section. 440 if f.Name == "next" || parent != "" && parent == t.Elem.Struct+autoSuffix { 441 opt = ", opt" 442 } 443 elem := &Field{ 444 Name: f.Name, 445 Type: t.Elem, 446 } 447 return fmt.Sprintf("ptr[%v, %v %v]", dir, ctx.fieldType(elem, counts, parent, true), opt) 448 } 449 450 func (ctx *context) fieldTypeArray(f *Field, parent string) string { 451 t := f.Type.Array 452 elem := &Field{ 453 Name: f.Name, 454 Type: t.Elem, 455 } 456 elemType := ctx.fieldType(elem, nil, parent, true) 457 if t.IsConstSize { 458 switch t.MaxSize { 459 case 0: 460 // Empty arrays may still affect parent struct layout, if the element type 461 // has alignment >1. We don't support arrays of size 0, so emit a special 462 // aligning type instead. 463 return fmt.Sprintf("auto_aligner[%v]", t.Align) 464 case 1: 465 // Array of size 1 is not really an array, just use the element type itself. 466 return elemType 467 } 468 } 469 bounds := ctx.bounds(f.Name, t.MinSize, t.MaxSize) 470 return fmt.Sprintf("array[%v%v]", elemType, bounds) 471 } 472 473 func (ctx *context) fieldTypeBuffer(f *Field) string { 474 t := f.Type.Buffer 475 bounds := ctx.bounds(f.Name, t.MinSize, t.MaxSize) 476 baseType := "string" 477 if t.IsNonTerminated { 478 baseType = "stringnoz" 479 } 480 switch { 481 case !t.IsString: 482 if t.MinSize == 6 && t.MaxSize == 6 { 483 // There are lots of different names for mac addresses (see grep ETH_ALEN in uapi/*.h). 484 // If this has too many false positives, theoretically we can make the clang tool 485 // look for arrays with [ETH_ALEN] size. See implementation of isExpandedFromMacro 486 // matcher for inspiration, that would need to be checked against 487 // ConstantArrayType::getSizeExpr. But for now let's just do the simple thing. 488 return "mac_addr" 489 } 490 if (t.MaxSize == 0 || t.MaxSize == 16) && 491 (strings.Contains(f.Name, "ipv6") || strings.Contains(f.Name, "ip6")) { 492 return "ipv6_addr" 493 } 494 return fmt.Sprintf("array[int8 %v]", bounds) 495 case strings.Contains(f.Name, "ifname") || strings.HasSuffix(f.Name, "dev_name") || 496 strings.Contains(f.Name, "_iface"): 497 return "devname" 498 case strings.Contains(f.Name, "filename") || strings.Contains(f.Name, "pathname") || 499 strings.Contains(f.Name, "dir_name") || f.Name == "oldname" || 500 f.Name == "newname" || f.Name == "path": 501 if !t.IsNonTerminated && bounds == "" { 502 return "filename" // alias that is easier to read 503 } 504 return fmt.Sprintf("%v[filename %v]", baseType, bounds) 505 } 506 return baseType 507 } 508 509 func (ctx *context) fieldTypeStruct(f *Field) string { 510 // Few important structs for which we have lots of heuristics, 511 // and the static analysis will have hard time generating something of similar 512 switch f.Type.Struct { 513 case "in_addr": 514 return "ipv4_addr" 515 case "in6_addr": 516 return "ipv6_addr" 517 case "sockaddr": 518 return "sockaddr" 519 case "__kernel_sockaddr_storage": 520 return "sockaddr_storage" 521 } 522 // We can get here several times for the same struct. 523 if !strings.HasSuffix(f.Type.Struct, autoSuffix) { 524 f.Type.Struct += autoSuffix 525 } 526 str := ctx.structs[f.Type.Struct] 527 if str == nil { 528 panic(fmt.Sprintf("can't find struct %v", f.Type.Struct)) 529 } 530 if str.ByteSize == 0 { 531 return fmt.Sprintf("auto_aligner[%v]", str.Align) 532 } 533 return f.Type.Struct 534 } 535 536 func (ctx *context) bounds(name string, min, max int) string { 537 if min < 0 || min > max { 538 ctx.error("field %v has bad bounds %v:%v", name, min, max) 539 } 540 if max > min { 541 return fmt.Sprintf(", %v:%v", min, max) 542 } 543 if max != 0 { 544 return fmt.Sprintf(", %v", max) 545 } 546 return "" 547 } 548 549 func (ctx *context) uniqualize(typ, name string) string { 550 id := fmt.Sprintf("%v-%v", typ, name) 551 ctx.uniqualizer[id]++ 552 if seq := ctx.uniqualizer[id]; seq != 1 { 553 return name + fmt.Sprint(seq) 554 } 555 return name 556 } 557 558 const ( 559 autoSuffix = "$auto" 560 todoType = "auto_todo" 561 voidType = "void" 562 ) 563 564 func fixIdentifier(name string) string { 565 switch name { 566 case "resource", "include", "define", "incdir", "syscall", "parent": 567 return "_" + name 568 } 569 return name 570 } 571 572 func stringIdentifier(name string) string { 573 for _, bad := range []string{" ", ".", "-"} { 574 name = strings.ReplaceAll(name, bad, "_") 575 } 576 return strings.ToLower(name) 577 } 578 579 func maybeBaseType(baseType string, needBase bool) string { 580 if needBase { 581 return ", " + baseType 582 } 583 return "" 584 } 585 586 func comma(i int) string { 587 if i == 0 { 588 return "" 589 } 590 return ", " 591 }