github.com/google/syzkaller@v0.0.0-20251211124644-a066d2bc4b02/pkg/csource/csource.go (about) 1 // Copyright 2015 syzkaller project authors. All rights reserved. 2 // Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. 3 4 // Package csource generates [almost] equivalent C programs from syzkaller programs. 5 // 6 // Outline of the process: 7 // - inputs to the generation are the program and options 8 // - options control multiple aspects of the resulting C program, 9 // like if we want a multi-threaded program or a single-threaded, 10 // what type of sandbox we want to use, if we want to setup net devices or not, etc 11 // - we use actual executor sources as the base 12 // - gen.go takes all executor/common*.h headers and bundles them into generated.go 13 // - during generation we tear executor headers apart and take only the bits 14 // we need for the current program/options, this is done by running C preprocessor 15 // with particular set of defines so that the preprocessor removes unneeded 16 // #ifdef SYZ_FOO sections 17 // - then we generate actual syscall calls with the given arguments 18 // based on the binary "encodingexec" representation of the program 19 // (the same representation executor uses for interpretation) 20 // - then we glue it all together 21 // - as the last step we run some text post-processing on the resulting source code: 22 // remove debug calls, replace exitf/fail with exit, hoist/sort/dedup includes, 23 // remove duplicate empty lines, etc 24 package csource 25 26 import ( 27 "bytes" 28 "fmt" 29 "math/bits" 30 "regexp" 31 "sort" 32 "strconv" 33 "strings" 34 "time" 35 36 "github.com/google/syzkaller/prog" 37 "github.com/google/syzkaller/sys/targets" 38 ) 39 40 // Write generates C source for program p based on the provided options opt. 41 func Write(p *prog.Prog, opts Options) ([]byte, error) { 42 if err := opts.Check(p.Target.OS); err != nil { 43 return nil, fmt.Errorf("csource: invalid opts: %w", err) 44 } 45 ctx := &context{ 46 p: p, 47 opts: opts, 48 target: p.Target, 49 sysTarget: targets.Get(p.Target.OS, p.Target.Arch), 50 calls: make(map[string]uint64), 51 } 52 return ctx.generateSource() 53 } 54 55 type context struct { 56 p *prog.Prog 57 opts Options 58 target *prog.Target 59 sysTarget *targets.Target 60 calls map[string]uint64 // CallName -> NR 61 } 62 63 func generateSandboxFunctionSignature(sandboxName string, sandboxArg int) string { 64 if sandboxName == "" { 65 return "loop();" 66 } 67 68 arguments := "();" 69 if sandboxName == "android" { 70 arguments = "(" + strconv.Itoa(sandboxArg) + ");" 71 } 72 return "do_sandbox_" + sandboxName + arguments 73 } 74 75 func (ctx *context) generateSource() ([]byte, error) { 76 ctx.filterCalls() 77 calls, vars, err := ctx.generateProgCalls(ctx.p, ctx.opts.Trace, ctx.opts.CallComments) 78 if err != nil { 79 return nil, err 80 } 81 82 mmapProg := ctx.p.Target.DataMmapProg() 83 // Disable comments on the mmap calls as they are part of the initial setup 84 // for a program and always very similar. Comments on these provide 85 // little-to-no additional context that can't be inferred from looking at 86 // the call arguments directly, and just make the source longer. 87 mmapCalls, _, err := ctx.generateProgCalls(mmapProg, false, false) 88 if err != nil { 89 return nil, err 90 } 91 92 for _, c := range append(mmapProg.Calls, ctx.p.Calls...) { 93 ctx.calls[c.Meta.CallName] = c.Meta.NR 94 for _, dep := range ctx.sysTarget.PseudoSyscallDeps[c.Meta.CallName] { 95 depCall := ctx.target.SyscallMap[dep] 96 if depCall == nil { 97 panic(dep + " is specified in PseudoSyscallDeps, but not present") 98 } 99 ctx.calls[depCall.CallName] = depCall.NR 100 } 101 } 102 103 varsBuf := new(bytes.Buffer) 104 if len(vars) != 0 { 105 fmt.Fprintf(varsBuf, "uint64 r[%v] = {", len(vars)) 106 for i, v := range vars { 107 if i != 0 { 108 fmt.Fprintf(varsBuf, ", ") 109 } 110 fmt.Fprintf(varsBuf, "0x%x", v) 111 } 112 fmt.Fprintf(varsBuf, "};\n") 113 } 114 115 sandboxFunc := generateSandboxFunctionSignature(ctx.opts.Sandbox, ctx.opts.SandboxArg) 116 replacements := map[string]string{ 117 "PROCS": fmt.Sprint(ctx.opts.Procs), 118 "REPEAT_TIMES": fmt.Sprint(ctx.opts.RepeatTimes), 119 "NUM_CALLS": fmt.Sprint(len(ctx.p.Calls)), 120 "MMAP_DATA": strings.Join(mmapCalls, ""), 121 "SYSCALL_DEFINES": ctx.generateSyscallDefines(), 122 "SANDBOX_FUNC": sandboxFunc, 123 "RESULTS": varsBuf.String(), 124 "SYSCALLS": ctx.generateSyscalls(calls, len(vars) != 0), 125 } 126 if !ctx.opts.Threaded && !ctx.opts.Repeat && ctx.opts.Sandbox == "" { 127 // This inlines syscalls right into main for the simplest case. 128 replacements["SANDBOX_FUNC"] = replacements["SYSCALLS"] 129 replacements["SYSCALLS"] = "unused" 130 } 131 timeouts := ctx.sysTarget.Timeouts(ctx.opts.Slowdown) 132 replacements["PROGRAM_TIMEOUT_MS"] = fmt.Sprint(int(timeouts.Program / time.Millisecond)) 133 timeoutExpr := fmt.Sprint(int(timeouts.Syscall / time.Millisecond)) 134 replacements["BASE_CALL_TIMEOUT_MS"] = timeoutExpr 135 for i, call := range ctx.p.Calls { 136 if timeout := call.Meta.Attrs.Timeout; timeout != 0 { 137 timeoutExpr += fmt.Sprintf(" + (call == %v ? %v : 0)", i, timeout*uint64(timeouts.Scale)) 138 } 139 } 140 replacements["CALL_TIMEOUT_MS"] = timeoutExpr 141 if ctx.p.RequiredFeatures().Async { 142 conditions := []string{} 143 for idx, call := range ctx.p.Calls { 144 if !call.Props.Async { 145 continue 146 } 147 conditions = append(conditions, fmt.Sprintf("call == %v", idx)) 148 } 149 replacements["ASYNC_CONDITIONS"] = strings.Join(conditions, " || ") 150 } 151 152 result, err := createCommonHeader(ctx.p, mmapProg, replacements, ctx.opts) 153 if err != nil { 154 return nil, err 155 } 156 const header = "// autogenerated by syzkaller (https://github.com/google/syzkaller)\n\n" 157 result = append([]byte(header), result...) 158 result = ctx.postProcess(result) 159 return result, nil 160 } 161 162 // This is a kludge, but we keep it here until a better approach is implemented. 163 // TODO: untie syz_emit_ethernet/syz_extract_tcp_res and NetInjection. And also 164 // untie VhciInjection and syz_emit_vhci. Then we could remove this method. 165 func (ctx *context) filterCalls() { 166 p := ctx.p 167 for i := 0; i < len(p.Calls); { 168 call := p.Calls[i] 169 callName := call.Meta.CallName 170 emitCall := (ctx.opts.NetInjection || 171 callName != "syz_emit_ethernet" && 172 callName != "syz_extract_tcp_res") && 173 (ctx.opts.VhciInjection || callName != "syz_emit_vhci") 174 if emitCall { 175 i++ 176 continue 177 } 178 // Remove the call. 179 if ctx.p == p { 180 // We lazily clone the program to avoid unnecessary copying. 181 p = ctx.p.Clone() 182 } 183 p.RemoveCall(i) 184 } 185 ctx.p = p 186 } 187 188 func (ctx *context) generateSyscalls(calls []string, hasVars bool) string { 189 opts := ctx.opts 190 buf := new(bytes.Buffer) 191 if !opts.Threaded && !opts.Collide { 192 if len(calls) > 0 && (hasVars || opts.Trace) { 193 fmt.Fprintf(buf, "\tintptr_t res = 0;\n") 194 } 195 fmt.Fprintf(buf, "\tif (write(1, \"executing program\\n\", sizeof(\"executing program\\n\") - 1)) {}\n") 196 if opts.Trace { 197 fmt.Fprintf(buf, "\tfprintf(stderr, \"### start\\n\");\n") 198 } 199 for _, c := range calls { 200 fmt.Fprintf(buf, "%s", c) 201 } 202 } else if len(calls) > 0 { 203 if hasVars || opts.Trace { 204 fmt.Fprintf(buf, "\tintptr_t res = 0;\n") 205 } 206 fmt.Fprintf(buf, "\tswitch (call) {\n") 207 for i, c := range calls { 208 fmt.Fprintf(buf, "\tcase %v:\n", i) 209 fmt.Fprintf(buf, "%s", strings.ReplaceAll(c, "\t", "\t\t")) 210 fmt.Fprintf(buf, "\t\tbreak;\n") 211 } 212 fmt.Fprintf(buf, "\t}\n") 213 } 214 return buf.String() 215 } 216 217 func (ctx *context) generateSyscallDefines() string { 218 var calls []string 219 for name, nr := range ctx.calls { 220 if !ctx.sysTarget.HasCallNumber(name) || !ctx.sysTarget.NeedSyscallDefine(nr) { 221 continue 222 } 223 calls = append(calls, name) 224 } 225 sort.Strings(calls) 226 buf := new(bytes.Buffer) 227 prefix := ctx.sysTarget.SyscallPrefix 228 for _, name := range calls { 229 fmt.Fprintf(buf, "#ifndef %v%v\n", prefix, name) 230 fmt.Fprintf(buf, "#define %v%v %v\n", prefix, name, ctx.calls[name]) 231 fmt.Fprintf(buf, "#endif\n") 232 } 233 if ctx.target.OS == targets.Linux && ctx.target.PtrSize == 4 { 234 // This is a dirty hack. 235 // On 32-bit linux mmap translated to old_mmap syscall which has a different signature. 236 // mmap2 has the right signature. syz-extract translates mmap to mmap2, do the same here. 237 fmt.Fprintf(buf, "#undef __NR_mmap\n") 238 fmt.Fprintf(buf, "#define __NR_mmap __NR_mmap2\n") 239 } 240 return buf.String() 241 } 242 243 const indent string = " " // Two spaces. 244 // clang-format produces nicer comments with '//' prefixing versus '/* ... */' style comments. 245 const commentPrefix string = "//" 246 247 func linesToCStyleComment(lines []string) string { 248 var commentBuilder strings.Builder 249 for i, line := range lines { 250 commentBuilder.WriteString(commentPrefix + indent + line) 251 if i != len(lines)-1 { 252 commentBuilder.WriteString("\n") 253 } 254 } 255 return commentBuilder.String() 256 } 257 258 func generateComment(call *prog.Call) string { 259 lines := []string{fmt.Sprintf("%s arguments: [", call.Meta.Name)} 260 for i, arg := range call.Args { 261 argLines := prog.FormatArg(arg, call.Meta.Args[i].Name) 262 // Indent the formatted argument. 263 for i := range argLines { 264 argLines[i] = indent + argLines[i] 265 } 266 lines = append(lines, argLines...) 267 } 268 lines = append(lines, "]") 269 if call.Ret != nil { 270 lines = append(lines, "returns "+call.Ret.Type().Name()) 271 } 272 return linesToCStyleComment(lines) 273 } 274 275 func (ctx *context) generateProgCalls(p *prog.Prog, trace, addComments bool) ([]string, []uint64, error) { 276 var comments []string 277 if addComments { 278 comments = make([]string, len(p.Calls)) 279 for i, call := range p.Calls { 280 comments[i] = generateComment(call) 281 } 282 } 283 284 exec, err := p.SerializeForExec() 285 if err != nil { 286 return nil, nil, fmt.Errorf("failed to serialize program: %w", err) 287 } 288 decoded, err := ctx.target.DeserializeExec(exec, nil) 289 if err != nil { 290 return nil, nil, err 291 } 292 calls, vars := ctx.generateCalls(decoded, trace, addComments, comments) 293 return calls, vars, nil 294 } 295 296 func (ctx *context) generateCalls(p prog.ExecProg, trace, addComments bool, 297 callComments []string) ([]string, []uint64) { 298 var calls []string 299 csumSeq := 0 300 for ci, call := range p.Calls { 301 w := new(bytes.Buffer) 302 if addComments { 303 w.WriteString(callComments[ci] + "\n") 304 } 305 // Copyin. 306 for _, copyin := range call.Copyin { 307 ctx.copyin(w, &csumSeq, copyin) 308 } 309 310 if call.Props.FailNth > 0 { 311 fmt.Fprintf(w, "\tinject_fault(%v);\n", call.Props.FailNth) 312 } 313 // Call itself. 314 resCopyout := call.Index != prog.ExecNoCopyout 315 argCopyout := len(call.Copyout) != 0 316 317 ctx.emitCall(w, call, ci, resCopyout || argCopyout, trace) 318 319 if call.Props.Rerun > 0 { 320 fmt.Fprintf(w, "\tfor (int i = 0; i < %v; i++) {\n", call.Props.Rerun) 321 // Rerun invocations should not affect the result value. 322 ctx.emitCall(w, call, ci, false, false) 323 fmt.Fprintf(w, "\t}\n") 324 } 325 // Copyout. 326 if resCopyout || argCopyout { 327 ctx.copyout(w, call, resCopyout) 328 } 329 calls = append(calls, w.String()) 330 } 331 return calls, p.Vars 332 } 333 334 func isNative(sysTarget *targets.Target, callName string) bool { 335 _, trampoline := sysTarget.SyscallTrampolines[callName] 336 return sysTarget.HasCallNumber(callName) && !trampoline 337 } 338 339 func (ctx *context) emitCall(w *bytes.Buffer, call prog.ExecCall, ci int, haveCopyout, trace bool) { 340 native := isNative(ctx.sysTarget, call.Meta.CallName) 341 fmt.Fprintf(w, "\t") 342 if !native { 343 // This mimics the same as executor does for execute_syscall, 344 // but only for non-native syscalls to reduce clutter (native syscalls are assumed to not crash). 345 // Arrange for res = -1 in case of syscall abort, we care about errno only if we are tracing for pkg/runtest. 346 if haveCopyout || trace { 347 fmt.Fprintf(w, "res = -1;\n\t") 348 } 349 if trace { 350 fmt.Fprintf(w, "errno = EFAULT;\n\t") 351 } 352 fmt.Fprintf(w, "NONFAILING(") 353 } 354 if haveCopyout || trace { 355 fmt.Fprintf(w, "res = ") 356 } 357 w.WriteString(ctx.fmtCallBody(call)) 358 if !native { 359 fmt.Fprintf(w, ")") // close NONFAILING macro 360 } 361 fmt.Fprintf(w, ";") 362 comment := ctx.target.AnnotateCall(call) 363 if comment != "" { 364 fmt.Fprintf(w, " /* %s */", comment) 365 } 366 fmt.Fprintf(w, "\n") 367 if trace { 368 cast := "" 369 if !native && !strings.HasPrefix(call.Meta.CallName, "syz_") { 370 // Potentially we casted a function returning int to a function returning intptr_t. 371 // So instead of intptr_t -1 we can get 0x00000000ffffffff. Sign extend it to intptr_t. 372 cast = "(intptr_t)(int)" 373 } 374 fmt.Fprintf(w, "\tfprintf(stderr, \"### call=%v errno=%%u\\n\", %vres == -1 ? errno : 0);\n", ci, cast) 375 } 376 } 377 378 func (ctx *context) fmtCallBody(call prog.ExecCall) string { 379 native := isNative(ctx.sysTarget, call.Meta.CallName) 380 callName, ok := ctx.sysTarget.SyscallTrampolines[call.Meta.CallName] 381 if !ok { 382 callName = call.Meta.CallName 383 } 384 argsStrs := []string{} 385 funcName := "" 386 if native { 387 funcName = "syscall" 388 argsStrs = append(argsStrs, ctx.sysTarget.SyscallPrefix+callName) 389 } else if strings.HasPrefix(callName, "syz_") { 390 funcName = callName 391 } else { 392 args := strings.Repeat(",intptr_t", len(call.Args)+call.Meta.MissingArgs) 393 if args != "" { 394 args = args[1:] 395 } 396 funcName = fmt.Sprintf("((intptr_t(*)(%v))CAST(%v))", args, callName) 397 } 398 for i, arg := range call.Args { 399 switch arg := arg.(type) { 400 case prog.ExecArgConst: 401 if arg.Format != prog.FormatNative && arg.Format != prog.FormatBigEndian { 402 panic("string format in syscall argument") 403 } 404 com := ctx.argComment(call.Meta.Args[i], arg) 405 argsStrs = append(argsStrs, com+handleBigEndian(arg, ctx.constArgToStr(arg, native))) 406 case prog.ExecArgResult: 407 if arg.Format != prog.FormatNative && arg.Format != prog.FormatBigEndian { 408 panic("string format in syscall argument") 409 } 410 com := ctx.argComment(call.Meta.Args[i], arg) 411 val := ctx.resultArgToStr(arg) 412 if native && ctx.target.PtrSize == 4 { 413 // syscall accepts args as ellipsis, resources are uint64 414 // and take 2 slots without the cast, which would be wrong. 415 val = "(intptr_t)" + val 416 } 417 argsStrs = append(argsStrs, com+val) 418 default: 419 panic(fmt.Sprintf("unknown arg type: %+v", arg)) 420 } 421 } 422 for i := 0; i < call.Meta.MissingArgs; i++ { 423 argsStrs = append(argsStrs, "0") 424 } 425 return fmt.Sprintf("%v(%v)", funcName, strings.Join(argsStrs, ", ")) 426 } 427 428 func (ctx *context) generateCsumInet(w *bytes.Buffer, addr uint64, arg prog.ExecArgCsum, csumSeq int) { 429 fmt.Fprintf(w, "\tstruct csum_inet csum_%d;\n", csumSeq) 430 fmt.Fprintf(w, "\tcsum_inet_init(&csum_%d);\n", csumSeq) 431 for i, chunk := range arg.Chunks { 432 switch chunk.Kind { 433 case prog.ExecArgCsumChunkData: 434 fmt.Fprintf(w, "\tNONFAILING(csum_inet_update(&csum_%d, (const uint8*)0x%x, %d));\n", 435 csumSeq, chunk.Value, chunk.Size) 436 case prog.ExecArgCsumChunkConst: 437 fmt.Fprintf(w, "\tuint%d csum_%d_chunk_%d = 0x%x;\n", 438 chunk.Size*8, csumSeq, i, chunk.Value) 439 fmt.Fprintf(w, "\tcsum_inet_update(&csum_%d, (const uint8*)&csum_%d_chunk_%d, %d);\n", 440 csumSeq, csumSeq, i, chunk.Size) 441 default: 442 panic(fmt.Sprintf("unknown checksum chunk kind %v", chunk.Kind)) 443 } 444 } 445 fmt.Fprintf(w, "\tNONFAILING(*(uint16*)0x%x = csum_inet_digest(&csum_%d));\n", 446 addr, csumSeq) 447 } 448 449 func (ctx *context) copyin(w *bytes.Buffer, csumSeq *int, copyin prog.ExecCopyin) { 450 switch arg := copyin.Arg.(type) { 451 case prog.ExecArgConst: 452 if arg.BitfieldOffset == 0 && arg.BitfieldLength == 0 { 453 ctx.copyinVal(w, copyin.Addr, arg.Size, handleBigEndian(arg, ctx.constArgToStr(arg, false)), arg.Format) 454 } else { 455 if arg.Format != prog.FormatNative && arg.Format != prog.FormatBigEndian { 456 panic("bitfield+string format") 457 } 458 htobe := "" 459 if !ctx.target.BigEndian && arg.Format == prog.FormatBigEndian { 460 htobe = fmt.Sprintf("htobe%v", arg.Size*8) 461 } 462 bitfieldOffset := arg.BitfieldOffset 463 if ctx.target.BigEndian { 464 bitfieldOffset = arg.Size*8 - arg.BitfieldOffset - arg.BitfieldLength 465 } 466 fmt.Fprintf(w, "\tNONFAILING(STORE_BY_BITMASK(uint%v, %v, 0x%x, %v, %v, %v));\n", 467 arg.Size*8, htobe, copyin.Addr, ctx.constArgToStr(arg, false), 468 bitfieldOffset, arg.BitfieldLength) 469 } 470 case prog.ExecArgResult: 471 ctx.copyinVal(w, copyin.Addr, arg.Size, ctx.resultArgToStr(arg), arg.Format) 472 case prog.ExecArgData: 473 if bytes.Equal(arg.Data, bytes.Repeat(arg.Data[:1], len(arg.Data))) { 474 fmt.Fprintf(w, "\tNONFAILING(memset((void*)0x%x, %v, %v));\n", 475 copyin.Addr, arg.Data[0], len(arg.Data)) 476 } else { 477 fmt.Fprintf(w, "\tNONFAILING(memcpy((void*)0x%x, \"%s\", %v));\n", 478 copyin.Addr, toCString(arg.Data, arg.Readable), len(arg.Data)) 479 } 480 case prog.ExecArgCsum: 481 switch arg.Kind { 482 case prog.ExecArgCsumInet: 483 *csumSeq++ 484 ctx.generateCsumInet(w, copyin.Addr, arg, *csumSeq) 485 default: 486 panic(fmt.Sprintf("unknown csum kind %v", arg.Kind)) 487 } 488 default: 489 panic(fmt.Sprintf("bad argument type: %+v", arg)) 490 } 491 } 492 493 func (ctx *context) copyinVal(w *bytes.Buffer, addr, size uint64, val string, bf prog.BinaryFormat) { 494 switch bf { 495 case prog.FormatNative, prog.FormatBigEndian: 496 fmt.Fprintf(w, "\tNONFAILING(*(uint%v*)0x%x = %v);\n", size*8, addr, val) 497 case prog.FormatStrDec: 498 if size != 20 { 499 panic("bad strdec size") 500 } 501 fmt.Fprintf(w, "\tNONFAILING(sprintf((char*)0x%x, \"%%020llu\", (long long)%v));\n", addr, val) 502 case prog.FormatStrHex: 503 if size != 18 { 504 panic("bad strdec size") 505 } 506 fmt.Fprintf(w, "\tNONFAILING(sprintf((char*)0x%x, \"0x%%016llx\", (long long)%v));\n", addr, val) 507 case prog.FormatStrOct: 508 if size != 23 { 509 panic("bad strdec size") 510 } 511 fmt.Fprintf(w, "\tNONFAILING(sprintf((char*)0x%x, \"%%023llo\", (long long)%v));\n", addr, val) 512 default: 513 panic("unknown binary format") 514 } 515 } 516 517 func (ctx *context) copyout(w *bytes.Buffer, call prog.ExecCall, resCopyout bool) { 518 if ctx.sysTarget.OS == targets.Fuchsia { 519 // On fuchsia we have real system calls that return ZX_OK on success, 520 // and libc calls that are casted to function returning intptr_t, 521 // as the result int -1 is returned as 0x00000000ffffffff rather than full -1. 522 if strings.HasPrefix(call.Meta.CallName, "zx_") { 523 fmt.Fprintf(w, "\tif (res == ZX_OK)") 524 } else { 525 fmt.Fprintf(w, "\tif ((int)res != -1)") 526 } 527 } else { 528 fmt.Fprintf(w, "\tif (res != -1)") 529 } 530 copyoutMultiple := len(call.Copyout) > 1 || resCopyout && len(call.Copyout) > 0 531 if copyoutMultiple { 532 fmt.Fprintf(w, " {") 533 } 534 fmt.Fprintf(w, "\n") 535 if resCopyout { 536 fmt.Fprintf(w, "\t\tr[%v] = res;\n", call.Index) 537 } 538 for _, copyout := range call.Copyout { 539 fmt.Fprintf(w, "\t\tNONFAILING(r[%v] = *(uint%v*)0x%x);\n", 540 copyout.Index, copyout.Size*8, copyout.Addr) 541 } 542 if copyoutMultiple { 543 fmt.Fprintf(w, "\t}\n") 544 } 545 } 546 547 func (ctx *context) factorizeAsFlags(value uint64, flags []string, attemptsLeft *int) ([]string, uint64) { 548 if len(flags) == 0 || value == 0 || *attemptsLeft == 0 { 549 return nil, value 550 } 551 552 *attemptsLeft -= 1 553 currentFlag := flags[0] 554 subset, remainder := ctx.factorizeAsFlags(value, flags[1:], attemptsLeft) 555 556 if flagMask, ok := ctx.p.Target.ConstMap[currentFlag]; ok && (value&flagMask == flagMask) { 557 subsetIfTaken, remainderIfTaken := ctx.factorizeAsFlags(value & ^flagMask, flags[1:], attemptsLeft) 558 subsetIfTaken = append(subsetIfTaken, currentFlag) 559 560 bits, bitsIfTaken := bits.OnesCount64(remainder), bits.OnesCount64(remainderIfTaken) 561 if (bitsIfTaken < bits) || (bits == bitsIfTaken && len(subsetIfTaken) < len(subset)) { 562 return subsetIfTaken, remainderIfTaken 563 } 564 } 565 566 return subset, remainder 567 } 568 569 func (ctx *context) prettyPrintValue(field prog.Field, arg prog.ExecArgConst) string { 570 mask := (uint64(1) << (arg.Size * 8)) - 1 571 v := arg.Value & mask 572 573 f := ctx.p.Target.FlagsMap[field.Type.Name()] 574 if len(f) == 0 { 575 return "" 576 } 577 578 maxFactorizationAttempts := 256 579 flags, remainder := ctx.factorizeAsFlags(v, f, &maxFactorizationAttempts) 580 if len(flags) == 0 { 581 return "" 582 } 583 if remainder != 0 { 584 flags = append(flags, fmt.Sprintf("0x%x", remainder)) 585 } 586 587 return strings.Join(flags, "|") 588 } 589 590 func (ctx *context) argComment(field prog.Field, arg prog.ExecArg) string { 591 val := "" 592 constArg, isConstArg := arg.(prog.ExecArgConst) 593 if isConstArg { 594 val = ctx.prettyPrintValue(field, constArg) 595 } 596 597 return "/*" + field.Name + "=" + val + "*/" 598 } 599 600 // enforceBitSize is necessary e.g. in the variadic arguments context of the syscall() function. 601 func (ctx *context) constArgToStr(arg prog.ExecArgConst, enforceBitSize bool) string { 602 suffix := "" 603 if enforceBitSize { 604 suffix = ctx.literalSuffix(arg) 605 } 606 mask := (uint64(1) << (arg.Size * 8)) - 1 607 v := arg.Value & mask 608 val := "" 609 if v == ^uint64(0)&mask { 610 if enforceBitSize { 611 val = "(intptr_t)-1" 612 } else { 613 val = "-1" 614 } 615 } else if v >= 10 { 616 val = fmt.Sprintf("0x%x%s", v, suffix) 617 } else { 618 val = fmt.Sprintf("%d%s", v, suffix) 619 } 620 if ctx.opts.Procs > 1 && arg.PidStride != 0 { 621 val += fmt.Sprintf(" + procid*%v", arg.PidStride) 622 } 623 return val 624 } 625 626 func (ctx *context) literalSuffix(arg prog.ExecArgConst) string { 627 if arg.Size == 8 { 628 // syscall() is variadic, so constant arguments must be explicitly 629 // promoted. Otherwise the compiler is free to leave garbage in the 630 // upper 32 bits of the argument value. In practice this can happen 631 // on amd64 with arguments that are passed on the stack, i.e., 632 // arguments beyond the first six. For example, on freebsd/amd64, 633 // syscall(SYS_mmap, ..., 0) causes clang to emit a 32-bit store of 634 // 0 to the stack, but the kernel expects a 64-bit value. 635 // 636 // syzkaller's argument type representations do not always match 637 // the OS ABI. For instance, "flags" is always 64 bits wide on 64-bit 638 // platforms, but is a 32-bit value ("unsigned int" or so) in many 639 // cases. Thus, we assume here that passing a 64-bit argument where 640 // a 32-bit argument is expected won't break anything. On amd64 641 // this should be fine: arguments are passed in 64-bit registers or 642 // at 64 bit-aligned addresses on the stack. 643 if ctx.target.PtrSize == 4 { 644 return "ull" 645 } else { 646 return "ul" 647 } 648 } 649 return "" 650 } 651 652 func handleBigEndian(arg prog.ExecArgConst, val string) string { 653 if arg.Format == prog.FormatBigEndian { 654 return fmt.Sprintf("htobe%v(%v)", arg.Size*8, val) 655 } 656 return val 657 } 658 659 func (ctx *context) resultArgToStr(arg prog.ExecArgResult) string { 660 res := fmt.Sprintf("r[%v]", arg.Index) 661 if arg.DivOp != 0 { 662 res = fmt.Sprintf("%v/%v", res, arg.DivOp) 663 } 664 if arg.AddOp != 0 { 665 res = fmt.Sprintf("%v+%v", res, arg.AddOp) 666 } 667 if arg.Format == prog.FormatBigEndian { 668 res = fmt.Sprintf("htobe%v(%v)", arg.Size*8, res) 669 } 670 return res 671 } 672 673 func (ctx *context) postProcess(result []byte) []byte { 674 // Remove NONFAILING, debug, fail, etc calls. 675 if !ctx.opts.HandleSegv { 676 result = regexp.MustCompile(`\t*NONFAILING\((.*)\);\n`).ReplaceAll(result, []byte("$1;\n")) 677 } 678 result = bytes.ReplaceAll(result, []byte("NORETURN"), nil) 679 result = bytes.ReplaceAll(result, []byte("doexit("), []byte("exit(")) 680 // TODO: Figure out what would be the right replacement for doexit_thread(). 681 result = bytes.ReplaceAll(result, []byte("doexit_thread("), []byte("exit(")) 682 result = regexp.MustCompile(`PRINTF\(.*?\)`).ReplaceAll(result, nil) 683 result = regexp.MustCompile(`\t*debug\((.*\n)*?.*\);\n`).ReplaceAll(result, nil) 684 result = regexp.MustCompile(`\t*debug_dump_data\((.*\n)*?.*\);\n`).ReplaceAll(result, nil) 685 result = regexp.MustCompile(`\t*exitf\((.*\n)*?.*\);\n`).ReplaceAll(result, []byte("\texit(1);\n")) 686 result = regexp.MustCompile(`\t*fail(msg)?\((.*\n)*?.*\);\n`).ReplaceAll(result, []byte("\texit(1);\n")) 687 688 // Remove executor include guards. 689 result = regexp.MustCompile(`#define\s+[A-Z0-9_]*_H\s*\n`).ReplaceAll(result, nil) 690 691 result = ctx.hoistIncludes(result) 692 result = ctx.removeEmptyLines(result) 693 return result 694 } 695 696 // hoistIncludes moves all includes to the top, removes dups and sorts. 697 func (ctx *context) hoistIncludes(result []byte) []byte { 698 includesStart := bytes.Index(result, []byte("#include")) 699 if includesStart == -1 { 700 return result 701 } 702 includes := make(map[string]bool) 703 includeRe := regexp.MustCompile("#include <.*>\n") 704 for _, match := range includeRe.FindAll(result, -1) { 705 includes[string(match)] = true 706 } 707 result = includeRe.ReplaceAll(result, nil) 708 // Certain linux and bsd headers are broken and go to the bottom. 709 var sorted, sortedBottom, sortedTop []string 710 for include := range includes { 711 if strings.Contains(include, "<linux/") { 712 sortedBottom = append(sortedBottom, include) 713 } else if strings.Contains(include, "<netinet/if_ether.h>") { 714 sortedBottom = append(sortedBottom, include) 715 } else if ctx.target.OS == targets.FreeBSD && strings.Contains(include, "<sys/types.h>") { 716 sortedTop = append(sortedTop, include) 717 } else { 718 sorted = append(sorted, include) 719 } 720 } 721 sort.Strings(sortedTop) 722 sort.Strings(sorted) 723 sort.Strings(sortedBottom) 724 newResult := append([]byte{}, result[:includesStart]...) 725 newResult = append(newResult, strings.Join(sortedTop, "")...) 726 newResult = append(newResult, '\n') 727 newResult = append(newResult, strings.Join(sorted, "")...) 728 newResult = append(newResult, '\n') 729 newResult = append(newResult, strings.Join(sortedBottom, "")...) 730 newResult = append(newResult, result[includesStart:]...) 731 return newResult 732 } 733 734 // removeEmptyLines removes duplicate new lines. 735 func (ctx *context) removeEmptyLines(result []byte) []byte { 736 for { 737 newResult := bytes.ReplaceAll(result, []byte{'\n', '\n', '\n'}, []byte{'\n', '\n'}) 738 newResult = bytes.ReplaceAll(newResult, []byte{'\n', '\n', '\t'}, []byte{'\n', '\t'}) 739 newResult = bytes.ReplaceAll(newResult, []byte{'\n', '\n', ' '}, []byte{'\n', ' '}) 740 if len(newResult) == len(result) { 741 return result 742 } 743 result = newResult 744 } 745 } 746 747 func toCString(data []byte, readable bool) []byte { 748 if len(data) == 0 { 749 panic("empty data arg") 750 } 751 buf := new(bytes.Buffer) 752 prog.EncodeData(buf, data, readable) 753 return buf.Bytes() 754 }