github.com/google/syzkaller@v0.0.0-20251211124644-a066d2bc4b02/executor/executor.cc

github.com/google/syzkaller@v0.0.0-20251211124644-a066d2bc4b02/executor/executor.cc (about)

     1  // Copyright 2017 syzkaller project authors. All rights reserved.
     2  // Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
     3  
     4  // +build
     5  
     6  #include <algorithm>
     7  #include <errno.h>
     8  #include <limits.h>
     9  #include <signal.h>
    10  #include <stdarg.h>
    11  #include <stddef.h>
    12  #include <stdint.h>
    13  #include <stdio.h>
    14  #include <stdlib.h>
    15  #include <string.h>
    16  #include <time.h>
    17  
    18  #include <atomic>
    19  #include <optional>
    20  
    21  #if !GOOS_windows
    22  #include <unistd.h>
    23  #endif
    24  
    25  #include "defs.h"
    26  
    27  #include "pkg/flatrpc/flatrpc.h"
    28  
    29  #if defined(__GNUC__)
    30  #define SYSCALLAPI
    31  #define NORETURN __attribute__((noreturn))
    32  #define PRINTF(fmt, args) __attribute__((format(printf, fmt, args)))
    33  #else
    34  // Assuming windows/cl.
    35  #define SYSCALLAPI WINAPI
    36  #define NORETURN __declspec(noreturn)
    37  #define PRINTF(fmt, args)
    38  #define __thread __declspec(thread)
    39  #endif
    40  
    41  #ifndef GIT_REVISION
    42  #define GIT_REVISION "unknown"
    43  #endif
    44  
    45  #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
    46  
    47  #ifndef __has_feature
    48  #define __has_feature(x) 0
    49  #endif
    50  
    51  #if defined(__SANITIZE_ADDRESS__) || __has_feature(address_sanitizer)
    52  constexpr bool kAddressSanitizer = true;
    53  #else
    54  constexpr bool kAddressSanitizer = false;
    55  #endif
    56  
    57  // uint64 is impossible to printf without using the clumsy and verbose "%" PRId64.
    58  // So we define and use uint64. Note: pkg/csource does s/uint64/uint64/.
    59  // Also define uint32/16/8 for consistency.
    60  typedef unsigned long long uint64;
    61  typedef unsigned int uint32;
    62  typedef unsigned short uint16;
    63  typedef unsigned char uint8;
    64  
    65  // Note: zircon max fd is 256.
    66  // Some common_OS.h files know about this constant for RLIMIT_NOFILE.
    67  const int kMaxFd = 250;
    68  const int kFdLimit = 256;
    69  const int kMaxThreads = 32;
    70  const int kInPipeFd = kMaxFd - 1; // remapped from stdin
    71  const int kOutPipeFd = kMaxFd - 2; // remapped from stdout
    72  const int kCoverFd = kOutPipeFd - kMaxThreads;
    73  const int kExtraCoverFd = kCoverFd - 1;
    74  const int kMaxArgs = 9;
    75  const int kCoverSize = 512 << 10;
    76  const int kFailStatus = 67;
    77  
    78  // Two approaches of dealing with kcov memory.
    79  const int kCoverOptimizedCount = 8; // the max number of kcov instances
    80  const int kCoverOptimizedPreMmap = 3; // this many will be mmapped inside main(), others - when needed.
    81  const int kCoverDefaultCount = 6; // the max number of kcov instances when delayed kcov mmap is not available
    82  
    83  // Logical error (e.g. invalid input program), use as an assert() alternative.
    84  // If such error happens 10+ times in a row, it will be detected as a bug by the runner process.
    85  // The runner will fail and syz-manager will create a bug for this.
    86  // Note: err is used for bug deduplication, thus distinction between err (constant message)
    87  // and msg (varying part).
    88  static NORETURN void fail(const char* err);
    89  static NORETURN PRINTF(2, 3) void failmsg(const char* err, const char* msg, ...);
    90  // Just exit (e.g. due to temporal ENOMEM error).
    91  static NORETURN PRINTF(1, 2) void exitf(const char* msg, ...);
    92  static NORETURN void doexit(int status);
    93  #if !GOOS_fuchsia
    94  static NORETURN void doexit_thread(int status);
    95  #endif
    96  
    97  // Print debug output that is visible when running syz-manager/execprog with -debug flag.
    98  // Debug output is supposed to be relatively high-level (syscalls executed, return values, timing, etc)
    99  // and is intended mostly for end users. If you need to debug lower-level details, use debug_verbose
   100  // function and temporary enable it in your build by changing #if 0 below.
   101  // This function does not add \n at the end of msg as opposed to the previous functions.
   102  static PRINTF(1, 2) void debug(const char* msg, ...);
   103  void debug_dump_data(const char* data, int length);
   104  
   105  #if 0
   106  #define debug_verbose(...) debug(__VA_ARGS__)
   107  #else
   108  #define debug_verbose(...) (void)0
   109  #endif
   110  
   111  static void receive_execute();
   112  static void reply_execute(uint32 status);
   113  static void receive_handshake();
   114  
   115  #if SYZ_EXECUTOR_USES_FORK_SERVER
   116  static void SnapshotPrepareParent();
   117  
   118  // Allocating (and forking) virtual memory for each executed process is expensive, so we only mmap
   119  // the amount we might possibly need for the specific received prog.
   120  const int kMaxOutputComparisons = 14 << 20; // executions with comparsions enabled are usually < 1% of all executions
   121  const int kMaxOutputCoverage = 6 << 20; // coverage is needed in ~ up to 1/3 of all executions (depending on corpus rotation)
   122  const int kMaxOutputSignal = 4 << 20;
   123  const int kMinOutput = 256 << 10; // if we don't need to send signal, the output is rather short.
   124  const int kInitialOutput = kMinOutput; // the minimal size to be allocated in the parent process
   125  const int kMaxOutput = kMaxOutputComparisons;
   126  #else
   127  // We don't fork and allocate the memory only once, so prepare for the worst case.
   128  const int kInitialOutput = 14 << 20;
   129  const int kMaxOutput = kInitialOutput;
   130  #endif
   131  
   132  // For use with flatrpc bit flags.
   133  template <typename T>
   134  bool IsSet(T flags, T f)
   135  {
   136  	return (flags & f) != T::NONE;
   137  }
   138  
   139  // TODO: allocate a smaller amount of memory in the parent once we merge the patches that enable
   140  // prog execution with neither signal nor coverage. Likely 64kb will be enough in that case.
   141  
   142  const uint32 kMaxCalls = 64;
   143  
   144  struct alignas(8) OutputData {
   145  	std::atomic<uint32> size;
   146  	std::atomic<uint32> consumed;
   147  	std::atomic<uint32> completed;
   148  	std::atomic<uint32> num_calls;
   149  	std::atomic<flatbuffers::Offset<flatbuffers::Vector<uint8_t>>> result_offset;
   150  	struct {
   151  		// Call index in the test program (they may be out-of-order is some syscalls block).
   152  		int index;
   153  		// Offset of the CallInfo object in the output region.
   154  		flatbuffers::Offset<rpc::CallInfoRaw> offset;
   155  	} calls[kMaxCalls];
   156  
   157  	void Reset()
   158  	{
   159  		size.store(0, std::memory_order_relaxed);
   160  		consumed.store(0, std::memory_order_relaxed);
   161  		completed.store(0, std::memory_order_relaxed);
   162  		num_calls.store(0, std::memory_order_relaxed);
   163  		result_offset.store(0, std::memory_order_relaxed);
   164  	}
   165  };
   166  
   167  // ShmemAllocator/ShmemBuilder help to construct flatbuffers ExecResult reply message in shared memory.
   168  //
   169  // To avoid copying the reply (in particular coverage/signal/comparisons which may be large), the child
   170  // process starts forming CallInfo objects as it handles completion of syscalls, then the top-most runner
   171  // process uses these CallInfo to form an array of them, and adds ProgInfo object with a reference to the array.
   172  // In order to make this possible, OutputData object is placed at the beginning of the shared memory region,
   173  // and it records metadata required to start serialization in one process and continue later in another process.
   174  //
   175  // OutputData::size is the size of the whole shmem region that the child uses (it different size when coverage/
   176  // comparisons are requested). Note that flatbuffers serialization happens from the end of the buffer backwards.
   177  // OutputData::consumed records currently consumed amount memory in the shmem region so that the parent process
   178  // can continue from that point.
   179  // OutputData::completed records number of completed calls (entries in OutputData::calls arrays).
   180  // Flatbuffers identifies everything using offsets in the buffer, OutputData::calls::offset records this offset
   181  // for the call object so that we can use it in the parent process to construct the array of calls.
   182  //
   183  // FlatBufferBuilder generally grows the underlying buffer incrementally as necessary and copying data
   184  // (std::vector style). We cannot do this in the shared memory since we have only a single region.
   185  // To allow serialization into the shared memory region, ShmemBuilder passes initial buffer size which is equal
   186  // to the overall shmem region size (minus OutputData header size) to FlatBufferBuilder, and the custom
   187  // ShmemAllocator allocator. As the result, FlatBufferBuilder does exactly one allocation request
   188  // to ShmemAllocator and never reallocates (if we overflow the buffer and FlatBufferBuilder does another request,
   189  // ShmemAllocator will fail).
   190  class ShmemAllocator : public flatbuffers::Allocator
   191  {
   192  public:
   193  	ShmemAllocator(void* buf, size_t size)
   194  	    : buf_(buf),
   195  	      size_(size)
   196  	{
   197  	}
   198  
   199  private:
   200  	void* buf_;
   201  	size_t size_;
   202  	bool allocated_ = false;
   203  
   204  	uint8_t* allocate(size_t size) override
   205  	{
   206  		if (allocated_ || size != size_)
   207  			failmsg("bad allocate request", "allocated=%d size=%zu/%zu", allocated_, size_, size);
   208  		allocated_ = true;
   209  		return static_cast<uint8_t*>(buf_);
   210  	}
   211  
   212  	void deallocate(uint8_t* p, size_t size) override
   213  	{
   214  		if (!allocated_ || buf_ != p || size_ != size)
   215  			failmsg("bad deallocate request", "allocated=%d buf=%p/%p size=%zu/%zu",
   216  				allocated_, buf_, p, size_, size);
   217  		allocated_ = false;
   218  	}
   219  
   220  	uint8_t* reallocate_downward(uint8_t* old_p, size_t old_size,
   221  				     size_t new_size, size_t in_use_back,
   222  				     size_t in_use_front) override
   223  	{
   224  		fail("can't reallocate");
   225  	}
   226  };
   227  
   228  class ShmemBuilder : ShmemAllocator, public flatbuffers::FlatBufferBuilder
   229  {
   230  public:
   231  	ShmemBuilder(OutputData* data, size_t size, bool store_size)
   232  	    : ShmemAllocator(data + 1, size - sizeof(*data)),
   233  	      flatbuffers::FlatBufferBuilder(size - sizeof(*data), this)
   234  	{
   235  		if (store_size)
   236  			data->size.store(size, std::memory_order_relaxed);
   237  		size_t consumed = data->consumed.load(std::memory_order_relaxed);
   238  		if (consumed >= size - sizeof(*data))
   239  			failmsg("ShmemBuilder: too large output offset", "size=%zd consumed=%zd", size, consumed);
   240  		if (consumed)
   241  			flatbuffers::FlatBufferBuilder::buf_.make_space(consumed);
   242  	}
   243  };
   244  
   245  const int kInFd = 3;
   246  const int kOutFd = 4;
   247  const int kMaxSignalFd = 5;
   248  const int kCoverFilterFd = 6;
   249  static OutputData* output_data;
   250  static std::optional<ShmemBuilder> output_builder;
   251  static uint32 output_size;
   252  static void mmap_output(uint32 size);
   253  static uint32 hash(uint32 a);
   254  static bool dedup(uint8 index, uint64 sig);
   255  
   256  static uint64 start_time_ms = 0;
   257  static bool flag_debug;
   258  static bool flag_snapshot;
   259  static bool flag_coverage;
   260  static bool flag_read_only_coverage;
   261  static bool flag_sandbox_none;
   262  static bool flag_sandbox_setuid;
   263  static bool flag_sandbox_namespace;
   264  static bool flag_sandbox_android;
   265  static bool flag_extra_coverage;
   266  static bool flag_net_injection;
   267  static bool flag_net_devices;
   268  static bool flag_net_reset;
   269  static bool flag_cgroups;
   270  static bool flag_close_fds;
   271  static bool flag_devlink_pci;
   272  static bool flag_nic_vf;
   273  static bool flag_vhci_injection;
   274  static bool flag_wifi;
   275  static bool flag_delay_kcov_mmap;
   276  
   277  static bool flag_collect_cover;
   278  static bool flag_collect_signal;
   279  static bool flag_dedup_cover;
   280  static bool flag_threaded;
   281  
   282  // If true, then executor should write the comparisons data to fuzzer.
   283  static bool flag_comparisons;
   284  
   285  static uint64 request_id;
   286  static rpc::RequestType request_type;
   287  static uint64 all_call_signal;
   288  static bool all_extra_signal;
   289  
   290  // Tunable timeouts, received with execute_req.
   291  static uint64 syscall_timeout_ms;
   292  static uint64 program_timeout_ms;
   293  static uint64 slowdown_scale;
   294  
   295  // Can be used to disginguish whether we're at the initialization stage
   296  // or we already execute programs.
   297  static bool in_execute_one = false;
   298  
   299  #define SYZ_EXECUTOR 1
   300  #include "common.h"
   301  
   302  const size_t kMaxInput = 4 << 20; // keep in sync with prog.ExecBufferSize
   303  const size_t kMaxCommands = 1000; // prog package knows about this constant (prog.execMaxCommands)
   304  
   305  const uint64 instr_eof = -1;
   306  const uint64 instr_copyin = -2;
   307  const uint64 instr_copyout = -3;
   308  const uint64 instr_setprops = -4;
   309  
   310  const uint64 arg_const = 0;
   311  const uint64 arg_addr32 = 1;
   312  const uint64 arg_addr64 = 2;
   313  const uint64 arg_result = 3;
   314  const uint64 arg_data = 4;
   315  const uint64 arg_csum = 5;
   316  
   317  const uint64 binary_format_native = 0;
   318  const uint64 binary_format_bigendian = 1;
   319  const uint64 binary_format_strdec = 2;
   320  const uint64 binary_format_strhex = 3;
   321  const uint64 binary_format_stroct = 4;
   322  
   323  const uint64 no_copyout = -1;
   324  
   325  static int running;
   326  static uint32 completed;
   327  static bool is_kernel_64_bit;
   328  static bool use_cover_edges;
   329  
   330  static uint8* input_data;
   331  
   332  // Checksum kinds.
   333  static const uint64 arg_csum_inet = 0;
   334  
   335  // Checksum chunk kinds.
   336  static const uint64 arg_csum_chunk_data = 0;
   337  static const uint64 arg_csum_chunk_const = 1;
   338  
   339  typedef intptr_t(SYSCALLAPI* syscall_t)(intptr_t, intptr_t, intptr_t, intptr_t, intptr_t, intptr_t, intptr_t, intptr_t, intptr_t);
   340  
   341  struct call_t {
   342  	const char* name;
   343  	int sys_nr;
   344  	call_attrs_t attrs;
   345  	syscall_t call;
   346  };
   347  
   348  struct cover_t {
   349  	int fd;
   350  	uint32 size;
   351  	// mmap_alloc_ptr is the internal pointer to KCOV mapping, possibly with guard pages.
   352  	// It is only used to allocate/deallocate the buffer of mmap_alloc_size.
   353  	char* mmap_alloc_ptr;
   354  	uint32 mmap_alloc_size;
   355  	// data is the pointer to the kcov buffer containing the recorded PCs.
   356  	// data may differ from mmap_alloc_ptr.
   357  	char* data;
   358  	// data_size is set by cover_open(). This is the requested kcov buffer size.
   359  	uint32 data_size;
   360  	// data_end is simply data + data_size.
   361  	char* data_end;
   362  	// Currently collecting comparisons.
   363  	bool collect_comps;
   364  	// Note: On everything but darwin the first value in data is the count of
   365  	// recorded PCs, followed by the PCs. We therefore set data_offset to the
   366  	// size of one PC.
   367  	// On darwin data points to an instance of the ksancov_trace struct. Here we
   368  	// set data_offset to the offset between data and the structs 'pcs' member,
   369  	// which contains the PCs.
   370  	intptr_t data_offset;
   371  	// Note: On everything but darwin this is 0, as the PCs contained in data
   372  	// are already correct. XNUs KSANCOV API, however, chose to always squeeze
   373  	// PCs into 32 bit. To make the recorded PC fit, KSANCOV substracts a fixed
   374  	// offset (VM_MIN_KERNEL_ADDRESS for AMD64) and then truncates the result to
   375  	// uint32_t. We get this from the 'offset' member in ksancov_trace.
   376  	intptr_t pc_offset;
   377  	// The coverage buffer has overflowed and we have truncated coverage.
   378  	bool overflow;
   379  	// True if cover_enable() was called for this object.
   380  	bool enabled;
   381  };
   382  
   383  struct thread_t {
   384  	int id;
   385  	bool created;
   386  	event_t ready;
   387  	event_t done;
   388  	uint8* copyout_pos;
   389  	uint64 copyout_index;
   390  	bool executing;
   391  	int call_index;
   392  	int call_num;
   393  	int num_args;
   394  	intptr_t args[kMaxArgs];
   395  	call_props_t call_props;
   396  	intptr_t res;
   397  	uint32 reserrno;
   398  	bool fault_injected;
   399  	cover_t cov;
   400  	bool soft_fail_state;
   401  };
   402  
   403  static thread_t threads[kMaxThreads];
   404  static thread_t* last_scheduled;
   405  // Threads use this variable to access information about themselves.
   406  static __thread struct thread_t* current_thread;
   407  
   408  static cover_t extra_cov;
   409  
   410  struct res_t {
   411  	bool executed;
   412  	uint64 val;
   413  };
   414  
   415  static res_t results[kMaxCommands];
   416  
   417  const uint64 kInMagic = 0xbadc0ffeebadface;
   418  
   419  struct handshake_req {
   420  	uint64 magic;
   421  	bool use_cover_edges;
   422  	bool is_kernel_64_bit;
   423  	rpc::ExecEnv flags;
   424  	uint64 pid;
   425  	uint64 sandbox_arg;
   426  	uint64 syscall_timeout_ms;
   427  	uint64 program_timeout_ms;
   428  	uint64 slowdown_scale;
   429  };
   430  
   431  struct execute_req {
   432  	uint64 magic;
   433  	uint64 id;
   434  	rpc::RequestType type;
   435  	uint64 exec_flags;
   436  	uint64 all_call_signal;
   437  	bool all_extra_signal;
   438  };
   439  
   440  struct execute_reply {
   441  	uint32 magic;
   442  	uint32 done;
   443  	uint32 status;
   444  };
   445  
   446  enum {
   447  	KCOV_CMP_CONST = 1,
   448  	KCOV_CMP_SIZE1 = 0,
   449  	KCOV_CMP_SIZE2 = 2,
   450  	KCOV_CMP_SIZE4 = 4,
   451  	KCOV_CMP_SIZE8 = 6,
   452  	KCOV_CMP_SIZE_MASK = 6,
   453  };
   454  
   455  struct kcov_comparison_t {
   456  	// Note: comparisons are always 64-bits regardless of kernel bitness.
   457  	uint64 type;
   458  	uint64 arg1;
   459  	uint64 arg2;
   460  	uint64 pc;
   461  };
   462  
   463  typedef char kcov_comparison_size[sizeof(kcov_comparison_t) == 4 * sizeof(uint64) ? 1 : -1];
   464  
   465  struct feature_t {
   466  	rpc::Feature id;
   467  	const char* (*setup)();
   468  };
   469  
   470  static thread_t* schedule_call(int call_index, int call_num, uint64 copyout_index, uint64 num_args, uint64* args, uint8* pos, call_props_t call_props);
   471  static void handle_completion(thread_t* th);
   472  static void copyout_call_results(thread_t* th);
   473  static void write_call_output(thread_t* th, bool finished);
   474  static void write_extra_output();
   475  static void execute_call(thread_t* th);
   476  static void thread_create(thread_t* th, int id, bool need_coverage);
   477  static void thread_mmap_cover(thread_t* th);
   478  static void* worker_thread(void* arg);
   479  static uint64 read_input(uint8** input_posp, bool peek = false);
   480  static uint64 read_arg(uint8** input_posp);
   481  static uint64 read_const_arg(uint8** input_posp, uint64* size_p, uint64* bf, uint64* bf_off_p, uint64* bf_len_p);
   482  static uint64 read_result(uint8** input_posp);
   483  static uint64 swap(uint64 v, uint64 size, uint64 bf);
   484  static void copyin(char* addr, uint64 val, uint64 size, uint64 bf, uint64 bf_off, uint64 bf_len);
   485  static bool copyout(char* addr, uint64 size, uint64* res);
   486  static void setup_control_pipes();
   487  static bool coverage_filter(uint64 pc);
   488  static rpc::ComparisonRaw convert(const kcov_comparison_t& cmp);
   489  static flatbuffers::span<uint8_t> finish_output(OutputData* output, int proc_id, uint64 req_id, uint32 num_calls,
   490  						uint64 elapsed, uint64 freshness, uint32 status, bool hanged,
   491  						const std::vector<uint8_t>* process_output);
   492  static void parse_execute(const execute_req& req);
   493  static void parse_handshake(const handshake_req& req);
   494  
   495  static void mmap_input();
   496  
   497  #include "syscalls.h"
   498  
   499  #if GOOS_linux
   500  #ifndef MAP_FIXED_NOREPLACE
   501  #define MAP_FIXED_NOREPLACE 0x100000
   502  #endif
   503  #define MAP_FIXED_EXCLUSIVE MAP_FIXED_NOREPLACE
   504  #elif GOOS_freebsd
   505  #define MAP_FIXED_EXCLUSIVE (MAP_FIXED | MAP_EXCL)
   506  #else
   507  #define MAP_FIXED_EXCLUSIVE MAP_FIXED // The check is not supported.
   508  #endif
   509  
   510  #if GOOS_linux
   511  #include "executor_linux.h"
   512  #elif GOOS_fuchsia
   513  #include "executor_fuchsia.h"
   514  #elif GOOS_freebsd || GOOS_netbsd || GOOS_openbsd
   515  #include "executor_bsd.h"
   516  #elif GOOS_darwin
   517  #include "executor_darwin.h"
   518  #elif GOOS_windows
   519  #include "executor_windows.h"
   520  #elif GOOS_test
   521  #include "executor_test.h"
   522  #else
   523  #error "unknown OS"
   524  #endif
   525  
   526  class CoverAccessScope final
   527  {
   528  public:
   529  	CoverAccessScope(cover_t* cov)
   530  	    : cov_(cov)
   531  	{
   532  		// CoverAccessScope must not be used recursively b/c on Linux pkeys protection is global,
   533  		// so cover_protect for one cov overrides previous cover_unprotect for another cov.
   534  		if (used_)
   535  			fail("recursion in CoverAccessScope");
   536  		used_ = true;
   537  		if (flag_coverage)
   538  			cover_unprotect(cov_);
   539  	}
   540  	~CoverAccessScope()
   541  	{
   542  		if (flag_coverage)
   543  			cover_protect(cov_);
   544  		used_ = false;
   545  	}
   546  
   547  private:
   548  	cover_t* const cov_;
   549  	static bool used_;
   550  
   551  	CoverAccessScope(const CoverAccessScope&) = delete;
   552  	CoverAccessScope& operator=(const CoverAccessScope&) = delete;
   553  };
   554  
   555  bool CoverAccessScope::used_;
   556  
   557  #if !SYZ_HAVE_FEATURES
   558  static feature_t features[] = {};
   559  #endif
   560  
   561  #include "shmem.h"
   562  
   563  #include "conn.h"
   564  #include "cover_filter.h"
   565  #include "files.h"
   566  #include "subprocess.h"
   567  
   568  #include "snapshot.h"
   569  
   570  #include "executor_runner.h"
   571  
   572  #include "test.h"
   573  
   574  static std::optional<CoverFilter> max_signal;
   575  static std::optional<CoverFilter> cover_filter;
   576  
   577  #if SYZ_HAVE_SANDBOX_ANDROID
   578  static uint64 sandbox_arg = 0;
   579  #endif
   580  
   581  int main(int argc, char** argv)
   582  {
   583  	if (argc == 1) {
   584  		fprintf(stderr, "no command");
   585  		return 1;
   586  	}
   587  	if (strcmp(argv[1], "runner") == 0) {
   588  		runner(argv, argc);
   589  		fail("runner returned");
   590  	}
   591  	if (strcmp(argv[1], "leak") == 0) {
   592  #if SYZ_HAVE_LEAK_CHECK
   593  		check_leaks(argv + 2, argc - 2);
   594  #else
   595  		fail("leak checking is not implemented");
   596  #endif
   597  		return 0;
   598  	}
   599  	if (strcmp(argv[1], "test") == 0)
   600  		return run_tests(argc == 3 ? argv[2] : nullptr);
   601  
   602  	if (strcmp(argv[1], "exec") != 0) {
   603  		fprintf(stderr, "unknown command");
   604  		return 1;
   605  	}
   606  
   607  	start_time_ms = current_time_ms();
   608  
   609  	os_init(argc, argv, (char*)SYZ_DATA_OFFSET, SYZ_NUM_PAGES * SYZ_PAGE_SIZE);
   610  	use_temporary_dir();
   611  	install_segv_handler();
   612  	current_thread = &threads[0];
   613  
   614  	if (argc > 2 && strcmp(argv[2], "snapshot") == 0) {
   615  		SnapshotSetup(argv, argc);
   616  	} else {
   617  		mmap_input();
   618  		mmap_output(kInitialOutput);
   619  
   620  		// Prevent test programs to mess with these fds.
   621  		// Due to races in collider mode, a program can e.g. ftruncate one of these fds,
   622  		// which will cause fuzzer to crash.
   623  		close(kInFd);
   624  #if !SYZ_EXECUTOR_USES_FORK_SERVER
   625  		// For SYZ_EXECUTOR_USES_FORK_SERVER, close(kOutFd) is invoked in the forked child,
   626  		// after the program has been received.
   627  		close(kOutFd);
   628  #endif
   629  
   630  		if (fcntl(kMaxSignalFd, F_GETFD) != -1) {
   631  			// Use random addresses for coverage filters to not collide with output_data.
   632  			max_signal.emplace(kMaxSignalFd, reinterpret_cast<void*>(0x110c230000ull));
   633  			close(kMaxSignalFd);
   634  		}
   635  		if (fcntl(kCoverFilterFd, F_GETFD) != -1) {
   636  			cover_filter.emplace(kCoverFilterFd, reinterpret_cast<void*>(0x110f230000ull));
   637  			close(kCoverFilterFd);
   638  		}
   639  
   640  		setup_control_pipes();
   641  		receive_handshake();
   642  #if !SYZ_EXECUTOR_USES_FORK_SERVER
   643  		// We receive/reply handshake when fork server is disabled just to simplify runner logic.
   644  		// It's a bit suboptimal, but no fork server is much slower anyway.
   645  		reply_execute(0);
   646  		receive_execute();
   647  #endif
   648  	}
   649  
   650  	if (flag_coverage) {
   651  		int create_count = kCoverDefaultCount, mmap_count = create_count;
   652  		if (flag_delay_kcov_mmap) {
   653  			create_count = kCoverOptimizedCount;
   654  			mmap_count = kCoverOptimizedPreMmap;
   655  		}
   656  		if (create_count > kMaxThreads)
   657  			create_count = kMaxThreads;
   658  		for (int i = 0; i < create_count; i++) {
   659  			threads[i].cov.fd = kCoverFd + i;
   660  			cover_open(&threads[i].cov, false);
   661  			if (i < mmap_count) {
   662  				// Pre-mmap coverage collection for some threads. This should be enough for almost
   663  				// all programs, for the remaning few ones coverage will be set up when it's needed.
   664  				thread_mmap_cover(&threads[i]);
   665  			}
   666  		}
   667  		extra_cov.fd = kExtraCoverFd;
   668  		cover_open(&extra_cov, true);
   669  		cover_mmap(&extra_cov);
   670  		cover_protect(&extra_cov);
   671  		if (flag_extra_coverage) {
   672  			// Don't enable comps because we don't use them in the fuzzer yet.
   673  			cover_enable(&extra_cov, false, true);
   674  		}
   675  	}
   676  
   677  	int status = 0;
   678  	if (flag_sandbox_none)
   679  		status = do_sandbox_none();
   680  #if SYZ_HAVE_SANDBOX_SETUID
   681  	else if (flag_sandbox_setuid)
   682  		status = do_sandbox_setuid();
   683  #endif
   684  #if SYZ_HAVE_SANDBOX_NAMESPACE
   685  	else if (flag_sandbox_namespace)
   686  		status = do_sandbox_namespace();
   687  #endif
   688  #if SYZ_HAVE_SANDBOX_ANDROID
   689  	else if (flag_sandbox_android)
   690  		status = do_sandbox_android(sandbox_arg);
   691  #endif
   692  	else
   693  		fail("unknown sandbox type");
   694  
   695  #if SYZ_EXECUTOR_USES_FORK_SERVER
   696  	fprintf(stderr, "loop exited with status %d\n", status);
   697  	// If an external sandbox process wraps executor, the out pipe will be closed
   698  	// before the sandbox process exits this will make ipc package kill the sandbox.
   699  	// As the result sandbox process will exit with exit status 9 instead of the executor
   700  	// exit status (notably kFailStatus). So we duplicate the exit status on the pipe.
   701  	reply_execute(status);
   702  	doexit(status);
   703  	// Unreachable.
   704  	return 1;
   705  #else
   706  	reply_execute(status);
   707  	return status;
   708  #endif
   709  }
   710  
   711  static uint32* input_base_address()
   712  {
   713  	if (kAddressSanitizer) {
   714  		// ASan conflicts with -static, so we end up having a dynamically linked syz-executor binary.
   715  		// It's often the case that the libraries are mapped shortly after 0x7f0000000000, so we cannot
   716  		// blindly set some HighMemory address and hope it's free.
   717  		// Since we only run relatively safe (or fake) syscalls under tests, it should be fine to
   718  		// just use whatever address mmap() returns us.
   719  		return 0;
   720  	}
   721  	// It's the first time we map output region - generate its location.
   722  	// The output region is the only thing in executor process for which consistency matters.
   723  	// If it is corrupted ipc package will fail to parse its contents and panic.
   724  	// But fuzzer constantly invents new ways of how to corrupt the region,
   725  	// so we map the region at a (hopefully) hard to guess address with random offset,
   726  	// surrounded by unmapped pages.
   727  	// The address chosen must also work on 32-bit kernels with 1GB user address space.
   728  	const uint64 kOutputBase = 0x1b2bc20000ull;
   729  	return (uint32*)(kOutputBase + (1 << 20) * (getpid() % 128));
   730  }
   731  
   732  static void mmap_input()
   733  {
   734  	uint32* mmap_at = input_base_address();
   735  	int flags = MAP_SHARED;
   736  	if (mmap_at != 0)
   737  		// If we map at a specific address, ensure it's not overlapping with anything else.
   738  		flags = flags | MAP_FIXED_EXCLUSIVE;
   739  	void* result = mmap(mmap_at, kMaxInput, PROT_READ, flags, kInFd, 0);
   740  	if (result == MAP_FAILED)
   741  		fail("mmap of input file failed");
   742  	input_data = static_cast<uint8*>(result);
   743  }
   744  
   745  static uint32* output_base_address()
   746  {
   747  	if (kAddressSanitizer) {
   748  		// See the comment in input_base_address();
   749  		return 0;
   750  	}
   751  	if (output_data != NULL) {
   752  		// If output_data was already mapped, use the old base address
   753  		// since we could be extending the area from a different pid:
   754  		// realloc_output_data() may be called from a fork, which would cause
   755  		// input_base_address() to return a different address.
   756  		return (uint32*)output_data;
   757  	}
   758  	// Leave some unmmapped area after the input data.
   759  	return input_base_address() + kMaxInput + SYZ_PAGE_SIZE;
   760  }
   761  
   762  // This method can be invoked as many times as one likes - MMAP_FIXED can overwrite the previous
   763  // mapping without any problems. The only precondition - kOutFd must not be closed.
   764  static void mmap_output(uint32 size)
   765  {
   766  	if (size <= output_size)
   767  		return;
   768  	if (size % SYZ_PAGE_SIZE != 0)
   769  		failmsg("trying to mmap output area that is not divisible by page size", "page=%d,area=%d", SYZ_PAGE_SIZE, size);
   770  	uint32* mmap_at = output_base_address();
   771  	int flags = MAP_SHARED;
   772  	if (mmap_at == NULL) {
   773  		// We map at an address chosen by the kernel, so if there was any previous mapping, just unmap it.
   774  		if (output_data != NULL) {
   775  			int ret = munmap(output_data, output_size);
   776  			if (ret != 0)
   777  				fail("munmap failed");
   778  			output_size = 0;
   779  		}
   780  	} else {
   781  		// We are possibly expanding the mmapped region. Adjust the parameters to avoid mmapping already
   782  		// mmapped area as much as possible.
   783  		// There exists a mremap call that could have helped, but it's purely Linux-specific.
   784  		mmap_at = (uint32*)((char*)(mmap_at) + output_size);
   785  		// Ensure we don't overwrite anything.
   786  		flags = flags | MAP_FIXED_EXCLUSIVE;
   787  	}
   788  	void* result = mmap(mmap_at, size - output_size, PROT_READ | PROT_WRITE, flags, kOutFd, output_size);
   789  	if (result == MAP_FAILED || (mmap_at && result != mmap_at))
   790  		failmsg("mmap of output file failed", "want %p, got %p", mmap_at, result);
   791  	if (output_size == 0)
   792  		output_data = static_cast<OutputData*>(result);
   793  	output_size = size;
   794  }
   795  
   796  void setup_control_pipes()
   797  {
   798  	if (dup2(0, kInPipeFd) < 0)
   799  		fail("dup2(0, kInPipeFd) failed");
   800  	if (dup2(1, kOutPipeFd) < 0)
   801  		fail("dup2(1, kOutPipeFd) failed");
   802  	if (dup2(2, 1) < 0)
   803  		fail("dup2(2, 1) failed");
   804  	// We used to close(0), but now we dup stderr to stdin to keep fd numbers
   805  	// stable across executor and C programs generated by pkg/csource.
   806  	if (dup2(2, 0) < 0)
   807  		fail("dup2(2, 0) failed");
   808  }
   809  
   810  void receive_handshake()
   811  {
   812  	handshake_req req = {};
   813  	ssize_t n = read(kInPipeFd, &req, sizeof(req));
   814  	if (n != sizeof(req))
   815  		failmsg("handshake read failed", "read=%zu", n);
   816  	parse_handshake(req);
   817  }
   818  
   819  void parse_handshake(const handshake_req& req)
   820  {
   821  	if (req.magic != kInMagic)
   822  		failmsg("bad handshake magic", "magic=0x%llx", req.magic);
   823  #if SYZ_HAVE_SANDBOX_ANDROID
   824  	sandbox_arg = req.sandbox_arg;
   825  #endif
   826  	is_kernel_64_bit = req.is_kernel_64_bit;
   827  	use_cover_edges = req.use_cover_edges;
   828  	procid = req.pid;
   829  	syscall_timeout_ms = req.syscall_timeout_ms;
   830  	program_timeout_ms = req.program_timeout_ms;
   831  	slowdown_scale = req.slowdown_scale;
   832  	flag_debug = (bool)(req.flags & rpc::ExecEnv::Debug);
   833  	flag_coverage = (bool)(req.flags & rpc::ExecEnv::Signal);
   834  	flag_read_only_coverage = (bool)(req.flags & rpc::ExecEnv::ReadOnlyCoverage);
   835  	flag_sandbox_none = (bool)(req.flags & rpc::ExecEnv::SandboxNone);
   836  	flag_sandbox_setuid = (bool)(req.flags & rpc::ExecEnv::SandboxSetuid);
   837  	flag_sandbox_namespace = (bool)(req.flags & rpc::ExecEnv::SandboxNamespace);
   838  	flag_sandbox_android = (bool)(req.flags & rpc::ExecEnv::SandboxAndroid);
   839  	flag_extra_coverage = (bool)(req.flags & rpc::ExecEnv::ExtraCover);
   840  	flag_net_injection = (bool)(req.flags & rpc::ExecEnv::EnableTun);
   841  	flag_net_devices = (bool)(req.flags & rpc::ExecEnv::EnableNetDev);
   842  	flag_net_reset = (bool)(req.flags & rpc::ExecEnv::EnableNetReset);
   843  	flag_cgroups = (bool)(req.flags & rpc::ExecEnv::EnableCgroups);
   844  	flag_close_fds = (bool)(req.flags & rpc::ExecEnv::EnableCloseFds);
   845  	flag_devlink_pci = (bool)(req.flags & rpc::ExecEnv::EnableDevlinkPCI);
   846  	flag_vhci_injection = (bool)(req.flags & rpc::ExecEnv::EnableVhciInjection);
   847  	flag_wifi = (bool)(req.flags & rpc::ExecEnv::EnableWifi);
   848  	flag_delay_kcov_mmap = (bool)(req.flags & rpc::ExecEnv::DelayKcovMmap);
   849  	flag_nic_vf = (bool)(req.flags & rpc::ExecEnv::EnableNicVF);
   850  }
   851  
   852  void receive_execute()
   853  {
   854  	execute_req req = {};
   855  	ssize_t n = 0;
   856  	while ((n = read(kInPipeFd, &req, sizeof(req))) == -1 && errno == EINTR)
   857  		;
   858  	if (n != (ssize_t)sizeof(req))
   859  		failmsg("control pipe read failed", "read=%zd want=%zd", n, sizeof(req));
   860  	parse_execute(req);
   861  }
   862  
   863  void parse_execute(const execute_req& req)
   864  {
   865  	request_id = req.id;
   866  	request_type = req.type;
   867  	flag_collect_signal = req.exec_flags & (uint64)rpc::ExecFlag::CollectSignal;
   868  	flag_collect_cover = req.exec_flags & (uint64)rpc::ExecFlag::CollectCover;
   869  	flag_dedup_cover = req.exec_flags & (uint64)rpc::ExecFlag::DedupCover;
   870  	flag_comparisons = req.exec_flags & (uint64)rpc::ExecFlag::CollectComps;
   871  	flag_threaded = req.exec_flags & (uint64)rpc::ExecFlag::Threaded;
   872  	all_call_signal = req.all_call_signal;
   873  	all_extra_signal = req.all_extra_signal;
   874  
   875  	debug("[%llums] exec opts: reqid=%llu type=%llu procid=%llu threaded=%d cover=%d comps=%d dedup=%d signal=%d "
   876  	      " sandbox=%d/%d/%d/%d timeouts=%llu/%llu/%llu kernel_64_bit=%d\n",
   877  	      current_time_ms() - start_time_ms, request_id, (uint64)request_type, procid, flag_threaded, flag_collect_cover,
   878  	      flag_comparisons, flag_dedup_cover, flag_collect_signal, flag_sandbox_none, flag_sandbox_setuid,
   879  	      flag_sandbox_namespace, flag_sandbox_android, syscall_timeout_ms, program_timeout_ms, slowdown_scale,
   880  	      is_kernel_64_bit);
   881  	if (syscall_timeout_ms == 0 || program_timeout_ms <= syscall_timeout_ms || slowdown_scale == 0)
   882  		failmsg("bad timeouts", "syscall=%llu, program=%llu, scale=%llu",
   883  			syscall_timeout_ms, program_timeout_ms, slowdown_scale);
   884  }
   885  
   886  bool cover_collection_required()
   887  {
   888  	return flag_coverage && (flag_collect_signal || flag_collect_cover || flag_comparisons);
   889  }
   890  
   891  void reply_execute(uint32 status)
   892  {
   893  	if (flag_snapshot)
   894  		SnapshotDone(status == kFailStatus);
   895  	if (write(kOutPipeFd, &status, sizeof(status)) != sizeof(status))
   896  		fail("control pipe write failed");
   897  }
   898  
   899  void realloc_output_data()
   900  {
   901  #if SYZ_EXECUTOR_USES_FORK_SERVER
   902  	if (flag_comparisons)
   903  		mmap_output(kMaxOutputComparisons);
   904  	else if (flag_collect_cover)
   905  		mmap_output(kMaxOutputCoverage);
   906  	else if (flag_collect_signal)
   907  		mmap_output(kMaxOutputSignal);
   908  	if (close(kOutFd) < 0)
   909  		fail("failed to close kOutFd");
   910  #endif
   911  }
   912  
   913  void execute_glob()
   914  {
   915  	const char* pattern = (const char*)input_data;
   916  	const auto& files = Glob(pattern);
   917  	size_t size = 0;
   918  	for (const auto& file : files)
   919  		size += file.size() + 1;
   920  	mmap_output(kMaxOutput);
   921  	ShmemBuilder fbb(output_data, kMaxOutput, true);
   922  	uint8_t* pos = nullptr;
   923  	auto off = fbb.CreateUninitializedVector(size, &pos);
   924  	for (const auto& file : files) {
   925  		memcpy(pos, file.c_str(), file.size() + 1);
   926  		pos += file.size() + 1;
   927  	}
   928  	output_data->consumed.store(fbb.GetSize(), std::memory_order_release);
   929  	output_data->result_offset.store(off, std::memory_order_release);
   930  }
   931  
   932  // execute_one executes program stored in input_data.
   933  void execute_one()
   934  {
   935  	if (request_type == rpc::RequestType::Glob) {
   936  		execute_glob();
   937  		return;
   938  	}
   939  	if (request_type != rpc::RequestType::Program)
   940  		failmsg("bad request type", "type=%llu", (uint64)request_type);
   941  
   942  	in_execute_one = true;
   943  #if GOOS_linux
   944  	char buf[64];
   945  	// Linux TASK_COMM_LEN is only 16, so the name needs to be compact.
   946  	snprintf(buf, sizeof(buf), "syz.%llu.%llu", procid, request_id);
   947  	prctl(PR_SET_NAME, buf);
   948  #endif
   949  	if (flag_snapshot)
   950  		SnapshotStart();
   951  	else
   952  		realloc_output_data();
   953  	// Output buffer may be pkey-protected in snapshot mode, so don't write the output size
   954  	// (it's fixed and known anyway).
   955  	output_builder.emplace(output_data, output_size, !flag_snapshot);
   956  	uint64 start = current_time_ms();
   957  	uint8* input_pos = input_data;
   958  
   959  	if (cover_collection_required()) {
   960  		if (!flag_threaded)
   961  			cover_enable(&threads[0].cov, flag_comparisons, false);
   962  		if (flag_extra_coverage)
   963  			cover_reset(&extra_cov);
   964  	}
   965  
   966  	int call_index = 0;
   967  	uint64 prog_extra_timeout = 0;
   968  	uint64 prog_extra_cover_timeout = 0;
   969  	call_props_t call_props;
   970  	memset(&call_props, 0, sizeof(call_props));
   971  
   972  	read_input(&input_pos); // total number of calls
   973  	for (;;) {
   974  		uint64 call_num = read_input(&input_pos);
   975  		if (call_num == instr_eof)
   976  			break;
   977  		if (call_num == instr_copyin) {
   978  			char* addr = (char*)(read_input(&input_pos) + SYZ_DATA_OFFSET);
   979  			uint64 typ = read_input(&input_pos);
   980  			switch (typ) {
   981  			case arg_const: {
   982  				uint64 size, bf, bf_off, bf_len;
   983  				uint64 arg = read_const_arg(&input_pos, &size, &bf, &bf_off, &bf_len);
   984  				copyin(addr, arg, size, bf, bf_off, bf_len);
   985  				break;
   986  			}
   987  			case arg_addr32:
   988  			case arg_addr64: {
   989  				uint64 val = read_input(&input_pos) + SYZ_DATA_OFFSET;
   990  				if (typ == arg_addr32)
   991  					NONFAILING(*(uint32*)addr = val);
   992  				else
   993  					NONFAILING(*(uint64*)addr = val);
   994  				break;
   995  			}
   996  			case arg_result: {
   997  				uint64 meta = read_input(&input_pos);
   998  				uint64 size = meta & 0xff;
   999  				uint64 bf = meta >> 8;
  1000  				uint64 val = read_result(&input_pos);
  1001  				copyin(addr, val, size, bf, 0, 0);
  1002  				break;
  1003  			}
  1004  			case arg_data: {
  1005  				uint64 size = read_input(&input_pos);
  1006  				size &= ~(1ull << 63); // readable flag
  1007  				if (input_pos + size > input_data + kMaxInput)
  1008  					fail("data arg overflow");
  1009  				NONFAILING(memcpy(addr, input_pos, size));
  1010  				input_pos += size;
  1011  				break;
  1012  			}
  1013  			case arg_csum: {
  1014  				debug_verbose("checksum found at %p\n", addr);
  1015  				uint64 size = read_input(&input_pos);
  1016  				char* csum_addr = addr;
  1017  				uint64 csum_kind = read_input(&input_pos);
  1018  				switch (csum_kind) {
  1019  				case arg_csum_inet: {
  1020  					if (size != 2)
  1021  						failmsg("bag inet checksum size", "size=%llu", size);
  1022  					debug_verbose("calculating checksum for %p\n", csum_addr);
  1023  					struct csum_inet csum;
  1024  					csum_inet_init(&csum);
  1025  					uint64 chunks_num = read_input(&input_pos);
  1026  					uint64 chunk;
  1027  					for (chunk = 0; chunk < chunks_num; chunk++) {
  1028  						uint64 chunk_kind = read_input(&input_pos);
  1029  						uint64 chunk_value = read_input(&input_pos);
  1030  						uint64 chunk_size = read_input(&input_pos);
  1031  						switch (chunk_kind) {
  1032  						case arg_csum_chunk_data:
  1033  							chunk_value += SYZ_DATA_OFFSET;
  1034  							debug_verbose("#%lld: data chunk, addr: %llx, size: %llu\n",
  1035  								      chunk, chunk_value, chunk_size);
  1036  							NONFAILING(csum_inet_update(&csum, (const uint8*)chunk_value, chunk_size));
  1037  							break;
  1038  						case arg_csum_chunk_const:
  1039  							if (chunk_size != 2 && chunk_size != 4 && chunk_size != 8)
  1040  								failmsg("bad checksum const chunk size", "size=%lld", chunk_size);
  1041  							// Here we assume that const values come to us big endian.
  1042  							debug_verbose("#%lld: const chunk, value: %llx, size: %llu\n",
  1043  								      chunk, chunk_value, chunk_size);
  1044  							csum_inet_update(&csum, (const uint8*)&chunk_value, chunk_size);
  1045  							break;
  1046  						default:
  1047  							failmsg("bad checksum chunk kind", "kind=%llu", chunk_kind);
  1048  						}
  1049  					}
  1050  					uint16 csum_value = csum_inet_digest(&csum);
  1051  					debug_verbose("writing inet checksum %hx to %p\n", csum_value, csum_addr);
  1052  					copyin(csum_addr, csum_value, 2, binary_format_native, 0, 0);
  1053  					break;
  1054  				}
  1055  				default:
  1056  					failmsg("bad checksum kind", "kind=%llu", csum_kind);
  1057  				}
  1058  				break;
  1059  			}
  1060  			default:
  1061  				failmsg("bad argument type", "type=%llu", typ);
  1062  			}
  1063  			continue;
  1064  		}
  1065  		if (call_num == instr_copyout) {
  1066  			read_input(&input_pos); // index
  1067  			read_input(&input_pos); // addr
  1068  			read_input(&input_pos); // size
  1069  			// The copyout will happen when/if the call completes.
  1070  			continue;
  1071  		}
  1072  		if (call_num == instr_setprops) {
  1073  			read_call_props_t(call_props, read_input(&input_pos, false));
  1074  			continue;
  1075  		}
  1076  
  1077  		// Normal syscall.
  1078  		if (call_num >= ARRAY_SIZE(syscalls))
  1079  			failmsg("invalid syscall number", "call_num=%llu", call_num);
  1080  		const call_t* call = &syscalls[call_num];
  1081  		if (prog_extra_timeout < call->attrs.prog_timeout)
  1082  			prog_extra_timeout = call->attrs.prog_timeout * slowdown_scale;
  1083  		if (call->attrs.remote_cover)
  1084  			prog_extra_cover_timeout = 500 * slowdown_scale; // 500 ms
  1085  		uint64 copyout_index = read_input(&input_pos);
  1086  		uint64 num_args = read_input(&input_pos);
  1087  		if (num_args > kMaxArgs)
  1088  			failmsg("command has bad number of arguments", "args=%llu", num_args);
  1089  		uint64 args[kMaxArgs] = {};
  1090  		for (uint64 i = 0; i < num_args; i++)
  1091  			args[i] = read_arg(&input_pos);
  1092  		for (uint64 i = num_args; i < kMaxArgs; i++)
  1093  			args[i] = 0;
  1094  		thread_t* th = schedule_call(call_index++, call_num, copyout_index,
  1095  					     num_args, args, input_pos, call_props);
  1096  
  1097  		if (call_props.async && flag_threaded) {
  1098  			// Don't wait for an async call to finish. We'll wait at the end.
  1099  			// If we're not in the threaded mode, just ignore the async flag - during repro simplification syzkaller
  1100  			// will anyway try to make it non-threaded.
  1101  		} else if (flag_threaded) {
  1102  			// Wait for call completion.
  1103  			uint64 timeout_ms = syscall_timeout_ms + call->attrs.timeout * slowdown_scale;
  1104  			// This is because of printing pre/post call. Ideally we print everything in the main thread
  1105  			// and then remove this (would also avoid intermixed output).
  1106  			if (flag_debug && timeout_ms < 1000)
  1107  				timeout_ms = 1000;
  1108  			if (event_timedwait(&th->done, timeout_ms))
  1109  				handle_completion(th);
  1110  
  1111  			// Check if any of previous calls have completed.
  1112  			for (int i = 0; i < kMaxThreads; i++) {
  1113  				th = &threads[i];
  1114  				if (th->executing && event_isset(&th->done))
  1115  					handle_completion(th);
  1116  			}
  1117  		} else {
  1118  			// Execute directly.
  1119  			if (th != &threads[0])
  1120  				fail("using non-main thread in non-thread mode");
  1121  			event_reset(&th->ready);
  1122  			execute_call(th);
  1123  			event_set(&th->done);
  1124  			handle_completion(th);
  1125  		}
  1126  		memset(&call_props, 0, sizeof(call_props));
  1127  	}
  1128  
  1129  	if (running > 0) {
  1130  		// Give unfinished syscalls some additional time.
  1131  		last_scheduled = 0;
  1132  		uint64 wait_start = current_time_ms();
  1133  		uint64 wait_end = wait_start + 2 * syscall_timeout_ms;
  1134  		wait_end = std::max(wait_end, start + program_timeout_ms / 6);
  1135  		wait_end = std::max(wait_end, wait_start + prog_extra_timeout);
  1136  		while (running > 0 && current_time_ms() <= wait_end) {
  1137  			sleep_ms(1 * slowdown_scale);
  1138  			for (int i = 0; i < kMaxThreads; i++) {
  1139  				thread_t* th = &threads[i];
  1140  				if (th->executing && event_isset(&th->done))
  1141  					handle_completion(th);
  1142  			}
  1143  		}
  1144  		// Write output coverage for unfinished calls.
  1145  		if (running > 0) {
  1146  			for (int i = 0; i < kMaxThreads; i++) {
  1147  				thread_t* th = &threads[i];
  1148  				if (th->executing) {
  1149  					if (cover_collection_required())
  1150  						cover_collect(&th->cov);
  1151  					write_call_output(th, false);
  1152  				}
  1153  			}
  1154  		}
  1155  	}
  1156  
  1157  #if SYZ_HAVE_CLOSE_FDS
  1158  	close_fds();
  1159  #endif
  1160  
  1161  	write_extra_output();
  1162  	if (flag_extra_coverage) {
  1163  		// Check for new extra coverage in small intervals to avoid situation
  1164  		// that we were killed on timeout before we write any.
  1165  		// Check for extra coverage is very cheap, effectively a memory load.
  1166  		const uint64 kSleepMs = 100;
  1167  		for (uint64 i = 0; i < prog_extra_cover_timeout / kSleepMs &&
  1168  				   output_data->completed.load(std::memory_order_relaxed) < kMaxCalls;
  1169  		     i++) {
  1170  			sleep_ms(kSleepMs);
  1171  			write_extra_output();
  1172  		}
  1173  	}
  1174  }
  1175  
  1176  thread_t* schedule_call(int call_index, int call_num, uint64 copyout_index, uint64 num_args, uint64* args, uint8* pos, call_props_t call_props)
  1177  {
  1178  	// Find a spare thread to execute the call.
  1179  	int i = 0;
  1180  	for (; i < kMaxThreads; i++) {
  1181  		thread_t* th = &threads[i];
  1182  		if (!th->created)
  1183  			thread_create(th, i, cover_collection_required());
  1184  		if (event_isset(&th->done)) {
  1185  			if (th->executing)
  1186  				handle_completion(th);
  1187  			break;
  1188  		}
  1189  	}
  1190  	if (i == kMaxThreads)
  1191  		exitf("out of threads");
  1192  	thread_t* th = &threads[i];
  1193  	if (event_isset(&th->ready) || !event_isset(&th->done) || th->executing)
  1194  		exitf("bad thread state in schedule: ready=%d done=%d executing=%d",
  1195  		      event_isset(&th->ready), event_isset(&th->done), th->executing);
  1196  	last_scheduled = th;
  1197  	th->copyout_pos = pos;
  1198  	th->copyout_index = copyout_index;
  1199  	event_reset(&th->done);
  1200  	// We do this both right before execute_syscall in the thread and here because:
  1201  	// the former is useful to reset all unrelated coverage from our syscalls (e.g. futex in event_wait),
  1202  	// while the reset here is useful to avoid the following scenario that the fuzzer was able to trigger.
  1203  	// If the test program contains seccomp syscall that kills the worker thread on the next syscall,
  1204  	// then it won't receive this next syscall and won't do cover_reset. If we are collecting comparions
  1205  	// then we've already transformed comparison data from the previous syscall into rpc::ComparisonRaw
  1206  	// in write_comparisons. That data is still in the buffer. The first word of rpc::ComparisonRaw is PC
  1207  	// which overlaps with comparison type in kernel exposed records. As the result write_comparisons
  1208  	// that will try to write out data from unfinished syscalls will see these rpc::ComparisonRaw records,
  1209  	// mis-interpret PC as type, and fail as: SYZFAIL: invalid kcov comp type (type=ffffffff8100b4e0).
  1210  	if (flag_coverage)
  1211  		cover_reset(&th->cov);
  1212  	th->executing = true;
  1213  	th->call_index = call_index;
  1214  	th->call_num = call_num;
  1215  	th->num_args = num_args;
  1216  	th->call_props = call_props;
  1217  	for (int i = 0; i < kMaxArgs; i++)
  1218  		th->args[i] = args[i];
  1219  	event_set(&th->ready);
  1220  	running++;
  1221  	return th;
  1222  }
  1223  
  1224  template <typename cover_data_t>
  1225  uint32 write_signal(flatbuffers::FlatBufferBuilder& fbb, int index, cover_t* cov, bool all)
  1226  {
  1227  	// Write out feedback signals.
  1228  	// Currently it is code edges computed as xor of two subsequent basic block PCs.
  1229  	fbb.StartVector<uint64_t>(0);
  1230  	cover_data_t* cover_data = (cover_data_t*)(cov->data + cov->data_offset);
  1231  	if ((char*)(cover_data + cov->size) > cov->data_end)
  1232  		failmsg("too much cover", "cov=%u", cov->size);
  1233  	uint32 nsig = 0;
  1234  	cover_data_t prev_pc = 0;
  1235  	bool prev_filter = true;
  1236  	for (uint32 i = 0; i < cov->size; i++) {
  1237  		cover_data_t pc = cover_data[i] + cov->pc_offset;
  1238  		uint64 sig = pc;
  1239  		if (use_cover_edges) {
  1240  			// Only hash the lower 12 bits so the hash is independent of any module offsets.
  1241  			const uint64 mask = (1 << 12) - 1;
  1242  			sig ^= hash(prev_pc & mask) & mask;
  1243  		}
  1244  		bool filter = coverage_filter(pc);
  1245  		// Ignore the edge only if both current and previous PCs are filtered out
  1246  		// to capture all incoming and outcoming edges into the interesting code.
  1247  		bool ignore = !filter && !prev_filter;
  1248  		prev_pc = pc;
  1249  		prev_filter = filter;
  1250  		if (ignore || dedup(index, sig))
  1251  			continue;
  1252  		if (!all && max_signal && max_signal->Contains(sig))
  1253  			continue;
  1254  		fbb.PushElement(uint64(sig));
  1255  		nsig++;
  1256  	}
  1257  	return fbb.EndVector(nsig);
  1258  }
  1259  
  1260  template <typename cover_data_t>
  1261  uint32 write_cover(flatbuffers::FlatBufferBuilder& fbb, cover_t* cov)
  1262  {
  1263  	uint32 cover_size = cov->size;
  1264  	cover_data_t* cover_data = (cover_data_t*)(cov->data + cov->data_offset);
  1265  	if (flag_dedup_cover) {
  1266  		cover_data_t* end = cover_data + cover_size;
  1267  		std::sort(cover_data, end);
  1268  		cover_size = std::unique(cover_data, end) - cover_data;
  1269  	}
  1270  	fbb.StartVector<uint64_t>(cover_size);
  1271  	// Flatbuffer arrays are written backwards, so reverse the order on our side as well.
  1272  	for (uint32 i = 0; i < cover_size; i++)
  1273  		fbb.PushElement(uint64(cover_data[cover_size - i - 1] + cov->pc_offset));
  1274  	return fbb.EndVector(cover_size);
  1275  }
  1276  
  1277  uint32 write_comparisons(flatbuffers::FlatBufferBuilder& fbb, cover_t* cov)
  1278  {
  1279  	// Collect only the comparisons
  1280  	uint64 ncomps = *(uint64_t*)cov->data;
  1281  	kcov_comparison_t* cov_start = (kcov_comparison_t*)(cov->data + sizeof(uint64));
  1282  	if ((char*)(cov_start + ncomps) > cov->data_end)
  1283  		failmsg("too many comparisons", "ncomps=%llu", ncomps);
  1284  	cov->overflow = ((char*)(cov_start + ncomps + 1) > cov->data_end);
  1285  	rpc::ComparisonRaw* start = (rpc::ComparisonRaw*)cov_start;
  1286  	rpc::ComparisonRaw* end = start;
  1287  	// We will convert kcov_comparison_t to ComparisonRaw inplace.
  1288  	static_assert(sizeof(kcov_comparison_t) >= sizeof(rpc::ComparisonRaw));
  1289  	for (uint32 i = 0; i < ncomps; i++) {
  1290  		auto raw = convert(cov_start[i]);
  1291  		if (!raw.pc())
  1292  			continue;
  1293  		*end++ = raw;
  1294  	}
  1295  	std::sort(start, end, [](rpc::ComparisonRaw a, rpc::ComparisonRaw b) -> bool {
  1296  		if (a.pc() != b.pc())
  1297  			return a.pc() < b.pc();
  1298  		if (a.op1() != b.op1())
  1299  			return a.op1() < b.op1();
  1300  		return a.op2() < b.op2();
  1301  	});
  1302  	ncomps = std::unique(start, end, [](rpc::ComparisonRaw a, rpc::ComparisonRaw b) -> bool {
  1303  			 return a.pc() == b.pc() && a.op1() == b.op1() && a.op2() == b.op2();
  1304  		 }) -
  1305  		 start;
  1306  	return fbb.CreateVectorOfStructs(start, ncomps).o;
  1307  }
  1308  
  1309  bool coverage_filter(uint64 pc)
  1310  {
  1311  	if (!cover_filter)
  1312  		return true;
  1313  	return cover_filter->Contains(pc);
  1314  }
  1315  
  1316  void handle_completion(thread_t* th)
  1317  {
  1318  	if (event_isset(&th->ready) || !event_isset(&th->done) || !th->executing)
  1319  		exitf("bad thread state in completion: ready=%d done=%d executing=%d",
  1320  		      event_isset(&th->ready), event_isset(&th->done), th->executing);
  1321  	if (th->res != (intptr_t)-1)
  1322  		copyout_call_results(th);
  1323  
  1324  	write_call_output(th, true);
  1325  	write_extra_output();
  1326  	th->executing = false;
  1327  	running--;
  1328  	if (running < 0) {
  1329  		// This fires periodically for the past 2 years (see issue #502).
  1330  		fprintf(stderr, "running=%d completed=%d flag_threaded=%d current=%d\n",
  1331  			running, completed, flag_threaded, th->id);
  1332  		for (int i = 0; i < kMaxThreads; i++) {
  1333  			thread_t* th1 = &threads[i];
  1334  			fprintf(stderr, "th #%2d: created=%d executing=%d"
  1335  					" ready=%d done=%d call_index=%d res=%lld reserrno=%d\n",
  1336  				i, th1->created, th1->executing,
  1337  				event_isset(&th1->ready), event_isset(&th1->done),
  1338  				th1->call_index, (uint64)th1->res, th1->reserrno);
  1339  		}
  1340  		exitf("negative running");
  1341  	}
  1342  }
  1343  
  1344  void copyout_call_results(thread_t* th)
  1345  {
  1346  	if (th->copyout_index != no_copyout) {
  1347  		if (th->copyout_index >= kMaxCommands)
  1348  			failmsg("result overflows kMaxCommands", "index=%lld", th->copyout_index);
  1349  		results[th->copyout_index].executed = true;
  1350  		results[th->copyout_index].val = th->res;
  1351  	}
  1352  	for (bool done = false; !done;) {
  1353  		uint64 instr = read_input(&th->copyout_pos);
  1354  		switch (instr) {
  1355  		case instr_copyout: {
  1356  			uint64 index = read_input(&th->copyout_pos);
  1357  			if (index >= kMaxCommands)
  1358  				failmsg("result overflows kMaxCommands", "index=%lld", index);
  1359  			char* addr = (char*)(read_input(&th->copyout_pos) + SYZ_DATA_OFFSET);
  1360  			uint64 size = read_input(&th->copyout_pos);
  1361  			uint64 val = 0;
  1362  			if (copyout(addr, size, &val)) {
  1363  				results[index].executed = true;
  1364  				results[index].val = val;
  1365  			}
  1366  			debug_verbose("copyout 0x%llx from %p\n", val, addr);
  1367  			break;
  1368  		}
  1369  		default:
  1370  			done = true;
  1371  			break;
  1372  		}
  1373  	}
  1374  }
  1375  
  1376  void write_output(int index, cover_t* cov, rpc::CallFlag flags, uint32 error, bool all_signal)
  1377  {
  1378  	CoverAccessScope scope(cov);
  1379  	auto& fbb = *output_builder;
  1380  	const uint32 start_size = output_builder->GetSize();
  1381  	(void)start_size;
  1382  	uint32 signal_off = 0;
  1383  	uint32 cover_off = 0;
  1384  	uint32 comps_off = 0;
  1385  	if (flag_comparisons) {
  1386  		comps_off = write_comparisons(fbb, cov);
  1387  	} else {
  1388  		if (flag_collect_signal) {
  1389  			if (is_kernel_64_bit)
  1390  				signal_off = write_signal<uint64>(fbb, index, cov, all_signal);
  1391  			else
  1392  				signal_off = write_signal<uint32>(fbb, index, cov, all_signal);
  1393  		}
  1394  		if (flag_collect_cover) {
  1395  			if (is_kernel_64_bit)
  1396  				cover_off = write_cover<uint64>(fbb, cov);
  1397  			else
  1398  				cover_off = write_cover<uint32>(fbb, cov);
  1399  		}
  1400  	}
  1401  
  1402  	rpc::CallInfoRawBuilder builder(*output_builder);
  1403  	if (cov->overflow)
  1404  		flags |= rpc::CallFlag::CoverageOverflow;
  1405  	builder.add_flags(flags);
  1406  	builder.add_error(error);
  1407  	if (signal_off)
  1408  		builder.add_signal(signal_off);
  1409  	if (cover_off)
  1410  		builder.add_cover(cover_off);
  1411  	if (comps_off)
  1412  		builder.add_comps(comps_off);
  1413  	auto off = builder.Finish();
  1414  	uint32 slot = output_data->completed.load(std::memory_order_relaxed);
  1415  	if (slot >= kMaxCalls)
  1416  		failmsg("too many calls in output", "slot=%d", slot);
  1417  	auto& call = output_data->calls[slot];
  1418  	call.index = index;
  1419  	call.offset = off;
  1420  	output_data->consumed.store(output_builder->GetSize(), std::memory_order_release);
  1421  	output_data->completed.store(slot + 1, std::memory_order_release);
  1422  	debug_verbose("out #%u: index=%u errno=%d flags=0x%x total_size=%u\n",
  1423  		      slot + 1, index, error, static_cast<unsigned>(flags), call.data_size - start_size);
  1424  }
  1425  
  1426  void write_call_output(thread_t* th, bool finished)
  1427  {
  1428  	uint32 reserrno = ENOSYS;
  1429  	rpc::CallFlag flags = rpc::CallFlag::Executed;
  1430  	if (finished && th != last_scheduled)
  1431  		flags |= rpc::CallFlag::Blocked;
  1432  	if (finished) {
  1433  		reserrno = th->res != -1 ? 0 : th->reserrno;
  1434  		flags |= rpc::CallFlag::Finished;
  1435  		if (th->fault_injected)
  1436  			flags |= rpc::CallFlag::FaultInjected;
  1437  	}
  1438  	bool all_signal = th->call_index < 64 ? (all_call_signal & (1ull << th->call_index)) : false;
  1439  	write_output(th->call_index, &th->cov, flags, reserrno, all_signal);
  1440  }
  1441  
  1442  void write_extra_output()
  1443  {
  1444  	if (!cover_collection_required() || !flag_extra_coverage || flag_comparisons)
  1445  		return;
  1446  	cover_collect(&extra_cov);
  1447  	if (!extra_cov.size)
  1448  		return;
  1449  	write_output(-1, &extra_cov, rpc::CallFlag::NONE, 997, all_extra_signal);
  1450  	cover_reset(&extra_cov);
  1451  }
  1452  
  1453  flatbuffers::span<uint8_t> finish_output(OutputData* output, int proc_id, uint64 req_id, uint32 num_calls, uint64 elapsed,
  1454  					 uint64 freshness, uint32 status, bool hanged, const std::vector<uint8_t>* process_output)
  1455  {
  1456  	// In snapshot mode the output size is fixed and output_size is always initialized, so use it.
  1457  	int out_size = flag_snapshot ? output_size : output->size.load(std::memory_order_relaxed) ?
  1458  												  : kMaxOutput;
  1459  	uint32 completed = output->completed.load(std::memory_order_relaxed);
  1460  	completed = std::min(completed, kMaxCalls);
  1461  	debug("handle completion: completed=%u output_size=%u\n", completed, out_size);
  1462  	ShmemBuilder fbb(output, out_size, false);
  1463  	auto empty_call = rpc::CreateCallInfoRawDirect(fbb, rpc::CallFlag::NONE, 998);
  1464  	std::vector<flatbuffers::Offset<rpc::CallInfoRaw>> calls(num_calls, empty_call);
  1465  	std::vector<flatbuffers::Offset<rpc::CallInfoRaw>> extra;
  1466  	for (uint32_t i = 0; i < completed; i++) {
  1467  		const auto& call = output->calls[i];
  1468  		if (call.index == -1) {
  1469  			extra.push_back(call.offset);
  1470  			continue;
  1471  		}
  1472  		if (call.index < 0 || call.index >= static_cast<int>(num_calls) || call.offset.o > kMaxOutput) {
  1473  			debug("bad call index/offset: proc=%d req=%llu call=%d/%d completed=%d offset=%u",
  1474  			      proc_id, req_id, call.index, num_calls,
  1475  			      completed, call.offset.o);
  1476  			continue;
  1477  		}
  1478  		calls[call.index] = call.offset;
  1479  	}
  1480  	auto prog_info_off = rpc::CreateProgInfoRawDirect(fbb, &calls, &extra, 0, elapsed, freshness);
  1481  	flatbuffers::Offset<flatbuffers::String> error_off = 0;
  1482  	if (status == kFailStatus)
  1483  		error_off = fbb.CreateString("process failed");
  1484  	// If the request wrote binary result (currently glob requests do this), use it instead of the output.
  1485  	auto output_off = output->result_offset.load(std::memory_order_relaxed);
  1486  	if (output_off.IsNull() && process_output)
  1487  		output_off = fbb.CreateVector(*process_output);
  1488  	auto exec_off = rpc::CreateExecResultRaw(fbb, req_id, proc_id, output_off, hanged, error_off, prog_info_off);
  1489  	auto msg_off = rpc::CreateExecutorMessageRaw(fbb, rpc::ExecutorMessagesRaw::ExecResult,
  1490  						     flatbuffers::Offset<void>(exec_off.o));
  1491  	fbb.FinishSizePrefixed(msg_off);
  1492  	return fbb.GetBufferSpan();
  1493  }
  1494  
  1495  void thread_create(thread_t* th, int id, bool need_coverage)
  1496  {
  1497  	th->created = true;
  1498  	th->id = id;
  1499  	th->executing = false;
  1500  	// Lazily set up coverage collection.
  1501  	// It is assumed that actually it's already initialized - with a few rare exceptions.
  1502  	if (need_coverage) {
  1503  		if (!th->cov.fd)
  1504  			exitf("out of opened kcov threads");
  1505  		thread_mmap_cover(th);
  1506  	}
  1507  	event_init(&th->ready);
  1508  	event_init(&th->done);
  1509  	event_set(&th->done);
  1510  	if (flag_threaded)
  1511  		thread_start(worker_thread, th);
  1512  }
  1513  
  1514  void thread_mmap_cover(thread_t* th)
  1515  {
  1516  	if (th->cov.data != NULL)
  1517  		return;
  1518  	cover_mmap(&th->cov);
  1519  	cover_protect(&th->cov);
  1520  }
  1521  
  1522  void* worker_thread(void* arg)
  1523  {
  1524  	thread_t* th = (thread_t*)arg;
  1525  	current_thread = th;
  1526  	for (bool first = true;; first = false) {
  1527  		event_wait(&th->ready);
  1528  		event_reset(&th->ready);
  1529  		// Setup coverage only after receiving the first ready event
  1530  		// because in snapshot mode we don't know coverage mode for precreated threads.
  1531  		if (first && cover_collection_required())
  1532  			cover_enable(&th->cov, flag_comparisons, false);
  1533  		execute_call(th);
  1534  		event_set(&th->done);
  1535  	}
  1536  	return 0;
  1537  }
  1538  
  1539  void execute_call(thread_t* th)
  1540  {
  1541  	const call_t* call = &syscalls[th->call_num];
  1542  	debug("#%d [%llums] -> %s(",
  1543  	      th->id, current_time_ms() - start_time_ms, call->name);
  1544  	for (int i = 0; i < th->num_args; i++) {
  1545  		if (i != 0)
  1546  			debug(", ");
  1547  		debug("0x%llx", (uint64)th->args[i]);
  1548  	}
  1549  	debug(")\n");
  1550  
  1551  	int fail_fd = -1;
  1552  	th->soft_fail_state = false;
  1553  	if (th->call_props.fail_nth > 0) {
  1554  		if (th->call_props.rerun > 0)
  1555  			fail("both fault injection and rerun are enabled for the same call");
  1556  		fail_fd = inject_fault(th->call_props.fail_nth);
  1557  		th->soft_fail_state = true;
  1558  	}
  1559  
  1560  	if (flag_coverage)
  1561  		cover_reset(&th->cov);
  1562  	// For pseudo-syscalls and user-space functions NONFAILING can abort before assigning to th->res.
  1563  	// Arrange for res = -1 and errno = EFAULT result for such case.
  1564  	th->res = -1;
  1565  	errno = EFAULT;
  1566  	NONFAILING(th->res = execute_syscall(call, th->args));
  1567  	th->reserrno = errno;
  1568  	// Our pseudo-syscalls may misbehave.
  1569  	if ((th->res == -1 && th->reserrno == 0) || call->attrs.ignore_return)
  1570  		th->reserrno = EINVAL;
  1571  	// Reset the flag before the first possible fail().
  1572  	th->soft_fail_state = false;
  1573  
  1574  	if (flag_coverage)
  1575  		cover_collect(&th->cov);
  1576  	th->fault_injected = false;
  1577  
  1578  	if (th->call_props.fail_nth > 0)
  1579  		th->fault_injected = fault_injected(fail_fd);
  1580  
  1581  	// If required, run the syscall some more times.
  1582  	// But let's still return res, errno and coverage from the first execution.
  1583  	for (int i = 0; i < th->call_props.rerun; i++)
  1584  		NONFAILING(execute_syscall(call, th->args));
  1585  
  1586  	debug("#%d [%llums] <- %s=0x%llx",
  1587  	      th->id, current_time_ms() - start_time_ms, call->name, (uint64)th->res);
  1588  	if (th->res == (intptr_t)-1)
  1589  		debug(" errno=%d", th->reserrno);
  1590  	if (flag_coverage)
  1591  		debug(" cover=%u", th->cov.size);
  1592  	if (th->call_props.fail_nth > 0)
  1593  		debug(" fault=%d", th->fault_injected);
  1594  	if (th->call_props.rerun > 0)
  1595  		debug(" rerun=%d", th->call_props.rerun);
  1596  	debug("\n");
  1597  }
  1598  
  1599  static uint32 hash(uint32 a)
  1600  {
  1601  	// For test OS we disable hashing for determinism and testability.
  1602  #if !GOOS_test
  1603  	a = (a ^ 61) ^ (a >> 16);
  1604  	a = a + (a << 3);
  1605  	a = a ^ (a >> 4);
  1606  	a = a * 0x27d4eb2d;
  1607  	a = a ^ (a >> 15);
  1608  #endif
  1609  	return a;
  1610  }
  1611  
  1612  const uint32 dedup_table_size = 8 << 10;
  1613  uint64 dedup_table_sig[dedup_table_size];
  1614  uint8 dedup_table_index[dedup_table_size];
  1615  
  1616  // Poorman's best-effort hashmap-based deduplication.
  1617  static bool dedup(uint8 index, uint64 sig)
  1618  {
  1619  	for (uint32 i = 0; i < 4; i++) {
  1620  		uint32 pos = (sig + i) % dedup_table_size;
  1621  		if (dedup_table_sig[pos] == sig && dedup_table_index[pos] == index)
  1622  			return true;
  1623  		if (dedup_table_sig[pos] == 0 || dedup_table_index[pos] != index) {
  1624  			dedup_table_index[pos] = index;
  1625  			dedup_table_sig[pos] = sig;
  1626  			return false;
  1627  		}
  1628  	}
  1629  	uint32 pos = sig % dedup_table_size;
  1630  	dedup_table_sig[pos] = sig;
  1631  	dedup_table_index[pos] = index;
  1632  	return false;
  1633  }
  1634  
  1635  template <typename T>
  1636  void copyin_int(char* addr, uint64 val, uint64 bf, uint64 bf_off, uint64 bf_len)
  1637  {
  1638  	if (bf_off == 0 && bf_len == 0) {
  1639  		*(T*)addr = swap(val, sizeof(T), bf);
  1640  		return;
  1641  	}
  1642  	T x = swap(*(T*)addr, sizeof(T), bf);
  1643  	debug_verbose("copyin_int<%zu>: old x=0x%llx\n", sizeof(T), (uint64)x);
  1644  #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
  1645  	const uint64 shift = sizeof(T) * CHAR_BIT - bf_off - bf_len;
  1646  #else
  1647  	const uint64 shift = bf_off;
  1648  #endif
  1649  	x = (x & ~BITMASK(shift, bf_len)) | ((val << shift) & BITMASK(shift, bf_len));
  1650  	debug_verbose("copyin_int<%zu>: x=0x%llx\n", sizeof(T), (uint64)x);
  1651  	*(T*)addr = swap(x, sizeof(T), bf);
  1652  }
  1653  
  1654  void copyin(char* addr, uint64 val, uint64 size, uint64 bf, uint64 bf_off, uint64 bf_len)
  1655  {
  1656  	debug_verbose("copyin: addr=%p val=0x%llx size=%llu bf=%llu bf_off=%llu bf_len=%llu\n",
  1657  		      addr, val, size, bf, bf_off, bf_len);
  1658  	if (bf != binary_format_native && bf != binary_format_bigendian && (bf_off != 0 || bf_len != 0))
  1659  		failmsg("bitmask for string format", "off=%llu, len=%llu", bf_off, bf_len);
  1660  	switch (bf) {
  1661  	case binary_format_native:
  1662  	case binary_format_bigendian:
  1663  		NONFAILING(switch (size) {
  1664  			case 1:
  1665  				copyin_int<uint8>(addr, val, bf, bf_off, bf_len);
  1666  				break;
  1667  			case 2:
  1668  				copyin_int<uint16>(addr, val, bf, bf_off, bf_len);
  1669  				break;
  1670  			case 4:
  1671  				copyin_int<uint32>(addr, val, bf, bf_off, bf_len);
  1672  				break;
  1673  			case 8:
  1674  				copyin_int<uint64>(addr, val, bf, bf_off, bf_len);
  1675  				break;
  1676  			default:
  1677  				failmsg("copyin: bad argument size", "size=%llu", size);
  1678  		});
  1679  		break;
  1680  	case binary_format_strdec:
  1681  		if (size != 20)
  1682  			failmsg("bad strdec size", "size=%llu", size);
  1683  		NONFAILING(sprintf((char*)addr, "%020llu", val));
  1684  		break;
  1685  	case binary_format_strhex:
  1686  		if (size != 18)
  1687  			failmsg("bad strhex size", "size=%llu", size);
  1688  		NONFAILING(sprintf((char*)addr, "0x%016llx", val));
  1689  		break;
  1690  	case binary_format_stroct:
  1691  		if (size != 23)
  1692  			failmsg("bad stroct size", "size=%llu", size);
  1693  		NONFAILING(sprintf((char*)addr, "%023llo", val));
  1694  		break;
  1695  	default:
  1696  		failmsg("unknown binary format", "format=%llu", bf);
  1697  	}
  1698  }
  1699  
  1700  bool copyout(char* addr, uint64 size, uint64* res)
  1701  {
  1702  	return NONFAILING(
  1703  	    switch (size) {
  1704  		    case 1:
  1705  			    *res = *(uint8*)addr;
  1706  			    break;
  1707  		    case 2:
  1708  			    *res = *(uint16*)addr;
  1709  			    break;
  1710  		    case 4:
  1711  			    *res = *(uint32*)addr;
  1712  			    break;
  1713  		    case 8:
  1714  			    *res = *(uint64*)addr;
  1715  			    break;
  1716  		    default:
  1717  			    failmsg("copyout: bad argument size", "size=%llu", size);
  1718  	    });
  1719  }
  1720  
  1721  uint64 read_arg(uint8** input_posp)
  1722  {
  1723  	uint64 typ = read_input(input_posp);
  1724  	switch (typ) {
  1725  	case arg_const: {
  1726  		uint64 size, bf, bf_off, bf_len;
  1727  		uint64 val = read_const_arg(input_posp, &size, &bf, &bf_off, &bf_len);
  1728  		if (bf != binary_format_native && bf != binary_format_bigendian)
  1729  			failmsg("bad argument binary format", "format=%llu", bf);
  1730  		if (bf_off != 0 || bf_len != 0)
  1731  			failmsg("bad argument bitfield", "off=%llu, len=%llu", bf_off, bf_len);
  1732  		return swap(val, size, bf);
  1733  	}
  1734  	case arg_addr32:
  1735  	case arg_addr64: {
  1736  		return read_input(input_posp) + SYZ_DATA_OFFSET;
  1737  	}
  1738  	case arg_result: {
  1739  		uint64 meta = read_input(input_posp);
  1740  		uint64 bf = meta >> 8;
  1741  		if (bf != binary_format_native)
  1742  			failmsg("bad result argument format", "format=%llu", bf);
  1743  		return read_result(input_posp);
  1744  	}
  1745  	default:
  1746  		failmsg("bad argument type", "type=%llu", typ);
  1747  	}
  1748  }
  1749  
  1750  uint64 swap(uint64 v, uint64 size, uint64 bf)
  1751  {
  1752  	if (bf == binary_format_native)
  1753  		return v;
  1754  	if (bf != binary_format_bigendian)
  1755  		failmsg("bad binary format in swap", "format=%llu", bf);
  1756  	switch (size) {
  1757  	case 2:
  1758  		return htobe16(v);
  1759  	case 4:
  1760  		return htobe32(v);
  1761  	case 8:
  1762  		return htobe64(v);
  1763  	default:
  1764  		failmsg("bad big-endian int size", "size=%llu", size);
  1765  	}
  1766  }
  1767  
  1768  uint64 read_const_arg(uint8** input_posp, uint64* size_p, uint64* bf_p, uint64* bf_off_p, uint64* bf_len_p)
  1769  {
  1770  	uint64 meta = read_input(input_posp);
  1771  	uint64 val = read_input(input_posp);
  1772  	*size_p = meta & 0xff;
  1773  	uint64 bf = (meta >> 8) & 0xff;
  1774  	*bf_off_p = (meta >> 16) & 0xff;
  1775  	*bf_len_p = (meta >> 24) & 0xff;
  1776  	uint64 pid_stride = meta >> 32;
  1777  	val += pid_stride * procid;
  1778  	*bf_p = bf;
  1779  	return val;
  1780  }
  1781  
  1782  uint64 read_result(uint8** input_posp)
  1783  {
  1784  	uint64 idx = read_input(input_posp);
  1785  	uint64 op_div = read_input(input_posp);
  1786  	uint64 op_add = read_input(input_posp);
  1787  	uint64 arg = read_input(input_posp);
  1788  	if (idx >= kMaxCommands)
  1789  		failmsg("command refers to bad result", "result=%lld", idx);
  1790  	if (results[idx].executed) {
  1791  		arg = results[idx].val;
  1792  		if (op_div != 0)
  1793  			arg = arg / op_div;
  1794  		arg += op_add;
  1795  	}
  1796  	return arg;
  1797  }
  1798  
  1799  uint64 read_input(uint8** input_posp, bool peek)
  1800  {
  1801  	uint64 v = 0;
  1802  	unsigned shift = 0;
  1803  	uint8* input_pos = *input_posp;
  1804  	for (int i = 0;; i++, shift += 7) {
  1805  		const int maxLen = 10;
  1806  		if (i == maxLen)
  1807  			failmsg("varint overflow", "pos=%zu", (size_t)(*input_posp - input_data));
  1808  		if (input_pos >= input_data + kMaxInput)
  1809  			failmsg("input command overflows input", "pos=%p: [%p:%p)",
  1810  				input_pos, input_data, input_data + kMaxInput);
  1811  		uint8 b = *input_pos++;
  1812  		v |= uint64(b & 0x7f) << shift;
  1813  		if (b < 0x80) {
  1814  			if (i == maxLen - 1 && b > 1)
  1815  				failmsg("varint overflow", "pos=%zu", (size_t)(*input_posp - input_data));
  1816  			break;
  1817  		}
  1818  	}
  1819  	if (v & 1)
  1820  		v = ~(v >> 1);
  1821  	else
  1822  		v = v >> 1;
  1823  	if (!peek)
  1824  		*input_posp = input_pos;
  1825  	return v;
  1826  }
  1827  
  1828  rpc::ComparisonRaw convert(const kcov_comparison_t& cmp)
  1829  {
  1830  	if (cmp.type > (KCOV_CMP_CONST | KCOV_CMP_SIZE_MASK))
  1831  		failmsg("invalid kcov comp type", "type=%llx", cmp.type);
  1832  	uint64 arg1 = cmp.arg1;
  1833  	uint64 arg2 = cmp.arg2;
  1834  	// Comparisons with 0 are not interesting, fuzzer should be able to guess 0's without help.
  1835  	if (arg1 == 0 && (arg2 == 0 || (cmp.type & KCOV_CMP_CONST)))
  1836  		return {};
  1837  	// Successful comparison is not interesting.
  1838  	if (arg1 == arg2)
  1839  		return {};
  1840  
  1841  	// This can be a pointer (assuming 64-bit kernel).
  1842  	// First of all, we want avert fuzzer from our output region.
  1843  	// Without this fuzzer manages to discover and corrupt it.
  1844  	uint64 out_start = (uint64)output_data;
  1845  	uint64 out_end = out_start + output_size;
  1846  	if (arg1 >= out_start && arg1 <= out_end)
  1847  		return {};
  1848  	if (arg2 >= out_start && arg2 <= out_end)
  1849  		return {};
  1850  	if (!coverage_filter(cmp.pc))
  1851  		return {};
  1852  
  1853  	// KCOV converts all arguments of size x first to uintx_t and then to uint64.
  1854  	// We want to properly extend signed values, e.g we want int8 c = 0xfe to be represented
  1855  	// as 0xfffffffffffffffe. Note that uint8 c = 0xfe will be represented the same way.
  1856  	// This is ok because during hints processing we will anyways try the value 0x00000000000000fe.
  1857  	switch (cmp.type & KCOV_CMP_SIZE_MASK) {
  1858  	case KCOV_CMP_SIZE1:
  1859  		arg1 = (uint64)(long long)(signed char)arg1;
  1860  		arg2 = (uint64)(long long)(signed char)arg2;
  1861  		break;
  1862  	case KCOV_CMP_SIZE2:
  1863  		arg1 = (uint64)(long long)(short)arg1;
  1864  		arg2 = (uint64)(long long)(short)arg2;
  1865  		break;
  1866  	case KCOV_CMP_SIZE4:
  1867  		arg1 = (uint64)(long long)(int)arg1;
  1868  		arg2 = (uint64)(long long)(int)arg2;
  1869  		break;
  1870  	}
  1871  
  1872  	// Prog package expects operands in the opposite order (first operand may come from the input,
  1873  	// the second operand was computed in the kernel), so swap operands.
  1874  	return {cmp.pc, arg2, arg1, !!(cmp.type & KCOV_CMP_CONST)};
  1875  }
  1876  
  1877  void failmsg(const char* err, const char* msg, ...)
  1878  {
  1879  	int e = errno;
  1880  	fprintf(stderr, "SYZFAIL: %s\n", err);
  1881  	if (msg) {
  1882  		va_list args;
  1883  		va_start(args, msg);
  1884  		vfprintf(stderr, msg, args);
  1885  		va_end(args);
  1886  	}
  1887  	fprintf(stderr, " (errno %d: %s)\n", e, strerror(e));
  1888  
  1889  	// fail()'s are often used during the validation of kernel reactions to queries
  1890  	// that were issued by pseudo syscalls implementations. As fault injection may
  1891  	// cause the kernel not to succeed in handling these queries (e.g. socket writes
  1892  	// or reads may fail), this could ultimately lead to unwanted "lost connection to
  1893  	// test machine" crashes.
  1894  	// In order to avoid this and, on the other hand, to still have the ability to
  1895  	// signal a disastrous situation, the exit code of this function depends on the
  1896  	// current context.
  1897  	// All fail() invocations during system call execution with enabled fault injection
  1898  	// lead to termination with zero exit code. In all other cases, the exit code is
  1899  	// kFailStatus.
  1900  	if (current_thread && current_thread->soft_fail_state)
  1901  		doexit(0);
  1902  	doexit(kFailStatus);
  1903  }
  1904  
  1905  void fail(const char* err)
  1906  {
  1907  	failmsg(err, 0);
  1908  }
  1909  
  1910  void exitf(const char* msg, ...)
  1911  {
  1912  	int e = errno;
  1913  	va_list args;
  1914  	va_start(args, msg);
  1915  	vfprintf(stderr, msg, args);
  1916  	va_end(args);
  1917  	fprintf(stderr, " (errno %d)\n", e);
  1918  	doexit(1);
  1919  }
  1920  
  1921  void debug(const char* msg, ...)
  1922  {
  1923  	if (!flag_debug)
  1924  		return;
  1925  	int err = errno;
  1926  	va_list args;
  1927  	va_start(args, msg);
  1928  	vfprintf(stderr, msg, args);
  1929  	va_end(args);
  1930  	fflush(stderr);
  1931  	errno = err;
  1932  }
  1933  
  1934  void debug_dump_data(const char* data, int length)
  1935  {
  1936  	if (!flag_debug)
  1937  		return;
  1938  	int i = 0;
  1939  	for (; i < length; i++) {
  1940  		debug("%02x ", data[i] & 0xff);
  1941  		if (i % 16 == 15)
  1942  			debug("\n");
  1943  	}
  1944  	if (i % 16 != 0)
  1945  		debug("\n");
  1946  }