github.com/google/syzkaller@v0.0.0-20251211124644-a066d2bc4b02/executor/cover_filter.h (about)

     1  // Copyright 2024 syzkaller project authors. All rights reserved.
     2  // Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
     3  
     4  // CoverFilter is PC hash set that can be placed in shared memory.
     5  //
     6  // The set can cover up to 4 distinct 1GB regions of PCs.
     7  // This restriction allows for efficient, simple and shared memory compatible representation,
     8  // but should be enough to cover any reasonable combination of kernel/modules mapping.
     9  //
    10  // Low 3 bits of PCs are discarded. This reduces memory consumption 8x, but allows for some false positives.
    11  // However, in practice false positives should be very rare. A typical coverage call instruction is 4/5 bytes,
    12  // and there must be at least 1 other instruction in between them to make them different basic blocks,
    13  // so it's practically impossible to place 2 of them in the same 8-byte region.
    14  // For signal with hashed low 12 bits the probability is also low b/c overall density of coverage callbacks
    15  // is relatively low, a KASAN Linux kernel contains 1 callback per 88 bytes of code on average.
    16  // So even if we discard low 3 bits, average densitiy is still 1/11.
    17  // For gVisor with dense coverage IDs special care must be taken to avoid collisions.
    18  //
    19  // The set is organized as a 3 level table.
    20  // The top "region" level is linear lookup, but contains at most 4 entries, each covering 1GB.
    21  // Most likely the first entry is the right one. This level allows to cover unconnected regions of PCs.
    22  // The next "L1" level splits 1GB chunks into 1MB chunks, and allows to allocate memory only
    23  // for a subset of these 1MB chunks.
    24  // The last "L2" level covers 1MB chunks with 16KB bitmaps (1MB divided by 8 for 3 discarded PC bits,
    25  // and divided by 8 again for 8 bits in a byte).
    26  class CoverFilter
    27  {
    28  public:
    29  	CoverFilter()
    30  	    : shmem_(kMemSize),
    31  	      tab_(static_cast<Table*>(shmem_.Mem()))
    32  	{
    33  	}
    34  
    35  	CoverFilter(int fd, void* preferred = nullptr)
    36  	    : shmem_(fd, preferred, kMemSize, false),
    37  	      tab_(static_cast<Table*>(shmem_.Mem()))
    38  	{
    39  	}
    40  
    41  	void Insert(uint64 pc)
    42  	{
    43  		auto [byte, bit] = FindByte(pc, true);
    44  		byte |= bit;
    45  	}
    46  
    47  	bool Contains(uint64 pc)
    48  	{
    49  		auto [byte, bit] = FindByte(pc, false);
    50  		return byte & bit;
    51  	}
    52  
    53  	// Prevents any future modifications to the filter.
    54  	void Seal()
    55  	{
    56  		shmem_.Seal();
    57  	}
    58  
    59  	int FD() const
    60  	{
    61  		return shmem_.FD();
    62  	}
    63  
    64  private:
    65  	static constexpr size_t kNumRegions = 4;
    66  	static constexpr size_t kL1Size = 1 << 30;
    67  	static constexpr size_t kL2Size = 1 << 20;
    68  	static constexpr size_t kPCDivider = 8;
    69  	static constexpr size_t kByteBits = 8;
    70  	// Approximately how much .text we can cover (2GB of PCs require 32MB shmem region).
    71  	static constexpr size_t kMaxCovered = 2ull << 30;
    72  	static constexpr size_t kCompression = kPCDivider * kByteBits;
    73  	static constexpr size_t kMemSize = kMaxCovered / kCompression;
    74  	static constexpr size_t kNoRegion = static_cast<size_t>(-1);
    75  
    76  	struct Table {
    77  		uint64 regions[kNumRegions];
    78  		uint16 l1[kNumRegions][kL1Size / kL2Size];
    79  		uint8 l2[][kL2Size / kCompression];
    80  	};
    81  
    82  	ShmemFile shmem_;
    83  	Table* tab_ = nullptr;
    84  	uint16 alloc_ = 0;
    85  
    86  	std::pair<uint8&, uint8> FindByte(uint64 pc, bool add = false)
    87  	{
    88  		static const uint8 empty = 0;
    89  		size_t reg = FindRegion(pc, add);
    90  		if (reg == kNoRegion)
    91  			return {const_cast<uint8&>(empty), 0};
    92  		size_t l1 = (pc % kL1Size) / kL2Size;
    93  		size_t l2 = tab_->l1[reg][l1];
    94  		if (l2 == 0) {
    95  			if (!add)
    96  				return {const_cast<uint8&>(empty), 0};
    97  			l2 = ++alloc_;
    98  			tab_->l1[reg][l1] = l2;
    99  			if ((tab_->l2[l2 - 1] + 1) > reinterpret_cast<uint8*>(tab_) + kMemSize)
   100  				Overflow(pc);
   101  		}
   102  		size_t off = (pc % kL2Size) / kCompression;
   103  		size_t shift = (pc / kPCDivider) % kByteBits;
   104  		return {tab_->l2[l2 - 1][off], 1 << shift};
   105  	}
   106  
   107  	size_t FindRegion(uint64 pc, bool add = false)
   108  	{
   109  		const uint64 reg = pc | (kL1Size - 1);
   110  		for (size_t r = 0; r < kNumRegions; r++) {
   111  			if (tab_->regions[r] == reg)
   112  				return r;
   113  		}
   114  		if (!add)
   115  			return kNoRegion;
   116  		for (size_t r = 0; r < kNumRegions; r++) {
   117  			if (tab_->regions[r] == 0) {
   118  				tab_->regions[r] = reg;
   119  				return r;
   120  			}
   121  		}
   122  		Overflow(pc);
   123  	}
   124  
   125  	NORETURN void Overflow(uint64 pc)
   126  	{
   127  		failmsg("coverage filter is full", "pc=0x%llx regions=[0x%llx 0x%llx 0x%llx 0x%llx] alloc=%u",
   128  			pc, tab_->regions[0], tab_->regions[1], tab_->regions[2], tab_->regions[3], alloc_);
   129  	}
   130  
   131  	CoverFilter(const CoverFilter&) = delete;
   132  	CoverFilter& operator=(const CoverFilter&) = delete;
   133  };