github.com/opencontainers/runc@v1.2.0-rc.1.0.20240520010911-492dc558cdd6/libcontainer/seccomp/patchbpf/enosys_linux_test.go (about)

     1  //go:build cgo && seccomp
     2  // +build cgo,seccomp
     3  
     4  package patchbpf
     5  
     6  import (
     7  	"bytes"
     8  	"encoding/binary"
     9  	"fmt"
    10  	"testing"
    11  
    12  	"github.com/opencontainers/runc/libcontainer/configs"
    13  
    14  	libseccomp "github.com/seccomp/libseccomp-golang"
    15  	"golang.org/x/net/bpf"
    16  )
    17  
    18  type seccompData struct {
    19  	Syscall uint32 // NOTE: We assume sizeof(int) == 4.
    20  	Arch    uint32
    21  	IP      uint64
    22  	Args    [6]uint64
    23  }
    24  
    25  // mockSyscallPayload creates a fake seccomp_data struct with the given data.
    26  func mockSyscallPayload(t *testing.T, sysno libseccomp.ScmpSyscall, arch linuxAuditArch, args ...uint64) []byte {
    27  	var buf bytes.Buffer
    28  
    29  	data := seccompData{
    30  		Syscall: uint32(sysno),
    31  		Arch:    uint32(arch),
    32  		IP:      0xDEADBEEFCAFE,
    33  	}
    34  
    35  	copy(data.Args[:], args)
    36  	if len(args) > 6 {
    37  		t.Fatalf("bad syscall payload: linux only supports 6-argument syscalls")
    38  	}
    39  
    40  	// NOTE: We use BigEndian here because golang.org/x/net/bpf assumes that
    41  	//       all payloads are big-endian while seccomp uses host endianness.
    42  	if err := binary.Write(&buf, binary.BigEndian, data); err != nil {
    43  		t.Fatalf("bad syscall payload: cannot write data: %v", err)
    44  	}
    45  	return buf.Bytes()
    46  }
    47  
    48  // retFallthrough is returned by the mockFilter. If a the mock filter returns
    49  // this value, it indicates "fallthrough to libseccomp-generated filter".
    50  const retFallthrough uint32 = 0xDEADBEEF
    51  
    52  // mockFilter returns a BPF VM that contains a mock filter with an -ENOSYS
    53  // stub. If the filter returns retFallthrough, the stub filter has permitted
    54  // the syscall to pass.
    55  func mockFilter(t *testing.T, config *configs.Seccomp) (*bpf.VM, []bpf.Instruction) {
    56  	patch, err := generatePatch(config)
    57  	if err != nil {
    58  		t.Fatalf("mock filter: generate enosys patch: %v", err)
    59  	}
    60  
    61  	program := append(patch, bpf.RetConstant{Val: retFallthrough})
    62  
    63  	vm, err := bpf.NewVM(program)
    64  	if err != nil {
    65  		t.Fatalf("mock filter: compile BPF VM: %v", err)
    66  	}
    67  	return vm, program
    68  }
    69  
    70  // fakeConfig generates a fake libcontainer seccomp configuration. The syscalls
    71  // are added with an action distinct from the default action.
    72  func fakeConfig(defaultAction configs.Action, explicitSyscalls []string, arches []string) *configs.Seccomp {
    73  	config := configs.Seccomp{
    74  		DefaultAction: defaultAction,
    75  		Architectures: arches,
    76  	}
    77  	syscallAction := configs.Allow
    78  	if syscallAction == defaultAction {
    79  		syscallAction = configs.Kill
    80  	}
    81  	for _, syscall := range explicitSyscalls {
    82  		config.Syscalls = append(config.Syscalls, &configs.Syscall{
    83  			Name:   syscall,
    84  			Action: syscallAction,
    85  		})
    86  	}
    87  	return &config
    88  }
    89  
    90  // List copied from <libcontainer/seccomp/config.go>.
    91  var testArches = []string{
    92  	"x86",
    93  	"amd64",
    94  	"x32",
    95  	"arm",
    96  	"arm64",
    97  	"mips",
    98  	"mips64",
    99  	"mips64n32",
   100  	"mipsel",
   101  	"mipsel64",
   102  	"mipsel64n32",
   103  	"ppc",
   104  	"ppc64",
   105  	"ppc64le",
   106  	"s390",
   107  	"s390x",
   108  	// Dummy value to indicate a configuration with no architecture specified.
   109  	"native",
   110  }
   111  
   112  // Used for the "native" architecture.
   113  var (
   114  	scmpNativeArch, _ = libseccomp.GetNativeArch()
   115  	nativeArch        = scmpNativeArch.String()
   116  )
   117  
   118  func testEnosysStub(t *testing.T, defaultAction configs.Action, arches []string) {
   119  	explicitSyscalls := []string{
   120  		"setns",
   121  		"kcmp",
   122  		"renameat2",
   123  		"copy_file_range",
   124  	}
   125  
   126  	implicitSyscalls := []string{
   127  		"clone",
   128  		"openat",
   129  		"read",
   130  		"write",
   131  	}
   132  
   133  	futureSyscalls := []libseccomp.ScmpSyscall{1000, 7331}
   134  
   135  	// Quick lookups for which arches are enabled.
   136  	archSet := map[string]bool{}
   137  	for _, arch := range arches {
   138  		archSet[arch] = true
   139  	}
   140  
   141  	for _, test := range []struct {
   142  		start, end int
   143  	}{
   144  		{0, 1}, // [setns]
   145  		{0, 2}, // [setns, process_vm_readv]
   146  		{1, 2}, // [process_vm_readv]
   147  		{1, 3}, // [process_vm_readv, renameat2, copy_file_range]
   148  		{1, 4}, // [process_vm_readv, renameat2, copy_file_range]
   149  		{3, 4}, // [copy_file_range]
   150  	} {
   151  		allowedSyscalls := explicitSyscalls[test.start:test.end]
   152  		config := fakeConfig(defaultAction, allowedSyscalls, arches)
   153  		filter, program := mockFilter(t, config)
   154  
   155  		// The syscalls are in increasing order of newness, so all syscalls
   156  		// after the last allowed syscall will give -ENOSYS.
   157  		enosysStart := test.end
   158  
   159  		for _, arch := range testArches {
   160  			type syscallTest struct {
   161  				sysno    libseccomp.ScmpSyscall
   162  				syscall  string
   163  				expected uint32
   164  			}
   165  
   166  			if arch == "native" {
   167  				arch = nativeArch
   168  			}
   169  			scmpArch, err := libseccomp.GetArchFromString(arch)
   170  			if err != nil {
   171  				t.Fatalf("unknown libseccomp architecture %q: %v", arch, err)
   172  			}
   173  
   174  			auditArch, err := scmpArchToAuditArch(scmpArch)
   175  			if err != nil {
   176  				t.Fatalf("unknown audit architecture %q: %v", arch, err)
   177  			}
   178  
   179  			var syscallTests []syscallTest
   180  
   181  			// Add explicit syscalls (whether they will return -ENOSYS
   182  			// depends on the filter rules).
   183  			for idx, syscall := range explicitSyscalls {
   184  				expected := retFallthrough
   185  				if idx >= enosysStart {
   186  					expected = retErrnoEnosys
   187  				}
   188  				sysno, err := libseccomp.GetSyscallFromNameByArch(syscall, scmpArch)
   189  				if err != nil {
   190  					t.Fatalf("unknown syscall %q on arch %q: %v", syscall, arch, err)
   191  				}
   192  				syscallTests = append(syscallTests, syscallTest{
   193  					sysno:    sysno,
   194  					syscall:  syscall,
   195  					expected: expected,
   196  				})
   197  			}
   198  
   199  			// Add implicit syscalls.
   200  			for _, syscall := range implicitSyscalls {
   201  				sysno, err := libseccomp.GetSyscallFromNameByArch(syscall, scmpArch)
   202  				if err != nil {
   203  					t.Fatalf("unknown syscall %q on arch %q: %v", syscall, arch, err)
   204  				}
   205  				syscallTests = append(syscallTests, syscallTest{
   206  					sysno:    sysno,
   207  					syscall:  syscall,
   208  					expected: retFallthrough,
   209  				})
   210  			}
   211  
   212  			// Add future syscalls.
   213  			for _, sysno := range futureSyscalls {
   214  				baseSysno, err := libseccomp.GetSyscallFromNameByArch("copy_file_range", scmpArch)
   215  				if err != nil {
   216  					t.Fatalf("unknown syscall 'copy_file_range' on arch %q: %v", arch, err)
   217  				}
   218  				sysno += baseSysno
   219  
   220  				syscallTests = append(syscallTests, syscallTest{
   221  					sysno:    sysno,
   222  					syscall:  fmt.Sprintf("syscall_%#x", sysno),
   223  					expected: retErrnoEnosys,
   224  				})
   225  			}
   226  
   227  			// If we're on s390(x) make sure you get -ENOSYS for the "setup"
   228  			// syscall (this is done to work around an issue with s390x's
   229  			// syscall multiplexing which results in unknown syscalls being a
   230  			// setup(2) invocation).
   231  			switch scmpArch {
   232  			case libseccomp.ArchS390, libseccomp.ArchS390X:
   233  				syscallTests = append(syscallTests, syscallTest{
   234  					sysno:    s390xMultiplexSyscall,
   235  					syscall:  "setup",
   236  					expected: retErrnoEnosys,
   237  				})
   238  			}
   239  
   240  			// Test syscalls in the explicit list.
   241  			for _, test := range syscallTests {
   242  				// Override the expected value in the two special cases:
   243  				//  1. If the default action is allow, the filter won't have
   244  				//     the stub prepended so we expect a fallthrough.
   245  				//  2. If the executing architecture is not in the architecture
   246  				//     set, then the architecture is not handled by the stub --
   247  				//     *except* in the case of the native architecture (which
   248  				//     is always included in the stub).
   249  				if isAllowAction(defaultAction) ||
   250  					(!archSet[arch] && arch != nativeArch) {
   251  					test.expected = retFallthrough
   252  				}
   253  
   254  				payload := mockSyscallPayload(t, test.sysno, auditArch, 0x1337, 0xF00BA5)
   255  				// NOTE: golang.org/x/net/bpf returns int here rather
   256  				// than uint32.
   257  				rawRet, err := filter.Run(payload)
   258  				if err != nil {
   259  					t.Fatalf("error running filter: %v", err)
   260  				}
   261  				ret := uint32(rawRet)
   262  				if ret != test.expected {
   263  					t.Logf("mock filter for %v %v:", arches, allowedSyscalls)
   264  					for idx, insn := range program {
   265  						t.Logf("  [%4.1d] %s", idx, insn)
   266  					}
   267  					t.Logf("payload: %#v", payload)
   268  					t.Errorf("filter %s(%d) %q(%d): got %#x, want %#x", arch, auditArch, test.syscall, test.sysno, ret, test.expected)
   269  				}
   270  			}
   271  		}
   272  	}
   273  }
   274  
   275  var testActions = map[string]configs.Action{
   276  	"allow": configs.Allow,
   277  	"log":   configs.Log,
   278  	"errno": configs.Errno,
   279  	"kill":  configs.Kill,
   280  }
   281  
   282  func TestEnosysStub_SingleArch(t *testing.T) {
   283  	for _, arch := range testArches {
   284  		var arches []string
   285  		// "native" indicates a blank architecture field for seccomp, to test
   286  		// the case where the running architecture was not included in the
   287  		// architecture. Docker doesn't always set the architecture for some
   288  		// reason (namely for ppc64le).
   289  		if arch != "native" {
   290  			arches = append(arches, arch)
   291  		}
   292  		t.Run("arch="+arch, func(t *testing.T) {
   293  			for name, action := range testActions {
   294  				t.Run("action="+name, func(t *testing.T) {
   295  					testEnosysStub(t, action, arches)
   296  				})
   297  			}
   298  		})
   299  	}
   300  }
   301  
   302  func TestEnosysStub_MultiArch(t *testing.T) {
   303  	for end := 0; end < len(testArches); end++ {
   304  		for start := 0; start < end; start++ {
   305  			var arches []string
   306  			for _, arch := range testArches[start:end] {
   307  				// "native" indicates a blank architecture field for seccomp, to test
   308  				// the case where the running architecture was not included in the
   309  				// architecture. Docker doesn't always set the architecture for some
   310  				// reason (namely for ppc64le).
   311  				if arch != "native" {
   312  					arches = append(arches, arch)
   313  				}
   314  			}
   315  			if len(arches) <= 1 {
   316  				continue
   317  			}
   318  			for _, action := range testActions {
   319  				testEnosysStub(t, action, arches)
   320  			}
   321  		}
   322  	}
   323  }
   324  
   325  func TestDisassembleHugeFilterDoesNotHang(t *testing.T) {
   326  	hugeFilter, err := libseccomp.NewFilter(libseccomp.ActAllow)
   327  	if err != nil {
   328  		t.Fatalf("failed to create seccomp filter: %v", err)
   329  	}
   330  
   331  	for i := 1; i < 10000; i++ {
   332  		if err := hugeFilter.AddRule(libseccomp.ScmpSyscall(i), libseccomp.ActKillThread); err != nil {
   333  			t.Fatalf("failed to add rule to filter %d: %v", i, err)
   334  		}
   335  	}
   336  
   337  	_, err = disassembleFilter(hugeFilter)
   338  	if err != nil {
   339  		t.Fatalf("failed to disassembleFilter: %v", err)
   340  	}
   341  
   342  	// if we exit, we did not hang
   343  }