github.com/opencontainers/runc@v1.2.0-rc.1.0.20240520010911-492dc558cdd6/libcontainer/seccomp/patchbpf/enosys_linux_test.go (about) 1 //go:build cgo && seccomp 2 // +build cgo,seccomp 3 4 package patchbpf 5 6 import ( 7 "bytes" 8 "encoding/binary" 9 "fmt" 10 "testing" 11 12 "github.com/opencontainers/runc/libcontainer/configs" 13 14 libseccomp "github.com/seccomp/libseccomp-golang" 15 "golang.org/x/net/bpf" 16 ) 17 18 type seccompData struct { 19 Syscall uint32 // NOTE: We assume sizeof(int) == 4. 20 Arch uint32 21 IP uint64 22 Args [6]uint64 23 } 24 25 // mockSyscallPayload creates a fake seccomp_data struct with the given data. 26 func mockSyscallPayload(t *testing.T, sysno libseccomp.ScmpSyscall, arch linuxAuditArch, args ...uint64) []byte { 27 var buf bytes.Buffer 28 29 data := seccompData{ 30 Syscall: uint32(sysno), 31 Arch: uint32(arch), 32 IP: 0xDEADBEEFCAFE, 33 } 34 35 copy(data.Args[:], args) 36 if len(args) > 6 { 37 t.Fatalf("bad syscall payload: linux only supports 6-argument syscalls") 38 } 39 40 // NOTE: We use BigEndian here because golang.org/x/net/bpf assumes that 41 // all payloads are big-endian while seccomp uses host endianness. 42 if err := binary.Write(&buf, binary.BigEndian, data); err != nil { 43 t.Fatalf("bad syscall payload: cannot write data: %v", err) 44 } 45 return buf.Bytes() 46 } 47 48 // retFallthrough is returned by the mockFilter. If a the mock filter returns 49 // this value, it indicates "fallthrough to libseccomp-generated filter". 50 const retFallthrough uint32 = 0xDEADBEEF 51 52 // mockFilter returns a BPF VM that contains a mock filter with an -ENOSYS 53 // stub. If the filter returns retFallthrough, the stub filter has permitted 54 // the syscall to pass. 55 func mockFilter(t *testing.T, config *configs.Seccomp) (*bpf.VM, []bpf.Instruction) { 56 patch, err := generatePatch(config) 57 if err != nil { 58 t.Fatalf("mock filter: generate enosys patch: %v", err) 59 } 60 61 program := append(patch, bpf.RetConstant{Val: retFallthrough}) 62 63 vm, err := bpf.NewVM(program) 64 if err != nil { 65 t.Fatalf("mock filter: compile BPF VM: %v", err) 66 } 67 return vm, program 68 } 69 70 // fakeConfig generates a fake libcontainer seccomp configuration. The syscalls 71 // are added with an action distinct from the default action. 72 func fakeConfig(defaultAction configs.Action, explicitSyscalls []string, arches []string) *configs.Seccomp { 73 config := configs.Seccomp{ 74 DefaultAction: defaultAction, 75 Architectures: arches, 76 } 77 syscallAction := configs.Allow 78 if syscallAction == defaultAction { 79 syscallAction = configs.Kill 80 } 81 for _, syscall := range explicitSyscalls { 82 config.Syscalls = append(config.Syscalls, &configs.Syscall{ 83 Name: syscall, 84 Action: syscallAction, 85 }) 86 } 87 return &config 88 } 89 90 // List copied from <libcontainer/seccomp/config.go>. 91 var testArches = []string{ 92 "x86", 93 "amd64", 94 "x32", 95 "arm", 96 "arm64", 97 "mips", 98 "mips64", 99 "mips64n32", 100 "mipsel", 101 "mipsel64", 102 "mipsel64n32", 103 "ppc", 104 "ppc64", 105 "ppc64le", 106 "s390", 107 "s390x", 108 // Dummy value to indicate a configuration with no architecture specified. 109 "native", 110 } 111 112 // Used for the "native" architecture. 113 var ( 114 scmpNativeArch, _ = libseccomp.GetNativeArch() 115 nativeArch = scmpNativeArch.String() 116 ) 117 118 func testEnosysStub(t *testing.T, defaultAction configs.Action, arches []string) { 119 explicitSyscalls := []string{ 120 "setns", 121 "kcmp", 122 "renameat2", 123 "copy_file_range", 124 } 125 126 implicitSyscalls := []string{ 127 "clone", 128 "openat", 129 "read", 130 "write", 131 } 132 133 futureSyscalls := []libseccomp.ScmpSyscall{1000, 7331} 134 135 // Quick lookups for which arches are enabled. 136 archSet := map[string]bool{} 137 for _, arch := range arches { 138 archSet[arch] = true 139 } 140 141 for _, test := range []struct { 142 start, end int 143 }{ 144 {0, 1}, // [setns] 145 {0, 2}, // [setns, process_vm_readv] 146 {1, 2}, // [process_vm_readv] 147 {1, 3}, // [process_vm_readv, renameat2, copy_file_range] 148 {1, 4}, // [process_vm_readv, renameat2, copy_file_range] 149 {3, 4}, // [copy_file_range] 150 } { 151 allowedSyscalls := explicitSyscalls[test.start:test.end] 152 config := fakeConfig(defaultAction, allowedSyscalls, arches) 153 filter, program := mockFilter(t, config) 154 155 // The syscalls are in increasing order of newness, so all syscalls 156 // after the last allowed syscall will give -ENOSYS. 157 enosysStart := test.end 158 159 for _, arch := range testArches { 160 type syscallTest struct { 161 sysno libseccomp.ScmpSyscall 162 syscall string 163 expected uint32 164 } 165 166 if arch == "native" { 167 arch = nativeArch 168 } 169 scmpArch, err := libseccomp.GetArchFromString(arch) 170 if err != nil { 171 t.Fatalf("unknown libseccomp architecture %q: %v", arch, err) 172 } 173 174 auditArch, err := scmpArchToAuditArch(scmpArch) 175 if err != nil { 176 t.Fatalf("unknown audit architecture %q: %v", arch, err) 177 } 178 179 var syscallTests []syscallTest 180 181 // Add explicit syscalls (whether they will return -ENOSYS 182 // depends on the filter rules). 183 for idx, syscall := range explicitSyscalls { 184 expected := retFallthrough 185 if idx >= enosysStart { 186 expected = retErrnoEnosys 187 } 188 sysno, err := libseccomp.GetSyscallFromNameByArch(syscall, scmpArch) 189 if err != nil { 190 t.Fatalf("unknown syscall %q on arch %q: %v", syscall, arch, err) 191 } 192 syscallTests = append(syscallTests, syscallTest{ 193 sysno: sysno, 194 syscall: syscall, 195 expected: expected, 196 }) 197 } 198 199 // Add implicit syscalls. 200 for _, syscall := range implicitSyscalls { 201 sysno, err := libseccomp.GetSyscallFromNameByArch(syscall, scmpArch) 202 if err != nil { 203 t.Fatalf("unknown syscall %q on arch %q: %v", syscall, arch, err) 204 } 205 syscallTests = append(syscallTests, syscallTest{ 206 sysno: sysno, 207 syscall: syscall, 208 expected: retFallthrough, 209 }) 210 } 211 212 // Add future syscalls. 213 for _, sysno := range futureSyscalls { 214 baseSysno, err := libseccomp.GetSyscallFromNameByArch("copy_file_range", scmpArch) 215 if err != nil { 216 t.Fatalf("unknown syscall 'copy_file_range' on arch %q: %v", arch, err) 217 } 218 sysno += baseSysno 219 220 syscallTests = append(syscallTests, syscallTest{ 221 sysno: sysno, 222 syscall: fmt.Sprintf("syscall_%#x", sysno), 223 expected: retErrnoEnosys, 224 }) 225 } 226 227 // If we're on s390(x) make sure you get -ENOSYS for the "setup" 228 // syscall (this is done to work around an issue with s390x's 229 // syscall multiplexing which results in unknown syscalls being a 230 // setup(2) invocation). 231 switch scmpArch { 232 case libseccomp.ArchS390, libseccomp.ArchS390X: 233 syscallTests = append(syscallTests, syscallTest{ 234 sysno: s390xMultiplexSyscall, 235 syscall: "setup", 236 expected: retErrnoEnosys, 237 }) 238 } 239 240 // Test syscalls in the explicit list. 241 for _, test := range syscallTests { 242 // Override the expected value in the two special cases: 243 // 1. If the default action is allow, the filter won't have 244 // the stub prepended so we expect a fallthrough. 245 // 2. If the executing architecture is not in the architecture 246 // set, then the architecture is not handled by the stub -- 247 // *except* in the case of the native architecture (which 248 // is always included in the stub). 249 if isAllowAction(defaultAction) || 250 (!archSet[arch] && arch != nativeArch) { 251 test.expected = retFallthrough 252 } 253 254 payload := mockSyscallPayload(t, test.sysno, auditArch, 0x1337, 0xF00BA5) 255 // NOTE: golang.org/x/net/bpf returns int here rather 256 // than uint32. 257 rawRet, err := filter.Run(payload) 258 if err != nil { 259 t.Fatalf("error running filter: %v", err) 260 } 261 ret := uint32(rawRet) 262 if ret != test.expected { 263 t.Logf("mock filter for %v %v:", arches, allowedSyscalls) 264 for idx, insn := range program { 265 t.Logf(" [%4.1d] %s", idx, insn) 266 } 267 t.Logf("payload: %#v", payload) 268 t.Errorf("filter %s(%d) %q(%d): got %#x, want %#x", arch, auditArch, test.syscall, test.sysno, ret, test.expected) 269 } 270 } 271 } 272 } 273 } 274 275 var testActions = map[string]configs.Action{ 276 "allow": configs.Allow, 277 "log": configs.Log, 278 "errno": configs.Errno, 279 "kill": configs.Kill, 280 } 281 282 func TestEnosysStub_SingleArch(t *testing.T) { 283 for _, arch := range testArches { 284 var arches []string 285 // "native" indicates a blank architecture field for seccomp, to test 286 // the case where the running architecture was not included in the 287 // architecture. Docker doesn't always set the architecture for some 288 // reason (namely for ppc64le). 289 if arch != "native" { 290 arches = append(arches, arch) 291 } 292 t.Run("arch="+arch, func(t *testing.T) { 293 for name, action := range testActions { 294 t.Run("action="+name, func(t *testing.T) { 295 testEnosysStub(t, action, arches) 296 }) 297 } 298 }) 299 } 300 } 301 302 func TestEnosysStub_MultiArch(t *testing.T) { 303 for end := 0; end < len(testArches); end++ { 304 for start := 0; start < end; start++ { 305 var arches []string 306 for _, arch := range testArches[start:end] { 307 // "native" indicates a blank architecture field for seccomp, to test 308 // the case where the running architecture was not included in the 309 // architecture. Docker doesn't always set the architecture for some 310 // reason (namely for ppc64le). 311 if arch != "native" { 312 arches = append(arches, arch) 313 } 314 } 315 if len(arches) <= 1 { 316 continue 317 } 318 for _, action := range testActions { 319 testEnosysStub(t, action, arches) 320 } 321 } 322 } 323 } 324 325 func TestDisassembleHugeFilterDoesNotHang(t *testing.T) { 326 hugeFilter, err := libseccomp.NewFilter(libseccomp.ActAllow) 327 if err != nil { 328 t.Fatalf("failed to create seccomp filter: %v", err) 329 } 330 331 for i := 1; i < 10000; i++ { 332 if err := hugeFilter.AddRule(libseccomp.ScmpSyscall(i), libseccomp.ActKillThread); err != nil { 333 t.Fatalf("failed to add rule to filter %d: %v", i, err) 334 } 335 } 336 337 _, err = disassembleFilter(hugeFilter) 338 if err != nil { 339 t.Fatalf("failed to disassembleFilter: %v", err) 340 } 341 342 // if we exit, we did not hang 343 }