github.com/opencontainers/runc@v1.2.0-rc.1.0.20240520010911-492dc558cdd6/libcontainer/seccomp/seccomp_linux.go (about) 1 //go:build cgo && seccomp 2 // +build cgo,seccomp 3 4 package seccomp 5 6 import ( 7 "errors" 8 "fmt" 9 "os" 10 11 libseccomp "github.com/seccomp/libseccomp-golang" 12 "github.com/sirupsen/logrus" 13 "golang.org/x/sys/unix" 14 15 "github.com/opencontainers/runc/libcontainer/configs" 16 "github.com/opencontainers/runc/libcontainer/seccomp/patchbpf" 17 "github.com/opencontainers/runtime-spec/specs-go" 18 ) 19 20 var ( 21 actTrace = libseccomp.ActTrace.SetReturnCode(int16(unix.EPERM)) 22 actErrno = libseccomp.ActErrno.SetReturnCode(int16(unix.EPERM)) 23 ) 24 25 const ( 26 // Linux system calls can have at most 6 arguments 27 syscallMaxArguments int = 6 28 ) 29 30 // InitSeccomp installs the seccomp filters to be used in the container as 31 // specified in config. Returns the seccomp file descriptor if any of the 32 // filters include a SCMP_ACT_NOTIFY action. 33 func InitSeccomp(config *configs.Seccomp) (*os.File, error) { 34 if config == nil { 35 return nil, errors.New("cannot initialize Seccomp - nil config passed") 36 } 37 38 defaultAction, err := getAction(config.DefaultAction, config.DefaultErrnoRet) 39 if err != nil { 40 return nil, errors.New("error initializing seccomp - invalid default action") 41 } 42 43 // Ignore the error since pre-2.4 libseccomp is treated as API level 0. 44 apiLevel, _ := libseccomp.GetAPI() 45 for _, call := range config.Syscalls { 46 if call.Action == configs.Notify { 47 if apiLevel < 6 { 48 return nil, fmt.Errorf("seccomp notify unsupported: API level: got %d, want at least 6. Please try with libseccomp >= 2.5.0 and Linux >= 5.7", apiLevel) 49 } 50 51 // We can't allow the write syscall to notify to the seccomp agent. 52 // After InitSeccomp() is called, we need to syncParentSeccomp() to write the seccomp fd plain 53 // number, so the parent sends it to the seccomp agent. If we use SCMP_ACT_NOTIFY on write, we 54 // never can write the seccomp fd to the parent and therefore the seccomp agent never receives 55 // the seccomp fd and runc is hang during initialization. 56 // 57 // Note that read()/close(), that are also used in syncParentSeccomp(), _can_ use SCMP_ACT_NOTIFY. 58 // Because we write the seccomp fd on the pipe to the parent, the parent is able to proceed and 59 // send the seccomp fd to the agent (it is another process and not subject to the seccomp 60 // filter). We will be blocked on read()/close() inside syncParentSeccomp() but if the seccomp 61 // agent allows those syscalls to proceed, initialization works just fine and the agent can 62 // handle future read()/close() syscalls as it wanted. 63 if call.Name == "write" { 64 return nil, errors.New("SCMP_ACT_NOTIFY cannot be used for the write syscall") 65 } 66 } 67 } 68 69 // See comment on why write is not allowed. The same reason applies, as this can mean handling write too. 70 if defaultAction == libseccomp.ActNotify { 71 return nil, errors.New("SCMP_ACT_NOTIFY cannot be used as default action") 72 } 73 74 filter, err := libseccomp.NewFilter(defaultAction) 75 if err != nil { 76 return nil, fmt.Errorf("error creating filter: %w", err) 77 } 78 79 // Add extra architectures 80 for _, arch := range config.Architectures { 81 scmpArch, err := libseccomp.GetArchFromString(arch) 82 if err != nil { 83 return nil, fmt.Errorf("error validating Seccomp architecture: %w", err) 84 } 85 if err := filter.AddArch(scmpArch); err != nil { 86 return nil, fmt.Errorf("error adding architecture to seccomp filter: %w", err) 87 } 88 } 89 90 // Add extra flags. 91 for _, flag := range config.Flags { 92 if err := setFlag(filter, flag); err != nil { 93 return nil, err 94 } 95 } 96 97 // Enable libseccomp binary tree optimization for longer rulesets. 98 // 99 // The number below chosen semi-arbitrarily, considering the following: 100 // 1. libseccomp <= 2.5.4 misbehaves when binary tree optimization 101 // is enabled and there are 0 rules. 102 // 2. All known libseccomp versions (2.5.0 to 2.5.4) generate a binary 103 // tree with 4 syscalls per node. 104 if len(config.Syscalls) > 32 { 105 if err := filter.SetOptimize(2); err != nil { 106 // The error is not fatal and is probably means we have older libseccomp. 107 logrus.Debugf("seccomp binary tree optimization not available: %v", err) 108 } 109 } 110 111 // Unset no new privs bit 112 if err := filter.SetNoNewPrivsBit(false); err != nil { 113 return nil, fmt.Errorf("error setting no new privileges: %w", err) 114 } 115 116 // Add a rule for each syscall 117 for _, call := range config.Syscalls { 118 if call == nil { 119 return nil, errors.New("encountered nil syscall while initializing Seccomp") 120 } 121 122 if err := matchCall(filter, call, defaultAction); err != nil { 123 return nil, err 124 } 125 } 126 127 seccompFd, err := patchbpf.PatchAndLoad(config, filter) 128 if err != nil { 129 return nil, fmt.Errorf("error loading seccomp filter into kernel: %w", err) 130 } 131 return seccompFd, nil 132 } 133 134 type unknownFlagError struct { 135 flag specs.LinuxSeccompFlag 136 } 137 138 func (e *unknownFlagError) Error() string { 139 return "seccomp flag " + string(e.flag) + " is not known to runc" 140 } 141 142 func setFlag(filter *libseccomp.ScmpFilter, flag specs.LinuxSeccompFlag) error { 143 switch flag { 144 case flagTsync: 145 // libseccomp-golang always use filterAttrTsync when 146 // possible so all goroutines will receive the same 147 // rules, so there is nothing to do. It does not make 148 // sense to apply the seccomp filter on only one 149 // thread; other threads will be terminated after exec 150 // anyway. 151 return nil 152 case specs.LinuxSeccompFlagLog: 153 if err := filter.SetLogBit(true); err != nil { 154 return fmt.Errorf("error adding log flag to seccomp filter: %w", err) 155 } 156 return nil 157 case specs.LinuxSeccompFlagSpecAllow: 158 if err := filter.SetSSB(true); err != nil { 159 return fmt.Errorf("error adding SSB flag to seccomp filter: %w", err) 160 } 161 return nil 162 } 163 // NOTE when adding more flags above, do not forget to also: 164 // - add new flags to `flags` slice in config.go; 165 // - add new flag values to flags_value() in tests/integration/seccomp.bats; 166 // - modify func filterFlags in patchbpf/ accordingly. 167 168 return &unknownFlagError{flag: flag} 169 } 170 171 // FlagSupported checks if the flag is known to runc and supported by 172 // currently used libseccomp and kernel (i.e. it can be set). 173 func FlagSupported(flag specs.LinuxSeccompFlag) error { 174 filter := &libseccomp.ScmpFilter{} 175 err := setFlag(filter, flag) 176 177 // For flags we don't know, setFlag returns unknownFlagError. 178 var uf *unknownFlagError 179 if errors.As(err, &uf) { 180 return err 181 } 182 // For flags that are known to runc and libseccomp-golang but can not 183 // be applied because either libseccomp or the kernel is too old, 184 // seccomp.VersionError is returned. 185 var verErr *libseccomp.VersionError 186 if errors.As(err, &verErr) { 187 // Not supported by libseccomp or the kernel. 188 return err 189 } 190 191 // All other flags are known and supported. 192 return nil 193 } 194 195 // Convert Libcontainer Action to Libseccomp ScmpAction 196 func getAction(act configs.Action, errnoRet *uint) (libseccomp.ScmpAction, error) { 197 switch act { 198 case configs.Kill, configs.KillThread: 199 return libseccomp.ActKillThread, nil 200 case configs.Errno: 201 if errnoRet != nil { 202 return libseccomp.ActErrno.SetReturnCode(int16(*errnoRet)), nil 203 } 204 return actErrno, nil 205 case configs.Trap: 206 return libseccomp.ActTrap, nil 207 case configs.Allow: 208 return libseccomp.ActAllow, nil 209 case configs.Trace: 210 if errnoRet != nil { 211 return libseccomp.ActTrace.SetReturnCode(int16(*errnoRet)), nil 212 } 213 return actTrace, nil 214 case configs.Log: 215 return libseccomp.ActLog, nil 216 case configs.Notify: 217 return libseccomp.ActNotify, nil 218 case configs.KillProcess: 219 return libseccomp.ActKillProcess, nil 220 default: 221 return libseccomp.ActInvalid, errors.New("invalid action, cannot use in rule") 222 } 223 } 224 225 // Convert Libcontainer Operator to Libseccomp ScmpCompareOp 226 func getOperator(op configs.Operator) (libseccomp.ScmpCompareOp, error) { 227 switch op { 228 case configs.EqualTo: 229 return libseccomp.CompareEqual, nil 230 case configs.NotEqualTo: 231 return libseccomp.CompareNotEqual, nil 232 case configs.GreaterThan: 233 return libseccomp.CompareGreater, nil 234 case configs.GreaterThanOrEqualTo: 235 return libseccomp.CompareGreaterEqual, nil 236 case configs.LessThan: 237 return libseccomp.CompareLess, nil 238 case configs.LessThanOrEqualTo: 239 return libseccomp.CompareLessOrEqual, nil 240 case configs.MaskEqualTo: 241 return libseccomp.CompareMaskedEqual, nil 242 default: 243 return libseccomp.CompareInvalid, errors.New("invalid operator, cannot use in rule") 244 } 245 } 246 247 // Convert Libcontainer Arg to Libseccomp ScmpCondition 248 func getCondition(arg *configs.Arg) (libseccomp.ScmpCondition, error) { 249 cond := libseccomp.ScmpCondition{} 250 251 if arg == nil { 252 return cond, errors.New("cannot convert nil to syscall condition") 253 } 254 255 op, err := getOperator(arg.Op) 256 if err != nil { 257 return cond, err 258 } 259 260 return libseccomp.MakeCondition(arg.Index, op, arg.Value, arg.ValueTwo) 261 } 262 263 // Add a rule to match a single syscall 264 func matchCall(filter *libseccomp.ScmpFilter, call *configs.Syscall, defAct libseccomp.ScmpAction) error { 265 if call == nil || filter == nil { 266 return errors.New("cannot use nil as syscall to block") 267 } 268 269 if len(call.Name) == 0 { 270 return errors.New("empty string is not a valid syscall") 271 } 272 273 // Convert the call's action to the libseccomp equivalent 274 callAct, err := getAction(call.Action, call.ErrnoRet) 275 if err != nil { 276 return fmt.Errorf("action in seccomp profile is invalid: %w", err) 277 } 278 if callAct == defAct { 279 // This rule is redundant, silently skip it 280 // to avoid error from AddRule. 281 return nil 282 } 283 284 // If we can't resolve the syscall, assume it is not supported 285 // by this kernel. Warn about it, don't error out. 286 callNum, err := libseccomp.GetSyscallFromName(call.Name) 287 if err != nil { 288 logrus.Debugf("unknown seccomp syscall %q ignored", call.Name) 289 return nil 290 } 291 292 // Unconditional match - just add the rule 293 if len(call.Args) == 0 { 294 if err := filter.AddRule(callNum, callAct); err != nil { 295 return fmt.Errorf("error adding seccomp filter rule for syscall %s: %w", call.Name, err) 296 } 297 } else { 298 // If two or more arguments have the same condition, 299 // Revert to old behavior, adding each condition as a separate rule 300 argCounts := make([]uint, syscallMaxArguments) 301 conditions := []libseccomp.ScmpCondition{} 302 303 for _, cond := range call.Args { 304 newCond, err := getCondition(cond) 305 if err != nil { 306 return fmt.Errorf("error creating seccomp syscall condition for syscall %s: %w", call.Name, err) 307 } 308 309 argCounts[cond.Index] += 1 310 311 conditions = append(conditions, newCond) 312 } 313 314 hasMultipleArgs := false 315 for _, count := range argCounts { 316 if count > 1 { 317 hasMultipleArgs = true 318 break 319 } 320 } 321 322 if hasMultipleArgs { 323 // Revert to old behavior 324 // Add each condition attached to a separate rule 325 for _, cond := range conditions { 326 condArr := []libseccomp.ScmpCondition{cond} 327 328 if err := filter.AddRuleConditional(callNum, callAct, condArr); err != nil { 329 return fmt.Errorf("error adding seccomp rule for syscall %s: %w", call.Name, err) 330 } 331 } 332 } else { 333 // No conditions share same argument 334 // Use new, proper behavior 335 if err := filter.AddRuleConditional(callNum, callAct, conditions); err != nil { 336 return fmt.Errorf("error adding seccomp rule for syscall %s: %w", call.Name, err) 337 } 338 } 339 } 340 341 return nil 342 } 343 344 // Version returns major, minor, and micro. 345 func Version() (uint, uint, uint) { 346 return libseccomp.GetLibraryVersion() 347 } 348 349 // Enabled is true if seccomp support is compiled in. 350 const Enabled = true