github.com/inspektor-gadget/inspektor-gadget@v0.28.1/pkg/gadget-collection/gadgets/advise/seccomp/gadget.go (about) 1 // Copyright 2019-2021 The Inspektor Gadget authors 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package seccomp 16 17 import ( 18 "context" 19 "encoding/json" 20 "fmt" 21 "sort" 22 "strconv" 23 "strings" 24 "sync" 25 26 log "github.com/sirupsen/logrus" 27 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 28 apimachineryruntime "k8s.io/apimachinery/pkg/runtime" 29 utilruntime "k8s.io/apimachinery/pkg/util/runtime" 30 "sigs.k8s.io/controller-runtime/pkg/client" 31 seccompprofile "sigs.k8s.io/security-profiles-operator/api/seccompprofile/v1beta1" 32 k8syaml "sigs.k8s.io/yaml" 33 34 gadgetv1alpha1 "github.com/inspektor-gadget/inspektor-gadget/pkg/apis/gadget/v1alpha1" 35 containercollection "github.com/inspektor-gadget/inspektor-gadget/pkg/container-collection" 36 "github.com/inspektor-gadget/inspektor-gadget/pkg/gadget-collection/gadgets" 37 seccomptracer "github.com/inspektor-gadget/inspektor-gadget/pkg/gadgets/advise/seccomp/tracer" 38 ) 39 40 type Trace struct { 41 helpers gadgets.GadgetHelpers 42 client client.Client 43 44 started bool 45 46 // policyGenerated is used to know if there was a policy generated 47 // at pod termination so that the Generate() operation does not have 48 // to notify that it did not find a pod that matches the filter. 49 policyGenerated bool 50 } 51 52 type TraceFactory struct { 53 gadgets.BaseFactory 54 } 55 56 type TraceSingleton struct { 57 mu sync.Mutex 58 tracer *seccomptracer.Tracer 59 users int 60 } 61 62 var traceSingleton TraceSingleton 63 64 func NewFactory() gadgets.TraceFactory { 65 return &TraceFactory{ 66 BaseFactory: gadgets.BaseFactory{DeleteTrace: deleteTrace}, 67 } 68 } 69 70 func (f *TraceFactory) Description() string { 71 return `The seccomp gadget traces system calls for each container in order to generate 72 seccomp policies. 73 74 The seccomp policies can be generated in two ways: 75 1. on demand with the gadget.kinvolk.io/operation=generate annotation. In this 76 case, the Trace.Spec.Filter should specify the namespace and pod name to the 77 exclusion of other fields because there can be only one SeccompProfile 78 written in the Trace.Status.Output or in the SeccompProfile resource named 79 by Trace.Spec.Output. The on-demand generation supports the outputMode 80 Status and ExternalResource. 81 2. automatically when containers matching the Trace.Spec.Filter terminate. In 82 this case, all filters are supported. The at-termination generation supports 83 the outputMode ExternalResource and Stream. 84 85 The seccomp policies can be written in the Status field of the Trace custom 86 resource, or in SeccompProfiles custom resources managed by the [Kubernetes 87 Security Profiles 88 Operator](https://github.com/kubernetes-sigs/security-profiles-operator). 89 90 SeccompProfiles will have the following annotations: 91 92 * seccomp.gadget.kinvolk.io/trace: the namespaced name of the Trace custom 93 resource that generated this SeccompProfile 94 * seccomp.gadget.kinvolk.io/node: the node where this SeccompProfile was 95 generated 96 * seccomp.gadget.kinvolk.io/pod: the pod namespaced name of the pod that was 97 traced 98 * seccomp.gadget.kinvolk.io/container: the container name in the pod that was 99 traced 100 * seccomp.gadget.kinvolk.io/ownerReference-APIVersion: the ownerReference's 101 APIVersion of the pod that was traced 102 * seccomp.gadget.kinvolk.io/ownerReference-Kind: the ownerReference's Kind of the 103 pod that was traced 104 * seccomp.gadget.kinvolk.io/ownerReference-Name: the ownerReference's Name of the 105 pod that was traced 106 * seccomp.gadget.kinvolk.io/ownerReference-UID: the ownerReference's UID of the 107 pod that was traced 108 109 SeccompProfiles will have the same labels as the Trace custom resource that 110 generated them. They don't have meaning for the seccomp gadget. They are 111 merely copied for convenience. 112 ` 113 } 114 115 func (f *TraceFactory) OutputModesSupported() map[gadgetv1alpha1.TraceOutputMode]struct{} { 116 return map[gadgetv1alpha1.TraceOutputMode]struct{}{ 117 gadgetv1alpha1.TraceOutputModeStatus: {}, 118 gadgetv1alpha1.TraceOutputModeStream: {}, 119 gadgetv1alpha1.TraceOutputModeExternalResource: {}, 120 } 121 } 122 123 func (f *TraceFactory) AddToScheme(scheme *apimachineryruntime.Scheme) { 124 utilruntime.Must(seccompprofile.AddToScheme(scheme)) 125 } 126 127 func deleteTrace(name string, t interface{}) { 128 trace := t.(*Trace) 129 if trace.started { 130 traceSingleton.mu.Lock() 131 defer traceSingleton.mu.Unlock() 132 traceSingleton.users-- 133 if traceSingleton.users == 0 { 134 trace.helpers.Unsubscribe(genPubSubKey(name)) 135 traceSingleton.tracer.Close() 136 traceSingleton.tracer = nil 137 } 138 } 139 } 140 141 func (f *TraceFactory) Operations() map[gadgetv1alpha1.Operation]gadgets.TraceOperation { 142 n := func() interface{} { 143 return &Trace{ 144 client: f.Client, 145 helpers: f.Helpers, 146 } 147 } 148 return map[gadgetv1alpha1.Operation]gadgets.TraceOperation{ 149 gadgetv1alpha1.OperationStart: { 150 Doc: "Start recording syscalls", 151 Operation: func(name string, trace *gadgetv1alpha1.Trace) { 152 f.LookupOrCreate(name, n).(*Trace).Start(trace) 153 }, 154 Order: 1, 155 }, 156 gadgetv1alpha1.OperationGenerate: { 157 Doc: `Generate a seccomp profile for the pod specified in Trace.Spec.Filter. The 158 namespace and pod name should be specified at the exclusion of other fields.`, 159 Operation: func(name string, trace *gadgetv1alpha1.Trace) { 160 f.LookupOrCreate(name, n).(*Trace).Generate(trace) 161 }, 162 Order: 2, 163 }, 164 gadgetv1alpha1.OperationStop: { 165 Doc: "Stop recording syscalls", 166 Operation: func(name string, trace *gadgetv1alpha1.Trace) { 167 f.LookupOrCreate(name, n).(*Trace).Stop(trace) 168 }, 169 Order: 3, 170 }, 171 } 172 } 173 174 type pubSubKey string 175 176 func genPubSubKey(name string) pubSubKey { 177 return pubSubKey(fmt.Sprintf("gadget/seccomp/%s", name)) 178 } 179 180 func seccompProfileAddLabelsAndAnnotations( 181 r *seccompprofile.SeccompProfile, 182 trace *gadgetv1alpha1.Trace, 183 podName string, 184 containerName string, 185 ownerReference *metav1.OwnerReference, 186 ) { 187 traceName := fmt.Sprintf("%s/%s", trace.ObjectMeta.Namespace, trace.ObjectMeta.Name) 188 r.ObjectMeta.Annotations["seccomp.gadget.kinvolk.io/trace"] = traceName 189 r.ObjectMeta.Annotations["seccomp.gadget.kinvolk.io/node"] = trace.Spec.Node 190 r.ObjectMeta.Annotations["seccomp.gadget.kinvolk.io/pod"] = podName 191 r.ObjectMeta.Annotations["seccomp.gadget.kinvolk.io/container"] = containerName 192 if ownerReference != nil { 193 r.ObjectMeta.Annotations["seccomp.gadget.kinvolk.io/ownerReference-APIVersion"] = ownerReference.APIVersion 194 r.ObjectMeta.Annotations["seccomp.gadget.kinvolk.io/ownerReference-Kind"] = ownerReference.Kind 195 r.ObjectMeta.Annotations["seccomp.gadget.kinvolk.io/ownerReference-Name"] = ownerReference.Name 196 r.ObjectMeta.Annotations["seccomp.gadget.kinvolk.io/ownerReference-UID"] = string(ownerReference.UID) 197 } 198 199 // Copy labels from the trace into the SeccompProfile. This will allow 200 // the CLI to add a label on the trace and gather its output 201 if trace.ObjectMeta.Labels != nil { 202 for key, value := range trace.ObjectMeta.Labels { 203 r.ObjectMeta.Labels[key] = value 204 } 205 } 206 } 207 208 type SeccompProfileNsName struct { 209 namespace string 210 name string 211 212 // generateName indicates whether the name field has to be used as 213 // resource's Name or GeneratedName 214 generateName bool 215 } 216 217 // getSeccompProfileNextName computes the next profile name that has to be used 218 // for a specific podname given a SeccompProfile list. This function returns: 219 // podName: If there do not exist profiles with podname or podname-X as name. 220 // podName-2: If there exist a profile with the podname but no one with podname-X. 221 // podName-<X+1>: If there exist at least one profile with podname-X. 222 func getSeccompProfileNextName(profileList []seccompprofile.SeccompProfile, podName string) string { 223 currentCounter := 0 224 for _, profile := range profileList { 225 if !strings.HasPrefix(profile.Name, podName) { 226 continue 227 } 228 229 if profile.Name == podName && currentCounter == 0 { 230 currentCounter++ 231 continue 232 } 233 234 c, err := strconv.Atoi(strings.TrimLeft(profile.Name, podName+"-")) 235 if err != nil { 236 // Ignore profiles with "podname" prefix but no "podname-X" syntax. 237 continue 238 } 239 240 if c > currentCounter { 241 currentCounter = c 242 } 243 } 244 245 // It is the first profile for this pod, use the podname as resource's name. 246 if currentCounter == 0 { 247 return podName 248 } 249 250 return fmt.Sprintf("%s-%d", podName, currentCounter+1) 251 } 252 253 // getSeccompProfileNsName computes the seccomp profile namespace and name 254 // based on the traceOutputName parameter. If it was not specified or does not 255 // contains the namespace, fallback to the trace's namespace and podname. 256 func getSeccompProfileNsName( 257 cli client.Client, 258 traceNs, traceOutputName, podname string, 259 ) (*SeccompProfileNsName, error) { 260 if traceOutputName != "" { 261 parts := strings.SplitN(traceOutputName, "/", 2) 262 if len(parts) == 2 { 263 // Use namespace and prefix-name provided by the user. 264 return &SeccompProfileNsName{ 265 namespace: parts[0], 266 name: parts[1], 267 generateName: true, 268 }, nil 269 } 270 271 // Fallback to the trace's namespace and use prefix-name provided by the user. 272 return &SeccompProfileNsName{ 273 namespace: traceNs, 274 name: traceOutputName, 275 generateName: true, 276 }, nil 277 } 278 279 // Fallback to the trace's namespace and podname but adding a counter 280 // suffix in case there is already a profile with the podname name. 281 profileList := &seccompprofile.SeccompProfileList{} 282 err := cli.List( 283 context.TODO(), 284 profileList, 285 client.InNamespace(traceNs), 286 ) 287 if err != nil { 288 return nil, fmt.Errorf("retrieving SeccompProfiles in %q: %w", traceNs, err) 289 } 290 profileName := getSeccompProfileNextName(profileList.Items, podname) 291 292 return &SeccompProfileNsName{ 293 namespace: traceNs, 294 name: profileName, 295 generateName: false, 296 }, nil 297 } 298 299 // generateSeccompPolicy generates a seccomp policy which is ready to be 300 // created. 301 func generateSeccompPolicy(client client.Client, trace *gadgetv1alpha1.Trace, syscallNames []string, podname, containername, fullPodName string, ownerReference *metav1.OwnerReference) (*seccompprofile.SeccompProfile, error) { 302 profileName, err := getSeccompProfileNsName( 303 client, 304 trace.ObjectMeta.Namespace, 305 trace.Spec.Output, 306 podname, 307 ) 308 if err != nil { 309 return nil, fmt.Errorf("getting the profile name: %w", err) 310 } 311 312 r := syscallNamesToSeccompPolicy(profileName, syscallNames) 313 seccompProfileAddLabelsAndAnnotations(r, trace, fullPodName, containername, ownerReference) 314 315 return r, nil 316 } 317 318 // containerTerminated is a callback called every time a container is 319 // terminated on the node. It is used to generate a SeccompProfile when a 320 // container terminates. 321 func (t *Trace) containerTerminated(trace *gadgetv1alpha1.Trace, event containercollection.PubSubEvent) { 322 if traceSingleton.tracer == nil { 323 log.Errorf("Seccomp tracer is nil") 324 return 325 } 326 327 if event.Container.Mntns == 0 { 328 log.Errorf("Container has unknown mntns") 329 return 330 } 331 332 traceName := fmt.Sprintf("%s/%s", trace.ObjectMeta.Namespace, trace.ObjectMeta.Name) 333 334 // Get the list of syscallNames from the BPF hash map 335 syscallNames, err := traceSingleton.tracer.Peek(event.Container.Mntns) 336 if err != nil { 337 log.Errorf("peeking syscalls for mntns %d: %s", event.Container.Mntns, err) 338 return 339 } 340 341 // The container has terminated. Cleanup the BPF hash map 342 traceSingleton.tracer.Delete(event.Container.Mntns) 343 344 namespacedName := fmt.Sprintf("%s/%s", event.Container.K8s.Namespace, event.Container.K8s.PodName) 345 346 // This field was fetched when the container was created 347 ownerReference := getContainerOwnerReference(event.Container) 348 349 r, err := generateSeccompPolicy(t.client, trace, syscallNames, event.Container.K8s.PodName, 350 event.Container.K8s.ContainerName, namespacedName, ownerReference) 351 if err != nil { 352 log.Errorf("Trace %s: %v", traceName, err) 353 return 354 } 355 356 switch trace.Spec.OutputMode { 357 case gadgetv1alpha1.TraceOutputModeExternalResource: 358 log.Infof("Trace %s: creating SeccompProfile for pod %s", traceName, namespacedName) 359 err := t.client.Create(context.TODO(), r) 360 if err != nil { 361 log.Errorf("Failed to create Seccomp Profile for pod %s: %s", namespacedName, err) 362 return 363 } 364 t.policyGenerated = true 365 case gadgetv1alpha1.TraceOutputModeStream: 366 log.Infof("Trace %s: adding SeccompProfile for pod %s in stream", traceName, namespacedName) 367 yamlOutput, err := k8syaml.Marshal(r) 368 if err != nil { 369 log.Errorf("Failed to convert Seccomp Profile to yaml: %s", err) 370 return 371 } 372 t.helpers.PublishEvent( 373 gadgets.TraceName(trace.ObjectMeta.Namespace, trace.ObjectMeta.Name), 374 fmt.Sprintf("%s\n---\n", string(yamlOutput)), 375 ) 376 t.policyGenerated = true 377 } 378 } 379 380 func getContainerOwnerReference(c *containercollection.Container) *metav1.OwnerReference { 381 ownerRef, err := c.GetOwnerReference() 382 if err != nil { 383 log.Warnf("Failed to get owner reference of %s/%s/%s: %s", 384 c.K8s.Namespace, c.K8s.PodName, c.K8s.ContainerName, err) 385 } 386 return ownerRef 387 } 388 389 func (t *Trace) Start(trace *gadgetv1alpha1.Trace) { 390 trace.Status.Output = "" 391 if t.started { 392 trace.Status.State = gadgetv1alpha1.TraceStateStarted 393 t.policyGenerated = false 394 return 395 } 396 397 traceSingleton.mu.Lock() 398 defer traceSingleton.mu.Unlock() 399 if traceSingleton.tracer == nil { 400 var err error 401 traceSingleton.tracer, err = seccomptracer.NewTracer() 402 if err != nil { 403 trace.Status.OperationError = fmt.Sprintf("Failed to start seccomp tracer: %s", err) 404 return 405 } 406 } 407 408 // 'trace' is owned by the controller and could be modified 409 // outside of the gadget control. Make a copy for the callback. 410 traceCopy := trace.DeepCopy() 411 412 // Subscribe to container creation and termination 413 // events. Termination is used to generate a 414 // SeccompProfile when a container terminates. Creation 415 // is used to fetch the owner reference of the 416 // containers to be sure this field is set when the 417 // container terminates. 418 containers := t.helpers.Subscribe( 419 genPubSubKey(trace.ObjectMeta.Namespace+"/"+trace.ObjectMeta.Name), 420 *gadgets.ContainerSelectorFromContainerFilter(trace.Spec.Filter), 421 func(event containercollection.PubSubEvent) { 422 switch event.Type { 423 case containercollection.EventTypeAddContainer: 424 getContainerOwnerReference(event.Container) 425 case containercollection.EventTypeRemoveContainer: 426 t.containerTerminated(traceCopy, event) 427 } 428 }, 429 ) 430 431 for _, container := range containers { 432 getContainerOwnerReference(container) 433 } 434 435 traceSingleton.users++ 436 t.started = true 437 t.policyGenerated = false 438 439 trace.Status.State = gadgetv1alpha1.TraceStateStarted 440 } 441 442 func (t *Trace) Generate(trace *gadgetv1alpha1.Trace) { 443 if traceSingleton.tracer == nil { 444 log.Errorf("Seccomp tracer is nil") 445 return 446 } 447 448 if !t.started { 449 trace.Status.OperationError = "Not started" 450 return 451 } 452 if trace.Spec.Filter == nil || trace.Spec.Filter.Namespace == "" || trace.Spec.Filter.Podname == "" { 453 trace.Status.OperationError = "Missing pod" 454 return 455 } 456 if len(trace.Spec.Filter.Labels) != 0 { 457 trace.Status.OperationError = "Seccomp gadget does not support filtering by labels" 458 return 459 } 460 461 var mntns uint64 462 var containerName string 463 if trace.Spec.Filter.ContainerName != "" { 464 mntns = t.helpers.LookupMntnsByContainer( 465 trace.Spec.Filter.Namespace, 466 trace.Spec.Filter.Podname, 467 trace.Spec.Filter.ContainerName, 468 ) 469 if mntns == 0 { 470 // Notify this only if the policy was not already generated at pod termination 471 if !t.policyGenerated { 472 trace.Status.OperationWarning = fmt.Sprintf("Container %s/%s/%s not found", 473 trace.Spec.Filter.Namespace, 474 trace.Spec.Filter.Podname, 475 trace.Spec.Filter.ContainerName, 476 ) 477 } 478 return 479 } 480 containerName = trace.Spec.Filter.ContainerName 481 } else { 482 mntnsMap := t.helpers.LookupMntnsByPod( 483 trace.Spec.Filter.Namespace, 484 trace.Spec.Filter.Podname, 485 ) 486 if len(mntnsMap) == 0 { 487 // Notify this only if the policy was not already generated at pod termination 488 if !t.policyGenerated { 489 trace.Status.OperationWarning = fmt.Sprintf("Pod %s/%s not found", 490 trace.Spec.Filter.Namespace, 491 trace.Spec.Filter.Podname, 492 ) 493 } 494 return 495 } 496 497 containerList := []string{} 498 for k, v := range mntnsMap { 499 containerName = k 500 mntns = v 501 containerList = append(containerList, k) 502 } 503 sort.Strings(containerList) 504 505 if len(mntnsMap) > 1 { 506 trace.Status.OperationError = fmt.Sprintf("Pod %s/%s has several containers: %v", 507 trace.Spec.Filter.Namespace, 508 trace.Spec.Filter.Podname, 509 containerList, 510 ) 511 return 512 } 513 if mntns == 0 { 514 trace.Status.OperationError = fmt.Sprintf("Pod %s/%s has unknown mntns", 515 trace.Spec.Filter.Namespace, 516 trace.Spec.Filter.Podname, 517 ) 518 return 519 } 520 } 521 522 // Get the list of syscallNames from the BPF hash map 523 syscallNames, err := traceSingleton.tracer.Peek(mntns) 524 if err != nil { 525 trace.Status.OperationError = fmt.Sprintf("peeking syscalls for mntns %d: %s", mntns, err) 526 return 527 } 528 529 switch trace.Spec.OutputMode { 530 case gadgetv1alpha1.TraceOutputModeStatus: 531 policy := seccomptracer.SyscallNamesToLinuxSeccomp(syscallNames) 532 output, err := json.MarshalIndent(policy, "", " ") 533 if err != nil { 534 trace.Status.OperationError = fmt.Sprintf("Failed to marshal seccomp policy: %s", err) 535 return 536 } 537 538 trace.Status.Output = string(output) 539 case gadgetv1alpha1.TraceOutputModeExternalResource: 540 podName := fmt.Sprintf("%s/%s", trace.Spec.Filter.Namespace, trace.Spec.Filter.Podname) 541 542 ownerReference := t.helpers.LookupOwnerReferenceByMntns(mntns) 543 544 r, err := generateSeccompPolicy(t.client, trace, syscallNames, trace.Spec.Filter.Podname, containerName, podName, ownerReference) 545 if err != nil { 546 trace.Status.OperationError = err.Error() 547 return 548 } 549 550 err = t.client.Create(context.TODO(), r) 551 if err != nil { 552 trace.Status.OperationError = fmt.Sprintf("Failed to update resource: %s", err) 553 return 554 } 555 case gadgetv1alpha1.TraceOutputModeFile: 556 fallthrough 557 default: 558 trace.Status.OperationError = fmt.Sprintf("OutputMode not supported: %s", trace.Spec.OutputMode) 559 } 560 } 561 562 func (t *Trace) Stop(trace *gadgetv1alpha1.Trace) { 563 if !t.started { 564 trace.Status.OperationError = "Not started" 565 return 566 } 567 568 traceSingleton.mu.Lock() 569 defer traceSingleton.mu.Unlock() 570 571 t.helpers.Unsubscribe(genPubSubKey(trace.ObjectMeta.Namespace + "/" + trace.ObjectMeta.Name)) 572 573 traceSingleton.users-- 574 if traceSingleton.users == 0 { 575 traceSingleton.tracer.Close() 576 traceSingleton.tracer = nil 577 } 578 579 t.started = false 580 581 trace.Status.State = gadgetv1alpha1.TraceStateStopped 582 }