github.com/aporeto-inc/trireme-lib@v10.358.0+incompatible/monitor/internal/pod/controller_test.go (about) 1 // +build linux 2 3 package podmonitor 4 5 import ( 6 "context" 7 "fmt" 8 "testing" 9 "time" 10 11 "go.aporeto.io/trireme-lib/monitor/extractors" 12 13 "go.aporeto.io/trireme-lib/common" 14 15 "github.com/golang/mock/gomock" 16 . "github.com/smartystreets/goconvey/convey" 17 "go.aporeto.io/trireme-lib/monitor/config" 18 "go.aporeto.io/trireme-lib/policy" 19 "go.aporeto.io/trireme-lib/policy/mockpolicy" 20 21 corev1 "k8s.io/api/core/v1" 22 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 23 "k8s.io/apimachinery/pkg/runtime" 24 "k8s.io/apimachinery/pkg/types" 25 26 fakeclient "sigs.k8s.io/controller-runtime/pkg/client/fake" 27 "sigs.k8s.io/controller-runtime/pkg/reconcile" 28 ) 29 30 // TODO: should be a mock, but how to create it? we don't even vendor in tireme-lib 31 type fakeRecorder struct{} 32 33 func (r *fakeRecorder) Event(object runtime.Object, eventtype, reason, message string) { 34 } 35 func (r *fakeRecorder) Eventf(object runtime.Object, eventtype, reason, messageFmt string, args ...interface{}) { 36 } 37 func (r *fakeRecorder) PastEventf(object runtime.Object, timestamp metav1.Time, eventtype, reason, messageFmt string, args ...interface{}) { 38 } 39 func (r *fakeRecorder) AnnotatedEventf(object runtime.Object, annotations map[string]string, eventtype, reason, messageFmt string, args ...interface{}) { 40 } 41 42 func TestController(t *testing.T) { 43 Convey("Given a reconciler", t, func() { 44 ctrl := gomock.NewController(t) 45 defer ctrl.Finish() 46 // ctx := context.TODO() 47 48 failure := fmt.Errorf("fail hard") 49 50 pod1 := &corev1.Pod{ 51 ObjectMeta: metav1.ObjectMeta{ 52 Name: "pod1", 53 Namespace: "default", 54 UID: types.UID("default/pod1"), 55 }, 56 } 57 pod2 := &corev1.Pod{ 58 ObjectMeta: metav1.ObjectMeta{ 59 Name: "pod2", 60 Namespace: "default", 61 UID: types.UID("default/pod2"), 62 }, 63 Spec: corev1.PodSpec{ 64 HostNetwork: true, 65 }, 66 } 67 pod3 := &corev1.Pod{ 68 ObjectMeta: metav1.ObjectMeta{ 69 Name: "pod3", 70 Namespace: "default", 71 UID: types.UID("default/pod3"), 72 DeletionTimestamp: &metav1.Time{Time: time.Now()}, 73 }, 74 Status: corev1.PodStatus{ 75 Phase: corev1.PodRunning, 76 }, 77 } 78 podUnknown := &corev1.Pod{ 79 ObjectMeta: metav1.ObjectMeta{ 80 Name: "unknown", 81 Namespace: "default", 82 UID: types.UID("default/unknown"), 83 }, 84 Status: corev1.PodStatus{ 85 Phase: corev1.PodUnknown, 86 }, 87 } 88 podUnrecognized := &corev1.Pod{ 89 ObjectMeta: metav1.ObjectMeta{ 90 Name: "unrecognized", 91 Namespace: "default", 92 UID: types.UID("default/unrecognized"), 93 }, 94 Status: corev1.PodStatus{ 95 Phase: corev1.PodPhase("not-really-a-pod-phase"), 96 }, 97 } 98 podFailed := &corev1.Pod{ 99 ObjectMeta: metav1.ObjectMeta{ 100 Name: "failed", 101 Namespace: "default", 102 UID: types.UID("default/failed"), 103 }, 104 Status: corev1.PodStatus{ 105 Phase: corev1.PodFailed, 106 }, 107 } 108 podSucceeded := &corev1.Pod{ 109 ObjectMeta: metav1.ObjectMeta{ 110 Name: "succeeded", 111 Namespace: "default", 112 UID: types.UID("default/succeeded"), 113 }, 114 Status: corev1.PodStatus{ 115 Phase: corev1.PodSucceeded, 116 }, 117 } 118 podPending := &corev1.Pod{ 119 ObjectMeta: metav1.ObjectMeta{ 120 Name: "pending", 121 Namespace: "default", 122 UID: types.UID("default/pending"), 123 }, 124 Status: corev1.PodStatus{ 125 Phase: corev1.PodPending, 126 }, 127 } 128 podPendingAndStarted := &corev1.Pod{ 129 ObjectMeta: metav1.ObjectMeta{ 130 Name: "pendingAndStarted", 131 Namespace: "default", 132 UID: types.UID("default/pendingAndStarted"), 133 }, 134 Status: corev1.PodStatus{ 135 Phase: corev1.PodPending, 136 InitContainerStatuses: []corev1.ContainerStatus{ 137 { 138 State: corev1.ContainerState{ 139 Running: &corev1.ContainerStateRunning{ 140 StartedAt: metav1.Time{Time: time.Now()}, 141 }, 142 }, 143 }, 144 }, 145 }, 146 } 147 podRunningNotStarted := &corev1.Pod{ 148 ObjectMeta: metav1.ObjectMeta{ 149 Name: "runningNotStarted", 150 Namespace: "default", 151 UID: types.UID("default/runningNotStarted"), 152 }, 153 Status: corev1.PodStatus{ 154 Phase: corev1.PodRunning, 155 }, 156 } 157 podRunning := &corev1.Pod{ 158 ObjectMeta: metav1.ObjectMeta{ 159 Name: "running", 160 Namespace: "default", 161 UID: types.UID("default/running"), 162 }, 163 Status: corev1.PodStatus{ 164 Phase: corev1.PodRunning, 165 InitContainerStatuses: []corev1.ContainerStatus{ 166 { 167 State: corev1.ContainerState{ 168 Terminated: &corev1.ContainerStateTerminated{ 169 ExitCode: 0, 170 }, 171 }, 172 }, 173 }, 174 ContainerStatuses: []corev1.ContainerStatus{ 175 { 176 State: corev1.ContainerState{ 177 Running: &corev1.ContainerStateRunning{ 178 StartedAt: metav1.Time{Time: time.Now()}, 179 }, 180 }, 181 }, 182 }, 183 }, 184 } 185 podRunningHostNetwork := &corev1.Pod{ 186 ObjectMeta: metav1.ObjectMeta{ 187 Name: "runningHostNetwork", 188 Namespace: "default", 189 UID: types.UID("default/runningHostNetwork"), 190 }, 191 Spec: corev1.PodSpec{ 192 HostNetwork: true, 193 }, 194 Status: corev1.PodStatus{ 195 Phase: corev1.PodRunning, 196 ContainerStatuses: []corev1.ContainerStatus{ 197 { 198 State: corev1.ContainerState{ 199 Running: &corev1.ContainerStateRunning{ 200 StartedAt: metav1.Time{Time: time.Now()}, 201 }, 202 }, 203 }, 204 }, 205 }, 206 } 207 c := fakeclient.NewFakeClient(pod1, pod2, pod3, podUnknown, podUnrecognized, podSucceeded, podFailed, podPending, podPendingAndStarted, podRunningNotStarted, podRunning, podRunningHostNetwork) 208 209 handler := mockpolicy.NewMockResolver(ctrl) 210 211 metadataExtractor := func(ctx context.Context, p *corev1.Pod, extractNetns bool) (*policy.PURuntime, error) { 212 return nil, nil 213 } 214 netclsProgrammer := func(context.Context, *corev1.Pod, policy.RuntimeReader) error { 215 return nil 216 } 217 218 sandboxExtractor := func(context.Context, *corev1.Pod) (string, error) { 219 return "", nil 220 } 221 222 sandboxID := "test" 223 // we will only send all delete events in this test, we are not going to handle them 224 deleteCh := make(chan DeleteEvent, 1000) 225 deleteReconcileCh := make(chan struct{}, 1000) 226 227 pc := &config.ProcessorConfig{ 228 Policy: handler, 229 } 230 231 r := &ReconcilePod{ 232 client: c, 233 recorder: &fakeRecorder{}, 234 handler: pc, 235 metadataExtractor: metadataExtractor, 236 netclsProgrammer: netclsProgrammer, 237 sandboxExtractor: sandboxExtractor, 238 nodeName: "testing-node", 239 enableHostPods: true, 240 deleteCh: deleteCh, 241 deleteReconcileCh: deleteReconcileCh, 242 resyncInfo: NewResyncInfoChan(), 243 244 // taken from original file 245 handlePUEventTimeout: 5 * time.Second, 246 metadataExtractTimeout: 3 * time.Second, 247 netclsProgramTimeout: 2 * time.Second, 248 } 249 250 Convey("a not existing pod should trigger a destroy event without any error", func() { 251 //handler.EXPECT().HandlePUEvent(gomock.Any(), "b/a", common.EventDestroy, gomock.Any()).Return(nil).Times(1) 252 _, err := r.Reconcile(reconcile.Request{NamespacedName: types.NamespacedName{Name: "a", Namespace: "b"}}) 253 So(err, ShouldBeNil) 254 }) 255 256 Convey("a not existing pod should trigger a destroy event, and *not* fail if it cannot handle the destroy", func() { 257 //handler.EXPECT().HandlePUEvent(gomock.Any(), "b/a", common.EventDestroy, gomock.Any()).Return(fmt.Errorf("stopping failed")).Times(1) 258 _, err := r.Reconcile(reconcile.Request{NamespacedName: types.NamespacedName{Name: "a", Namespace: "b"}}) 259 So(err, ShouldBeNil) 260 }) 261 262 Convey("an existing pod with HostNetwork=true, but host pod activation disabled, should silently return", func() { 263 r.enableHostPods = false 264 _, err := r.Reconcile(reconcile.Request{NamespacedName: types.NamespacedName{Name: "pod2", Namespace: "default"}}) 265 So(err, ShouldBeNil) 266 }) 267 268 Convey("a pod which is terminating, should update metadata and silently return", func() { 269 handler.EXPECT().HandlePUEvent(gomock.Any(), "default/pod3", common.EventUpdate, gomock.Any()).Return(nil).Times(1) 270 _, err := r.Reconcile(reconcile.Request{NamespacedName: types.NamespacedName{Name: "pod3", Namespace: "default"}}) 271 So(err, ShouldBeNil) 272 }) 273 274 Convey("a pod which is in PodUnknown state should silently return", func() { 275 _, err := r.Reconcile(reconcile.Request{NamespacedName: types.NamespacedName{Name: "unknown", Namespace: "default"}}) 276 So(err, ShouldBeNil) 277 }) 278 279 Convey("a pod which has an unrecognized pod phase should silently return", func() { 280 _, err := r.Reconcile(reconcile.Request{NamespacedName: types.NamespacedName{Name: "unrecognized", Namespace: "default"}}) 281 So(err, ShouldBeNil) 282 }) 283 284 Convey("a pod which is in podsucceeded or podfailed state should try to stop the PU", func() { 285 handler.EXPECT().HandlePUEvent(gomock.Any(), "default/failed", common.EventUpdate, gomock.Any()).Return(fmt.Errorf("update failed")).Times(1) 286 handler.EXPECT().HandlePUEvent(gomock.Any(), "default/failed", common.EventStop, gomock.Any()).Return(fmt.Errorf("stop failed")).Times(1) 287 _, err := r.Reconcile(reconcile.Request{NamespacedName: types.NamespacedName{Name: "failed", Namespace: "default"}}) 288 So(err, ShouldBeNil) 289 290 handler.EXPECT().HandlePUEvent(gomock.Any(), "default/succeeded", common.EventUpdate, gomock.Any()).Return(nil).Times(1) 291 handler.EXPECT().HandlePUEvent(gomock.Any(), "default/succeeded", common.EventStop, gomock.Any()).Return(nil).Times(1) 292 _, err = r.Reconcile(reconcile.Request{NamespacedName: types.NamespacedName{Name: "succeeded", Namespace: "default"}}) 293 So(err, ShouldBeNil) 294 295 handler.EXPECT().HandlePUEvent(gomock.Any(), "default/succeeded", common.EventUpdate, gomock.Any()).Return(policy.ErrPUNotFound("default/succeeded", nil)).Times(1) 296 handler.EXPECT().HandlePUEvent(gomock.Any(), "default/succeeded", common.EventStop, gomock.Any()).Return(policy.ErrPUNotFound("default/succeeded", nil)).Times(1) 297 _, err = r.Reconcile(reconcile.Request{NamespacedName: types.NamespacedName{Name: "succeeded", Namespace: "default"}}) 298 So(err, ShouldBeNil) 299 300 Convey("and retry if metadata extraction fails", func() { 301 r.metadataExtractor = func(ctx context.Context, p *corev1.Pod, extractNetns bool) (*policy.PURuntime, error) { 302 return nil, failure 303 } 304 _, err := r.Reconcile(reconcile.Request{NamespacedName: types.NamespacedName{Name: "succeeded", Namespace: "default"}}) 305 So(err, ShouldNotBeNil) 306 So(err, ShouldEqual, failure) 307 }) 308 309 Reset(func() { 310 r.metadataExtractor = metadataExtractor 311 }) 312 }) 313 314 Convey("a pod in pending state should update or create a PU if it does already exist", func() { 315 // metadata extractor needs to change tags in order to provoke an update call 316 r.metadataExtractor = func(ctx context.Context, p *corev1.Pod, extractNetns bool) (*policy.PURuntime, error) { 317 ru := policy.NewPURuntimeWithDefaults() 318 ru.SetTags(policy.NewTagStoreFromMap(map[string]string{"exists": "exists", "a": "b"})) 319 return ru, nil 320 } 321 322 // update works 323 existingRuntime := policy.NewPURuntimeWithDefaults() 324 existingRuntime.SetTags(policy.NewTagStoreFromMap(map[string]string{"exists": "exists"})) 325 handler.EXPECT().HandlePUEvent(gomock.Any(), "default/pending", common.EventUpdate, gomock.Any()).Return(nil).Times(1) 326 _, err := r.Reconcile(reconcile.Request{NamespacedName: types.NamespacedName{Name: "pending", Namespace: "default"}}) 327 So(err, ShouldBeNil) 328 329 // update fails hard 330 handler.EXPECT().HandlePUEvent(gomock.Any(), "default/pending", common.EventUpdate, gomock.Any()).Return(failure).Times(1) 331 _, err = r.Reconcile(reconcile.Request{NamespacedName: types.NamespacedName{Name: "pending", Namespace: "default"}}) 332 So(err, ShouldBeNil) 333 334 // PU does not exist, but create fails hard 335 handler.EXPECT().HandlePUEvent(gomock.Any(), "default/pending", common.EventUpdate, gomock.Any()).Return(policy.ErrPUNotFound("default/pending", nil)).Times(1) 336 _, err = r.Reconcile(reconcile.Request{NamespacedName: types.NamespacedName{Name: "pending", Namespace: "default"}}) 337 So(err, ShouldBeNil) 338 339 // PU does not exist, but create succeeds 340 handler.EXPECT().HandlePUEvent(gomock.Any(), "default/pending", common.EventUpdate, gomock.Any()).Return(policy.ErrPUNotFound("default/pending", nil)).Times(1) 341 _, err = r.Reconcile(reconcile.Request{NamespacedName: types.NamespacedName{Name: "pending", Namespace: "default"}}) 342 So(err, ShouldBeNil) 343 }) 344 345 Convey("a pod in pending state which has an init container started, should silently return if everything could be started", func() { 346 r.metadataExtractor = func(ctx context.Context, p *corev1.Pod, extractNetns bool) (*policy.PURuntime, error) { 347 return policy.NewPURuntime("default/pendingAndStarted", 42, "", nil, nil, common.ContainerPU, nil), nil 348 } 349 r.sandboxExtractor = func(context.Context, *corev1.Pod) (string, error) { 350 return sandboxID, nil 351 } 352 handler.EXPECT().HandlePUEvent(gomock.Any(), "default/pendingAndStarted", common.EventUpdate, gomock.Any()).Return(nil).Times(1) 353 handler.EXPECT().HandlePUEvent(gomock.Any(), "default/pendingAndStarted", common.EventStart, gomock.Any()).Return(nil).Times(1) 354 _, err := r.Reconcile(reconcile.Request{NamespacedName: types.NamespacedName{Name: "pendingAndStarted", Namespace: "default"}}) 355 So(err, ShouldBeNil) 356 }) 357 358 Convey("a pod in running state should silently return if no containers have been started yet", func() { 359 handler.EXPECT().HandlePUEvent(gomock.Any(), "default/runningNotStarted", common.EventUpdate, gomock.Any()).Return(nil).Times(1) 360 _, err := r.Reconcile(reconcile.Request{NamespacedName: types.NamespacedName{Name: "runningNotStarted", Namespace: "default"}}) 361 So(err, ShouldBeNil) 362 }) 363 364 Convey("a pod in running state", func() { 365 Convey("should retry if metadata extraction fails", func() { 366 r.metadataExtractor = func(ctx context.Context, p *corev1.Pod, extractNetns bool) (*policy.PURuntime, error) { 367 return nil, failure 368 } 369 _, err := r.Reconcile(reconcile.Request{NamespacedName: types.NamespacedName{Name: "running", Namespace: "default"}}) 370 So(err, ShouldNotBeNil) 371 So(err, ShouldEqual, failure) 372 }) 373 Convey("should retry if metadata extraction succeeded, but no PID nor netns path were found and this is not a hostnetwork pod", func() { 374 handler.EXPECT().HandlePUEvent(gomock.Any(), "default/running", common.EventUpdate, gomock.Any()).Return(nil).Times(1) 375 r.metadataExtractor = func(ctx context.Context, p *corev1.Pod, extractNetns bool) (*policy.PURuntime, error) { 376 return policy.NewPURuntimeWithDefaults(), nil 377 } 378 r.sandboxExtractor = func(context.Context, *corev1.Pod) (string, error) { 379 return sandboxID, nil 380 } 381 _, err := r.Reconcile(reconcile.Request{NamespacedName: types.NamespacedName{Name: "running", Namespace: "default"}}) 382 So(err, ShouldNotBeNil) 383 So(err, ShouldEqual, ErrNetnsExtractionMissing) 384 }) 385 Convey("should *not* fail if metadata and PID/netnspath extraction succeeded, but the Start PU event fails", func() { 386 r.metadataExtractor = func(ctx context.Context, p *corev1.Pod, extractNetns bool) (*policy.PURuntime, error) { 387 return policy.NewPURuntime("default/running", 42, "", nil, nil, common.ContainerPU, nil), nil 388 } 389 r.sandboxExtractor = func(context.Context, *corev1.Pod) (string, error) { 390 return sandboxID, nil 391 } 392 handler.EXPECT().HandlePUEvent(gomock.Any(), "default/running", common.EventUpdate, gomock.Any()).Return(nil).Times(1) 393 handler.EXPECT().HandlePUEvent(gomock.Any(), "default/running", common.EventStart, gomock.Any()).Return(failure).Times(1) 394 _, err := r.Reconcile(reconcile.Request{NamespacedName: types.NamespacedName{Name: "running", Namespace: "default"}}) 395 So(err, ShouldBeNil) 396 }) 397 Convey("should return silently if metadata and PID/netnspath extraction succeeded, but the PU has already been activated", func() { 398 r.metadataExtractor = func(ctx context.Context, p *corev1.Pod, extractNetns bool) (*policy.PURuntime, error) { 399 return policy.NewPURuntime("default/running", 42, "", nil, nil, common.ContainerPU, nil), nil 400 } 401 r.sandboxExtractor = func(context.Context, *corev1.Pod) (string, error) { 402 return sandboxID, nil 403 } 404 handler.EXPECT().HandlePUEvent(gomock.Any(), "default/running", common.EventUpdate, gomock.Any()).Return(nil).Times(1) 405 handler.EXPECT().HandlePUEvent(gomock.Any(), "default/running", common.EventStart, gomock.Any()).Return(policy.ErrPUAlreadyActivated("default/running", nil)).Times(1) 406 _, err := r.Reconcile(reconcile.Request{NamespacedName: types.NamespacedName{Name: "running", Namespace: "default"}}) 407 So(err, ShouldBeNil) 408 }) 409 Convey("should return silently if metadata and PID/netnspath extraction succeeded, and the PU could be successfully activated", func() { 410 r.metadataExtractor = func(ctx context.Context, p *corev1.Pod, extractNetns bool) (*policy.PURuntime, error) { 411 return policy.NewPURuntime("default/running", 42, "", nil, nil, common.ContainerPU, nil), nil 412 } 413 r.sandboxExtractor = func(context.Context, *corev1.Pod) (string, error) { 414 return sandboxID, nil 415 } 416 handler.EXPECT().HandlePUEvent(gomock.Any(), "default/running", common.EventUpdate, gomock.Any()).Return(nil).Times(1) 417 handler.EXPECT().HandlePUEvent(gomock.Any(), "default/running", common.EventStart, gomock.Any()).Return(nil).Times(1) 418 _, err := r.Reconcile(reconcile.Request{NamespacedName: types.NamespacedName{Name: "running", Namespace: "default"}}) 419 So(err, ShouldBeNil) 420 }) 421 }) 422 423 Convey("a HostNetwork=true pod should try to start the PU and try to program the netcls cgroup", func() { 424 r.metadataExtractor = func(ctx context.Context, p *corev1.Pod, extractNetns bool) (*policy.PURuntime, error) { 425 return policy.NewPURuntime("default/runningHostNetwork", 0, "", nil, nil, common.LinuxProcessPU, nil), nil 426 } 427 handler.EXPECT().HandlePUEvent(gomock.Any(), "default/runningHostNetwork", common.EventUpdate, gomock.Any()).Return(nil).Times(1) 428 handler.EXPECT().HandlePUEvent(gomock.Any(), "default/runningHostNetwork", common.EventStart, gomock.Any()).Return(nil).AnyTimes() 429 Convey("and succeed if metadata extraction succeeded, and netcls cgroup programming succeeded", func() { 430 _, err := r.Reconcile(reconcile.Request{NamespacedName: types.NamespacedName{Name: "runningHostNetwork", Namespace: "default"}}) 431 So(err, ShouldBeNil) 432 }) 433 Convey("and succeed if metadata extraction succeeded, and netcls cgroup programming failed with netcls already programmed", func() { 434 r.netclsProgrammer = func(context.Context, *corev1.Pod, policy.RuntimeReader) error { 435 return extractors.ErrNetclsAlreadyProgrammed("mark") 436 } 437 _, err := r.Reconcile(reconcile.Request{NamespacedName: types.NamespacedName{Name: "runningHostNetwork", Namespace: "default"}}) 438 So(err, ShouldBeNil) 439 }) 440 Convey("and return silently if metadata extraction succeeded, but netcls cgroup programming discovered that this pod is not a host network pod (cannot recover)", func() { 441 r.netclsProgrammer = func(context.Context, *corev1.Pod, policy.RuntimeReader) error { 442 return extractors.ErrNoHostNetworkPod 443 } 444 _, err := r.Reconcile(reconcile.Request{NamespacedName: types.NamespacedName{Name: "runningHostNetwork", Namespace: "default"}}) 445 So(err, ShouldBeNil) 446 }) 447 Convey("should fail if metadata extraction succeeded, but netcls cgroup programming fails", func() { 448 r.netclsProgrammer = func(context.Context, *corev1.Pod, policy.RuntimeReader) error { 449 return failure 450 } 451 r.sandboxExtractor = func(context.Context, *corev1.Pod) (string, error) { 452 return sandboxID, nil 453 } 454 _, err := r.Reconcile(reconcile.Request{NamespacedName: types.NamespacedName{Name: "runningHostNetwork", Namespace: "default"}}) 455 So(err, ShouldNotBeNil) 456 So(err, ShouldEqual, failure) 457 }) 458 }) 459 }) 460 }