istio.io/istio@v0.0.0-20240520182934-d79c90f27776/pilot/pkg/autoregistration/controller_test.go (about) 1 // Copyright Istio Authors 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package autoregistration 16 17 import ( 18 "fmt" 19 "math" 20 "reflect" 21 "strings" 22 "sync" 23 "testing" 24 "time" 25 26 core "github.com/envoyproxy/go-control-plane/envoy/config/core/v3" 27 "github.com/google/go-cmp/cmp" 28 "github.com/hashicorp/go-multierror" 29 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 30 kubetypes "k8s.io/apimachinery/pkg/types" 31 32 "istio.io/api/annotation" 33 "istio.io/api/meta/v1alpha1" 34 "istio.io/api/networking/v1alpha3" 35 "istio.io/istio/pilot/pkg/config/memory" 36 "istio.io/istio/pilot/pkg/features" 37 "istio.io/istio/pilot/pkg/model" 38 "istio.io/istio/pilot/pkg/model/status" 39 "istio.io/istio/pilot/pkg/networking/util" 40 "istio.io/istio/pkg/config" 41 "istio.io/istio/pkg/config/schema/collections" 42 "istio.io/istio/pkg/config/schema/gvk" 43 "istio.io/istio/pkg/keepalive" 44 "istio.io/istio/pkg/network" 45 "istio.io/istio/pkg/spiffe" 46 "istio.io/istio/pkg/test" 47 "istio.io/istio/pkg/test/util/assert" 48 "istio.io/istio/pkg/test/util/retry" 49 ) 50 51 func init() { 52 features.WorkloadEntryCleanupGracePeriod = 50 * time.Millisecond 53 } 54 55 var _ connection = &fakeConn{} 56 57 type fakeConn struct { 58 sync.RWMutex 59 proxy *model.Proxy 60 connTime time.Time 61 stopped bool 62 } 63 64 func makeConn(proxy *model.Proxy, connTime time.Time) *fakeConn { 65 return &fakeConn{proxy: proxy, connTime: connTime} 66 } 67 68 func (f *fakeConn) ID() string { 69 return fmt.Sprintf("%s-%v", f.proxy.IPAddresses[0], f.connTime) 70 } 71 72 func (f *fakeConn) Proxy() *model.Proxy { 73 return f.proxy 74 } 75 76 func (f *fakeConn) ConnectedAt() time.Time { 77 return f.connTime 78 } 79 80 func (f *fakeConn) Stop() { 81 f.Lock() 82 defer f.Unlock() 83 f.stopped = true 84 } 85 86 func (f *fakeConn) Stopped() bool { 87 f.RLock() 88 defer f.RUnlock() 89 return f.stopped 90 } 91 92 var ( 93 tmplA = &v1alpha3.WorkloadGroup{ 94 Template: &v1alpha3.WorkloadEntry{ 95 Ports: map[string]uint32{"http": 80}, 96 Labels: map[string]string{"app": "a"}, 97 Network: "nw0", 98 Locality: "reg0/zone0/subzone0", 99 Weight: 1, 100 ServiceAccount: "sa-a", 101 }, 102 } 103 wgA = config.Config{ 104 Meta: config.Meta{ 105 GroupVersionKind: gvk.WorkloadGroup, 106 Namespace: "a", 107 Name: "wg-a", 108 Labels: map[string]string{ 109 "grouplabel": "notonentry", 110 }, 111 }, 112 Spec: tmplA, 113 Status: nil, 114 } 115 wgAWrongNs = config.Config{ 116 Meta: config.Meta{ 117 GroupVersionKind: gvk.WorkloadGroup, 118 Namespace: "wrong", 119 Name: "wg-a", 120 Labels: map[string]string{ 121 "grouplabel": "notonentry", 122 }, 123 }, 124 Spec: tmplA, 125 Status: nil, 126 } 127 wgWithoutSA = config.Config{ 128 Meta: config.Meta{ 129 GroupVersionKind: gvk.WorkloadGroup, 130 Namespace: "a", 131 Name: "wg-b", 132 Labels: map[string]string{ 133 "grouplabel": "notonentry", 134 }, 135 }, 136 Spec: &v1alpha3.WorkloadGroup{ 137 Template: &v1alpha3.WorkloadEntry{ 138 Ports: map[string]uint32{"http": 80}, 139 Labels: map[string]string{"app": "a"}, 140 Network: "nw0", 141 Locality: "reg0/zone0/subzone0", 142 Weight: 1, 143 ServiceAccount: "", 144 }, 145 }, 146 Status: nil, 147 } 148 weB = config.Config{ 149 Meta: config.Meta{ 150 GroupVersionKind: gvk.WorkloadEntry, 151 Namespace: "b", 152 Name: "we-without-auto-registration", 153 Annotations: map[string]string{ 154 "proxy.istio.io/health-checks-enabled": "true", 155 }, 156 }, 157 Spec: &v1alpha3.WorkloadEntry{ 158 Address: "10.0.0.1", 159 Network: "nw0", 160 }, 161 Status: nil, 162 } 163 ) 164 165 func TestNonAutoregisteredWorkloads(t *testing.T) { 166 store := memory.NewController(memory.Make(collections.All)) 167 c := NewController(store, "", time.Duration(math.MaxInt64)) 168 createOrFail(t, store, wgA) 169 stop := test.NewStop(t) 170 go c.Run(stop) 171 172 cases := map[string]*model.Proxy{ 173 "missing group": {IPAddresses: []string{"1.2.3.4"}, Metadata: &model.NodeMetadata{Namespace: wgA.Namespace}}, 174 "missing ip": {Metadata: &model.NodeMetadata{Namespace: wgA.Namespace, AutoRegisterGroup: wgA.Name}}, 175 "missing namespace": {IPAddresses: []string{"1.2.3.4"}, Metadata: &model.NodeMetadata{AutoRegisterGroup: wgA.Name}}, 176 "non-existent group": {IPAddresses: []string{"1.2.3.4"}, Metadata: &model.NodeMetadata{Namespace: wgA.Namespace, AutoRegisterGroup: "dne"}}, 177 } 178 179 for name, tc := range cases { 180 tc := tc 181 t.Run(name, func(t *testing.T) { 182 c.OnConnect(makeConn(tc, time.Now())) 183 items := store.List(gvk.WorkloadEntry, model.NamespaceAll) 184 if len(items) != 0 { 185 t.Fatalf("expected 0 WorkloadEntry") 186 } 187 }) 188 } 189 } 190 191 func TestAutoregistrationLifecycle(t *testing.T) { 192 maxConnAge := time.Hour 193 c1, c2, store := setup(t) 194 c2.maxConnectionAge = maxConnAge 195 stopped1 := false 196 stop1, stop2 := make(chan struct{}), make(chan struct{}) 197 defer func() { 198 // stop1 should be killed early, as part of test 199 if !stopped1 { 200 close(stop1) 201 } 202 }() 203 defer close(stop2) 204 go c1.Run(stop1) 205 go c2.Run(stop2) 206 go store.Run(stop2) 207 208 n := fakeNode("reg1", "zone1", "subzone1") 209 210 var p1conn1, p1conn2 *fakeConn 211 p := fakeProxy("1.2.3.4", wgA, "nw1", "sa-a") 212 p.Locality = n.Locality 213 214 var p2conn1 *fakeConn 215 p2 := fakeProxy("1.2.3.4", wgA, "nw2", "sa-a") 216 p2.Locality = n.Locality 217 218 var p3conn1 *fakeConn 219 p3 := fakeProxy("1.2.3.5", wgA, "nw1", "sa-a") 220 p3.Locality = n.Locality 221 222 t.Run("initial registration", func(t *testing.T) { 223 // simply make sure the entry exists after connecting 224 p1conn1 = makeConn(p, time.Now()) 225 c1.OnConnect(p1conn1) 226 checkEntryOrFail(t, store, wgA, p, n, c1.instanceID) 227 }) 228 t.Run("multinetwork same ip", func(t *testing.T) { 229 // make sure we don't overrwrite a similar entry for a different network 230 p2conn1 = makeConn(p2, time.Now()) 231 c2.OnConnect(p2conn1) 232 checkEntryOrFail(t, store, wgA, p, n, c1.instanceID) 233 checkEntryOrFail(t, store, wgA, p2, n, c2.instanceID) 234 c2.OnDisconnect(p2conn1) // cleanup for future tests 235 }) 236 t.Run("fast reconnect", func(t *testing.T) { 237 t.Run("same instance", func(t *testing.T) { 238 // disconnect, make sure entry is still there with disconnect meta 239 c1.OnDisconnect(p1conn1) 240 checkEntryOrFailAfter(t, store, wgA, p, n, "", features.WorkloadEntryCleanupGracePeriod/2) 241 // reconnect, ensure entry is there with the same instance id 242 p1conn1 = makeConn(p, time.Now()) 243 c1.OnConnect(p1conn1) 244 checkEntryOrFail(t, store, wgA, p, n, c1.instanceID) 245 }) 246 t.Run("same instance: connect before disconnect ", func(t *testing.T) { 247 // reconnect, ensure entry is there with the same instance id 248 p1conn2 = makeConn(p, p1conn1.ConnectedAt().Add(10*time.Millisecond)) 249 c1.OnConnect(p1conn2) 250 // disconnect (associated with original connect, not the reconnect) 251 // make sure entry is still there with disconnect meta 252 c1.OnDisconnect(p1conn1) 253 checkEntryOrFailAfter(t, store, wgA, p, n, c1.instanceID, features.WorkloadEntryCleanupGracePeriod/2) 254 }) 255 t.Run("different instance", func(t *testing.T) { 256 // disconnect, make sure entry is still there with disconnect metadata 257 c1.OnDisconnect(p1conn2) 258 checkEntryOrFailAfter(t, store, wgA, p, n, "", features.WorkloadEntryCleanupGracePeriod/2) 259 // reconnect, ensure entry is there with the new instance id 260 p1conn1 = makeConn(p, time.Now()) 261 c2.OnConnect(p1conn1) 262 checkEntryOrFail(t, store, wgA, p, n, c2.instanceID) 263 }) 264 }) 265 t.Run("slow reconnect", func(t *testing.T) { 266 // disconnect, wait and make sure entry is gone 267 c2.OnDisconnect(p1conn1) 268 retry.UntilSuccessOrFail(t, func() error { 269 return checkNoEntry(store, wgA, p) 270 }) 271 // reconnect 272 p1conn1 = makeConn(p, time.Now()) 273 c1.OnConnect(p1conn1) 274 checkEntryOrFail(t, store, wgA, p, n, c1.instanceID) 275 }) 276 t.Run("garbage collected if pilot stops after disconnect", func(t *testing.T) { 277 // disconnect, kill the cleanup queue from the first controller 278 c1.OnDisconnect(p1conn1) 279 // stop processing the delayed close queue in c1, forces using periodic cleanup 280 close(stop1) 281 stopped1 = true 282 // unfortunately, this retry at worst could be twice as long as the sweep interval 283 retry.UntilSuccessOrFail(t, func() error { 284 return checkNoEntry(store, wgA, p) 285 }, retry.Timeout(time.Until(time.Now().Add(21*features.WorkloadEntryCleanupGracePeriod)))) 286 }) 287 288 t.Run("garbage collected if pilot and workload stops simultaneously before pilot can do anything", func(t *testing.T) { 289 // simulate p3 has been registered long before 290 p3conn1 = makeConn(p3, time.Now().Add(-2*maxConnAge)) 291 c2.OnConnect(p3conn1) 292 293 // keep silent to simulate the scenario (don't OnDisconnect to simulate pilot being down) 294 295 // unfortunately, this retry at worst could be twice as long as the sweep interval 296 retry.UntilSuccessOrFail(t, func() error { 297 return checkNoEntry(store, wgA, p3) 298 }, retry.Timeout(time.Until(time.Now().Add(21*features.WorkloadEntryCleanupGracePeriod)))) 299 300 c2.OnDisconnect(p3conn1) // cleanup the state for future tests 301 }) 302 t.Run("workload group recreate", func(t *testing.T) { 303 p1conn1 = makeConn(p, time.Now()) 304 c2.OnConnect(p1conn1) 305 checkEntryOrFail(t, store, wgA, p, n, c2.instanceID) 306 307 name := strings.Join([]string{wgA.Name, p.IPAddresses[0], string(p.Metadata.Network)}, "-") 308 if err := store.Delete(gvk.WorkloadGroup, wgA.Name, wgA.Namespace, nil); err != nil { 309 t.Fatal(err) 310 } 311 if err := store.Delete(gvk.WorkloadEntry, name, wgA.Namespace, nil); err != nil { 312 t.Fatal(err) 313 } 314 createOrFail(t, store, wgA) 315 316 retry.UntilSuccessOrFail(t, func() error { 317 return checkEntry(store, wgA, p, n, c2.instanceID) 318 }) 319 }) 320 c2.OnDisconnect(p1conn1) // cleanup the state for future tests 321 t.Run("unverified client", func(t *testing.T) { 322 p := fakeProxy("1.2.3.6", wgA, "nw1", "") 323 324 // Should fail 325 assert.Error(t, c1.OnConnect(makeConn(p, time.Now()))) 326 checkNoEntryOrFail(t, store, wgA, p) 327 }) 328 t.Run("wrong SA client", func(t *testing.T) { 329 p := fakeProxy("1.2.3.6", wgA, "nw1", "wrong") 330 331 // Should fail 332 assert.Error(t, c1.OnConnect(makeConn(p, time.Now()))) 333 checkNoEntryOrFail(t, store, wgA, p) 334 }) 335 t.Run("wrong NS client", func(t *testing.T) { 336 p := fakeProxy("1.2.3.6", wgA, "nw1", "sa-a") 337 p.Metadata.Namespace = "wrong" 338 339 // Should fail 340 assert.Error(t, c1.OnConnect(makeConn(p, time.Now()))) 341 checkNoEntryOrFail(t, store, wgA, p) 342 }) 343 t.Run("no SA WG", func(t *testing.T) { 344 p := fakeProxy("1.2.3.6", wgWithoutSA, "nw1", "sa-a") 345 n := fakeNode("reg0", "zone0", "subzone0") 346 p.Locality = n.Locality 347 348 // Should not fail 349 assert.NoError(t, c1.OnConnect(makeConn(p, time.Now()))) 350 checkEntryOrFail(t, store, wgWithoutSA, p, n, c1.instanceID) 351 }) 352 // TODO test garbage collection if pilot stops before disconnect meta is set (relies on heartbeat) 353 } 354 355 func TestAutoregistrationDisabled(t *testing.T) { 356 test.SetForTest(t, &features.WorkloadEntryAutoRegistration, false) 357 store := memory.NewController(memory.Make(collections.All)) 358 createOrFail(t, store, weB) 359 360 stop := test.NewStop(t) 361 362 c := NewController(store, "pilot-x", keepalive.Infinity) 363 go c.Run(stop) 364 365 t.Run("health check still works", func(t *testing.T) { 366 proxy := fakeProxySuitableForHealthChecks(weB) 367 368 err := c.OnConnect(makeConn(proxy, time.Now())) 369 assert.NoError(t, err) 370 // report workload is healthy 371 c.QueueWorkloadEntryHealth(proxy, HealthEvent{ 372 Healthy: true, 373 }) 374 // ensure health condition has been updated 375 checkHealthOrFail(t, store, proxy, true) 376 }) 377 t.Run("registration does nothing", func(t *testing.T) { 378 p := fakeProxy("1.2.3.4", wgA, "litNw", "sa-a") 379 assert.NoError(t, c.OnConnect(makeConn(p, time.Now()))) 380 checkNoEntryOrFail(t, store, wgA, p) 381 }) 382 } 383 384 func TestUpdateHealthCondition(t *testing.T) { 385 stop := test.NewStop(t) 386 ig, ig2, store := setup(t) 387 go ig.Run(stop) 388 go ig2.Run(stop) 389 p := fakeProxy("1.2.3.4", wgA, "litNw", "sa-a") 390 p.XdsNode = fakeNode("reg1", "zone1", "subzone1") 391 ig.OnConnect(makeConn(p, time.Now())) 392 t.Run("auto registered healthy health", func(t *testing.T) { 393 ig.QueueWorkloadEntryHealth(p, HealthEvent{ 394 Healthy: true, 395 }) 396 checkHealthOrFail(t, store, p, true) 397 }) 398 t.Run("auto registered unhealthy health", func(t *testing.T) { 399 ig.QueueWorkloadEntryHealth(p, HealthEvent{ 400 Healthy: false, 401 Message: "lol health bad", 402 }) 403 checkHealthOrFail(t, store, p, false) 404 }) 405 } 406 407 func TestWorkloadEntryFromGroup(t *testing.T) { 408 group := config.Config{ 409 Meta: config.Meta{ 410 GroupVersionKind: gvk.WorkloadGroup, 411 Namespace: "a", 412 Name: "wg-a", 413 Labels: map[string]string{ 414 "grouplabel": "notonentry", 415 }, 416 }, 417 Spec: &v1alpha3.WorkloadGroup{ 418 Metadata: &v1alpha3.WorkloadGroup_ObjectMeta{ 419 Labels: map[string]string{"foo": "bar"}, 420 Annotations: map[string]string{"foo": "bar"}, 421 }, 422 Template: &v1alpha3.WorkloadEntry{ 423 Ports: map[string]uint32{"http": 80}, 424 Labels: map[string]string{"app": "a"}, 425 Weight: 1, 426 Network: "nw0", 427 Locality: "rgn1/zone1/subzone1", 428 ServiceAccount: "sa-a", 429 }, 430 }, 431 } 432 proxy := fakeProxy("10.0.0.1", group, "nw1", "sa") 433 proxy.Labels[model.LocalityLabel] = "rgn2/zone2/subzone2" 434 proxy.XdsNode = fakeNode("rgn2", "zone2", "subzone2") 435 proxy.Locality = proxy.XdsNode.Locality 436 437 wantLabels := map[string]string{ 438 "app": "a", // from WorkloadEntry template 439 "foo": "bar", // from WorkloadGroup.Metadata 440 "merge": "me", // from Node metadata 441 } 442 443 want := config.Config{ 444 Meta: config.Meta{ 445 GroupVersionKind: gvk.WorkloadEntry, 446 Name: "test-we", 447 Namespace: proxy.Metadata.Namespace, 448 Labels: wantLabels, 449 Annotations: map[string]string{ 450 annotation.IoIstioAutoRegistrationGroup.Name: group.Name, 451 "foo": "bar", 452 }, 453 OwnerReferences: []metav1.OwnerReference{{ 454 APIVersion: group.GroupVersionKind.GroupVersion(), 455 Kind: group.GroupVersionKind.Kind, 456 Name: group.Name, 457 UID: kubetypes.UID(group.UID), 458 Controller: &workloadGroupIsController, 459 }}, 460 }, 461 Spec: &v1alpha3.WorkloadEntry{ 462 Address: "10.0.0.1", 463 Ports: map[string]uint32{ 464 "http": 80, 465 }, 466 Labels: wantLabels, 467 Network: "nw1", 468 Locality: "rgn2/zone2/subzone2", 469 Weight: 1, 470 ServiceAccount: "sa-a", 471 }, 472 } 473 474 got := workloadEntryFromGroup("test-we", proxy, &group) 475 assert.Equal(t, got, &want) 476 } 477 478 func TestNonAutoregisteredWorkloads_UnsuitableForHealthChecks_WorkloadEntryNotFound(t *testing.T) { 479 store := memory.NewController(memory.Make(collections.All)) 480 createOrFail(t, store, weB) 481 482 stop := test.NewStop(t) 483 484 c := NewController(store, "pilot-x", keepalive.Infinity) 485 go c.Run(stop) 486 487 proxy := fakeProxySuitableForHealthChecks(weB) 488 // change proxy metadata to make it unsuitable for health checks 489 proxy.Metadata.WorkloadEntry = "non-exisiting-workload-entry" 490 491 err := c.OnConnect(makeConn(proxy, time.Now())) 492 assert.Error(t, err) 493 } 494 495 func TestNonAutoregisteredWorkloads_UnsuitableForHealthChecks_ShouldNotBeTreatedAsConnected(t *testing.T) { 496 cases := []struct { 497 name string 498 we func() config.Config 499 proxy func(we config.Config) *model.Proxy 500 }{ 501 { 502 name: "when proxy.Metadata.WorkloadEntry is not set", 503 we: weB.DeepCopy, 504 proxy: func(we config.Config) *model.Proxy { 505 proxy := fakeProxySuitableForHealthChecks(we) 506 // change proxy metadata to make it unsuitable for health checks 507 proxy.Metadata.WorkloadEntry = "" 508 return proxy 509 }, 510 }, 511 { 512 name: "when 'proxy.istio.io/health-checks-enabled' annotation is missing", 513 we: func() config.Config { 514 we := weB.DeepCopy() 515 delete(we.Annotations, "proxy.istio.io/health-checks-enabled") 516 return we 517 }, 518 proxy: fakeProxySuitableForHealthChecks, 519 }, 520 } 521 for _, tc := range cases { 522 t.Run(tc.name, func(t *testing.T) { 523 we := tc.we() 524 525 store := memory.NewController(memory.Make(collections.All)) 526 createOrFail(t, store, we) 527 528 stop := test.NewStop(t) 529 530 c := NewController(store, "pilot-x", keepalive.Infinity) 531 go c.Run(stop) 532 533 proxy := tc.proxy(we) 534 535 err := c.OnConnect(makeConn(proxy, time.Now())) 536 assert.NoError(t, err) 537 538 wle := store.Get(gvk.WorkloadEntry, we.Name, we.Namespace) 539 if wle == nil { 540 t.Fatalf("WorkloadEntry %s/%s must exist", we.Namespace, we.Name) 541 } 542 if diff := cmp.Diff(we.Annotations, wle.Annotations); diff != "" { 543 t.Fatalf("WorkloadEntry should not have been changed: %v", diff) 544 } 545 }) 546 } 547 } 548 549 func TestNonAutoregisteredWorkloads_SuitableForHealthChecks_ShouldBeTreatedAsConnected(t *testing.T) { 550 for _, value := range []string{"", "false", "true"} { 551 name := fmt.Sprintf("when 'proxy.istio.io/health-checks-enabled' annotation has value %q", value) 552 t.Run(name, func(t *testing.T) { 553 we := weB.DeepCopy() 554 we.Annotations["proxy.istio.io/health-checks-enabled"] = value 555 556 store := memory.NewController(memory.Make(collections.All)) 557 createOrFail(t, store, we) 558 559 stop := test.NewStop(t) 560 561 c := NewController(store, "pilot-x", keepalive.Infinity) 562 go c.Run(stop) 563 564 proxy := fakeProxySuitableForHealthChecks(we) 565 566 now := time.Now() 567 568 err := c.OnConnect(makeConn(proxy, now)) 569 assert.NoError(t, err) 570 571 wle := store.Get(gvk.WorkloadEntry, we.Name, we.Namespace) 572 if wle == nil { 573 t.Fatalf("WorkloadEntry %s/%s must exist", we.Namespace, we.Name) 574 } 575 if diff := cmp.Diff("pilot-x", wle.Annotations[annotation.IoIstioWorkloadController.Name]); diff != "" { 576 t.Fatalf("WorkloadEntry should have been annotated with %q: %v", annotation.IoIstioWorkloadController.Name, diff) 577 } 578 if diff := cmp.Diff(now.Format(time.RFC3339Nano), wle.Annotations[annotation.IoIstioConnectedAt.Name]); diff != "" { 579 t.Fatalf("WorkloadEntry should have been annotated with %q: %v", annotation.IoIstioConnectedAt.Name, diff) 580 } 581 }) 582 } 583 } 584 585 func TestNonAutoregisteredWorkloads_SuitableForHealthChecks_ShouldSupportLifecycle(t *testing.T) { 586 c1, c2, store := setup(t) 587 createOrFail(t, store, weB) 588 589 stop1, stop2 := test.NewStop(t), test.NewStop(t) 590 591 go c1.Run(stop1) 592 go c2.Run(stop2) 593 594 p := fakeProxySuitableForHealthChecks(weB) 595 596 // allows associating a Register call with Unregister 597 var origConnTime time.Time 598 599 t.Run("initial connect", func(t *testing.T) { 600 // connect 601 origConnTime = time.Now() 602 c1.OnConnect(makeConn(p, origConnTime)) 603 // ensure the entry is connected 604 checkNonAutoRegisteredEntryOrFail(t, store, weB, c1.instanceID) 605 }) 606 t.Run("reconnect", func(t *testing.T) { 607 t.Run("same instance: disconnect then connect", func(t *testing.T) { 608 // disconnect 609 c1.OnDisconnect(makeConn(p, origConnTime)) 610 // wait until WE get updated asynchronously 611 retry.UntilSuccessOrFail(t, func() error { 612 return checkEntryDisconnected(store, weB) 613 }) 614 // ensure the entry is disconnected 615 checkNonAutoRegisteredEntryOrFail(t, store, weB, "") 616 // reconnect 617 origConnTime = time.Now() 618 c1.OnConnect(makeConn(p, origConnTime)) 619 // ensure the entry is connected 620 checkNonAutoRegisteredEntryOrFail(t, store, weB, c1.instanceID) 621 }) 622 t.Run("same instance: connect before disconnect ", func(t *testing.T) { 623 nextConnTime := origConnTime.Add(10 * time.Millisecond) 624 defer func() { 625 time.Sleep(time.Until(nextConnTime)) 626 origConnTime = nextConnTime 627 }() 628 // reconnect 629 c1.OnConnect(makeConn(p, nextConnTime)) 630 // ensure the entry is connected 631 checkNonAutoRegisteredEntryOrFail(t, store, weB, c1.instanceID) 632 // disconnect (associated with original connect, not the reconnect) 633 c1.OnDisconnect(makeConn(p, origConnTime)) 634 // ensure the entry is connected 635 checkNonAutoRegisteredEntryOrFail(t, store, weB, c1.instanceID) 636 }) 637 t.Run("different instance: disconnect then connect", func(t *testing.T) { 638 // disconnect 639 c1.OnDisconnect(makeConn(p, origConnTime)) 640 // wait until WE get updated asynchronously 641 retry.UntilSuccessOrFail(t, func() error { 642 return checkEntryDisconnected(store, weB) 643 }) 644 // ensure the entry is disconnected 645 checkNonAutoRegisteredEntryOrFail(t, store, weB, "") 646 // reconnect 647 origConnTime = time.Now() 648 c2.OnConnect(makeConn(p, origConnTime)) 649 // ensure the entry is connected to the new instance 650 checkNonAutoRegisteredEntryOrFail(t, store, weB, c2.instanceID) 651 }) 652 t.Run("different instance: connect before disconnect ", func(t *testing.T) { 653 nextConnTime := origConnTime.Add(10 * time.Millisecond) 654 defer func() { 655 time.Sleep(time.Until(nextConnTime)) 656 origConnTime = nextConnTime 657 }() 658 // reconnect to the new instance 659 c2.OnConnect(makeConn(p, nextConnTime)) 660 // ensure the entry is connected to the new instance 661 checkNonAutoRegisteredEntryOrFail(t, store, weB, c2.instanceID) 662 // disconnect (associated with original connect, not the reconnect) 663 c2.OnDisconnect(makeConn(p, origConnTime)) 664 // ensure the entry is connected to the new instance 665 checkNonAutoRegisteredEntryOrFail(t, store, weB, c2.instanceID) 666 }) 667 }) 668 t.Run("disconnect for longer than grace period", func(t *testing.T) { 669 // report proxy is healthy 670 c2.QueueWorkloadEntryHealth(p, HealthEvent{ 671 Healthy: true, 672 }) 673 // ensure health condition has been updated 674 checkHealthOrFail(t, store, p, true) 675 // disconnect 676 c2.OnDisconnect(makeConn(p, origConnTime)) 677 // wait until WE get updated asynchronously 678 retry.UntilSuccessOrFail(t, func() error { 679 return checkEntryDisconnected(store, weB) 680 }) 681 // ensure the entry is disconnected 682 checkNonAutoRegisteredEntryOrFail(t, store, weB, "") 683 // ensure health condition is removed after the grace period is over 684 retry.UntilSuccessOrFail(t, func() error { 685 return checkNoEntryHealth(store, p) 686 }, retry.Timeout(time.Until(time.Now().Add(21*features.WorkloadEntryCleanupGracePeriod)))) 687 }) 688 } 689 690 func TestNonAutoregisteredWorkloads_SuitableForHealthChecks_ShouldUpdateHealthCondition(t *testing.T) { 691 c1, c2, store := setup(t) 692 createOrFail(t, store, weB) 693 694 stop := test.NewStop(t) 695 696 go c1.Run(stop) 697 go c2.Run(stop) 698 699 p := fakeProxySuitableForHealthChecks(weB) 700 701 c1.OnConnect(makeConn(p, time.Now())) 702 703 t.Run("healthy", func(t *testing.T) { 704 // report workload is healthy 705 c1.QueueWorkloadEntryHealth(p, HealthEvent{ 706 Healthy: true, 707 }) 708 // ensure health condition has been updated 709 checkHealthOrFail(t, store, p, true) 710 }) 711 t.Run("unhealthy", func(t *testing.T) { 712 // report workload is unhealthy 713 c1.QueueWorkloadEntryHealth(p, HealthEvent{ 714 Healthy: false, 715 Message: "lol health bad", 716 }) 717 // ensure health condition has been updated 718 checkHealthOrFail(t, store, p, false) 719 }) 720 } 721 722 func setup(t *testing.T) (*Controller, *Controller, model.ConfigStoreController) { 723 store := memory.NewController(memory.Make(collections.All)) 724 c1 := NewController(store, "pilot-1", time.Duration(math.MaxInt64)) 725 c2 := NewController(store, "pilot-2", time.Duration(math.MaxInt64)) 726 createOrFail(t, store, wgA) 727 createOrFail(t, store, wgAWrongNs) 728 createOrFail(t, store, wgWithoutSA) 729 return c1, c2, store 730 } 731 732 func checkNoEntry(store model.ConfigStoreController, wg config.Config, proxy *model.Proxy) error { 733 name := wg.Name + "-" + proxy.IPAddresses[0] 734 if proxy.Metadata.Network != "" { 735 name += "-" + string(proxy.Metadata.Network) 736 } 737 738 cfg := store.Get(gvk.WorkloadEntry, name, wg.Namespace) 739 if cfg != nil { 740 return fmt.Errorf("did not expect WorkloadEntry %s/%s to exist", wg.Namespace, name) 741 } 742 return nil 743 } 744 745 func checkEntry( 746 store model.ConfigStore, 747 wg config.Config, 748 proxy *model.Proxy, 749 node *core.Node, 750 connectedTo string, 751 ) (err error) { 752 name := wg.Name + "-" + proxy.IPAddresses[0] 753 if proxy.Metadata.Network != "" { 754 name += "-" + string(proxy.Metadata.Network) 755 } 756 757 cfg := store.Get(gvk.WorkloadEntry, name, wg.Namespace) 758 if cfg == nil { 759 err = multierror.Append(fmt.Errorf("expected WorkloadEntry %s/%s to exist", wg.Namespace, name)) 760 return 761 } 762 tmpl := wg.Spec.(*v1alpha3.WorkloadGroup) 763 we := cfg.Spec.(*v1alpha3.WorkloadEntry) 764 765 // check workload entry specific fields 766 if !reflect.DeepEqual(we.Ports, tmpl.Template.Ports) { 767 err = multierror.Append(err, fmt.Errorf("expected ports from WorkloadGroup")) 768 } 769 if we.Address != proxy.IPAddresses[0] { 770 err = multierror.Append(fmt.Errorf("entry has address %s; expected %s", we.Address, proxy.IPAddresses[0])) 771 } 772 773 if proxy.Metadata.Network != "" { 774 if we.Network != string(proxy.Metadata.Network) { 775 err = multierror.Append(fmt.Errorf("entry has network %s; expected to match meta network %s", we.Network, proxy.Metadata.Network)) 776 } 777 } else { 778 if we.Network != tmpl.Template.Network { 779 err = multierror.Append(fmt.Errorf("entry has network %s; expected to match group template network %s", we.Network, tmpl.Template.Network)) 780 } 781 } 782 783 loc := tmpl.Template.Locality 784 if node.Locality != nil { 785 loc = util.LocalityToString(node.Locality) 786 } 787 if we.Locality != loc { 788 err = multierror.Append(fmt.Errorf("entry has locality %s; expected %s", we.Locality, loc)) 789 } 790 791 // check controller annotations 792 if connectedTo != "" { 793 if v := cfg.Annotations[annotation.IoIstioWorkloadController.Name]; v != connectedTo { 794 err = multierror.Append(err, fmt.Errorf("expected WorkloadEntry to be updated by %s; got %s", connectedTo, v)) 795 } 796 if _, ok := cfg.Annotations[annotation.IoIstioConnectedAt.Name]; !ok { 797 err = multierror.Append(err, fmt.Errorf("expected connection timestamp to be set")) 798 } 799 } else if _, ok := cfg.Annotations[annotation.IoIstioDisconnectedAt.Name]; !ok { 800 err = multierror.Append(err, fmt.Errorf("expected disconnection timestamp to be set")) 801 } 802 803 // check all labels are copied to the WorkloadEntry 804 if !reflect.DeepEqual(cfg.Labels, we.Labels) { 805 err = multierror.Append(err, fmt.Errorf("spec labels on WorkloadEntry should match meta labels")) 806 } 807 for k, v := range tmpl.Template.Labels { 808 if _, ok := proxy.Labels[k]; ok { 809 // would be overwritten 810 continue 811 } 812 if we.Labels[k] != v { 813 err = multierror.Append(err, fmt.Errorf("labels missing on WorkloadEntry: %s=%s from template", k, v)) 814 } 815 } 816 for k, v := range proxy.Labels { 817 if we.Labels[k] != v { 818 err = multierror.Append(err, fmt.Errorf("labels missing on WorkloadEntry: %s=%s from proxy meta", k, v)) 819 } 820 } 821 return 822 } 823 824 func checkEntryOrFail( 825 t test.Failer, 826 store model.ConfigStoreController, 827 wg config.Config, 828 proxy *model.Proxy, 829 node *core.Node, 830 connectedTo string, 831 ) { 832 if err := checkEntry(store, wg, proxy, node, connectedTo); err != nil { 833 t.Fatal(err) 834 } 835 } 836 837 func checkEntryOrFailAfter( 838 t test.Failer, 839 store model.ConfigStoreController, 840 wg config.Config, 841 proxy *model.Proxy, 842 node *core.Node, 843 connectedTo string, 844 after time.Duration, 845 ) { 846 time.Sleep(after) 847 checkEntryOrFail(t, store, wg, proxy, node, connectedTo) 848 } 849 850 func checkNoEntryOrFail( 851 t test.Failer, 852 store model.ConfigStoreController, 853 wg config.Config, 854 proxy *model.Proxy, 855 ) { 856 name := wg.Name + "-" + proxy.IPAddresses[0] 857 if proxy.Metadata.Network != "" { 858 name += "-" + string(proxy.Metadata.Network) 859 } 860 861 cfg := store.Get(gvk.WorkloadEntry, name, wg.Namespace) 862 if cfg != nil { 863 t.Fatalf("workload entry found when it was not expected") 864 } 865 } 866 867 func checkNoEntryHealth(store model.ConfigStoreController, proxy *model.Proxy) error { 868 name, _ := proxy.WorkloadEntry() 869 cfg := store.Get(gvk.WorkloadEntry, name, proxy.Metadata.Namespace) 870 if cfg == nil { 871 return fmt.Errorf("expected WorkloadEntry %s/%s to exist", proxy.Metadata.Namespace, name) 872 } 873 if cfg.Status == nil { 874 return nil 875 } 876 s := cfg.Status.(*v1alpha1.IstioStatus) 877 if status.GetCondition(s.Conditions, "Healthy") != nil { 878 return fmt.Errorf("expected WorkloadEntry %s/%s not to have %q condition", 879 proxy.Metadata.Namespace, name, "Healthy") 880 } 881 return nil 882 } 883 884 func checkEntryHealth(store model.ConfigStoreController, proxy *model.Proxy, healthy bool) (err error) { 885 name, _ := proxy.WorkloadEntry() 886 cfg := store.Get(gvk.WorkloadEntry, name, proxy.Metadata.Namespace) 887 if cfg == nil || cfg.Status == nil { 888 err = multierror.Append(fmt.Errorf("expected workloadEntry %s/%s to exist", name, proxy.Metadata.Namespace)) 889 return 890 } 891 stat := cfg.Status.(*v1alpha1.IstioStatus) 892 found := false 893 idx := 0 894 for i, cond := range stat.Conditions { 895 if cond.Type == "Healthy" { 896 idx = i 897 found = true 898 } 899 } 900 if !found { 901 err = multierror.Append(err, fmt.Errorf("expected condition of type Health on WorkloadEntry %s/%s", 902 name, proxy.Metadata.Namespace)) 903 } else { 904 statStr := stat.Conditions[idx].Status 905 if healthy && statStr != "True" { 906 err = multierror.Append(err, fmt.Errorf("expected healthy condition on WorkloadEntry %s/%s", 907 name, proxy.Metadata.Namespace)) 908 } 909 if !healthy && statStr != "False" { 910 err = multierror.Append(err, fmt.Errorf("expected unhealthy condition on WorkloadEntry %s/%s", 911 name, proxy.Metadata.Namespace)) 912 } 913 } 914 return 915 } 916 917 func checkHealthOrFail(t test.Failer, store model.ConfigStoreController, proxy *model.Proxy, healthy bool) { 918 retry.UntilSuccessOrFail(t, func() error { 919 return checkEntryHealth(store, proxy, healthy) 920 }) 921 } 922 923 func checkEntryDisconnected(store model.ConfigStoreController, we config.Config) error { 924 cfg := store.Get(gvk.WorkloadEntry, we.Name, we.Namespace) 925 if cfg == nil { 926 return fmt.Errorf("expected WorkloadEntry %s/%s to exist", we.Namespace, we.Name) 927 } 928 if _, ok := cfg.Annotations[annotation.IoIstioDisconnectedAt.Name]; !ok { 929 return fmt.Errorf("expected disconnection timestamp to be set on WorkloadEntry %s/%s: %#v", we.Namespace, we.Name, cfg) 930 } 931 return nil 932 } 933 934 func checkNonAutoRegisteredEntryOrFail(t test.Failer, store model.ConfigStoreController, we config.Config, connectedTo string) { 935 t.Helper() 936 937 cfg := store.Get(gvk.WorkloadEntry, we.Name, we.Namespace) 938 if cfg == nil { 939 t.Fatalf("expected WorkloadEntry %s/%s to exist", we.Namespace, we.Name) 940 } 941 942 // check controller annotations 943 if connectedTo != "" { 944 if v := cfg.Annotations[annotation.IoIstioWorkloadController.Name]; v != connectedTo { 945 t.Fatalf("expected WorkloadEntry to be updated by %s; got %s", connectedTo, v) 946 } 947 if _, ok := cfg.Annotations[annotation.IoIstioConnectedAt.Name]; !ok { 948 t.Fatalf("expected connection timestamp to be set") 949 } 950 } else if _, ok := cfg.Annotations[annotation.IoIstioDisconnectedAt.Name]; !ok { 951 t.Fatalf("expected disconnection timestamp to be set") 952 } 953 } 954 955 func fakeProxy(ip string, wg config.Config, nw network.ID, sa string) *model.Proxy { 956 var id *spiffe.Identity 957 if wg.Namespace != "" && sa != "" { 958 id = &spiffe.Identity{Namespace: wg.Namespace, ServiceAccount: sa} 959 } 960 return &model.Proxy{ 961 IPAddresses: []string{ip}, 962 Labels: map[string]string{"merge": "me"}, 963 VerifiedIdentity: id, 964 Metadata: &model.NodeMetadata{ 965 AutoRegisterGroup: wg.Name, 966 Namespace: wg.Namespace, 967 Network: nw, 968 Labels: map[string]string{"merge": "me"}, 969 }, 970 } 971 } 972 973 func fakeProxySuitableForHealthChecks(wle config.Config) *model.Proxy { 974 wleSpec := wle.Spec.(*v1alpha3.WorkloadEntry) 975 return &model.Proxy{ 976 ID: wle.Name + "." + wle.Namespace, 977 IPAddresses: []string{wleSpec.Address}, 978 VerifiedIdentity: &spiffe.Identity{Namespace: wle.Namespace, ServiceAccount: "my-sa"}, 979 Metadata: &model.NodeMetadata{ 980 Namespace: wle.Namespace, 981 Network: network.ID(wleSpec.Network), 982 ProxyConfig: &model.NodeMetaProxyConfig{ 983 ReadinessProbe: &v1alpha3.ReadinessProbe{}, 984 }, 985 WorkloadEntry: wle.Name, // indicate a name of the WorkloadEntry this proxy corresponds to 986 }, 987 } 988 } 989 990 func fakeNode(r, z, sz string) *core.Node { 991 return &core.Node{ 992 Locality: &core.Locality{ 993 Region: r, 994 Zone: z, 995 SubZone: sz, 996 }, 997 } 998 } 999 1000 // createOrFail wraps config creation with convenience for failing tests 1001 func createOrFail(t test.Failer, store model.ConfigStoreController, cfg config.Config) { 1002 if _, err := store.Create(cfg); err != nil { 1003 t.Fatalf("failed creating %s/%s: %v", cfg.Namespace, cfg.Name, err) 1004 } 1005 }