istio.io/istio@v0.0.0-20240520182934-d79c90f27776/pilot/pkg/autoregistration/controller_test.go (about)

     1  // Copyright Istio Authors
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package autoregistration
    16  
    17  import (
    18  	"fmt"
    19  	"math"
    20  	"reflect"
    21  	"strings"
    22  	"sync"
    23  	"testing"
    24  	"time"
    25  
    26  	core "github.com/envoyproxy/go-control-plane/envoy/config/core/v3"
    27  	"github.com/google/go-cmp/cmp"
    28  	"github.com/hashicorp/go-multierror"
    29  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    30  	kubetypes "k8s.io/apimachinery/pkg/types"
    31  
    32  	"istio.io/api/annotation"
    33  	"istio.io/api/meta/v1alpha1"
    34  	"istio.io/api/networking/v1alpha3"
    35  	"istio.io/istio/pilot/pkg/config/memory"
    36  	"istio.io/istio/pilot/pkg/features"
    37  	"istio.io/istio/pilot/pkg/model"
    38  	"istio.io/istio/pilot/pkg/model/status"
    39  	"istio.io/istio/pilot/pkg/networking/util"
    40  	"istio.io/istio/pkg/config"
    41  	"istio.io/istio/pkg/config/schema/collections"
    42  	"istio.io/istio/pkg/config/schema/gvk"
    43  	"istio.io/istio/pkg/keepalive"
    44  	"istio.io/istio/pkg/network"
    45  	"istio.io/istio/pkg/spiffe"
    46  	"istio.io/istio/pkg/test"
    47  	"istio.io/istio/pkg/test/util/assert"
    48  	"istio.io/istio/pkg/test/util/retry"
    49  )
    50  
    51  func init() {
    52  	features.WorkloadEntryCleanupGracePeriod = 50 * time.Millisecond
    53  }
    54  
    55  var _ connection = &fakeConn{}
    56  
    57  type fakeConn struct {
    58  	sync.RWMutex
    59  	proxy    *model.Proxy
    60  	connTime time.Time
    61  	stopped  bool
    62  }
    63  
    64  func makeConn(proxy *model.Proxy, connTime time.Time) *fakeConn {
    65  	return &fakeConn{proxy: proxy, connTime: connTime}
    66  }
    67  
    68  func (f *fakeConn) ID() string {
    69  	return fmt.Sprintf("%s-%v", f.proxy.IPAddresses[0], f.connTime)
    70  }
    71  
    72  func (f *fakeConn) Proxy() *model.Proxy {
    73  	return f.proxy
    74  }
    75  
    76  func (f *fakeConn) ConnectedAt() time.Time {
    77  	return f.connTime
    78  }
    79  
    80  func (f *fakeConn) Stop() {
    81  	f.Lock()
    82  	defer f.Unlock()
    83  	f.stopped = true
    84  }
    85  
    86  func (f *fakeConn) Stopped() bool {
    87  	f.RLock()
    88  	defer f.RUnlock()
    89  	return f.stopped
    90  }
    91  
    92  var (
    93  	tmplA = &v1alpha3.WorkloadGroup{
    94  		Template: &v1alpha3.WorkloadEntry{
    95  			Ports:          map[string]uint32{"http": 80},
    96  			Labels:         map[string]string{"app": "a"},
    97  			Network:        "nw0",
    98  			Locality:       "reg0/zone0/subzone0",
    99  			Weight:         1,
   100  			ServiceAccount: "sa-a",
   101  		},
   102  	}
   103  	wgA = config.Config{
   104  		Meta: config.Meta{
   105  			GroupVersionKind: gvk.WorkloadGroup,
   106  			Namespace:        "a",
   107  			Name:             "wg-a",
   108  			Labels: map[string]string{
   109  				"grouplabel": "notonentry",
   110  			},
   111  		},
   112  		Spec:   tmplA,
   113  		Status: nil,
   114  	}
   115  	wgAWrongNs = config.Config{
   116  		Meta: config.Meta{
   117  			GroupVersionKind: gvk.WorkloadGroup,
   118  			Namespace:        "wrong",
   119  			Name:             "wg-a",
   120  			Labels: map[string]string{
   121  				"grouplabel": "notonentry",
   122  			},
   123  		},
   124  		Spec:   tmplA,
   125  		Status: nil,
   126  	}
   127  	wgWithoutSA = config.Config{
   128  		Meta: config.Meta{
   129  			GroupVersionKind: gvk.WorkloadGroup,
   130  			Namespace:        "a",
   131  			Name:             "wg-b",
   132  			Labels: map[string]string{
   133  				"grouplabel": "notonentry",
   134  			},
   135  		},
   136  		Spec: &v1alpha3.WorkloadGroup{
   137  			Template: &v1alpha3.WorkloadEntry{
   138  				Ports:          map[string]uint32{"http": 80},
   139  				Labels:         map[string]string{"app": "a"},
   140  				Network:        "nw0",
   141  				Locality:       "reg0/zone0/subzone0",
   142  				Weight:         1,
   143  				ServiceAccount: "",
   144  			},
   145  		},
   146  		Status: nil,
   147  	}
   148  	weB = config.Config{
   149  		Meta: config.Meta{
   150  			GroupVersionKind: gvk.WorkloadEntry,
   151  			Namespace:        "b",
   152  			Name:             "we-without-auto-registration",
   153  			Annotations: map[string]string{
   154  				"proxy.istio.io/health-checks-enabled": "true",
   155  			},
   156  		},
   157  		Spec: &v1alpha3.WorkloadEntry{
   158  			Address: "10.0.0.1",
   159  			Network: "nw0",
   160  		},
   161  		Status: nil,
   162  	}
   163  )
   164  
   165  func TestNonAutoregisteredWorkloads(t *testing.T) {
   166  	store := memory.NewController(memory.Make(collections.All))
   167  	c := NewController(store, "", time.Duration(math.MaxInt64))
   168  	createOrFail(t, store, wgA)
   169  	stop := test.NewStop(t)
   170  	go c.Run(stop)
   171  
   172  	cases := map[string]*model.Proxy{
   173  		"missing group":      {IPAddresses: []string{"1.2.3.4"}, Metadata: &model.NodeMetadata{Namespace: wgA.Namespace}},
   174  		"missing ip":         {Metadata: &model.NodeMetadata{Namespace: wgA.Namespace, AutoRegisterGroup: wgA.Name}},
   175  		"missing namespace":  {IPAddresses: []string{"1.2.3.4"}, Metadata: &model.NodeMetadata{AutoRegisterGroup: wgA.Name}},
   176  		"non-existent group": {IPAddresses: []string{"1.2.3.4"}, Metadata: &model.NodeMetadata{Namespace: wgA.Namespace, AutoRegisterGroup: "dne"}},
   177  	}
   178  
   179  	for name, tc := range cases {
   180  		tc := tc
   181  		t.Run(name, func(t *testing.T) {
   182  			c.OnConnect(makeConn(tc, time.Now()))
   183  			items := store.List(gvk.WorkloadEntry, model.NamespaceAll)
   184  			if len(items) != 0 {
   185  				t.Fatalf("expected 0 WorkloadEntry")
   186  			}
   187  		})
   188  	}
   189  }
   190  
   191  func TestAutoregistrationLifecycle(t *testing.T) {
   192  	maxConnAge := time.Hour
   193  	c1, c2, store := setup(t)
   194  	c2.maxConnectionAge = maxConnAge
   195  	stopped1 := false
   196  	stop1, stop2 := make(chan struct{}), make(chan struct{})
   197  	defer func() {
   198  		// stop1 should be killed early, as part of test
   199  		if !stopped1 {
   200  			close(stop1)
   201  		}
   202  	}()
   203  	defer close(stop2)
   204  	go c1.Run(stop1)
   205  	go c2.Run(stop2)
   206  	go store.Run(stop2)
   207  
   208  	n := fakeNode("reg1", "zone1", "subzone1")
   209  
   210  	var p1conn1, p1conn2 *fakeConn
   211  	p := fakeProxy("1.2.3.4", wgA, "nw1", "sa-a")
   212  	p.Locality = n.Locality
   213  
   214  	var p2conn1 *fakeConn
   215  	p2 := fakeProxy("1.2.3.4", wgA, "nw2", "sa-a")
   216  	p2.Locality = n.Locality
   217  
   218  	var p3conn1 *fakeConn
   219  	p3 := fakeProxy("1.2.3.5", wgA, "nw1", "sa-a")
   220  	p3.Locality = n.Locality
   221  
   222  	t.Run("initial registration", func(t *testing.T) {
   223  		// simply make sure the entry exists after connecting
   224  		p1conn1 = makeConn(p, time.Now())
   225  		c1.OnConnect(p1conn1)
   226  		checkEntryOrFail(t, store, wgA, p, n, c1.instanceID)
   227  	})
   228  	t.Run("multinetwork same ip", func(t *testing.T) {
   229  		// make sure we don't overrwrite a similar entry for a different network
   230  		p2conn1 = makeConn(p2, time.Now())
   231  		c2.OnConnect(p2conn1)
   232  		checkEntryOrFail(t, store, wgA, p, n, c1.instanceID)
   233  		checkEntryOrFail(t, store, wgA, p2, n, c2.instanceID)
   234  		c2.OnDisconnect(p2conn1) // cleanup for future tests
   235  	})
   236  	t.Run("fast reconnect", func(t *testing.T) {
   237  		t.Run("same instance", func(t *testing.T) {
   238  			// disconnect, make sure entry is still there with disconnect meta
   239  			c1.OnDisconnect(p1conn1)
   240  			checkEntryOrFailAfter(t, store, wgA, p, n, "", features.WorkloadEntryCleanupGracePeriod/2)
   241  			// reconnect, ensure entry is there with the same instance id
   242  			p1conn1 = makeConn(p, time.Now())
   243  			c1.OnConnect(p1conn1)
   244  			checkEntryOrFail(t, store, wgA, p, n, c1.instanceID)
   245  		})
   246  		t.Run("same instance: connect before disconnect ", func(t *testing.T) {
   247  			// reconnect, ensure entry is there with the same instance id
   248  			p1conn2 = makeConn(p, p1conn1.ConnectedAt().Add(10*time.Millisecond))
   249  			c1.OnConnect(p1conn2)
   250  			// disconnect (associated with original connect, not the reconnect)
   251  			// make sure entry is still there with disconnect meta
   252  			c1.OnDisconnect(p1conn1)
   253  			checkEntryOrFailAfter(t, store, wgA, p, n, c1.instanceID, features.WorkloadEntryCleanupGracePeriod/2)
   254  		})
   255  		t.Run("different instance", func(t *testing.T) {
   256  			// disconnect, make sure entry is still there with disconnect metadata
   257  			c1.OnDisconnect(p1conn2)
   258  			checkEntryOrFailAfter(t, store, wgA, p, n, "", features.WorkloadEntryCleanupGracePeriod/2)
   259  			// reconnect, ensure entry is there with the new instance id
   260  			p1conn1 = makeConn(p, time.Now())
   261  			c2.OnConnect(p1conn1)
   262  			checkEntryOrFail(t, store, wgA, p, n, c2.instanceID)
   263  		})
   264  	})
   265  	t.Run("slow reconnect", func(t *testing.T) {
   266  		// disconnect, wait and make sure entry is gone
   267  		c2.OnDisconnect(p1conn1)
   268  		retry.UntilSuccessOrFail(t, func() error {
   269  			return checkNoEntry(store, wgA, p)
   270  		})
   271  		// reconnect
   272  		p1conn1 = makeConn(p, time.Now())
   273  		c1.OnConnect(p1conn1)
   274  		checkEntryOrFail(t, store, wgA, p, n, c1.instanceID)
   275  	})
   276  	t.Run("garbage collected if pilot stops after disconnect", func(t *testing.T) {
   277  		// disconnect, kill the cleanup queue from the first controller
   278  		c1.OnDisconnect(p1conn1)
   279  		// stop processing the delayed close queue in c1, forces using periodic cleanup
   280  		close(stop1)
   281  		stopped1 = true
   282  		// unfortunately, this retry at worst could be twice as long as the sweep interval
   283  		retry.UntilSuccessOrFail(t, func() error {
   284  			return checkNoEntry(store, wgA, p)
   285  		}, retry.Timeout(time.Until(time.Now().Add(21*features.WorkloadEntryCleanupGracePeriod))))
   286  	})
   287  
   288  	t.Run("garbage collected if pilot and workload stops simultaneously before pilot can do anything", func(t *testing.T) {
   289  		// simulate p3 has been registered long before
   290  		p3conn1 = makeConn(p3, time.Now().Add(-2*maxConnAge))
   291  		c2.OnConnect(p3conn1)
   292  
   293  		// keep silent to simulate the scenario (don't OnDisconnect to simulate pilot being down)
   294  
   295  		// unfortunately, this retry at worst could be twice as long as the sweep interval
   296  		retry.UntilSuccessOrFail(t, func() error {
   297  			return checkNoEntry(store, wgA, p3)
   298  		}, retry.Timeout(time.Until(time.Now().Add(21*features.WorkloadEntryCleanupGracePeriod))))
   299  
   300  		c2.OnDisconnect(p3conn1) // cleanup the state for future tests
   301  	})
   302  	t.Run("workload group recreate", func(t *testing.T) {
   303  		p1conn1 = makeConn(p, time.Now())
   304  		c2.OnConnect(p1conn1)
   305  		checkEntryOrFail(t, store, wgA, p, n, c2.instanceID)
   306  
   307  		name := strings.Join([]string{wgA.Name, p.IPAddresses[0], string(p.Metadata.Network)}, "-")
   308  		if err := store.Delete(gvk.WorkloadGroup, wgA.Name, wgA.Namespace, nil); err != nil {
   309  			t.Fatal(err)
   310  		}
   311  		if err := store.Delete(gvk.WorkloadEntry, name, wgA.Namespace, nil); err != nil {
   312  			t.Fatal(err)
   313  		}
   314  		createOrFail(t, store, wgA)
   315  
   316  		retry.UntilSuccessOrFail(t, func() error {
   317  			return checkEntry(store, wgA, p, n, c2.instanceID)
   318  		})
   319  	})
   320  	c2.OnDisconnect(p1conn1) // cleanup the state for future tests
   321  	t.Run("unverified client", func(t *testing.T) {
   322  		p := fakeProxy("1.2.3.6", wgA, "nw1", "")
   323  
   324  		// Should fail
   325  		assert.Error(t, c1.OnConnect(makeConn(p, time.Now())))
   326  		checkNoEntryOrFail(t, store, wgA, p)
   327  	})
   328  	t.Run("wrong SA client", func(t *testing.T) {
   329  		p := fakeProxy("1.2.3.6", wgA, "nw1", "wrong")
   330  
   331  		// Should fail
   332  		assert.Error(t, c1.OnConnect(makeConn(p, time.Now())))
   333  		checkNoEntryOrFail(t, store, wgA, p)
   334  	})
   335  	t.Run("wrong NS client", func(t *testing.T) {
   336  		p := fakeProxy("1.2.3.6", wgA, "nw1", "sa-a")
   337  		p.Metadata.Namespace = "wrong"
   338  
   339  		// Should fail
   340  		assert.Error(t, c1.OnConnect(makeConn(p, time.Now())))
   341  		checkNoEntryOrFail(t, store, wgA, p)
   342  	})
   343  	t.Run("no SA WG", func(t *testing.T) {
   344  		p := fakeProxy("1.2.3.6", wgWithoutSA, "nw1", "sa-a")
   345  		n := fakeNode("reg0", "zone0", "subzone0")
   346  		p.Locality = n.Locality
   347  
   348  		// Should not fail
   349  		assert.NoError(t, c1.OnConnect(makeConn(p, time.Now())))
   350  		checkEntryOrFail(t, store, wgWithoutSA, p, n, c1.instanceID)
   351  	})
   352  	// TODO test garbage collection if pilot stops before disconnect meta is set (relies on heartbeat)
   353  }
   354  
   355  func TestAutoregistrationDisabled(t *testing.T) {
   356  	test.SetForTest(t, &features.WorkloadEntryAutoRegistration, false)
   357  	store := memory.NewController(memory.Make(collections.All))
   358  	createOrFail(t, store, weB)
   359  
   360  	stop := test.NewStop(t)
   361  
   362  	c := NewController(store, "pilot-x", keepalive.Infinity)
   363  	go c.Run(stop)
   364  
   365  	t.Run("health check still works", func(t *testing.T) {
   366  		proxy := fakeProxySuitableForHealthChecks(weB)
   367  
   368  		err := c.OnConnect(makeConn(proxy, time.Now()))
   369  		assert.NoError(t, err)
   370  		// report workload is healthy
   371  		c.QueueWorkloadEntryHealth(proxy, HealthEvent{
   372  			Healthy: true,
   373  		})
   374  		// ensure health condition has been updated
   375  		checkHealthOrFail(t, store, proxy, true)
   376  	})
   377  	t.Run("registration does nothing", func(t *testing.T) {
   378  		p := fakeProxy("1.2.3.4", wgA, "litNw", "sa-a")
   379  		assert.NoError(t, c.OnConnect(makeConn(p, time.Now())))
   380  		checkNoEntryOrFail(t, store, wgA, p)
   381  	})
   382  }
   383  
   384  func TestUpdateHealthCondition(t *testing.T) {
   385  	stop := test.NewStop(t)
   386  	ig, ig2, store := setup(t)
   387  	go ig.Run(stop)
   388  	go ig2.Run(stop)
   389  	p := fakeProxy("1.2.3.4", wgA, "litNw", "sa-a")
   390  	p.XdsNode = fakeNode("reg1", "zone1", "subzone1")
   391  	ig.OnConnect(makeConn(p, time.Now()))
   392  	t.Run("auto registered healthy health", func(t *testing.T) {
   393  		ig.QueueWorkloadEntryHealth(p, HealthEvent{
   394  			Healthy: true,
   395  		})
   396  		checkHealthOrFail(t, store, p, true)
   397  	})
   398  	t.Run("auto registered unhealthy health", func(t *testing.T) {
   399  		ig.QueueWorkloadEntryHealth(p, HealthEvent{
   400  			Healthy: false,
   401  			Message: "lol health bad",
   402  		})
   403  		checkHealthOrFail(t, store, p, false)
   404  	})
   405  }
   406  
   407  func TestWorkloadEntryFromGroup(t *testing.T) {
   408  	group := config.Config{
   409  		Meta: config.Meta{
   410  			GroupVersionKind: gvk.WorkloadGroup,
   411  			Namespace:        "a",
   412  			Name:             "wg-a",
   413  			Labels: map[string]string{
   414  				"grouplabel": "notonentry",
   415  			},
   416  		},
   417  		Spec: &v1alpha3.WorkloadGroup{
   418  			Metadata: &v1alpha3.WorkloadGroup_ObjectMeta{
   419  				Labels:      map[string]string{"foo": "bar"},
   420  				Annotations: map[string]string{"foo": "bar"},
   421  			},
   422  			Template: &v1alpha3.WorkloadEntry{
   423  				Ports:          map[string]uint32{"http": 80},
   424  				Labels:         map[string]string{"app": "a"},
   425  				Weight:         1,
   426  				Network:        "nw0",
   427  				Locality:       "rgn1/zone1/subzone1",
   428  				ServiceAccount: "sa-a",
   429  			},
   430  		},
   431  	}
   432  	proxy := fakeProxy("10.0.0.1", group, "nw1", "sa")
   433  	proxy.Labels[model.LocalityLabel] = "rgn2/zone2/subzone2"
   434  	proxy.XdsNode = fakeNode("rgn2", "zone2", "subzone2")
   435  	proxy.Locality = proxy.XdsNode.Locality
   436  
   437  	wantLabels := map[string]string{
   438  		"app":   "a",   // from WorkloadEntry template
   439  		"foo":   "bar", // from WorkloadGroup.Metadata
   440  		"merge": "me",  // from Node metadata
   441  	}
   442  
   443  	want := config.Config{
   444  		Meta: config.Meta{
   445  			GroupVersionKind: gvk.WorkloadEntry,
   446  			Name:             "test-we",
   447  			Namespace:        proxy.Metadata.Namespace,
   448  			Labels:           wantLabels,
   449  			Annotations: map[string]string{
   450  				annotation.IoIstioAutoRegistrationGroup.Name: group.Name,
   451  				"foo": "bar",
   452  			},
   453  			OwnerReferences: []metav1.OwnerReference{{
   454  				APIVersion: group.GroupVersionKind.GroupVersion(),
   455  				Kind:       group.GroupVersionKind.Kind,
   456  				Name:       group.Name,
   457  				UID:        kubetypes.UID(group.UID),
   458  				Controller: &workloadGroupIsController,
   459  			}},
   460  		},
   461  		Spec: &v1alpha3.WorkloadEntry{
   462  			Address: "10.0.0.1",
   463  			Ports: map[string]uint32{
   464  				"http": 80,
   465  			},
   466  			Labels:         wantLabels,
   467  			Network:        "nw1",
   468  			Locality:       "rgn2/zone2/subzone2",
   469  			Weight:         1,
   470  			ServiceAccount: "sa-a",
   471  		},
   472  	}
   473  
   474  	got := workloadEntryFromGroup("test-we", proxy, &group)
   475  	assert.Equal(t, got, &want)
   476  }
   477  
   478  func TestNonAutoregisteredWorkloads_UnsuitableForHealthChecks_WorkloadEntryNotFound(t *testing.T) {
   479  	store := memory.NewController(memory.Make(collections.All))
   480  	createOrFail(t, store, weB)
   481  
   482  	stop := test.NewStop(t)
   483  
   484  	c := NewController(store, "pilot-x", keepalive.Infinity)
   485  	go c.Run(stop)
   486  
   487  	proxy := fakeProxySuitableForHealthChecks(weB)
   488  	// change proxy metadata to make it unsuitable for health checks
   489  	proxy.Metadata.WorkloadEntry = "non-exisiting-workload-entry"
   490  
   491  	err := c.OnConnect(makeConn(proxy, time.Now()))
   492  	assert.Error(t, err)
   493  }
   494  
   495  func TestNonAutoregisteredWorkloads_UnsuitableForHealthChecks_ShouldNotBeTreatedAsConnected(t *testing.T) {
   496  	cases := []struct {
   497  		name  string
   498  		we    func() config.Config
   499  		proxy func(we config.Config) *model.Proxy
   500  	}{
   501  		{
   502  			name: "when proxy.Metadata.WorkloadEntry is not set",
   503  			we:   weB.DeepCopy,
   504  			proxy: func(we config.Config) *model.Proxy {
   505  				proxy := fakeProxySuitableForHealthChecks(we)
   506  				// change proxy metadata to make it unsuitable for health checks
   507  				proxy.Metadata.WorkloadEntry = ""
   508  				return proxy
   509  			},
   510  		},
   511  		{
   512  			name: "when 'proxy.istio.io/health-checks-enabled' annotation is missing",
   513  			we: func() config.Config {
   514  				we := weB.DeepCopy()
   515  				delete(we.Annotations, "proxy.istio.io/health-checks-enabled")
   516  				return we
   517  			},
   518  			proxy: fakeProxySuitableForHealthChecks,
   519  		},
   520  	}
   521  	for _, tc := range cases {
   522  		t.Run(tc.name, func(t *testing.T) {
   523  			we := tc.we()
   524  
   525  			store := memory.NewController(memory.Make(collections.All))
   526  			createOrFail(t, store, we)
   527  
   528  			stop := test.NewStop(t)
   529  
   530  			c := NewController(store, "pilot-x", keepalive.Infinity)
   531  			go c.Run(stop)
   532  
   533  			proxy := tc.proxy(we)
   534  
   535  			err := c.OnConnect(makeConn(proxy, time.Now()))
   536  			assert.NoError(t, err)
   537  
   538  			wle := store.Get(gvk.WorkloadEntry, we.Name, we.Namespace)
   539  			if wle == nil {
   540  				t.Fatalf("WorkloadEntry %s/%s must exist", we.Namespace, we.Name)
   541  			}
   542  			if diff := cmp.Diff(we.Annotations, wle.Annotations); diff != "" {
   543  				t.Fatalf("WorkloadEntry should not have been changed: %v", diff)
   544  			}
   545  		})
   546  	}
   547  }
   548  
   549  func TestNonAutoregisteredWorkloads_SuitableForHealthChecks_ShouldBeTreatedAsConnected(t *testing.T) {
   550  	for _, value := range []string{"", "false", "true"} {
   551  		name := fmt.Sprintf("when 'proxy.istio.io/health-checks-enabled' annotation has value %q", value)
   552  		t.Run(name, func(t *testing.T) {
   553  			we := weB.DeepCopy()
   554  			we.Annotations["proxy.istio.io/health-checks-enabled"] = value
   555  
   556  			store := memory.NewController(memory.Make(collections.All))
   557  			createOrFail(t, store, we)
   558  
   559  			stop := test.NewStop(t)
   560  
   561  			c := NewController(store, "pilot-x", keepalive.Infinity)
   562  			go c.Run(stop)
   563  
   564  			proxy := fakeProxySuitableForHealthChecks(we)
   565  
   566  			now := time.Now()
   567  
   568  			err := c.OnConnect(makeConn(proxy, now))
   569  			assert.NoError(t, err)
   570  
   571  			wle := store.Get(gvk.WorkloadEntry, we.Name, we.Namespace)
   572  			if wle == nil {
   573  				t.Fatalf("WorkloadEntry %s/%s must exist", we.Namespace, we.Name)
   574  			}
   575  			if diff := cmp.Diff("pilot-x", wle.Annotations[annotation.IoIstioWorkloadController.Name]); diff != "" {
   576  				t.Fatalf("WorkloadEntry should have been annotated with %q: %v", annotation.IoIstioWorkloadController.Name, diff)
   577  			}
   578  			if diff := cmp.Diff(now.Format(time.RFC3339Nano), wle.Annotations[annotation.IoIstioConnectedAt.Name]); diff != "" {
   579  				t.Fatalf("WorkloadEntry should have been annotated with %q: %v", annotation.IoIstioConnectedAt.Name, diff)
   580  			}
   581  		})
   582  	}
   583  }
   584  
   585  func TestNonAutoregisteredWorkloads_SuitableForHealthChecks_ShouldSupportLifecycle(t *testing.T) {
   586  	c1, c2, store := setup(t)
   587  	createOrFail(t, store, weB)
   588  
   589  	stop1, stop2 := test.NewStop(t), test.NewStop(t)
   590  
   591  	go c1.Run(stop1)
   592  	go c2.Run(stop2)
   593  
   594  	p := fakeProxySuitableForHealthChecks(weB)
   595  
   596  	// allows associating a Register call with Unregister
   597  	var origConnTime time.Time
   598  
   599  	t.Run("initial connect", func(t *testing.T) {
   600  		// connect
   601  		origConnTime = time.Now()
   602  		c1.OnConnect(makeConn(p, origConnTime))
   603  		// ensure the entry is connected
   604  		checkNonAutoRegisteredEntryOrFail(t, store, weB, c1.instanceID)
   605  	})
   606  	t.Run("reconnect", func(t *testing.T) {
   607  		t.Run("same instance: disconnect then connect", func(t *testing.T) {
   608  			// disconnect
   609  			c1.OnDisconnect(makeConn(p, origConnTime))
   610  			// wait until WE get updated asynchronously
   611  			retry.UntilSuccessOrFail(t, func() error {
   612  				return checkEntryDisconnected(store, weB)
   613  			})
   614  			// ensure the entry is disconnected
   615  			checkNonAutoRegisteredEntryOrFail(t, store, weB, "")
   616  			// reconnect
   617  			origConnTime = time.Now()
   618  			c1.OnConnect(makeConn(p, origConnTime))
   619  			// ensure the entry is connected
   620  			checkNonAutoRegisteredEntryOrFail(t, store, weB, c1.instanceID)
   621  		})
   622  		t.Run("same instance: connect before disconnect ", func(t *testing.T) {
   623  			nextConnTime := origConnTime.Add(10 * time.Millisecond)
   624  			defer func() {
   625  				time.Sleep(time.Until(nextConnTime))
   626  				origConnTime = nextConnTime
   627  			}()
   628  			// reconnect
   629  			c1.OnConnect(makeConn(p, nextConnTime))
   630  			// ensure the entry is connected
   631  			checkNonAutoRegisteredEntryOrFail(t, store, weB, c1.instanceID)
   632  			// disconnect (associated with original connect, not the reconnect)
   633  			c1.OnDisconnect(makeConn(p, origConnTime))
   634  			// ensure the entry is connected
   635  			checkNonAutoRegisteredEntryOrFail(t, store, weB, c1.instanceID)
   636  		})
   637  		t.Run("different instance: disconnect then connect", func(t *testing.T) {
   638  			// disconnect
   639  			c1.OnDisconnect(makeConn(p, origConnTime))
   640  			// wait until WE get updated asynchronously
   641  			retry.UntilSuccessOrFail(t, func() error {
   642  				return checkEntryDisconnected(store, weB)
   643  			})
   644  			// ensure the entry is disconnected
   645  			checkNonAutoRegisteredEntryOrFail(t, store, weB, "")
   646  			// reconnect
   647  			origConnTime = time.Now()
   648  			c2.OnConnect(makeConn(p, origConnTime))
   649  			// ensure the entry is connected to the new instance
   650  			checkNonAutoRegisteredEntryOrFail(t, store, weB, c2.instanceID)
   651  		})
   652  		t.Run("different instance: connect before disconnect ", func(t *testing.T) {
   653  			nextConnTime := origConnTime.Add(10 * time.Millisecond)
   654  			defer func() {
   655  				time.Sleep(time.Until(nextConnTime))
   656  				origConnTime = nextConnTime
   657  			}()
   658  			// reconnect to the new instance
   659  			c2.OnConnect(makeConn(p, nextConnTime))
   660  			// ensure the entry is connected to the new instance
   661  			checkNonAutoRegisteredEntryOrFail(t, store, weB, c2.instanceID)
   662  			// disconnect (associated with original connect, not the reconnect)
   663  			c2.OnDisconnect(makeConn(p, origConnTime))
   664  			// ensure the entry is connected to the new instance
   665  			checkNonAutoRegisteredEntryOrFail(t, store, weB, c2.instanceID)
   666  		})
   667  	})
   668  	t.Run("disconnect for longer than grace period", func(t *testing.T) {
   669  		// report proxy is healthy
   670  		c2.QueueWorkloadEntryHealth(p, HealthEvent{
   671  			Healthy: true,
   672  		})
   673  		// ensure health condition has been updated
   674  		checkHealthOrFail(t, store, p, true)
   675  		// disconnect
   676  		c2.OnDisconnect(makeConn(p, origConnTime))
   677  		// wait until WE get updated asynchronously
   678  		retry.UntilSuccessOrFail(t, func() error {
   679  			return checkEntryDisconnected(store, weB)
   680  		})
   681  		// ensure the entry is disconnected
   682  		checkNonAutoRegisteredEntryOrFail(t, store, weB, "")
   683  		// ensure health condition is removed after the grace period is over
   684  		retry.UntilSuccessOrFail(t, func() error {
   685  			return checkNoEntryHealth(store, p)
   686  		}, retry.Timeout(time.Until(time.Now().Add(21*features.WorkloadEntryCleanupGracePeriod))))
   687  	})
   688  }
   689  
   690  func TestNonAutoregisteredWorkloads_SuitableForHealthChecks_ShouldUpdateHealthCondition(t *testing.T) {
   691  	c1, c2, store := setup(t)
   692  	createOrFail(t, store, weB)
   693  
   694  	stop := test.NewStop(t)
   695  
   696  	go c1.Run(stop)
   697  	go c2.Run(stop)
   698  
   699  	p := fakeProxySuitableForHealthChecks(weB)
   700  
   701  	c1.OnConnect(makeConn(p, time.Now()))
   702  
   703  	t.Run("healthy", func(t *testing.T) {
   704  		// report workload is healthy
   705  		c1.QueueWorkloadEntryHealth(p, HealthEvent{
   706  			Healthy: true,
   707  		})
   708  		// ensure health condition has been updated
   709  		checkHealthOrFail(t, store, p, true)
   710  	})
   711  	t.Run("unhealthy", func(t *testing.T) {
   712  		// report workload is unhealthy
   713  		c1.QueueWorkloadEntryHealth(p, HealthEvent{
   714  			Healthy: false,
   715  			Message: "lol health bad",
   716  		})
   717  		// ensure health condition has been updated
   718  		checkHealthOrFail(t, store, p, false)
   719  	})
   720  }
   721  
   722  func setup(t *testing.T) (*Controller, *Controller, model.ConfigStoreController) {
   723  	store := memory.NewController(memory.Make(collections.All))
   724  	c1 := NewController(store, "pilot-1", time.Duration(math.MaxInt64))
   725  	c2 := NewController(store, "pilot-2", time.Duration(math.MaxInt64))
   726  	createOrFail(t, store, wgA)
   727  	createOrFail(t, store, wgAWrongNs)
   728  	createOrFail(t, store, wgWithoutSA)
   729  	return c1, c2, store
   730  }
   731  
   732  func checkNoEntry(store model.ConfigStoreController, wg config.Config, proxy *model.Proxy) error {
   733  	name := wg.Name + "-" + proxy.IPAddresses[0]
   734  	if proxy.Metadata.Network != "" {
   735  		name += "-" + string(proxy.Metadata.Network)
   736  	}
   737  
   738  	cfg := store.Get(gvk.WorkloadEntry, name, wg.Namespace)
   739  	if cfg != nil {
   740  		return fmt.Errorf("did not expect WorkloadEntry %s/%s to exist", wg.Namespace, name)
   741  	}
   742  	return nil
   743  }
   744  
   745  func checkEntry(
   746  	store model.ConfigStore,
   747  	wg config.Config,
   748  	proxy *model.Proxy,
   749  	node *core.Node,
   750  	connectedTo string,
   751  ) (err error) {
   752  	name := wg.Name + "-" + proxy.IPAddresses[0]
   753  	if proxy.Metadata.Network != "" {
   754  		name += "-" + string(proxy.Metadata.Network)
   755  	}
   756  
   757  	cfg := store.Get(gvk.WorkloadEntry, name, wg.Namespace)
   758  	if cfg == nil {
   759  		err = multierror.Append(fmt.Errorf("expected WorkloadEntry %s/%s to exist", wg.Namespace, name))
   760  		return
   761  	}
   762  	tmpl := wg.Spec.(*v1alpha3.WorkloadGroup)
   763  	we := cfg.Spec.(*v1alpha3.WorkloadEntry)
   764  
   765  	// check workload entry specific fields
   766  	if !reflect.DeepEqual(we.Ports, tmpl.Template.Ports) {
   767  		err = multierror.Append(err, fmt.Errorf("expected ports from WorkloadGroup"))
   768  	}
   769  	if we.Address != proxy.IPAddresses[0] {
   770  		err = multierror.Append(fmt.Errorf("entry has address %s; expected %s", we.Address, proxy.IPAddresses[0]))
   771  	}
   772  
   773  	if proxy.Metadata.Network != "" {
   774  		if we.Network != string(proxy.Metadata.Network) {
   775  			err = multierror.Append(fmt.Errorf("entry has network %s; expected to match meta network %s", we.Network, proxy.Metadata.Network))
   776  		}
   777  	} else {
   778  		if we.Network != tmpl.Template.Network {
   779  			err = multierror.Append(fmt.Errorf("entry has network %s; expected to match group template network %s", we.Network, tmpl.Template.Network))
   780  		}
   781  	}
   782  
   783  	loc := tmpl.Template.Locality
   784  	if node.Locality != nil {
   785  		loc = util.LocalityToString(node.Locality)
   786  	}
   787  	if we.Locality != loc {
   788  		err = multierror.Append(fmt.Errorf("entry has locality %s; expected %s", we.Locality, loc))
   789  	}
   790  
   791  	// check controller annotations
   792  	if connectedTo != "" {
   793  		if v := cfg.Annotations[annotation.IoIstioWorkloadController.Name]; v != connectedTo {
   794  			err = multierror.Append(err, fmt.Errorf("expected WorkloadEntry to be updated by %s; got %s", connectedTo, v))
   795  		}
   796  		if _, ok := cfg.Annotations[annotation.IoIstioConnectedAt.Name]; !ok {
   797  			err = multierror.Append(err, fmt.Errorf("expected connection timestamp to be set"))
   798  		}
   799  	} else if _, ok := cfg.Annotations[annotation.IoIstioDisconnectedAt.Name]; !ok {
   800  		err = multierror.Append(err, fmt.Errorf("expected disconnection timestamp to be set"))
   801  	}
   802  
   803  	// check all labels are copied to the WorkloadEntry
   804  	if !reflect.DeepEqual(cfg.Labels, we.Labels) {
   805  		err = multierror.Append(err, fmt.Errorf("spec labels on WorkloadEntry should match meta labels"))
   806  	}
   807  	for k, v := range tmpl.Template.Labels {
   808  		if _, ok := proxy.Labels[k]; ok {
   809  			// would be overwritten
   810  			continue
   811  		}
   812  		if we.Labels[k] != v {
   813  			err = multierror.Append(err, fmt.Errorf("labels missing on WorkloadEntry: %s=%s from template", k, v))
   814  		}
   815  	}
   816  	for k, v := range proxy.Labels {
   817  		if we.Labels[k] != v {
   818  			err = multierror.Append(err, fmt.Errorf("labels missing on WorkloadEntry: %s=%s from proxy meta", k, v))
   819  		}
   820  	}
   821  	return
   822  }
   823  
   824  func checkEntryOrFail(
   825  	t test.Failer,
   826  	store model.ConfigStoreController,
   827  	wg config.Config,
   828  	proxy *model.Proxy,
   829  	node *core.Node,
   830  	connectedTo string,
   831  ) {
   832  	if err := checkEntry(store, wg, proxy, node, connectedTo); err != nil {
   833  		t.Fatal(err)
   834  	}
   835  }
   836  
   837  func checkEntryOrFailAfter(
   838  	t test.Failer,
   839  	store model.ConfigStoreController,
   840  	wg config.Config,
   841  	proxy *model.Proxy,
   842  	node *core.Node,
   843  	connectedTo string,
   844  	after time.Duration,
   845  ) {
   846  	time.Sleep(after)
   847  	checkEntryOrFail(t, store, wg, proxy, node, connectedTo)
   848  }
   849  
   850  func checkNoEntryOrFail(
   851  	t test.Failer,
   852  	store model.ConfigStoreController,
   853  	wg config.Config,
   854  	proxy *model.Proxy,
   855  ) {
   856  	name := wg.Name + "-" + proxy.IPAddresses[0]
   857  	if proxy.Metadata.Network != "" {
   858  		name += "-" + string(proxy.Metadata.Network)
   859  	}
   860  
   861  	cfg := store.Get(gvk.WorkloadEntry, name, wg.Namespace)
   862  	if cfg != nil {
   863  		t.Fatalf("workload entry found when it was not expected")
   864  	}
   865  }
   866  
   867  func checkNoEntryHealth(store model.ConfigStoreController, proxy *model.Proxy) error {
   868  	name, _ := proxy.WorkloadEntry()
   869  	cfg := store.Get(gvk.WorkloadEntry, name, proxy.Metadata.Namespace)
   870  	if cfg == nil {
   871  		return fmt.Errorf("expected WorkloadEntry %s/%s to exist", proxy.Metadata.Namespace, name)
   872  	}
   873  	if cfg.Status == nil {
   874  		return nil
   875  	}
   876  	s := cfg.Status.(*v1alpha1.IstioStatus)
   877  	if status.GetCondition(s.Conditions, "Healthy") != nil {
   878  		return fmt.Errorf("expected WorkloadEntry %s/%s not to have %q condition",
   879  			proxy.Metadata.Namespace, name, "Healthy")
   880  	}
   881  	return nil
   882  }
   883  
   884  func checkEntryHealth(store model.ConfigStoreController, proxy *model.Proxy, healthy bool) (err error) {
   885  	name, _ := proxy.WorkloadEntry()
   886  	cfg := store.Get(gvk.WorkloadEntry, name, proxy.Metadata.Namespace)
   887  	if cfg == nil || cfg.Status == nil {
   888  		err = multierror.Append(fmt.Errorf("expected workloadEntry %s/%s to exist", name, proxy.Metadata.Namespace))
   889  		return
   890  	}
   891  	stat := cfg.Status.(*v1alpha1.IstioStatus)
   892  	found := false
   893  	idx := 0
   894  	for i, cond := range stat.Conditions {
   895  		if cond.Type == "Healthy" {
   896  			idx = i
   897  			found = true
   898  		}
   899  	}
   900  	if !found {
   901  		err = multierror.Append(err, fmt.Errorf("expected condition of type Health on WorkloadEntry %s/%s",
   902  			name, proxy.Metadata.Namespace))
   903  	} else {
   904  		statStr := stat.Conditions[idx].Status
   905  		if healthy && statStr != "True" {
   906  			err = multierror.Append(err, fmt.Errorf("expected healthy condition on WorkloadEntry %s/%s",
   907  				name, proxy.Metadata.Namespace))
   908  		}
   909  		if !healthy && statStr != "False" {
   910  			err = multierror.Append(err, fmt.Errorf("expected unhealthy condition on WorkloadEntry %s/%s",
   911  				name, proxy.Metadata.Namespace))
   912  		}
   913  	}
   914  	return
   915  }
   916  
   917  func checkHealthOrFail(t test.Failer, store model.ConfigStoreController, proxy *model.Proxy, healthy bool) {
   918  	retry.UntilSuccessOrFail(t, func() error {
   919  		return checkEntryHealth(store, proxy, healthy)
   920  	})
   921  }
   922  
   923  func checkEntryDisconnected(store model.ConfigStoreController, we config.Config) error {
   924  	cfg := store.Get(gvk.WorkloadEntry, we.Name, we.Namespace)
   925  	if cfg == nil {
   926  		return fmt.Errorf("expected WorkloadEntry %s/%s to exist", we.Namespace, we.Name)
   927  	}
   928  	if _, ok := cfg.Annotations[annotation.IoIstioDisconnectedAt.Name]; !ok {
   929  		return fmt.Errorf("expected disconnection timestamp to be set on WorkloadEntry %s/%s: %#v", we.Namespace, we.Name, cfg)
   930  	}
   931  	return nil
   932  }
   933  
   934  func checkNonAutoRegisteredEntryOrFail(t test.Failer, store model.ConfigStoreController, we config.Config, connectedTo string) {
   935  	t.Helper()
   936  
   937  	cfg := store.Get(gvk.WorkloadEntry, we.Name, we.Namespace)
   938  	if cfg == nil {
   939  		t.Fatalf("expected WorkloadEntry %s/%s to exist", we.Namespace, we.Name)
   940  	}
   941  
   942  	// check controller annotations
   943  	if connectedTo != "" {
   944  		if v := cfg.Annotations[annotation.IoIstioWorkloadController.Name]; v != connectedTo {
   945  			t.Fatalf("expected WorkloadEntry to be updated by %s; got %s", connectedTo, v)
   946  		}
   947  		if _, ok := cfg.Annotations[annotation.IoIstioConnectedAt.Name]; !ok {
   948  			t.Fatalf("expected connection timestamp to be set")
   949  		}
   950  	} else if _, ok := cfg.Annotations[annotation.IoIstioDisconnectedAt.Name]; !ok {
   951  		t.Fatalf("expected disconnection timestamp to be set")
   952  	}
   953  }
   954  
   955  func fakeProxy(ip string, wg config.Config, nw network.ID, sa string) *model.Proxy {
   956  	var id *spiffe.Identity
   957  	if wg.Namespace != "" && sa != "" {
   958  		id = &spiffe.Identity{Namespace: wg.Namespace, ServiceAccount: sa}
   959  	}
   960  	return &model.Proxy{
   961  		IPAddresses:      []string{ip},
   962  		Labels:           map[string]string{"merge": "me"},
   963  		VerifiedIdentity: id,
   964  		Metadata: &model.NodeMetadata{
   965  			AutoRegisterGroup: wg.Name,
   966  			Namespace:         wg.Namespace,
   967  			Network:           nw,
   968  			Labels:            map[string]string{"merge": "me"},
   969  		},
   970  	}
   971  }
   972  
   973  func fakeProxySuitableForHealthChecks(wle config.Config) *model.Proxy {
   974  	wleSpec := wle.Spec.(*v1alpha3.WorkloadEntry)
   975  	return &model.Proxy{
   976  		ID:               wle.Name + "." + wle.Namespace,
   977  		IPAddresses:      []string{wleSpec.Address},
   978  		VerifiedIdentity: &spiffe.Identity{Namespace: wle.Namespace, ServiceAccount: "my-sa"},
   979  		Metadata: &model.NodeMetadata{
   980  			Namespace: wle.Namespace,
   981  			Network:   network.ID(wleSpec.Network),
   982  			ProxyConfig: &model.NodeMetaProxyConfig{
   983  				ReadinessProbe: &v1alpha3.ReadinessProbe{},
   984  			},
   985  			WorkloadEntry: wle.Name, // indicate a name of the WorkloadEntry this proxy corresponds to
   986  		},
   987  	}
   988  }
   989  
   990  func fakeNode(r, z, sz string) *core.Node {
   991  	return &core.Node{
   992  		Locality: &core.Locality{
   993  			Region:  r,
   994  			Zone:    z,
   995  			SubZone: sz,
   996  		},
   997  	}
   998  }
   999  
  1000  // createOrFail wraps config creation with convenience for failing tests
  1001  func createOrFail(t test.Failer, store model.ConfigStoreController, cfg config.Config) {
  1002  	if _, err := store.Create(cfg); err != nil {
  1003  		t.Fatalf("failed creating %s/%s: %v", cfg.Namespace, cfg.Name, err)
  1004  	}
  1005  }