istio.io/istio@v0.0.0-20240520182934-d79c90f27776/tests/integration/pilot/mcs/discoverability/discoverability_test.go (about)

     1  //go:build integ
     2  // +build integ
     3  
     4  // Copyright Istio Authors
     5  //
     6  // Licensed under the Apache License, Version 2.0 (the "License");
     7  // you may not use this file except in compliance with the License.
     8  // You may obtain a copy of the License at
     9  //
    10  //     http://www.apache.org/licenses/LICENSE-2.0
    11  //
    12  // Unless required by applicable law or agreed to in writing, software
    13  // distributed under the License is distributed on an "AS IS" BASIS,
    14  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    15  // See the License for the specific language governing permissions and
    16  // limitations under the License.
    17  
    18  package discoverability
    19  
    20  import (
    21  	"context"
    22  	"fmt"
    23  	"sort"
    24  	"strings"
    25  	"sync"
    26  	"testing"
    27  	"time"
    28  
    29  	envoy_admin_v3 "github.com/envoyproxy/go-control-plane/envoy/admin/v3"
    30  	"golang.org/x/sync/errgroup"
    31  	corev1 "k8s.io/api/core/v1"
    32  	kerrors "k8s.io/apimachinery/pkg/api/errors"
    33  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    34  	"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
    35  	"k8s.io/apimachinery/pkg/runtime"
    36  	"k8s.io/apimachinery/pkg/runtime/schema"
    37  	mcsapi "sigs.k8s.io/mcs-api/pkg/apis/v1alpha1"
    38  	"sigs.k8s.io/yaml"
    39  
    40  	"istio.io/api/annotation"
    41  	kube "istio.io/istio/pilot/pkg/serviceregistry/kube/controller"
    42  	"istio.io/istio/pkg/kube/mcs"
    43  	"istio.io/istio/pkg/test/framework"
    44  	"istio.io/istio/pkg/test/framework/components/cluster"
    45  	"istio.io/istio/pkg/test/framework/components/echo"
    46  	"istio.io/istio/pkg/test/framework/components/echo/check"
    47  	"istio.io/istio/pkg/test/framework/components/echo/echotest"
    48  	"istio.io/istio/pkg/test/framework/components/echo/match"
    49  	"istio.io/istio/pkg/test/framework/components/istio"
    50  	"istio.io/istio/pkg/test/framework/label"
    51  	"istio.io/istio/pkg/test/framework/resource"
    52  	"istio.io/istio/pkg/test/scopes"
    53  	"istio.io/istio/pkg/test/util/retry"
    54  	"istio.io/istio/tests/integration/pilot/mcs/common"
    55  )
    56  
    57  type hostType string
    58  
    59  func (ht hostType) String() string {
    60  	return string(ht)
    61  }
    62  
    63  const (
    64  	hostTypeClusterLocal    hostType = "cluster.local"
    65  	hostTypeClusterSetLocal hostType = "clusterset.local"
    66  )
    67  
    68  var (
    69  	i     istio.Instance
    70  	echos common.EchoDeployment
    71  
    72  	retryTimeout = retry.Timeout(1 * time.Minute)
    73  	retryDelay   = retry.Delay(500 * time.Millisecond)
    74  
    75  	hostTypes = []hostType{hostTypeClusterSetLocal, hostTypeClusterLocal}
    76  
    77  	serviceA match.Matcher
    78  	serviceB match.Matcher
    79  )
    80  
    81  func TestMain(m *testing.M) {
    82  	// nolint: staticcheck
    83  	framework.
    84  		NewSuite(m).
    85  		Label(label.CustomSetup).
    86  		RequireMinVersion(17).
    87  		RequireMinClusters(2).
    88  		Setup(common.InstallMCSCRDs).
    89  		Setup(istio.Setup(&i, enableMCSServiceDiscovery)).
    90  		Setup(common.DeployEchosFunc("mcs", &echos)).
    91  		Run()
    92  }
    93  
    94  func TestClusterLocal(t *testing.T) {
    95  	framework.NewTest(t).
    96  		RequireIstioVersion("1.11").
    97  		Run(func(t framework.TestContext) {
    98  			serviceA = match.ServiceName(echo.NamespacedName{Name: common.ServiceA, Namespace: echos.Namespace})
    99  			serviceB = match.ServiceName(echo.NamespacedName{Name: common.ServiceB, Namespace: echos.Namespace})
   100  			// Don't export service B in any cluster. All requests should stay in-cluster.
   101  			for _, ht := range hostTypes {
   102  				t.NewSubTest(ht.String()).Run(func(t framework.TestContext) {
   103  					runForAllClusterCombinations(t, func(t framework.TestContext, from echo.Instance, to echo.Target) {
   104  						var checker echo.Checker
   105  						if ht == hostTypeClusterLocal {
   106  							// For calls to cluster.local, ensure that all requests stay in the same cluster
   107  							expectedClusters := cluster.Clusters{from.Config().Cluster}
   108  							checker = checkClustersReached(t.AllClusters(), expectedClusters)
   109  						} else {
   110  							// For calls to clusterset.local, we should fail DNS lookup. The clusterset.local host
   111  							// is only available for a service when it is exported in at least one cluster.
   112  							checker = checkDNSLookupFailed()
   113  						}
   114  						callAndValidate(t, ht, from, to, checker)
   115  					})
   116  				})
   117  			}
   118  		})
   119  }
   120  
   121  func TestMeshWide(t *testing.T) {
   122  	framework.NewTest(t).
   123  		Run(func(t framework.TestContext) {
   124  			// Export service B in all clusters.
   125  			createAndCleanupServiceExport(t, common.ServiceB, t.Clusters())
   126  			serviceA = match.ServiceName(echo.NamespacedName{Name: common.ServiceA, Namespace: echos.Namespace})
   127  			serviceB = match.ServiceName(echo.NamespacedName{Name: common.ServiceB, Namespace: echos.Namespace})
   128  
   129  			for _, ht := range hostTypes {
   130  				t.NewSubTest(ht.String()).Run(func(t framework.TestContext) {
   131  					runForAllClusterCombinations(t, func(t framework.TestContext, from echo.Instance, to echo.Target) {
   132  						var expectedClusters cluster.Clusters
   133  						if ht == hostTypeClusterLocal {
   134  							// Ensure that all requests to cluster.local stay in the same cluster
   135  							expectedClusters = cluster.Clusters{from.Config().Cluster}
   136  						} else {
   137  							// Ensure that requests to clusterset.local reach all destination clusters.
   138  							expectedClusters = to.Clusters()
   139  						}
   140  						callAndValidate(t, ht, from, to, checkClustersReached(t.AllClusters(), expectedClusters))
   141  					})
   142  				})
   143  			}
   144  		})
   145  }
   146  
   147  func TestServiceExportedInOneCluster(t *testing.T) {
   148  	framework.NewTest(t).
   149  		Run(func(t framework.TestContext) {
   150  			t.Skip("https://github.com/istio/istio/issues/34051")
   151  			// Get all the clusters where service B resides.
   152  			bClusters := serviceB.GetMatches(echos.Instances).Clusters()
   153  
   154  			// Test exporting service B exclusively in each cluster.
   155  			for _, exportCluster := range bClusters {
   156  				exportCluster := exportCluster
   157  				t.NewSubTestf("b exported in %s", exportCluster.StableName()).
   158  					Run(func(t framework.TestContext) {
   159  						// Export service B in the export cluster.
   160  						createAndCleanupServiceExport(t, common.ServiceB, cluster.Clusters{exportCluster})
   161  
   162  						for _, ht := range hostTypes {
   163  							t.NewSubTest(ht.String()).Run(func(t framework.TestContext) {
   164  								runForAllClusterCombinations(t, func(t framework.TestContext, from echo.Instance, to echo.Target) {
   165  									var expectedClusters cluster.Clusters
   166  									if ht == hostTypeClusterLocal {
   167  										// Ensure that all requests to cluster.local stay in the same cluster
   168  										expectedClusters = cluster.Clusters{from.Config().Cluster}
   169  									} else {
   170  										// Since we're exporting only the endpoints in the exportCluster, depending
   171  										// on where we call service B from, we'll reach a different set of endpoints.
   172  										// If we're calling from exportCluster, it will be the same as cluster-local
   173  										// (i.e. we'll only reach endpoints in exportCluster). From all other clusters,
   174  										// we should reach endpoints in that cluster AND exportCluster.
   175  										expectedClusters = cluster.Clusters{exportCluster}
   176  										if from.Config().Cluster.Name() != exportCluster.Name() {
   177  											expectedClusters = append(expectedClusters, from.Config().Cluster)
   178  										}
   179  									}
   180  									callAndValidate(t, ht, from, to, checkClustersReached(t.AllClusters(), expectedClusters))
   181  								})
   182  							})
   183  						}
   184  					})
   185  			}
   186  		})
   187  }
   188  
   189  func enableMCSServiceDiscovery(t resource.Context, cfg *istio.Config) {
   190  	cfg.ControlPlaneValues = fmt.Sprintf(`
   191  values:
   192    pilot:
   193      env:
   194        ENABLE_MCS_SERVICE_DISCOVERY: "true"
   195        ENABLE_MCS_HOST: "true"
   196        ENABLE_MCS_CLUSTER_LOCAL: "true"
   197        MCS_API_GROUP: %s
   198        MCS_API_VERSION: %s`,
   199  		common.KubeSettings(t).MCSAPIGroup,
   200  		common.KubeSettings(t).MCSAPIVersion)
   201  }
   202  
   203  func runForAllClusterCombinations(
   204  	t framework.TestContext,
   205  	fn func(t framework.TestContext, from echo.Instance, to echo.Target),
   206  ) {
   207  	t.Helper()
   208  	echotest.New(t, echos.Instances).
   209  		WithDefaultFilters(1, 1).
   210  		FromMatch(serviceA).
   211  		ToMatch(serviceB).
   212  		Run(fn)
   213  }
   214  
   215  func newServiceExport(service string, serviceExportGVR schema.GroupVersionResource) *mcsapi.ServiceExport {
   216  	return &mcsapi.ServiceExport{
   217  		TypeMeta: metav1.TypeMeta{
   218  			Kind:       "ServiceExport",
   219  			APIVersion: serviceExportGVR.GroupVersion().String(),
   220  		},
   221  		ObjectMeta: metav1.ObjectMeta{
   222  			Name:      service,
   223  			Namespace: echos.Namespace.Name(),
   224  		},
   225  	}
   226  }
   227  
   228  func checkClustersReached(allClusters cluster.Clusters, clusters cluster.Clusters) echo.Checker {
   229  	return check.And(
   230  		check.OK(),
   231  		check.ReachedClusters(allClusters, clusters))
   232  }
   233  
   234  func checkDNSLookupFailed() echo.Checker {
   235  	return check.And(
   236  		check.Error(),
   237  		func(_ echo.CallResult, err error) error {
   238  			if strings.Contains(err.Error(), "no such host") || strings.Contains(err.Error(), "server misbehaving") {
   239  				return nil
   240  			}
   241  			return err
   242  		})
   243  }
   244  
   245  func callAndValidate(t framework.TestContext, ht hostType, from echo.Instance, to echo.Target, checker echo.Checker) {
   246  	t.Helper()
   247  
   248  	var address string
   249  	if ht == hostTypeClusterSetLocal {
   250  		// Call the service using the MCS ClusterSet host.
   251  		address = to.Config().ClusterSetLocalFQDN()
   252  	} else {
   253  		address = to.Config().ClusterLocalFQDN()
   254  	}
   255  
   256  	_, err := from.Call(echo.CallOptions{
   257  		Address: address,
   258  		To:      to,
   259  		Port: echo.Port{
   260  			Name: "http",
   261  		},
   262  		Check: checker,
   263  		Retry: echo.Retry{
   264  			Options: []retry.Option{retryDelay, retryTimeout},
   265  		},
   266  	})
   267  	if err != nil {
   268  		t.Fatalf("failed calling host %s: %v\nCluster Details:\n%s", address, err,
   269  			getClusterDetailsYAML(t, address, from, to))
   270  	}
   271  }
   272  
   273  func getClusterDetailsYAML(t framework.TestContext, address string, from echo.Instance, to echo.Target) string {
   274  	// Add details about the configuration to the error message.
   275  	type IPs struct {
   276  		Cluster   string   `json:"cluster"`
   277  		TargetPod []string `json:"targetPod"`
   278  		Gateway   []string `json:"gateway"`
   279  	}
   280  
   281  	type Outbound struct {
   282  		ClusterName string                         `json:"clusterName"`
   283  		IP          string                         `json:"ip"`
   284  		Stats       []*envoy_admin_v3.SimpleMetric `json:"stats"`
   285  	}
   286  
   287  	type Details struct {
   288  		From     string     `json:"from"`
   289  		To       string     `json:"to"`
   290  		Outbound []Outbound `json:"outbound"`
   291  		IPs      []IPs      `json:"ips"`
   292  	}
   293  	details := Details{
   294  		From: from.Config().Cluster.Name(),
   295  		To:   address,
   296  	}
   297  
   298  	destName := to.Config().Service
   299  	destNS := to.Config().Namespace.Name()
   300  	istioNS := istio.GetOrFail(t, t).Settings().SystemNamespace
   301  
   302  	for _, c := range t.Clusters() {
   303  		info := IPs{
   304  			Cluster: c.StableName(),
   305  		}
   306  
   307  		// Get pod IPs for service B.
   308  		pods, err := c.PodsForSelector(context.TODO(), destNS, "app="+destName)
   309  		if err == nil {
   310  			for _, destPod := range pods.Items {
   311  				info.TargetPod = append(info.TargetPod, destPod.Status.PodIP)
   312  			}
   313  			sort.Strings(info.TargetPod)
   314  		}
   315  
   316  		// Get the East-West Gateway IP
   317  		svc, err := c.Kube().CoreV1().Services(istioNS).Get(context.TODO(), "istio-eastwestgateway", metav1.GetOptions{})
   318  		if err == nil {
   319  			var ips []string
   320  			for _, ingress := range svc.Status.LoadBalancer.Ingress {
   321  				ips = append(ips, ingress.IP)
   322  			}
   323  			info.Gateway = append(info.Gateway, ips...)
   324  		}
   325  
   326  		details.IPs = append(details.IPs, info)
   327  	}
   328  
   329  	// Populate the source Envoy's outbound clusters to the dest service.
   330  	srcWorkload := from.WorkloadsOrFail(t)[0]
   331  	envoyClusters, err := srcWorkload.Sidecar().Clusters()
   332  	if err == nil {
   333  		for _, hostName := range []string{to.Config().ClusterLocalFQDN(), to.Config().ClusterSetLocalFQDN()} {
   334  			clusterName := fmt.Sprintf("outbound|80||%s", hostName)
   335  
   336  			for _, status := range envoyClusters.GetClusterStatuses() {
   337  				if status.Name == clusterName {
   338  					for _, hostStatus := range status.GetHostStatuses() {
   339  						details.Outbound = append(details.Outbound, Outbound{
   340  							ClusterName: clusterName,
   341  							IP:          hostStatus.Address.GetSocketAddress().GetAddress(),
   342  							Stats:       hostStatus.Stats,
   343  						})
   344  					}
   345  				}
   346  			}
   347  		}
   348  	}
   349  
   350  	detailsYAML, err := yaml.Marshal(&details)
   351  	if err != nil {
   352  		return fmt.Sprintf("failed writing cluster details: %v", err)
   353  	}
   354  
   355  	return string(detailsYAML)
   356  }
   357  
   358  func createAndCleanupServiceExport(t framework.TestContext, service string, exportClusters cluster.Clusters) {
   359  	t.Helper()
   360  
   361  	start := time.Now()
   362  	scopes.Framework.Infof("=== BEGIN: Create ServiceExport%v ===", exportClusters.Names())
   363  
   364  	serviceExportGVR := common.KubeSettings(t).ServiceExportGVR()
   365  	serviceImportGVR := common.KubeSettings(t).ServiceImportGVR()
   366  
   367  	serviceExport := newServiceExport(service, serviceExportGVR)
   368  
   369  	u, err := runtime.DefaultUnstructuredConverter.ToUnstructured(serviceExport)
   370  	if err != nil {
   371  		t.Fatal(err)
   372  	}
   373  
   374  	// Create the ServiceExports in each cluster concurrently.
   375  	g := errgroup.Group{}
   376  	for _, c := range exportClusters {
   377  		c := c
   378  		g.Go(func() error {
   379  			_, err := c.Dynamic().Resource(serviceExportGVR).Namespace(echos.Namespace.Name()).Create(context.TODO(),
   380  				&unstructured.Unstructured{Object: u}, metav1.CreateOptions{})
   381  			if err != nil {
   382  				return fmt.Errorf("failed creating %s with name %s/%s in cluster %s: %v",
   383  					serviceExportGVR.String(), echos.Namespace, common.ServiceB, c.Name(), err)
   384  			}
   385  
   386  			return nil
   387  		})
   388  	}
   389  
   390  	// Now wait for ServiceImport to be created
   391  	serviceA = match.ServiceName(echo.NamespacedName{Name: common.ServiceA, Namespace: echos.Namespace})
   392  	importClusters := serviceA.GetMatches(echos.Instances).Clusters()
   393  	if common.IsMCSControllerEnabled(t) {
   394  		scopes.Framework.Infof("Waiting for the MCS Controller to create ServiceImport in each cluster")
   395  		for _, c := range importClusters {
   396  			c := c
   397  			serviceImports := c.Dynamic().Resource(serviceImportGVR).Namespace(echos.Namespace.Name())
   398  
   399  			g.Go(func() error {
   400  				return retry.UntilSuccess(func() error {
   401  					si, err := serviceImports.Get(context.TODO(), common.ServiceB, metav1.GetOptions{})
   402  					if err != nil {
   403  						return fmt.Errorf("failed waiting for ServiceImport %s/%s in cluster %s: %v",
   404  							echos.Namespace, common.ServiceB, c.Name(), err)
   405  					}
   406  
   407  					ips := kube.GetServiceImportIPs(si)
   408  					if len(ips) == 0 {
   409  						return fmt.Errorf("no ClusterSet IP for ServiceImport %s/%s in cluster %s",
   410  							echos.Namespace, common.ServiceB, c.Name())
   411  					}
   412  					return nil
   413  				}, retry.Timeout(5*time.Minute)) // GKE has a significant delay in creating ServiceImport.
   414  			})
   415  		}
   416  	} else {
   417  		scopes.Framework.Infof("No MCS Controller running. Manually creating ServiceImport in each cluster")
   418  		for _, c := range importClusters {
   419  			c := c
   420  			g.Go(func() error {
   421  				// Generate a dummy service in the cluster to reserve the ClusterSet VIP.
   422  				clusterSetIPSvc, err := genClusterSetIPService(c)
   423  				if err != nil {
   424  					return err
   425  				}
   426  
   427  				// Create a ServiceImport in the cluster with the ClusterSet VIP.
   428  				return createServiceImport(c, clusterSetIPSvc.Spec.ClusterIP, serviceImportGVR)
   429  			})
   430  		}
   431  	}
   432  
   433  	err = g.Wait()
   434  	status := "success"
   435  	if err != nil {
   436  		status = "failed"
   437  	}
   438  
   439  	end := time.Now()
   440  	scopes.Framework.Infof("=== DONE (%s): Create ServiceExport%v (%v) ===", status, exportClusters.Names(), end.Sub(start))
   441  	if err != nil {
   442  		t.Fatal(err)
   443  	}
   444  
   445  	// Add a cleanup that will delete the ServiceExports in each cluster concurrently.
   446  	t.Cleanup(func() {
   447  		wg := sync.WaitGroup{}
   448  		for _, c := range exportClusters {
   449  			c := c
   450  			wg.Add(1)
   451  			go func() {
   452  				defer wg.Done()
   453  
   454  				err := c.Dynamic().Resource(mcs.ServiceExportGVR).Namespace(echos.Namespace.Name()).Delete(context.TODO(),
   455  					serviceExport.Name, metav1.DeleteOptions{})
   456  				if err != nil && !kerrors.IsAlreadyExists(err) {
   457  					scopes.Framework.Warnf("failed deleting ServiceExport %s/%s in cluster %s: %v",
   458  						echos.Namespace, common.ServiceB, c.Name(), err)
   459  					return
   460  				}
   461  			}()
   462  		}
   463  
   464  		wg.Wait()
   465  	})
   466  }
   467  
   468  // genClusterSetIPService Generates a dummy service in order to allocate ClusterSet VIPs for
   469  // service B in the given cluster.
   470  func genClusterSetIPService(c cluster.Cluster) (*corev1.Service, error) {
   471  	// Get the definition for service B, so we can get the ports.
   472  	svc, err := c.Kube().CoreV1().Services(echos.Namespace.Name()).Get(context.TODO(), common.ServiceB, metav1.GetOptions{})
   473  	if err != nil {
   474  		return nil, err
   475  	}
   476  
   477  	dummySvcName := "clusterset-vip-" + common.ServiceB
   478  	dummySvc := &corev1.Service{
   479  		ObjectMeta: metav1.ObjectMeta{
   480  			Name:      dummySvcName,
   481  			Namespace: echos.Namespace.Name(),
   482  			Annotations: map[string]string{
   483  				// Export the service nowhere, so that no proxy will receive it or its VIP.
   484  				annotation.NetworkingExportTo.Name: "~",
   485  			},
   486  		},
   487  		Spec: corev1.ServiceSpec{
   488  			Type:  corev1.ServiceTypeClusterIP,
   489  			Ports: svc.Spec.Ports,
   490  		},
   491  	}
   492  
   493  	ns := echos.Namespace.Name()
   494  	if _, err := c.Kube().CoreV1().Services(ns).Create(context.TODO(), dummySvc, metav1.CreateOptions{}); err != nil && !kerrors.IsAlreadyExists(err) {
   495  		return nil, err
   496  	}
   497  
   498  	// Wait until a ClusterIP has been assigned.
   499  	dummySvc = nil
   500  	err = retry.UntilSuccess(func() error {
   501  		var err error
   502  		dummySvc, err = c.Kube().CoreV1().Services(echos.Namespace.Name()).Get(context.TODO(), dummySvcName, metav1.GetOptions{})
   503  		if err != nil {
   504  			return err
   505  		}
   506  		if len(svc.Spec.ClusterIP) == 0 {
   507  			return fmt.Errorf("clusterSet VIP not set for service %s/%s in cluster %s",
   508  				echos.Namespace, dummySvcName, c.Name())
   509  		}
   510  		return nil
   511  	}, retry.Timeout(10*time.Second))
   512  
   513  	return dummySvc, err
   514  }
   515  
   516  func createServiceImport(c cluster.Cluster, vip string, serviceImportGVR schema.GroupVersionResource) error {
   517  	// Get the definition for service B, so we can get the ports.
   518  	svc, err := c.Kube().CoreV1().Services(echos.Namespace.Name()).Get(context.TODO(), common.ServiceB, metav1.GetOptions{})
   519  	if err != nil {
   520  		return err
   521  	}
   522  
   523  	// Convert the ports for the ServiceImport.
   524  	ports := make([]mcsapi.ServicePort, len(svc.Spec.Ports))
   525  	for i, p := range svc.Spec.Ports {
   526  		ports[i] = mcsapi.ServicePort{
   527  			Name:        p.Name,
   528  			Protocol:    p.Protocol,
   529  			Port:        p.Port,
   530  			AppProtocol: p.AppProtocol,
   531  		}
   532  	}
   533  
   534  	serviceImport := &mcsapi.ServiceImport{
   535  		TypeMeta: metav1.TypeMeta{
   536  			Kind:       "ServiceImport",
   537  			APIVersion: serviceImportGVR.GroupVersion().String(),
   538  		},
   539  		ObjectMeta: metav1.ObjectMeta{
   540  			Namespace: echos.Namespace.Name(),
   541  			Name:      common.ServiceB,
   542  		},
   543  		Spec: mcsapi.ServiceImportSpec{
   544  			IPs:   []string{vip},
   545  			Type:  mcsapi.ClusterSetIP,
   546  			Ports: ports,
   547  		},
   548  	}
   549  
   550  	u, err := runtime.DefaultUnstructuredConverter.ToUnstructured(serviceImport)
   551  	if err != nil {
   552  		panic(err)
   553  	}
   554  
   555  	// Create the ServiceImport.
   556  	_, err = c.Dynamic().Resource(serviceImportGVR).Namespace(echos.Namespace.Name()).Create(
   557  		context.TODO(), &unstructured.Unstructured{Object: u}, metav1.CreateOptions{})
   558  	if err != nil && !kerrors.IsAlreadyExists(err) {
   559  		return err
   560  	}
   561  	return nil
   562  }