istio.io/istio@v0.0.0-20240520182934-d79c90f27776/tests/integration/telemetry/api/accesslogs_test.go (about)

     1  // Copyright Istio Authors
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  //go:build integ
    16  // +build integ
    17  
    18  // Copyright Istio Authors
    19  //
    20  // Licensed under the Apache License, Version 2.0 (the "License");
    21  // you may not use this file except in compliance with the License.
    22  // You may obtain a copy of the License at
    23  //
    24  //     http://www.apache.org/licenses/LICENSE-2.0
    25  //
    26  // Unless required by applicable law or agreed to in writing, software
    27  // distributed under the License is distributed on an "AS IS" BASIS,
    28  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    29  // See the License for the specific language governing permissions and
    30  // limitations under the License.
    31  
    32  package api
    33  
    34  import (
    35  	"fmt"
    36  	"strings"
    37  	"testing"
    38  	"time"
    39  
    40  	"k8s.io/apimachinery/pkg/util/rand"
    41  
    42  	"istio.io/istio/pkg/config/protocol"
    43  	"istio.io/istio/pkg/http/headers"
    44  	"istio.io/istio/pkg/test"
    45  	"istio.io/istio/pkg/test/framework"
    46  	"istio.io/istio/pkg/test/framework/components/crd"
    47  	"istio.io/istio/pkg/test/framework/components/echo"
    48  	"istio.io/istio/pkg/test/framework/components/echo/check"
    49  	"istio.io/istio/pkg/test/util/retry"
    50  )
    51  
    52  func TestAccessLogs(t *testing.T) {
    53  	framework.NewTest(t).
    54  		Run(func(t framework.TestContext) {
    55  			t.NewSubTest("enabled").Run(func(t framework.TestContext) {
    56  				applyTelemetryResource(t, true)
    57  				runAccessLogsTests(t, true, false)
    58  				deleteTelemetryResource(t, true)
    59  			})
    60  			t.NewSubTest("enabled-with-targetref").Run(func(t framework.TestContext) {
    61  				args := map[string]any{
    62  					"To":                GetTarget().(echo.Instances),
    63  					"TargetGatewayName": GetTarget().ServiceName() + "-gateway",
    64  					"Address":           fmt.Sprintf("%s-gateway-istio.%s.svc.cluster.local", GetTarget().ServiceName(), apps.Namespace.Name()),
    65  				}
    66  				crd.DeployGatewayAPIOrSkip(t)
    67  				t.ConfigIstio().EvalFile(apps.Namespace.Name(), args, "./testdata/gateway-api.yaml").ApplyOrFail(t)
    68  				applyTelemetryResourceWithTargetRef(t, true)
    69  				runAccessLogsTests(t, true, true)
    70  				deleteTelemetryResource(t, true)
    71  			})
    72  			t.NewSubTest("disabled").Run(func(t framework.TestContext) {
    73  				applyTelemetryResource(t, false)
    74  				runAccessLogsTests(t, false, false)
    75  				deleteTelemetryResource(t, false)
    76  			})
    77  		})
    78  }
    79  
    80  func TestAccessLogsFilter(t *testing.T) {
    81  	framework.NewTest(t).
    82  		Run(func(t framework.TestContext) {
    83  			runAccessLogFilterTests(t, false)
    84  			t.ConfigIstio().File(apps.Namespace.Name(), "./testdata/accesslog/filter.yaml").ApplyOrFail(t)
    85  			runAccessLogFilterTests(t, true)
    86  		})
    87  }
    88  
    89  func TestAccessLogsMode(t *testing.T) {
    90  	framework.NewTest(t).
    91  		Run(func(t framework.TestContext) {
    92  			t.NewSubTest("client").Run(func(t framework.TestContext) {
    93  				t.ConfigIstio().File(apps.Namespace.Name(), "./testdata/accesslog/mode-client.yaml").ApplyOrFail(t)
    94  				runAccessLogModeTests(t, true, false)
    95  			})
    96  			t.NewSubTest("server").Run(func(t framework.TestContext) {
    97  				t.ConfigIstio().File(apps.Namespace.Name(), "./testdata/accesslog/mode-server.yaml").ApplyOrFail(t)
    98  				runAccessLogModeTests(t, false, true)
    99  			})
   100  			t.NewSubTest("client-and-server").Run(func(t framework.TestContext) {
   101  				t.ConfigIstio().File(apps.Namespace.Name(), "./testdata/accesslog/mode-clientserver.yaml").ApplyOrFail(t)
   102  				runAccessLogModeTests(t, true, true)
   103  			})
   104  		})
   105  }
   106  
   107  func TestAccessLogsDefaultProvider(t *testing.T) {
   108  	framework.NewTest(t).
   109  		Run(func(t framework.TestContext) {
   110  			t.NewSubTest("disabled").Run(func(t framework.TestContext) {
   111  				runAccessLogsTests(t, false, false)
   112  			})
   113  			t.NewSubTest("enabled").Run(func(t framework.TestContext) {
   114  				cfg := `
   115  defaultProviders:
   116    accessLogging:
   117    - envoy
   118  `
   119  				ist.PatchMeshConfigOrFail(t, t, cfg)
   120  				runAccessLogsTests(t, true, false)
   121  			})
   122  		})
   123  }
   124  
   125  func applyTelemetryResourceWithTargetRef(t framework.TestContext, enableLogs bool) {
   126  	args := map[string]any{
   127  		"TargetGatewayName": GetTarget().ServiceName() + "-gateway",
   128  		"DisableLogs":       !enableLogs,
   129  	}
   130  	t.ConfigIstio().EvalFile(apps.Namespace.Name(), args, "./testdata/accesslog/targetref.yaml").ApplyOrFail(t)
   131  }
   132  
   133  func applyTelemetryResource(t framework.TestContext, enableLogs bool) {
   134  	config := fmt.Sprintf(`apiVersion: telemetry.istio.io/v1alpha1
   135  kind: Telemetry
   136  metadata:
   137    name: logs
   138  spec:
   139    accessLogging:
   140    - providers:
   141      - name: envoy
   142      disabled: %v
   143  `, !enableLogs)
   144  	t.ConfigIstio().YAML(apps.Namespace.Name(), config).ApplyOrFail(t)
   145  }
   146  
   147  func deleteTelemetryResource(t framework.TestContext, enableLogs bool) {
   148  	config := fmt.Sprintf(`apiVersion: telemetry.istio.io/v1alpha1
   149  kind: Telemetry
   150  metadata:
   151    name: logs
   152  spec:
   153    accessLogging:
   154    - disabled: %v
   155  `, enableLogs)
   156  	t.ConfigIstio().YAML(apps.Namespace.Name(), config).DeleteOrFail(t)
   157  }
   158  
   159  func runAccessLogsTests(t framework.TestContext, expectLogs bool, hasTargetRef bool) {
   160  	testID := rand.String(16)
   161  	to := GetTarget()
   162  	if len(GetClientInstances()) == 0 {
   163  		t.Fatal("there is no client")
   164  	}
   165  	cltInstance := GetClientInstances()[0]
   166  	from := GetClientInstances()[0]
   167  	var count float64
   168  	if expectLogs {
   169  		var http echo.HTTP
   170  		if hasTargetRef {
   171  			http = echo.HTTP{
   172  				Path:    "/" + testID,
   173  				Method:  "GET",
   174  				Headers: headers.New().WithHost(fmt.Sprintf("%s.com", GetTarget().ServiceName())).Build(),
   175  			}
   176  		} else {
   177  			http = echo.HTTP{
   178  				Path: "/" + testID,
   179  			}
   180  		}
   181  		// For positive test, we use the same ID and repeatedly send requests and check the count
   182  		// Retry a bit to get the logs. There is some delay before they are output(MeshConfig will not take effect immediately),
   183  		// so they may not be immediately ready. If not ready, we retry sending a call again.
   184  		err := retry.UntilSuccess(func() error {
   185  			if hasTargetRef {
   186  				hostname := fmt.Sprintf("%s-gateway-istio.%s.svc.cluster.local", GetTarget().ServiceName(), apps.Namespace.Name())
   187  				defaultOptions := []retry.Option{retry.Delay(100 * time.Millisecond), retry.Timeout(200 * time.Second)}
   188  				httpOpts := echo.CallOptions{
   189  					Address: hostname,
   190  					Port: echo.Port{
   191  						Name:        "http",
   192  						ServicePort: 80,
   193  						Protocol:    protocol.HTTP,
   194  					},
   195  					HTTP:  http,
   196  					Count: 1,
   197  					Retry: echo.Retry{
   198  						Options: append(defaultOptions, retry.Timeout(framework.TelemetryRetryTimeout)),
   199  					},
   200  					Check: check.ResponseHeader(injectedHeader, ""),
   201  				}
   202  				count = logCount(t, from, testID)
   203  				_ = cltInstance.CallOrFail(t, httpOpts)
   204  			} else {
   205  				GetClientInstances()[0].CallOrFail(t, echo.CallOptions{
   206  					To: to,
   207  					Port: echo.Port{
   208  						Name: "http",
   209  					},
   210  					HTTP: http,
   211  				})
   212  				count = logCount(t, to, testID)
   213  			}
   214  			if count > 0 != expectLogs {
   215  				return fmt.Errorf("expected logs '%v', got %v", expectLogs, count)
   216  			}
   217  			return nil
   218  		}, retry.Timeout(framework.TelemetryRetryTimeout))
   219  		if err != nil {
   220  			t.Fatalf("expected logs but got nil, err: %v", err)
   221  		}
   222  	} else {
   223  		// For negative case, we retry with a new ID each time. This ensures that a previous failure
   224  		// (due to hitting old code path with logs still enabled) doesn't stop us from succeeding later
   225  		// once we stop logging.
   226  		retry.UntilSuccessOrFail(t, func() error {
   227  			testID := rand.String(16)
   228  			GetClientInstances()[0].CallOrFail(t, echo.CallOptions{
   229  				To: to,
   230  				Port: echo.Port{
   231  					Name: "http",
   232  				},
   233  				HTTP: echo.HTTP{
   234  					Path: "/" + testID,
   235  				},
   236  			})
   237  			// This is a negative test; there isn't much we can do other than wait a few seconds and ensure we didn't emit logs
   238  			// Logs should flush every 1s, so 2s should be plenty of time for logs to be emitted
   239  			time.Sleep(time.Second * 2)
   240  			count = logCount(t, to, testID)
   241  			if count > 0 != expectLogs {
   242  				return fmt.Errorf("expected logs '%v', got %v", expectLogs, count)
   243  			}
   244  			return nil
   245  		})
   246  	}
   247  }
   248  
   249  func logCount(t test.Failer, to echo.Target, testID string) float64 {
   250  	counts := map[string]float64{}
   251  	for _, w := range to.WorkloadsOrFail(t) {
   252  		var logs string
   253  		l, err := w.Sidecar().Logs()
   254  		if err != nil {
   255  			t.Fatalf("failed getting logs: %v", err)
   256  		}
   257  		logs += l
   258  		if c := float64(strings.Count(logs, testID)); c > 0 {
   259  			counts[w.Cluster().Name()] = c
   260  		}
   261  	}
   262  	var total float64
   263  	for _, c := range counts {
   264  		total += c
   265  	}
   266  	return total
   267  }
   268  
   269  func runAccessLogFilterTests(t framework.TestContext, expectLogs bool) {
   270  	to := GetTarget()
   271  	from := GetClientInstances()[0]
   272  	if expectLogs {
   273  		// For positive test, we use the same path in Telemetry API and repeatedly send requests and check the count
   274  		// Retry a bit to get the logs. There is some delay before they are output(MeshConfig will not take effect immediately),
   275  		// so they may not be immediately ready. If not ready, we retry sending a call again.
   276  		err := retry.UntilSuccess(func() error {
   277  			from.CallOrFail(t, echo.CallOptions{
   278  				To: to,
   279  				Port: echo.Port{
   280  					Name: "http",
   281  				},
   282  				HTTP: echo.HTTP{
   283  					Path: "/filter-test",
   284  				},
   285  			})
   286  			count := logCount(t, to, "filter-test")
   287  			if count > 0 != expectLogs {
   288  				return fmt.Errorf("expected logs '%v', got %v", expectLogs, count)
   289  			}
   290  			return nil
   291  		}, retry.Timeout(framework.TelemetryRetryTimeout))
   292  		if err != nil {
   293  			t.Fatalf("expected logs but got nil, err: %v", err)
   294  		}
   295  	} else {
   296  		// For negative case, we retry with a new ID each time. This ensures that a previous failure
   297  		// (due to hitting old code path with logs still enabled) doesn't stop us from succeeding later
   298  		// once we stop logging.
   299  		retry.UntilSuccessOrFail(t, func() error {
   300  			testID := rand.String(16)
   301  			from.CallOrFail(t, echo.CallOptions{
   302  				To: to,
   303  				Port: echo.Port{
   304  					Name: "http",
   305  				},
   306  				HTTP: echo.HTTP{
   307  					Path: "/" + testID,
   308  				},
   309  			})
   310  			// This is a negative test; there isn't much we can do other than wait a few seconds and ensure we didn't emit logs
   311  			// Logs should flush every 1s, so 2s should be plenty of time for logs to be emitted
   312  			time.Sleep(time.Second * 2)
   313  			count := logCount(t, to, testID)
   314  			if count > 0 != expectLogs {
   315  				return fmt.Errorf("expected logs '%v', got %v", expectLogs, count)
   316  			}
   317  			return nil
   318  		})
   319  	}
   320  }
   321  
   322  func runAccessLogModeTests(t framework.TestContext, exceptClientLog, exceptServerLog bool) {
   323  	testID := rand.String(16)
   324  	to := GetTarget()
   325  	from := GetClientInstances()[0]
   326  	// For positive test, we use the same path in Telemetry API and repeatedly send requests and check the count
   327  	// Retry a bit to get the logs. There is some delay before they are output(MeshConfig will not take effect immediately),
   328  	// so they may not be immediately ready. If not ready, we retry sending a call again.
   329  	err := retry.UntilSuccess(func() error {
   330  		clientCount := logCount(t, from, testID)
   331  		serverCount := logCount(t, to, testID)
   332  
   333  		from.CallOrFail(t, echo.CallOptions{
   334  			To: to,
   335  			Port: echo.Port{
   336  				Name: "http",
   337  			},
   338  			HTTP: echo.HTTP{
   339  				Path: "/" + testID,
   340  			},
   341  		})
   342  
   343  		return retry.UntilSuccess(func() error {
   344  			clientDeltaCount := logCount(t, from, testID) - clientCount
   345  			if clientDeltaCount > 0 != exceptClientLog {
   346  				return fmt.Errorf("expected client logs %v but got %v", exceptClientLog, clientDeltaCount)
   347  			}
   348  
   349  			serverDeltaCount := logCount(t, to, testID) - serverCount
   350  			if serverDeltaCount > 0 != exceptServerLog {
   351  				return fmt.Errorf("expected server logs %v but got %v", exceptServerLog, serverDeltaCount)
   352  			}
   353  
   354  			return nil
   355  		}, retry.Timeout(10*time.Second), retry.Delay(time.Second))
   356  	}, retry.Timeout(framework.TelemetryRetryTimeout))
   357  	if err != nil {
   358  		t.Fatalf("expected logs but got err: %v", err)
   359  	}
   360  }