agones.dev/agones@v1.53.0/pkg/metrics/exporter_test.go (about)

     1  // Copyright 2025 Google LLC All Rights Reserved.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package metrics
    16  
    17  import (
    18  	"bufio"
    19  	"context"
    20  	"io"
    21  	"net/http"
    22  	"net/http/httptest"
    23  	"os"
    24  	"strings"
    25  	"testing"
    26  	"time"
    27  
    28  	agonesv1 "agones.dev/agones/pkg/apis/agones/v1"
    29  	agtesting "agones.dev/agones/pkg/testing"
    30  	"agones.dev/agones/pkg/util/httpserver"
    31  	"agones.dev/agones/test/e2e/framework"
    32  
    33  	"agones.dev/agones/pkg/util/runtime"
    34  	"github.com/prometheus/client_golang/prometheus"
    35  	"github.com/stretchr/testify/assert"
    36  	"github.com/stretchr/testify/require"
    37  	"go.opencensus.io/stats/view"
    38  	corev1 "k8s.io/api/core/v1"
    39  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    40  	k8sruntime "k8s.io/apimachinery/pkg/runtime"
    41  	k8stesting "k8s.io/client-go/testing"
    42  )
    43  
    44  func TestRegisterPrometheusExporter(t *testing.T) {
    45  	resetMetrics()
    46  	registry := prometheus.NewRegistry()
    47  
    48  	handler, err := RegisterPrometheusExporter(registry)
    49  	assert.NoError(t, err, "RegisterPrometheusExporter should not return an error")
    50  	assert.NotNil(t, handler, "Handler should not be nil")
    51  
    52  	req, err := http.NewRequestWithContext(context.Background(), http.MethodGet, "/metrics", nil)
    53  	require.NoError(t, err, "Creating request to /metrics should not fail")
    54  
    55  	rr := httptest.NewRecorder()
    56  	handler.ServeHTTP(rr, req)
    57  
    58  	resp := rr.Result()
    59  	defer func() {
    60  		assert.NoError(t, resp.Body.Close())
    61  	}()
    62  
    63  	assert.Equal(t, http.StatusOK, resp.StatusCode, "Expected status code 200")
    64  
    65  	bodyBytes, err := io.ReadAll(resp.Body)
    66  	require.NoError(t, err)
    67  	body := string(bodyBytes)
    68  
    69  	assert.Contains(t, body, "go_gc_duration_seconds", "Should contain default Go metrics")
    70  	assert.Contains(t, resp.Header.Get("Content-Type"), "text/plain", "Expected text/plain content type")
    71  }
    72  
    73  func TestMetrics_Endpoint_ExposesAllMetrics(t *testing.T) {
    74  	resetMetrics()
    75  
    76  	runtime.FeatureTestMutex.Lock()
    77  	defer runtime.FeatureTestMutex.Unlock()
    78  	runtime.EnableAllFeatures()
    79  
    80  	conf := Config{
    81  		PrometheusMetrics: true,
    82  	}
    83  	server := &httpserver.Server{
    84  		Port:   "3001",
    85  		Logger: framework.TestLogger(t),
    86  	}
    87  
    88  	m := newMockWithReactorNodesAndGameServers()
    89  	ctrl := newFakeControllerWithMock(m)
    90  	defer ctrl.close()
    91  
    92  	ctrl.run(t)
    93  	require.True(t, ctrl.sync(), "Controller failed to sync")
    94  
    95  	// ---- Setup steps ----
    96  	setupSteps := []func(t *testing.T, c *fakeController){
    97  		setupGameServer,
    98  		setupFleet,
    99  		setupFleetAutoScalers,
   100  		setupFleetWithCountersAndLists,
   101  		setupGameServerPlayerConnect,
   102  		setupGameServerStateDuration,
   103  	}
   104  
   105  	for _, stepFn := range setupSteps {
   106  		stepFn(t, ctrl)
   107  	}
   108  
   109  	ctrl.collect()
   110  
   111  	health, closer := SetupMetrics(conf, server)
   112  	defer t.Cleanup(closer)
   113  
   114  	assert.NotNil(t, health, "Health check handler should not be nil")
   115  	server.Handle("/", health)
   116  
   117  	ctx, cancel := context.WithCancel(context.Background())
   118  	defer cancel()
   119  
   120  	// Start the HTTP server
   121  	go func() {
   122  		_ = server.Run(ctx, 0)
   123  	}()
   124  	time.Sleep(300 * time.Millisecond)
   125  
   126  	resp, err := http.Get("http://localhost:3001/metrics")
   127  	require.NoError(t, err, "Failed to GET metrics endpoint")
   128  	defer func() {
   129  		assert.NoError(t, resp.Body.Close())
   130  	}()
   131  
   132  	assert.Equal(t, http.StatusOK, resp.StatusCode, "Expected status code 200")
   133  
   134  	metricsSet := collectMetricNames(resp)
   135  	expectedMetrics := getMetricNames()
   136  
   137  	for _, metric := range expectedMetrics {
   138  		assert.Contains(t, metricsSet, metric, "Missing expected metric: %s", metric)
   139  	}
   140  
   141  }
   142  
   143  func TestSetupMetrics_StackdriverOnly_NoPanic(t *testing.T) {
   144  	// Set required env vars
   145  	require.NoError(t, os.Setenv("POD_NAMESPACE", "default"))
   146  	require.NoError(t, os.Setenv("POD_NAME", "test-pod"))
   147  	require.NoError(t, os.Setenv("CONTAINER_NAME", "test-container"))
   148  
   149  	// Fake metadata server
   150  	handler := http.NewServeMux()
   151  	handler.HandleFunc("/computeMetadata/v1/instance/zone", func(w http.ResponseWriter, _ *http.Request) {
   152  		w.Header().Set("Metadata-Flavor", "Google")
   153  		_, _ = w.Write([]byte("projects/123456789/zones/fake-zone"))
   154  	})
   155  	handler.HandleFunc("/computeMetadata/v1/instance/attributes/cluster-name", func(w http.ResponseWriter, _ *http.Request) {
   156  		w.Header().Set("Metadata-Flavor", "Google")
   157  		_, _ = w.Write([]byte("fake-cluster"))
   158  	})
   159  	fakeMetadataServer := httptest.NewServer(handler)
   160  	t.Cleanup(fakeMetadataServer.Close)
   161  
   162  	// Set env var to point to the fake metadata server
   163  	host := strings.TrimPrefix(fakeMetadataServer.URL, "http://")
   164  	require.NoError(t, os.Setenv("GCE_METADATA_HOST", host))
   165  
   166  	// Config for Stackdriver metrics
   167  	conf := Config{
   168  		Stackdriver:       true,
   169  		GCPProjectID:      "fake-project",
   170  		StackdriverLabels: "env=dev",
   171  	}
   172  	server := &httpserver.Server{
   173  		Port:   "3001",
   174  		Logger: framework.TestLogger(t),
   175  	}
   176  
   177  	health, closer := SetupMetrics(conf, server)
   178  	defer t.Cleanup(closer)
   179  	assert.NotNil(t, health, "Health check handler should not be nil")
   180  }
   181  
   182  func newMockWithReactorNodesAndGameServers() agtesting.Mocks {
   183  	m := agtesting.NewMocks()
   184  
   185  	m.KubeClient.AddReactor("list", "nodes", func(_ k8stesting.Action) (bool, k8sruntime.Object, error) {
   186  		n1 := nodeWithName("node1")
   187  		n2 := nodeWithName("node2")
   188  		n3 := nodeWithName("node3")
   189  		return true, &corev1.NodeList{Items: []corev1.Node{*n1, *n2, *n3}}, nil
   190  	})
   191  
   192  	m.AgonesClient.AddReactor("list", "gameservers", func(_ k8stesting.Action) (bool, k8sruntime.Object, error) {
   193  		gs1 := gameServerWithNode("node1")
   194  		gs2 := gameServerWithNode("node2")
   195  		gs3 := gameServerWithNode("node2")
   196  		return true, &agonesv1.GameServerList{Items: []agonesv1.GameServer{*gs1, *gs2, *gs3}}, nil
   197  	})
   198  
   199  	return m
   200  }
   201  
   202  func setupGameServer(t *testing.T, ctrl *fakeController) {
   203  	gs := gameServerWithFleetAndState("test-fleet", agonesv1.GameServerStateCreating)
   204  	ctrl.gsWatch.Add(gs)
   205  
   206  	require.Eventually(t, func() bool {
   207  		gs, err := ctrl.gameServerLister.GameServers(gs.ObjectMeta.Namespace).Get(gs.ObjectMeta.Name)
   208  		assert.NoError(t, err)
   209  		return gs.Status.State == agonesv1.GameServerStateCreating
   210  	}, 5*time.Second, time.Second)
   211  	ctrl.collect()
   212  }
   213  
   214  func setupFleet(_ *testing.T, ctrl *fakeController) {
   215  	flt := fleet("fleet-test", 8, 2, 5, 1, 1)
   216  	ctrl.fleetWatch.Add(flt)
   217  
   218  	flt = flt.DeepCopy()
   219  	flt.Status.Replicas = 15
   220  	ctrl.fleetWatch.Modify(flt)
   221  	ctrl.collect()
   222  }
   223  
   224  func setupFleetAutoScalers(_ *testing.T, ctrl *fakeController) {
   225  	ctrl.fasWatch.Add(fleetAutoScaler("fleet-test", "fas-test"))
   226  	ctrl.collect()
   227  }
   228  
   229  func setupFleetWithCountersAndLists(_ *testing.T, ctrl *fakeController) {
   230  	flt := fleet("cl-fleet-test", 8, 3, 5, 8, 0)
   231  	ctrl.fleetWatch.Add(flt)
   232  	flt = flt.DeepCopy()
   233  	flt.Status.Counters = map[string]agonesv1.AggregatedCounterStatus{
   234  		"players": {
   235  			AllocatedCount:    24,
   236  			AllocatedCapacity: 30,
   237  			Count:             28,
   238  			Capacity:          50,
   239  		},
   240  	}
   241  	flt.Status.Lists = map[string]agonesv1.AggregatedListStatus{
   242  		"rooms": {
   243  			AllocatedCount:    4,
   244  			AllocatedCapacity: 6,
   245  			Count:             1,
   246  			Capacity:          100,
   247  		},
   248  	}
   249  	ctrl.fleetWatch.Modify(flt)
   250  	ctrl.collect()
   251  }
   252  
   253  func setupGameServerPlayerConnect(t *testing.T, ctrl *fakeController) {
   254  	gs := gameServerWithFleetAndState("test-fleet", agonesv1.GameServerStateReady)
   255  	gs.Status.Players = &agonesv1.PlayerStatus{
   256  		Count: 0,
   257  	}
   258  	ctrl.gsWatch.Add(gs)
   259  	gs = gs.DeepCopy()
   260  	gs.Status.Players.Count = 1
   261  	ctrl.gsWatch.Modify(gs)
   262  
   263  	require.Eventually(t, func() bool {
   264  		gs, err := ctrl.gameServerLister.GameServers(gs.ObjectMeta.Namespace).Get(gs.ObjectMeta.Name)
   265  		assert.NoError(t, err)
   266  		return gs.Status.Players.Count == 1
   267  	}, 5*time.Second, time.Second)
   268  	ctrl.collect()
   269  }
   270  
   271  func setupGameServerStateDuration(_ *testing.T, ctrl *fakeController) {
   272  	creationTimestamp := metav1.Now()
   273  	currentTime := creationTimestamp.Local()
   274  	// Add one second each time Duration is calculated
   275  	ctrl.now = func() time.Time {
   276  		currentTime = currentTime.Add(1 * time.Second)
   277  		return currentTime
   278  	}
   279  
   280  	gs1 := gameServerWithFleetStateCreationTimestamp("test-fleet", "exampleGameServer1", "", creationTimestamp)
   281  	gs2 := gameServerWithFleetStateCreationTimestamp("test-fleet", "exampleGameServer1", agonesv1.GameServerStateCreating, creationTimestamp)
   282  
   283  	ctrl.gsWatch.Modify(gs1)
   284  	ctrl.gsWatch.Modify(gs2)
   285  	ctrl.collect()
   286  }
   287  
   288  // getMetricNames returns all metric view names.
   289  func getMetricNames() []string {
   290  	var metricNames []string
   291  	for _, v := range stateViews {
   292  		metricName := "agones_" + v.Name
   293  
   294  		// Check if the aggregation type is Distribution
   295  		if v.Aggregation.Type == view.AggTypeDistribution {
   296  			// If it's a distribution, we append _bucket, _sum, and _count
   297  			metricNames = append(metricNames,
   298  				metricName+"_bucket",
   299  				metricName+"_sum",
   300  				metricName+"_count",
   301  			)
   302  		} else {
   303  			metricNames = append(metricNames, metricName)
   304  
   305  		}
   306  	}
   307  	return metricNames
   308  }
   309  
   310  func collectMetricNames(resp *http.Response) map[string]bool {
   311  	metrics := make(map[string]bool)
   312  	scanner := bufio.NewScanner(resp.Body)
   313  	for scanner.Scan() {
   314  		line := scanner.Text()
   315  		if strings.HasPrefix(line, "#") || line == "" {
   316  			continue
   317  		}
   318  		fields := strings.Fields(line)
   319  		if len(fields) > 0 {
   320  			// Extract only the metric name, excluding labels
   321  			metricName := fields[0]
   322  			if idx := strings.Index(metricName, "{"); idx != -1 {
   323  				metricName = metricName[:idx]
   324  			}
   325  			metrics[metricName] = true
   326  		}
   327  	}
   328  	return metrics
   329  }