agones.dev/agones@v1.54.0/test/e2e/controller/crash_test.go (about)

     1  // Copyright 2020 Google LLC All Rights Reserved.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package controller
    16  
    17  import (
    18  	"context"
    19  	"testing"
    20  	"time"
    21  
    22  	agonesv1 "agones.dev/agones/pkg/apis/agones/v1"
    23  	e2eframework "agones.dev/agones/test/e2e/framework"
    24  	"github.com/stretchr/testify/assert"
    25  	"github.com/stretchr/testify/require"
    26  	corev1 "k8s.io/api/core/v1"
    27  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    28  	"k8s.io/apimachinery/pkg/labels"
    29  	"k8s.io/apimachinery/pkg/util/wait"
    30  )
    31  
    32  func TestGameServerUnhealthyAfterDeletingPodWhileControllerDown(t *testing.T) {
    33  	logger := e2eframework.TestLogger(t)
    34  	gs := framework.DefaultGameServer(defaultNs)
    35  	ctx := context.Background()
    36  
    37  	readyGs, err := framework.CreateGameServerAndWaitUntilReady(t, defaultNs, gs)
    38  	if err != nil {
    39  		t.Fatalf("Could not get a GameServer ready: %v", err)
    40  	}
    41  	logger.WithField("gsKey", readyGs.ObjectMeta.Name).Info("GameServer Ready")
    42  
    43  	gsClient := framework.AgonesClient.AgonesV1().GameServers(defaultNs)
    44  	podClient := framework.KubeClient.CoreV1().Pods(defaultNs)
    45  	defer gsClient.Delete(ctx, readyGs.ObjectMeta.Name, metav1.DeleteOptions{}) // nolint: errcheck
    46  
    47  	pod, err := podClient.Get(ctx, readyGs.ObjectMeta.Name, metav1.GetOptions{})
    48  	assert.NoError(t, err)
    49  	assert.True(t, metav1.IsControlledBy(pod, readyGs))
    50  
    51  	err = deleteAgonesControllerPods(ctx)
    52  	assert.NoError(t, err)
    53  	err = podClient.Delete(ctx, pod.ObjectMeta.Name, metav1.DeleteOptions{})
    54  	assert.NoError(t, err)
    55  
    56  	_, err = framework.WaitForGameServerState(t, readyGs, agonesv1.GameServerStateUnhealthy, 3*time.Minute)
    57  	assert.NoError(t, err)
    58  	logger.Info("waiting for Agones controller to come back to running")
    59  	assert.NoError(t, waitForAgonesControllerRunning(ctx, -1))
    60  }
    61  
    62  func TestLeaderElectionAfterDeletingLeader(t *testing.T) {
    63  	logger := e2eframework.TestLogger(t)
    64  	gs := framework.DefaultGameServer(defaultNs)
    65  	ctx := context.Background()
    66  
    67  	err := waitForAgonesControllerRunning(ctx, -1)
    68  	require.NoError(t, err, "Could not ensure controller running")
    69  
    70  	list, err := getAgonesControllerPods(ctx)
    71  	require.NoError(t, err, "Could not get list of controller pods")
    72  	if len(list.Items) == 1 {
    73  		t.Skip("Skip test. Leader Election is not enabled since there is only 1 controller")
    74  	}
    75  
    76  	replication := len(list.Items)
    77  
    78  	// Deleting other controller pods to make sure that the last one becomes leader
    79  	willBeLeader := list.Items[0].ObjectMeta.Name
    80  	for _, pod := range list.Items[1:] {
    81  		err = deleteAgonesControllerPod(ctx, pod.ObjectMeta.Name)
    82  		require.NoError(t, err, "Could not delete controller pod")
    83  	}
    84  
    85  	err = waitForAgonesControllerRunning(ctx, replication)
    86  	require.NoError(t, err, "Could not get controller ready after delete")
    87  
    88  	err = deleteAgonesControllerPod(ctx, willBeLeader)
    89  	require.NoError(t, err, "Could not delete leader controller pod")
    90  
    91  	readyGs, err := framework.CreateGameServerAndWaitUntilReady(t, defaultNs, gs)
    92  	if err != nil {
    93  		t.Fatalf("Could not get a GameServer ready: %v", err)
    94  	}
    95  	logger.WithField("gsKey", readyGs.ObjectMeta.Name).Info("GameServer Ready")
    96  }
    97  
    98  // deleteAgonesControllerPods deletes all the Controller pods for the Agones controller,
    99  // faking a controller crash.
   100  func deleteAgonesControllerPods(ctx context.Context) error {
   101  	list, err := getAgonesControllerPods(ctx)
   102  	if err != nil {
   103  		return err
   104  	}
   105  
   106  	for i := range list.Items {
   107  		err = deleteAgonesControllerPod(ctx, list.Items[i].ObjectMeta.Name)
   108  		if err != nil {
   109  			return err
   110  		}
   111  	}
   112  	return nil
   113  }
   114  
   115  func waitForAgonesControllerRunning(ctx context.Context, wantReplicas int) error {
   116  	return wait.PollUntilContextTimeout(ctx, time.Second, 5*time.Minute, true, func(ctx context.Context) (bool, error) {
   117  		list, err := getAgonesControllerPods(ctx)
   118  		if err != nil {
   119  			return true, err
   120  		}
   121  
   122  		if wantReplicas != -1 && len(list.Items) != wantReplicas {
   123  			return false, nil
   124  		}
   125  
   126  		for i := range list.Items {
   127  			for _, c := range list.Items[i].Status.ContainerStatuses {
   128  				if c.State.Running == nil {
   129  					return false, nil
   130  				}
   131  			}
   132  		}
   133  
   134  		return true, nil
   135  	})
   136  }
   137  
   138  // getAgonesControllerPods returns all the Agones controller pods
   139  func getAgonesControllerPods(ctx context.Context) (*corev1.PodList, error) {
   140  	opts := metav1.ListOptions{LabelSelector: labels.Set{"agones.dev/role": "controller"}.String()}
   141  	return framework.KubeClient.CoreV1().Pods("agones-system").List(ctx, opts)
   142  }
   143  
   144  // deleteAgonesControllerPod deletes a Agones controller pod
   145  func deleteAgonesControllerPod(ctx context.Context, podName string) error {
   146  	policy := metav1.DeletePropagationBackground
   147  	err := framework.KubeClient.CoreV1().Pods("agones-system").Delete(ctx, podName,
   148  		metav1.DeleteOptions{PropagationPolicy: &policy})
   149  	return err
   150  }