agones.dev/agones@v1.54.0/test/e2e/controller/crash_test.go (about) 1 // Copyright 2020 Google LLC All Rights Reserved. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package controller 16 17 import ( 18 "context" 19 "testing" 20 "time" 21 22 agonesv1 "agones.dev/agones/pkg/apis/agones/v1" 23 e2eframework "agones.dev/agones/test/e2e/framework" 24 "github.com/stretchr/testify/assert" 25 "github.com/stretchr/testify/require" 26 corev1 "k8s.io/api/core/v1" 27 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 28 "k8s.io/apimachinery/pkg/labels" 29 "k8s.io/apimachinery/pkg/util/wait" 30 ) 31 32 func TestGameServerUnhealthyAfterDeletingPodWhileControllerDown(t *testing.T) { 33 logger := e2eframework.TestLogger(t) 34 gs := framework.DefaultGameServer(defaultNs) 35 ctx := context.Background() 36 37 readyGs, err := framework.CreateGameServerAndWaitUntilReady(t, defaultNs, gs) 38 if err != nil { 39 t.Fatalf("Could not get a GameServer ready: %v", err) 40 } 41 logger.WithField("gsKey", readyGs.ObjectMeta.Name).Info("GameServer Ready") 42 43 gsClient := framework.AgonesClient.AgonesV1().GameServers(defaultNs) 44 podClient := framework.KubeClient.CoreV1().Pods(defaultNs) 45 defer gsClient.Delete(ctx, readyGs.ObjectMeta.Name, metav1.DeleteOptions{}) // nolint: errcheck 46 47 pod, err := podClient.Get(ctx, readyGs.ObjectMeta.Name, metav1.GetOptions{}) 48 assert.NoError(t, err) 49 assert.True(t, metav1.IsControlledBy(pod, readyGs)) 50 51 err = deleteAgonesControllerPods(ctx) 52 assert.NoError(t, err) 53 err = podClient.Delete(ctx, pod.ObjectMeta.Name, metav1.DeleteOptions{}) 54 assert.NoError(t, err) 55 56 _, err = framework.WaitForGameServerState(t, readyGs, agonesv1.GameServerStateUnhealthy, 3*time.Minute) 57 assert.NoError(t, err) 58 logger.Info("waiting for Agones controller to come back to running") 59 assert.NoError(t, waitForAgonesControllerRunning(ctx, -1)) 60 } 61 62 func TestLeaderElectionAfterDeletingLeader(t *testing.T) { 63 logger := e2eframework.TestLogger(t) 64 gs := framework.DefaultGameServer(defaultNs) 65 ctx := context.Background() 66 67 err := waitForAgonesControllerRunning(ctx, -1) 68 require.NoError(t, err, "Could not ensure controller running") 69 70 list, err := getAgonesControllerPods(ctx) 71 require.NoError(t, err, "Could not get list of controller pods") 72 if len(list.Items) == 1 { 73 t.Skip("Skip test. Leader Election is not enabled since there is only 1 controller") 74 } 75 76 replication := len(list.Items) 77 78 // Deleting other controller pods to make sure that the last one becomes leader 79 willBeLeader := list.Items[0].ObjectMeta.Name 80 for _, pod := range list.Items[1:] { 81 err = deleteAgonesControllerPod(ctx, pod.ObjectMeta.Name) 82 require.NoError(t, err, "Could not delete controller pod") 83 } 84 85 err = waitForAgonesControllerRunning(ctx, replication) 86 require.NoError(t, err, "Could not get controller ready after delete") 87 88 err = deleteAgonesControllerPod(ctx, willBeLeader) 89 require.NoError(t, err, "Could not delete leader controller pod") 90 91 readyGs, err := framework.CreateGameServerAndWaitUntilReady(t, defaultNs, gs) 92 if err != nil { 93 t.Fatalf("Could not get a GameServer ready: %v", err) 94 } 95 logger.WithField("gsKey", readyGs.ObjectMeta.Name).Info("GameServer Ready") 96 } 97 98 // deleteAgonesControllerPods deletes all the Controller pods for the Agones controller, 99 // faking a controller crash. 100 func deleteAgonesControllerPods(ctx context.Context) error { 101 list, err := getAgonesControllerPods(ctx) 102 if err != nil { 103 return err 104 } 105 106 for i := range list.Items { 107 err = deleteAgonesControllerPod(ctx, list.Items[i].ObjectMeta.Name) 108 if err != nil { 109 return err 110 } 111 } 112 return nil 113 } 114 115 func waitForAgonesControllerRunning(ctx context.Context, wantReplicas int) error { 116 return wait.PollUntilContextTimeout(ctx, time.Second, 5*time.Minute, true, func(ctx context.Context) (bool, error) { 117 list, err := getAgonesControllerPods(ctx) 118 if err != nil { 119 return true, err 120 } 121 122 if wantReplicas != -1 && len(list.Items) != wantReplicas { 123 return false, nil 124 } 125 126 for i := range list.Items { 127 for _, c := range list.Items[i].Status.ContainerStatuses { 128 if c.State.Running == nil { 129 return false, nil 130 } 131 } 132 } 133 134 return true, nil 135 }) 136 } 137 138 // getAgonesControllerPods returns all the Agones controller pods 139 func getAgonesControllerPods(ctx context.Context) (*corev1.PodList, error) { 140 opts := metav1.ListOptions{LabelSelector: labels.Set{"agones.dev/role": "controller"}.String()} 141 return framework.KubeClient.CoreV1().Pods("agones-system").List(ctx, opts) 142 } 143 144 // deleteAgonesControllerPod deletes a Agones controller pod 145 func deleteAgonesControllerPod(ctx context.Context, podName string) error { 146 policy := metav1.DeletePropagationBackground 147 err := framework.KubeClient.CoreV1().Pods("agones-system").Delete(ctx, podName, 148 metav1.DeleteOptions{PropagationPolicy: &policy}) 149 return err 150 }