k8s.io/kubernetes@v1.31.0-alpha.0.0.20240520171757-56147500dadc/test/e2e/cloud/gcp/ha_master.go (about) 1 /* 2 Copyright 2015 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package gcp 18 19 import ( 20 "context" 21 "fmt" 22 "os/exec" 23 "path" 24 "regexp" 25 "strconv" 26 "strings" 27 "time" 28 29 "github.com/onsi/ginkgo/v2" 30 v1 "k8s.io/api/core/v1" 31 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 32 clientset "k8s.io/client-go/kubernetes" 33 "k8s.io/kubernetes/test/e2e/common" 34 "k8s.io/kubernetes/test/e2e/feature" 35 "k8s.io/kubernetes/test/e2e/framework" 36 e2enode "k8s.io/kubernetes/test/e2e/framework/node" 37 e2epod "k8s.io/kubernetes/test/e2e/framework/pod" 38 e2eskipper "k8s.io/kubernetes/test/e2e/framework/skipper" 39 admissionapi "k8s.io/pod-security-admission/api" 40 ) 41 42 func addMasterReplica(zone string) error { 43 framework.Logf(fmt.Sprintf("Adding a new master replica, zone: %s", zone)) 44 _, _, err := framework.RunCmd(path.Join(framework.TestContext.RepoRoot, "hack/e2e-internal/e2e-grow-cluster.sh"), zone, "true", "true", "false") 45 if err != nil { 46 return err 47 } 48 return nil 49 } 50 51 func removeMasterReplica(zone string) error { 52 framework.Logf(fmt.Sprintf("Removing an existing master replica, zone: %s", zone)) 53 _, _, err := framework.RunCmd(path.Join(framework.TestContext.RepoRoot, "hack/e2e-internal/e2e-shrink-cluster.sh"), zone, "true", "false", "false") 54 if err != nil { 55 return err 56 } 57 return nil 58 } 59 60 func addWorkerNodes(zone string) error { 61 framework.Logf(fmt.Sprintf("Adding worker nodes, zone: %s", zone)) 62 _, _, err := framework.RunCmd(path.Join(framework.TestContext.RepoRoot, "hack/e2e-internal/e2e-grow-cluster.sh"), zone, "true", "false", "true") 63 if err != nil { 64 return err 65 } 66 return nil 67 } 68 69 func removeWorkerNodes(zone string) error { 70 framework.Logf(fmt.Sprintf("Removing worker nodes, zone: %s", zone)) 71 _, _, err := framework.RunCmd(path.Join(framework.TestContext.RepoRoot, "hack/e2e-internal/e2e-shrink-cluster.sh"), zone, "true", "true", "true") 72 if err != nil { 73 return err 74 } 75 return nil 76 } 77 78 func verifyRCs(ctx context.Context, c clientset.Interface, ns string, names []string) { 79 for _, name := range names { 80 framework.ExpectNoError(e2epod.VerifyPods(ctx, c, ns, name, true, 1)) 81 } 82 } 83 84 func createNewRC(c clientset.Interface, ns string, name string) { 85 _, err := common.NewRCByName(c, ns, name, 1, nil, nil) 86 framework.ExpectNoError(err) 87 } 88 89 func findRegionForZone(zone string) string { 90 region, err := exec.Command("gcloud", "compute", "zones", "list", zone, "--quiet", "--format=csv[no-heading](region)").Output() 91 framework.ExpectNoError(err) 92 if string(region) == "" { 93 framework.Failf("Region not found; zone: %s", zone) 94 } 95 return string(region) 96 } 97 98 func findZonesForRegion(region string) []string { 99 output, err := exec.Command("gcloud", "compute", "zones", "list", "--filter=region="+region, 100 "--quiet", "--format=csv[no-heading](name)").Output() 101 framework.ExpectNoError(err) 102 zones := strings.Split(string(output), "\n") 103 return zones 104 } 105 106 // removeZoneFromZones removes zone from zones slide. 107 // Please note that entries in zones can be repeated. In such situation only one replica is removed. 108 func removeZoneFromZones(zones []string, zone string) []string { 109 idx := -1 110 for j, z := range zones { 111 if z == zone { 112 idx = j 113 break 114 } 115 } 116 if idx >= 0 { 117 return zones[:idx+copy(zones[idx:], zones[idx+1:])] 118 } 119 return zones 120 } 121 122 // generateMasterRegexp returns a regex for matching master node name. 123 func generateMasterRegexp(prefix string) string { 124 return prefix + "(-...)?" 125 } 126 127 // waitForMasters waits until the cluster has the desired number of ready masters in it. 128 func waitForMasters(ctx context.Context, masterPrefix string, c clientset.Interface, size int, timeout time.Duration) error { 129 for start := time.Now(); time.Since(start) < timeout; time.Sleep(20 * time.Second) { 130 nodes, err := c.CoreV1().Nodes().List(ctx, metav1.ListOptions{}) 131 if err != nil { 132 framework.Logf("Failed to list nodes: %v", err) 133 continue 134 } 135 136 // Filter out nodes that are not master replicas 137 e2enode.Filter(nodes, func(node v1.Node) bool { 138 res, err := regexp.Match(generateMasterRegexp(masterPrefix), ([]byte)(node.Name)) 139 if err != nil { 140 framework.Logf("Failed to match regexp to node name: %v", err) 141 return false 142 } 143 return res 144 }) 145 146 numNodes := len(nodes.Items) 147 148 // Filter out not-ready nodes. 149 e2enode.Filter(nodes, func(node v1.Node) bool { 150 return e2enode.IsConditionSetAsExpected(&node, v1.NodeReady, true) 151 }) 152 153 numReady := len(nodes.Items) 154 155 if numNodes == size && numReady == size { 156 framework.Logf("Cluster has reached the desired number of masters %d", size) 157 return nil 158 } 159 framework.Logf("Waiting for the number of masters %d, current %d, not ready master nodes %d", size, numNodes, numNodes-numReady) 160 } 161 return fmt.Errorf("timeout waiting %v for the number of masters to be %d", timeout, size) 162 } 163 164 var _ = SIGDescribe("HA-master", feature.HAMaster, func() { 165 f := framework.NewDefaultFramework("ha-master") 166 f.NamespacePodSecurityLevel = admissionapi.LevelPrivileged 167 var c clientset.Interface 168 var ns string 169 var additionalReplicaZones []string 170 var additionalNodesZones []string 171 var existingRCs []string 172 173 ginkgo.BeforeEach(func(ctx context.Context) { 174 e2eskipper.SkipUnlessProviderIs("gce") 175 c = f.ClientSet 176 ns = f.Namespace.Name 177 framework.ExpectNoError(waitForMasters(ctx, framework.TestContext.CloudConfig.MasterName, c, 1, 10*time.Minute)) 178 additionalReplicaZones = make([]string, 0) 179 existingRCs = make([]string, 0) 180 }) 181 182 ginkgo.AfterEach(func(ctx context.Context) { 183 // Clean-up additional worker nodes if the test execution was broken. 184 for _, zone := range additionalNodesZones { 185 removeWorkerNodes(zone) 186 } 187 framework.ExpectNoError(e2enode.AllNodesReady(ctx, c, 5*time.Minute)) 188 189 // Clean-up additional master replicas if the test execution was broken. 190 for _, zone := range additionalReplicaZones { 191 removeMasterReplica(zone) 192 } 193 framework.ExpectNoError(waitForMasters(ctx, framework.TestContext.CloudConfig.MasterName, c, 1, 10*time.Minute)) 194 }) 195 196 type Action int 197 const ( 198 None Action = iota 199 AddReplica 200 RemoveReplica 201 AddNodes 202 RemoveNodes 203 ) 204 205 step := func(ctx context.Context, action Action, zone string) { 206 switch action { 207 case None: 208 case AddReplica: 209 framework.ExpectNoError(addMasterReplica(zone)) 210 additionalReplicaZones = append(additionalReplicaZones, zone) 211 case RemoveReplica: 212 framework.ExpectNoError(removeMasterReplica(zone)) 213 additionalReplicaZones = removeZoneFromZones(additionalReplicaZones, zone) 214 case AddNodes: 215 framework.ExpectNoError(addWorkerNodes(zone)) 216 additionalNodesZones = append(additionalNodesZones, zone) 217 case RemoveNodes: 218 framework.ExpectNoError(removeWorkerNodes(zone)) 219 additionalNodesZones = removeZoneFromZones(additionalNodesZones, zone) 220 } 221 framework.ExpectNoError(waitForMasters(ctx, framework.TestContext.CloudConfig.MasterName, c, len(additionalReplicaZones)+1, 10*time.Minute)) 222 framework.ExpectNoError(e2enode.AllNodesReady(ctx, c, 5*time.Minute)) 223 224 // Verify that API server works correctly with HA master. 225 rcName := "ha-master-" + strconv.Itoa(len(existingRCs)) 226 createNewRC(c, ns, rcName) 227 existingRCs = append(existingRCs, rcName) 228 verifyRCs(ctx, c, ns, existingRCs) 229 } 230 231 f.It("survive addition/removal replicas same zone", f.WithSerial(), f.WithDisruptive(), func(ctx context.Context) { 232 zone := framework.TestContext.CloudConfig.Zone 233 step(ctx, None, "") 234 numAdditionalReplicas := 2 235 for i := 0; i < numAdditionalReplicas; i++ { 236 step(ctx, AddReplica, zone) 237 } 238 for i := 0; i < numAdditionalReplicas; i++ { 239 step(ctx, RemoveReplica, zone) 240 } 241 }) 242 243 f.It("survive addition/removal replicas different zones", f.WithSerial(), f.WithDisruptive(), func(ctx context.Context) { 244 zone := framework.TestContext.CloudConfig.Zone 245 region := findRegionForZone(zone) 246 zones := findZonesForRegion(region) 247 zones = removeZoneFromZones(zones, zone) 248 249 step(ctx, None, "") 250 // If numAdditionalReplicas is larger then the number of remaining zones in the region, 251 // we create a few masters in the same zone and zone entry is repeated in additionalReplicaZones. 252 numAdditionalReplicas := 2 253 for i := 0; i < numAdditionalReplicas; i++ { 254 step(ctx, AddReplica, zones[i%len(zones)]) 255 } 256 for i := 0; i < numAdditionalReplicas; i++ { 257 step(ctx, RemoveReplica, zones[i%len(zones)]) 258 } 259 }) 260 261 f.It("survive addition/removal replicas multizone workers", f.WithSerial(), f.WithDisruptive(), func(ctx context.Context) { 262 zone := framework.TestContext.CloudConfig.Zone 263 region := findRegionForZone(zone) 264 zones := findZonesForRegion(region) 265 zones = removeZoneFromZones(zones, zone) 266 267 step(ctx, None, "") 268 numAdditionalReplicas := 2 269 270 // Add worker nodes. 271 for i := 0; i < numAdditionalReplicas && i < len(zones); i++ { 272 step(ctx, AddNodes, zones[i]) 273 } 274 275 // Add master repilcas. 276 // 277 // If numAdditionalReplicas is larger then the number of remaining zones in the region, 278 // we create a few masters in the same zone and zone entry is repeated in additionalReplicaZones. 279 for i := 0; i < numAdditionalReplicas; i++ { 280 step(ctx, AddReplica, zones[i%len(zones)]) 281 } 282 283 // Remove master repilcas. 284 for i := 0; i < numAdditionalReplicas; i++ { 285 step(ctx, RemoveReplica, zones[i%len(zones)]) 286 } 287 288 // Remove worker nodes. 289 for i := 0; i < numAdditionalReplicas && i < len(zones); i++ { 290 step(ctx, RemoveNodes, zones[i]) 291 } 292 }) 293 })