agones.dev/agones@v1.53.0/pkg/fleets/controller_rollingupdatefix.go (about) 1 package fleets 2 3 import ( 4 "context" 5 "fmt" 6 7 agonesv1 "agones.dev/agones/pkg/apis/agones/v1" 8 "github.com/pkg/errors" 9 corev1 "k8s.io/api/core/v1" 10 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 11 "k8s.io/apimachinery/pkg/util/intstr" 12 "k8s.io/utils/integer" 13 ) 14 15 func (c *Controller) cleanupUnhealthyReplicasRollingUpdateFix(ctx context.Context, rest []*agonesv1.GameServerSet, 16 fleet *agonesv1.Fleet, maxCleanupCount int32) ([]*agonesv1.GameServerSet, int32, error) { 17 18 // Safely scale down all old GameServerSets with unhealthy replicas. 19 totalScaledDown := int32(0) 20 for i, gsSet := range rest { 21 healthy := gsSet.Status.ReadyReplicas + gsSet.Status.AllocatedReplicas + gsSet.Status.ReservedReplicas 22 23 if totalScaledDown >= maxCleanupCount { 24 break 25 } 26 if gsSet.Spec.Replicas == 0 { 27 // cannot scale down this replica set. 28 continue 29 } 30 if gsSet.Spec.Replicas <= healthy { 31 // no unhealthy replicas found, no scaling required. 32 continue 33 } 34 35 scaledDownCount := int32(integer.IntMin(int(maxCleanupCount-totalScaledDown), int(gsSet.Spec.Replicas-healthy))) 36 newReplicasCount := gsSet.Spec.Replicas - scaledDownCount 37 if newReplicasCount > gsSet.Spec.Replicas { 38 return nil, 0, fmt.Errorf("when cleaning up unhealthy replicas, got invalid request to scale down %s/%s %d -> %d", gsSet.Namespace, gsSet.Name, gsSet.Spec.Replicas, newReplicasCount) 39 } 40 41 gsSetCopy := gsSet.DeepCopy() 42 gsSetCopy.Spec.Replicas = newReplicasCount 43 totalScaledDown += scaledDownCount 44 if _, err := c.gameServerSetGetter.GameServerSets(gsSetCopy.ObjectMeta.Namespace).Update(ctx, gsSetCopy, metav1.UpdateOptions{}); err != nil { 45 return nil, totalScaledDown, errors.Wrapf(err, "error updating gameserverset %s", gsSetCopy.ObjectMeta.Name) 46 } 47 c.recorder.Eventf(fleet, corev1.EventTypeNormal, "ScalingGameServerSet", 48 "Scaling inactive GameServerSet %s from %d to %d", gsSetCopy.ObjectMeta.Name, gsSet.Spec.Replicas, gsSetCopy.Spec.Replicas) 49 50 rest[i] = gsSetCopy 51 } 52 return rest, totalScaledDown, nil 53 } 54 55 func (c *Controller) rollingUpdateRestFixedOnReadyRollingUpdateFix(ctx context.Context, fleet *agonesv1.Fleet, active *agonesv1.GameServerSet, rest []*agonesv1.GameServerSet) error { 56 if len(rest) == 0 { 57 return nil 58 } 59 60 // Look at Kubernetes Deployment util ResolveFenceposts() function 61 r, err := intstr.GetValueFromIntOrPercent(fleet.Spec.Strategy.RollingUpdate.MaxUnavailable, int(fleet.Status.ReadyReplicas), false) 62 if err != nil { 63 return errors.Wrapf(err, "error parsing MaxUnavailable value: %s", fleet.ObjectMeta.Name) 64 } 65 if r == 0 { 66 r = 1 67 } 68 if r > int(fleet.Spec.Replicas) { 69 r = int(fleet.Spec.Replicas) 70 } 71 unavailable := int32(r) 72 73 totalAlreadyScaledDown := int32(0) 74 75 totalScaleDownCount := int32(0) 76 // Check if we can scale down. 77 allGSS := rest 78 allGSS = append(allGSS, active) 79 readyReplicasCount := agonesv1.GetReadyReplicaCountForGameServerSets(allGSS) 80 minAvailable := fleet.Status.ReadyReplicas - unavailable 81 if minAvailable > fleet.Spec.Replicas { 82 minAvailable = fleet.Spec.Replicas 83 } 84 85 // Check if we are ready to scale down 86 newGSSUnavailablePodCount := active.Spec.Replicas - active.Status.ReadyReplicas - active.Status.ReservedReplicas - 87 active.Status.AllocatedReplicas - active.Status.ShutdownReplicas 88 maxScaledDown := readyReplicasCount - minAvailable - newGSSUnavailablePodCount 89 90 if maxScaledDown <= 0 { 91 return nil 92 } 93 rest, _, err = c.cleanupUnhealthyReplicasRollingUpdateFix(ctx, rest, fleet, maxScaledDown) 94 if err != nil { 95 loggerForFleet(fleet, c.baseLogger).WithField("fleet", fleet.ObjectMeta.Name).WithField("maxScaledDown", maxScaledDown). 96 Debug("Can not cleanup Unhealth Replicas") 97 // There could be the case when GameServerSet would be updated from another place, say Status or Spec would be updated 98 // We don't want to propagate such errors further 99 // And this set in sync with reconcileOldReplicaSets() Kubernetes code 100 return nil 101 } 102 // Resulting value is readyReplicasCount + unavailable - fleet.Spec.Replicas 103 totalScaleDownCount = readyReplicasCount - minAvailable 104 if readyReplicasCount <= minAvailable { 105 // Cannot scale down. 106 return nil 107 } 108 for _, gsSet := range rest { 109 if totalAlreadyScaledDown >= totalScaleDownCount { 110 // No further scaling required. 111 break 112 } 113 114 // Crucial fix if we are using wrong configuration of a fleet, 115 // that would lead to Status.Replicas being 0 but number of GameServers would be in a Scheduled or Unhealthy state. 116 // Compare with scaleDownOldReplicaSetsForRollingUpdate() for loop. 117 // if the Spec.Replicas are less than or equal to 0, then that means we are done 118 // scaling this GameServerSet down, and can therefore exit/move to the next one. 119 if gsSet.Spec.Replicas <= 0 { 120 continue 121 } 122 123 // If the Spec.Replicas does not equal the Status.Replicas for this GameServerSet, this means 124 // that the rolling down process is currently ongoing, and we should therefore exit so we can wait for it to finish 125 if gsSet.Spec.Replicas != gsSet.Status.Replicas { 126 break 127 } 128 gsSetCopy := gsSet.DeepCopy() 129 if gsSet.Status.ShutdownReplicas == 0 { 130 // Wait for new GameServers to become Ready before scaling down Inactive GameServerset 131 // Scale down. 132 scaleDownCount := int32(integer.IntMin(int(gsSet.Spec.Replicas), int(totalScaleDownCount-totalAlreadyScaledDown))) 133 134 newReplicasCount := gsSet.Spec.Replicas - scaleDownCount 135 if newReplicasCount > gsSet.Spec.Replicas { 136 return fmt.Errorf("when scaling down old GameServerSet, got invalid request to scale down %s/%s %d -> %d", gsSet.Namespace, gsSet.Name, gsSet.Spec.Replicas, newReplicasCount) 137 } 138 139 switch { 140 case gsSet.Status.Replicas == gsSet.Status.AllocatedReplicas: 141 gsSetCopy.Spec.Replicas = 0 142 case newReplicasCount == gsSet.Spec.Replicas: 143 // No updates on GameServerSet 144 continue 145 default: 146 gsSetCopy.Spec.Replicas = newReplicasCount 147 } 148 loggerForFleet(fleet, c.baseLogger).WithField("gameserverset", gsSet.ObjectMeta.Name).WithField("replicas", gsSetCopy.Spec.Replicas). 149 Debug("applying rolling update to inactive gameserverset") 150 151 if _, err := c.gameServerSetGetter.GameServerSets(gsSetCopy.ObjectMeta.Namespace).Update(ctx, gsSetCopy, metav1.UpdateOptions{}); err != nil { 152 return errors.Wrapf(err, "error updating gameserverset %s", gsSetCopy.ObjectMeta.Name) 153 } 154 c.recorder.Eventf(fleet, corev1.EventTypeNormal, "ScalingGameServerSet", 155 "Scaling inactive GameServerSet %s from %d to %d", gsSetCopy.ObjectMeta.Name, gsSet.Spec.Replicas, gsSetCopy.Spec.Replicas) 156 157 totalAlreadyScaledDown += scaleDownCount 158 } 159 } 160 return nil 161 }