agones.dev/agones@v1.54.0/pkg/fleets/controller.go (about) 1 // Copyright 2018 Google LLC All Rights Reserved. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package fleets 16 17 import ( 18 "context" 19 "encoding/json" 20 "fmt" 21 "time" 22 23 "agones.dev/agones/pkg/apis/agones" 24 agonesv1 "agones.dev/agones/pkg/apis/agones/v1" 25 "agones.dev/agones/pkg/client/clientset/versioned" 26 getterv1 "agones.dev/agones/pkg/client/clientset/versioned/typed/agones/v1" 27 "agones.dev/agones/pkg/client/informers/externalversions" 28 listerv1 "agones.dev/agones/pkg/client/listers/agones/v1" 29 "agones.dev/agones/pkg/util/crd" 30 "agones.dev/agones/pkg/util/logfields" 31 "agones.dev/agones/pkg/util/runtime" 32 "agones.dev/agones/pkg/util/webhooks" 33 "agones.dev/agones/pkg/util/workerqueue" 34 "github.com/google/go-cmp/cmp" 35 "github.com/heptiolabs/healthcheck" 36 "github.com/pkg/errors" 37 "github.com/sirupsen/logrus" 38 "gomodules.xyz/jsonpatch/v2" 39 admissionv1 "k8s.io/api/admission/v1" 40 appsv1 "k8s.io/api/apps/v1" 41 corev1 "k8s.io/api/core/v1" 42 extclientset "k8s.io/apiextensions-apiserver/pkg/client/clientset/clientset" 43 apiextclientv1 "k8s.io/apiextensions-apiserver/pkg/client/clientset/clientset/typed/apiextensions/v1" 44 apiequality "k8s.io/apimachinery/pkg/api/equality" 45 k8serrors "k8s.io/apimachinery/pkg/api/errors" 46 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 47 runtimeschema "k8s.io/apimachinery/pkg/runtime/schema" 48 "k8s.io/apimachinery/pkg/util/intstr" 49 "k8s.io/client-go/kubernetes" 50 "k8s.io/client-go/kubernetes/scheme" 51 typedcorev1 "k8s.io/client-go/kubernetes/typed/core/v1" 52 "k8s.io/client-go/tools/cache" 53 "k8s.io/client-go/tools/record" 54 "k8s.io/utils/integer" 55 ) 56 57 // Extensions struct contains what is needed to bind webhook handlers 58 type Extensions struct { 59 baseLogger *logrus.Entry 60 apiHooks agonesv1.APIHooks 61 } 62 63 // Controller is a the GameServerSet controller 64 type Controller struct { 65 baseLogger *logrus.Entry 66 crdGetter apiextclientv1.CustomResourceDefinitionInterface 67 gameServerSetGetter getterv1.GameServerSetsGetter 68 gameServerSetLister listerv1.GameServerSetLister 69 gameServerSetSynced cache.InformerSynced 70 fleetGetter getterv1.FleetsGetter 71 fleetLister listerv1.FleetLister 72 fleetSynced cache.InformerSynced 73 workerqueue *workerqueue.WorkerQueue 74 recorder record.EventRecorder 75 } 76 77 // NewController returns a new fleets crd controller 78 func NewController( 79 health healthcheck.Handler, 80 kubeClient kubernetes.Interface, 81 extClient extclientset.Interface, 82 agonesClient versioned.Interface, 83 agonesInformerFactory externalversions.SharedInformerFactory) *Controller { 84 85 gameServerSets := agonesInformerFactory.Agones().V1().GameServerSets() 86 gsSetInformer := gameServerSets.Informer() 87 88 fleets := agonesInformerFactory.Agones().V1().Fleets() 89 fInformer := fleets.Informer() 90 91 c := &Controller{ 92 crdGetter: extClient.ApiextensionsV1().CustomResourceDefinitions(), 93 gameServerSetGetter: agonesClient.AgonesV1(), 94 gameServerSetLister: gameServerSets.Lister(), 95 gameServerSetSynced: gsSetInformer.HasSynced, 96 fleetGetter: agonesClient.AgonesV1(), 97 fleetLister: fleets.Lister(), 98 fleetSynced: fInformer.HasSynced, 99 } 100 101 c.baseLogger = runtime.NewLoggerWithType(c) 102 c.workerqueue = workerqueue.NewWorkerQueueWithRateLimiter(c.syncFleet, c.baseLogger, logfields.FleetKey, agones.GroupName+".FleetController", workerqueue.FastRateLimiter(3*time.Second)) 103 health.AddLivenessCheck("fleet-workerqueue", healthcheck.Check(c.workerqueue.Healthy)) 104 105 eventBroadcaster := record.NewBroadcaster() 106 eventBroadcaster.StartLogging(c.baseLogger.Debugf) 107 eventBroadcaster.StartRecordingToSink(&typedcorev1.EventSinkImpl{Interface: kubeClient.CoreV1().Events("")}) 108 c.recorder = eventBroadcaster.NewRecorder(scheme.Scheme, corev1.EventSource{Component: "fleet-controller"}) 109 110 _, _ = fInformer.AddEventHandler(cache.ResourceEventHandlerFuncs{ 111 AddFunc: c.workerqueue.Enqueue, 112 UpdateFunc: func(_, newObj interface{}) { 113 c.workerqueue.Enqueue(newObj) 114 }, 115 }) 116 117 _, _ = gsSetInformer.AddEventHandler(cache.ResourceEventHandlerFuncs{ 118 AddFunc: c.gameServerSetEventHandler, 119 UpdateFunc: func(_, newObj interface{}) { 120 gsSet := newObj.(*agonesv1.GameServerSet) 121 // ignore if already being deleted 122 if gsSet.ObjectMeta.DeletionTimestamp.IsZero() { 123 c.gameServerSetEventHandler(gsSet) 124 } 125 }, 126 }) 127 128 return c 129 } 130 131 // NewExtensions binds the handlers to the webhook outside the initialization of the controller 132 // initializes a new logger for extensions. 133 func NewExtensions(apiHooks agonesv1.APIHooks, wh *webhooks.WebHook) *Extensions { 134 ext := &Extensions{apiHooks: apiHooks} 135 136 ext.baseLogger = runtime.NewLoggerWithType(ext) 137 138 wh.AddHandler("/mutate", agonesv1.Kind("Fleet"), admissionv1.Create, ext.creationMutationHandler) 139 wh.AddHandler("/validate", agonesv1.Kind("Fleet"), admissionv1.Create, ext.creationValidationHandler) 140 wh.AddHandler("/validate", agonesv1.Kind("Fleet"), admissionv1.Update, ext.creationValidationHandler) 141 142 return ext 143 } 144 145 // creationMutationHandler is the handler for the mutating webhook that sets the 146 // the default values on the Fleet 147 // Should only be called on fleet create operations. 148 // nolint:dupl 149 func (ext *Extensions) creationMutationHandler(review admissionv1.AdmissionReview) (admissionv1.AdmissionReview, error) { 150 ext.baseLogger.WithField("review", review).Debug("creationMutationHandler") 151 152 obj := review.Request.Object 153 fleet := &agonesv1.Fleet{} 154 err := json.Unmarshal(obj.Raw, fleet) 155 if err != nil { 156 // If the JSON is invalid during mutation, fall through to validation. This allows OpenAPI schema validation 157 // to proceed, resulting in a more user friendly error message. 158 return review, nil 159 } 160 161 // This is the main logic of this function 162 // the rest is really just json plumbing 163 fleet.ApplyDefaults() 164 165 newFleet, err := json.Marshal(fleet) 166 if err != nil { 167 return review, errors.Wrapf(err, "error marshalling default applied Fleet %s to json", fleet.ObjectMeta.Name) 168 } 169 170 patch, err := jsonpatch.CreatePatch(obj.Raw, newFleet) 171 if err != nil { 172 return review, errors.Wrapf(err, "error creating patch for Fleet %s", fleet.ObjectMeta.Name) 173 } 174 175 jsn, err := json.Marshal(patch) 176 if err != nil { 177 return review, errors.Wrapf(err, "error creating json for patch for Fleet %s", fleet.ObjectMeta.Name) 178 } 179 180 loggerForFleet(fleet, ext.baseLogger).WithField("patch", string(jsn)).Debug("patch created!") 181 182 pt := admissionv1.PatchTypeJSONPatch 183 review.Response.PatchType = &pt 184 review.Response.Patch = jsn 185 186 return review, nil 187 } 188 189 // creationValidationHandler that validates a Fleet when it is created 190 // Should only be called on Fleet create and Update operations. 191 func (ext *Extensions) creationValidationHandler(review admissionv1.AdmissionReview) (admissionv1.AdmissionReview, error) { 192 ext.baseLogger.WithField("review", review).Debug("creationValidationHandler") 193 194 obj := review.Request.Object 195 fleet := &agonesv1.Fleet{} 196 err := json.Unmarshal(obj.Raw, fleet) 197 if err != nil { 198 return review, errors.Wrapf(err, "error unmarshalling Fleet json after schema validation: %s", obj.Raw) 199 } 200 201 if errs := fleet.Validate(ext.apiHooks); len(errs) > 0 { 202 kind := runtimeschema.GroupKind{ 203 Group: review.Request.Kind.Group, 204 Kind: review.Request.Kind.Kind, 205 } 206 statusErr := k8serrors.NewInvalid(kind, review.Request.Name, errs) 207 review.Response.Allowed = false 208 review.Response.Result = &statusErr.ErrStatus 209 loggerForFleet(fleet, ext.baseLogger).WithField("review", review).Debug("Invalid Fleet") 210 } 211 212 return review, nil 213 } 214 215 // Run the Fleet controller. Will block until stop is closed. 216 // Runs threadiness number workers to process the rate limited queue 217 func (c *Controller) Run(ctx context.Context, workers int) error { 218 err := crd.WaitForEstablishedCRD(ctx, c.crdGetter, "fleets.agones.dev", c.baseLogger) 219 if err != nil { 220 return err 221 } 222 223 c.baseLogger.Debug("Wait for cache sync") 224 if !cache.WaitForCacheSync(ctx.Done(), c.gameServerSetSynced, c.fleetSynced) { 225 return errors.New("failed to wait for caches to sync") 226 } 227 228 c.workerqueue.Run(ctx, workers) 229 return nil 230 } 231 232 func loggerForFleetKey(key string, logger *logrus.Entry) *logrus.Entry { 233 return logfields.AugmentLogEntry(logger, logfields.FleetKey, key) 234 } 235 236 func loggerForFleet(f *agonesv1.Fleet, logger *logrus.Entry) *logrus.Entry { 237 fleetName := "NilFleet" 238 if f != nil { 239 fleetName = f.ObjectMeta.Namespace + "/" + f.ObjectMeta.Name 240 } 241 return loggerForFleetKey(fleetName, logger).WithField("fleet", f) 242 } 243 244 // gameServerSetEventHandler enqueues the owning Fleet for this GameServerSet, 245 // assuming that it has one 246 func (c *Controller) gameServerSetEventHandler(obj interface{}) { 247 gsSet := obj.(*agonesv1.GameServerSet) 248 ref := metav1.GetControllerOf(gsSet) 249 if ref == nil { 250 return 251 } 252 253 fleet, err := c.fleetLister.Fleets(gsSet.ObjectMeta.Namespace).Get(ref.Name) 254 if err != nil { 255 if k8serrors.IsNotFound(err) { 256 c.baseLogger.WithField("ref", ref).Warn("Owner Fleet no longer available for syncing") 257 } else { 258 runtime.HandleError(loggerForFleet(fleet, c.baseLogger).WithField("ref", ref), 259 errors.Wrap(err, "error retrieving GameServerSet owner")) 260 } 261 return 262 } 263 c.workerqueue.Enqueue(fleet) 264 } 265 266 // syncFleet synchronised the fleet CRDs and configures/updates 267 // backing GameServerSets 268 func (c *Controller) syncFleet(ctx context.Context, key string) error { 269 loggerForFleetKey(key, c.baseLogger).Debug("Synchronising") 270 271 // Convert the namespace/name string into a distinct namespace and name 272 namespace, name, err := cache.SplitMetaNamespaceKey(key) 273 if err != nil { 274 // don't return an error, as we don't want this retried 275 runtime.HandleError(loggerForFleetKey(key, c.baseLogger), errors.Wrapf(err, "invalid resource key")) 276 return nil 277 } 278 279 fleet, err := c.fleetLister.Fleets(namespace).Get(name) 280 if err != nil { 281 if k8serrors.IsNotFound(err) { 282 loggerForFleetKey(key, c.baseLogger).Debug("Fleet is no longer available for syncing") 283 return nil 284 } 285 return errors.Wrapf(err, "error retrieving fleet %s from namespace %s", name, namespace) 286 } 287 288 // If Fleet is marked for deletion don't do anything. 289 if !fleet.DeletionTimestamp.IsZero() { 290 return nil 291 } 292 293 gameServerSetNamespacedLister := c.gameServerSetLister.GameServerSets(fleet.ObjectMeta.Namespace) 294 list, err := ListGameServerSetsByFleetOwner(gameServerSetNamespacedLister, fleet) 295 if err != nil { 296 return err 297 } 298 299 active, rest := c.filterGameServerSetByActive(fleet, list) 300 301 // if there isn't an active gameServerSet, create one (but don't persist yet) 302 if active == nil { 303 loggerForFleet(fleet, c.baseLogger).Debug("could not find active GameServerSet, creating") 304 active = fleet.GameServerSet() 305 } 306 307 replicas, err := c.applyDeploymentStrategy(ctx, fleet, active, rest) 308 if err != nil { 309 return err 310 } 311 if err := c.deleteEmptyGameServerSets(ctx, fleet, rest); err != nil { 312 return err 313 } 314 315 if err := c.upsertGameServerSet(ctx, fleet, active, replicas); err != nil { 316 return err 317 } 318 return c.updateFleetStatus(ctx, fleet) 319 } 320 321 // upsertGameServerSet if the GameServerSet is new, insert it 322 // if the replicas do not match the active 323 // GameServerSet, then update it 324 func (c *Controller) upsertGameServerSet(ctx context.Context, fleet *agonesv1.Fleet, active *agonesv1.GameServerSet, replicas int32) error { 325 if active.ObjectMeta.UID == "" { 326 active.Spec.Replicas = replicas 327 gsSets := c.gameServerSetGetter.GameServerSets(active.ObjectMeta.Namespace) 328 gsSet, err := gsSets.Create(ctx, active, metav1.CreateOptions{}) 329 if err != nil { 330 return errors.Wrapf(err, "error creating gameserverset for fleet %s", fleet.ObjectMeta.Name) 331 } 332 333 // extra step which is needed to set 334 // default values for GameServerSet Status Subresource 335 gsSetCopy := gsSet.DeepCopy() 336 gsSetCopy.Status.ReadyReplicas = 0 337 gsSetCopy.Status.Replicas = 0 338 gsSetCopy.Status.AllocatedReplicas = 0 339 _, err = gsSets.UpdateStatus(ctx, gsSetCopy, metav1.UpdateOptions{}) 340 if err != nil { 341 return errors.Wrapf(err, "error updating status of gameserverset for fleet %s", 342 fleet.ObjectMeta.Name) 343 } 344 345 c.recorder.Eventf(fleet, corev1.EventTypeNormal, "CreatingGameServerSet", 346 "Created GameServerSet %s", gsSet.ObjectMeta.Name) 347 return nil 348 } 349 350 if replicas != active.Spec.Replicas || active.Spec.Scheduling != fleet.Spec.Scheduling { 351 gsSetCopy := active.DeepCopy() 352 gsSetCopy.Spec.Replicas = replicas 353 gsSetCopy.Spec.Scheduling = fleet.Spec.Scheduling 354 gsSetCopy, err := c.gameServerSetGetter.GameServerSets(fleet.ObjectMeta.Namespace).Update(ctx, gsSetCopy, metav1.UpdateOptions{}) 355 if err != nil { 356 return errors.Wrapf(err, "error updating replicas for gameserverset for fleet %s", fleet.ObjectMeta.Name) 357 } 358 c.recorder.Eventf(fleet, corev1.EventTypeNormal, "ScalingGameServerSet", 359 "Scaling active GameServerSet %s from %d to %d", gsSetCopy.ObjectMeta.Name, active.Spec.Replicas, gsSetCopy.Spec.Replicas) 360 } 361 362 // Update GameServerSet Counts and Lists Priorities if not equal to the Priorities on the Fleet 363 if runtime.FeatureEnabled(runtime.FeatureCountsAndLists) { 364 if !cmp.Equal(active.Spec.Priorities, fleet.Spec.Priorities) { 365 gsSetCopy := active.DeepCopy() 366 gsSetCopy.Spec.Priorities = fleet.Spec.Priorities 367 _, err := c.gameServerSetGetter.GameServerSets(fleet.ObjectMeta.Namespace).Update(ctx, gsSetCopy, metav1.UpdateOptions{}) 368 if err != nil { 369 return errors.Wrapf(err, "error updating priorities for gameserverset for fleet %s", fleet.ObjectMeta.Name) 370 } 371 c.recorder.Eventf(fleet, corev1.EventTypeNormal, "UpdatingGameServerSet", 372 "Updated GameServerSet %s Priorities", gsSetCopy.ObjectMeta.Name) 373 } 374 } 375 376 return nil 377 } 378 379 // applyDeploymentStrategy applies the Fleet > Spec > Deployment strategy to all the non-active 380 // GameServerSets that are passed in 381 func (c *Controller) applyDeploymentStrategy(ctx context.Context, fleet *agonesv1.Fleet, active *agonesv1.GameServerSet, rest []*agonesv1.GameServerSet) (int32, error) { 382 // if there is nothing `rest`, then it's either a brand-new Fleet, or we can just jump to the fleet value, 383 // since there is nothing else scaling down at this point 384 if len(rest) == 0 { 385 return fleet.Spec.Replicas, nil 386 } 387 388 // if we do have `rest` but all their spec.replicas is zero, we can just do subtraction against whatever is allocated in `rest`. 389 if agonesv1.SumSpecReplicas(rest) == 0 { 390 blocked := agonesv1.SumGameServerSets(rest, func(gsSet *agonesv1.GameServerSet) int32 { 391 return gsSet.Status.ReservedReplicas + gsSet.Status.AllocatedReplicas 392 }) 393 replicas := fleet.Spec.Replicas - blocked 394 if replicas < 0 { 395 replicas = 0 396 } 397 return replicas, nil 398 } 399 400 switch fleet.Spec.Strategy.Type { 401 case appsv1.RecreateDeploymentStrategyType: 402 return c.recreateDeployment(ctx, fleet, rest) 403 case appsv1.RollingUpdateDeploymentStrategyType: 404 return c.rollingUpdateDeployment(ctx, fleet, active, rest) 405 } 406 407 return 0, errors.Errorf("unexpected deployment strategy type: %s", fleet.Spec.Strategy.Type) 408 } 409 410 // deleteEmptyGameServerSets deletes all GameServerServerSets 411 // That have `Status > Replicas` of 0 412 func (c *Controller) deleteEmptyGameServerSets(ctx context.Context, fleet *agonesv1.Fleet, list []*agonesv1.GameServerSet) error { 413 p := metav1.DeletePropagationBackground 414 for _, gsSet := range list { 415 if gsSet.Status.Replicas == 0 && gsSet.Status.ShutdownReplicas == 0 { 416 err := c.gameServerSetGetter.GameServerSets(gsSet.ObjectMeta.Namespace).Delete(ctx, gsSet.ObjectMeta.Name, metav1.DeleteOptions{PropagationPolicy: &p}) 417 if err != nil { 418 return errors.Wrapf(err, "error updating gameserverset %s", gsSet.ObjectMeta.Name) 419 } 420 421 c.recorder.Eventf(fleet, corev1.EventTypeNormal, "DeletingGameServerSet", "Deleting inactive GameServerSet %s", gsSet.ObjectMeta.Name) 422 } 423 } 424 425 return nil 426 } 427 428 // recreateDeployment applies the recreate deployment strategy to all non-active 429 // GameServerSets, and return the replica count for the active GameServerSet 430 func (c *Controller) recreateDeployment(ctx context.Context, fleet *agonesv1.Fleet, rest []*agonesv1.GameServerSet) (int32, error) { 431 for _, gsSet := range rest { 432 if gsSet.Spec.Replicas == 0 { 433 continue 434 } 435 loggerForFleet(fleet, c.baseLogger).WithField("gameserverset", gsSet.ObjectMeta.Name).Debug("applying recreate deployment: scaling to 0") 436 gsSetCopy := gsSet.DeepCopy() 437 gsSetCopy.Spec.Replicas = 0 438 if _, err := c.gameServerSetGetter.GameServerSets(gsSetCopy.ObjectMeta.Namespace).Update(ctx, gsSetCopy, metav1.UpdateOptions{}); err != nil { 439 return 0, errors.Wrapf(err, "error updating gameserverset %s", gsSetCopy.ObjectMeta.Name) 440 } 441 c.recorder.Eventf(fleet, corev1.EventTypeNormal, "ScalingGameServerSet", 442 "Scaling inactive GameServerSet %s from %d to %d", gsSetCopy.ObjectMeta.Name, gsSet.Spec.Replicas, gsSetCopy.Spec.Replicas) 443 } 444 445 return fleet.LowerBoundReplicas(fleet.Spec.Replicas - agonesv1.SumStatusAllocatedReplicas(rest)), nil 446 } 447 448 // rollingUpdateDeployment will do the rolling update of the old GameServers 449 // through to the new ones, based on the fleet.Spec.Strategy.RollingUpdate configuration 450 // and return the replica count for the active GameServerSet 451 func (c *Controller) rollingUpdateDeployment(ctx context.Context, fleet *agonesv1.Fleet, active *agonesv1.GameServerSet, rest []*agonesv1.GameServerSet) (int32, error) { 452 replicas, err := c.rollingUpdateActive(fleet, active, rest) 453 if err != nil { 454 return 0, err 455 } 456 if err := c.rollingUpdateRest(ctx, fleet, active, rest); err != nil { 457 return 0, err 458 } 459 return replicas, nil 460 } 461 462 // rollingUpdateActive applies the rolling update to the active GameServerSet 463 // and returns what its replica value should be set to 464 func (c *Controller) rollingUpdateActive(fleet *agonesv1.Fleet, active *agonesv1.GameServerSet, rest []*agonesv1.GameServerSet) (int32, error) { 465 replicas := active.Spec.Replicas 466 // always leave room for Allocated GameServers 467 sumAllocated := agonesv1.SumStatusAllocatedReplicas(rest) 468 469 // if the active spec replicas don't equal the active status replicas, this means we are 470 // in the middle of a rolling update, and should wait for it to complete. 471 if active.Spec.Replicas != active.Status.Replicas { 472 return replicas, nil 473 } 474 475 // if the current number replicas from the fleet is zero, the rolling update can be ignored 476 // and the cleanup stage will remove dangling GameServerSets 477 if fleet.Spec.Replicas == 0 { 478 return 0, nil 479 } 480 481 // if the active spec replicas are greater than or equal the fleet spec replicas, then we don't 482 // need to do another rolling update upwards. 483 if active.Spec.Replicas >= (fleet.Spec.Replicas - sumAllocated) { 484 return fleet.Spec.Replicas - sumAllocated, nil 485 } 486 487 r, err := intstr.GetValueFromIntOrPercent(fleet.Spec.Strategy.RollingUpdate.MaxSurge, int(fleet.Spec.Replicas), true) 488 if err != nil { 489 return 0, errors.Wrapf(err, "error parsing MaxSurge value: %s", fleet.ObjectMeta.Name) 490 } 491 surge := int32(r) 492 493 // make sure we don't end up with more than the configured max surge 494 maxSurge := surge + fleet.Spec.Replicas 495 replicas = fleet.UpperBoundReplicas(replicas + surge) 496 total := agonesv1.SumStatusReplicas(rest) + replicas 497 if total > maxSurge { 498 replicas = fleet.LowerBoundReplicas(replicas - (total - maxSurge)) 499 } 500 501 // make room for allocated game servers, but not over the fleet replica count 502 if replicas+sumAllocated > fleet.Spec.Replicas { 503 replicas = fleet.LowerBoundReplicas(fleet.Spec.Replicas - sumAllocated) 504 } 505 506 loggerForFleet(fleet, c.baseLogger).WithField("gameserverset", active.ObjectMeta.Name).WithField("replicas", replicas). 507 Debug("applying rolling update to active gameserverset") 508 509 return replicas, nil 510 } 511 512 func (c *Controller) cleanupUnhealthyReplicas(ctx context.Context, rest []*agonesv1.GameServerSet, 513 fleet *agonesv1.Fleet, maxCleanupCount int32) ([]*agonesv1.GameServerSet, int32, error) { 514 515 // Safely scale down all old GameServerSets with unhealthy replicas. 516 totalScaledDown := int32(0) 517 for i, gsSet := range rest { 518 if totalScaledDown >= maxCleanupCount { 519 break 520 } 521 if gsSet.Spec.Replicas == 0 { 522 // cannot scale down this replica set. 523 continue 524 } 525 if gsSet.Spec.Replicas == gsSet.Status.ReadyReplicas { 526 // no unhealthy replicas found, no scaling required. 527 continue 528 } 529 530 scaledDownCount := int32(integer.IntMin(int(maxCleanupCount-totalScaledDown), int(gsSet.Spec.Replicas-gsSet.Status.ReadyReplicas))) 531 newReplicasCount := gsSet.Spec.Replicas - scaledDownCount 532 if newReplicasCount > gsSet.Spec.Replicas { 533 return nil, 0, fmt.Errorf("when cleaning up unhealthy replicas, got invalid request to scale down %s/%s %d -> %d", gsSet.Namespace, gsSet.Name, gsSet.Spec.Replicas, newReplicasCount) 534 } 535 536 gsSetCopy := gsSet.DeepCopy() 537 gsSetCopy.Spec.Replicas = newReplicasCount 538 totalScaledDown += scaledDownCount 539 if _, err := c.gameServerSetGetter.GameServerSets(gsSetCopy.ObjectMeta.Namespace).Update(ctx, gsSetCopy, metav1.UpdateOptions{}); err != nil { 540 return nil, totalScaledDown, errors.Wrapf(err, "error updating gameserverset %s", gsSetCopy.ObjectMeta.Name) 541 } 542 c.recorder.Eventf(fleet, corev1.EventTypeNormal, "ScalingGameServerSet", 543 "Scaling inactive GameServerSet %s from %d to %d", gsSetCopy.ObjectMeta.Name, gsSet.Spec.Replicas, gsSetCopy.Spec.Replicas) 544 545 rest[i] = gsSetCopy 546 } 547 return rest, totalScaledDown, nil 548 } 549 550 func (c *Controller) rollingUpdateRestFixedOnReady(ctx context.Context, fleet *agonesv1.Fleet, active *agonesv1.GameServerSet, rest []*agonesv1.GameServerSet) error { 551 if len(rest) == 0 { 552 return nil 553 } 554 if runtime.FeatureEnabled(runtime.FeatureRollingUpdateFix) { 555 return c.rollingUpdateRestFixedOnReadyRollingUpdateFix(ctx, fleet, active, rest) 556 } 557 558 // Look at Kubernetes Deployment util ResolveFenceposts() function 559 r, err := intstr.GetValueFromIntOrPercent(fleet.Spec.Strategy.RollingUpdate.MaxUnavailable, int(fleet.Spec.Replicas), false) 560 if err != nil { 561 return errors.Wrapf(err, "error parsing MaxUnavailable value: %s", fleet.ObjectMeta.Name) 562 } 563 if r == 0 { 564 r = 1 565 } 566 if r > int(fleet.Spec.Replicas) { 567 r = int(fleet.Spec.Replicas) 568 } 569 unavailable := int32(r) 570 571 totalAlreadyScaledDown := int32(0) 572 573 totalScaleDownCount := int32(0) 574 // Check if we can scale down. 575 allGSS := rest 576 allGSS = append(allGSS, active) 577 readyReplicasCount := agonesv1.GetReadyReplicaCountForGameServerSets(allGSS) 578 minAvailable := fleet.Spec.Replicas - unavailable 579 580 // Check if we are ready to scale down 581 allPodsCount := agonesv1.SumSpecReplicas(allGSS) 582 newGSSUnavailablePodCount := active.Spec.Replicas - active.Status.ReadyReplicas - active.Status.AllocatedReplicas 583 maxScaledDown := allPodsCount - minAvailable - newGSSUnavailablePodCount 584 585 if maxScaledDown <= 0 { 586 return nil 587 } 588 rest, _, err = c.cleanupUnhealthyReplicas(ctx, rest, fleet, maxScaledDown) 589 if err != nil { 590 loggerForFleet(fleet, c.baseLogger).WithField("fleet", fleet.ObjectMeta.Name).WithField("maxScaledDown", maxScaledDown). 591 Debug("Can not cleanup Unhealth Replicas") 592 // There could be the case when GameServerSet would be updated from another place, say Status or Spec would be updated 593 // We don't want to propagate such errors further 594 // And this set in sync with reconcileOldReplicaSets() Kubernetes code 595 return nil 596 } 597 // Resulting value is readyReplicasCount + unavailable - fleet.Spec.Replicas 598 totalScaleDownCount = readyReplicasCount - minAvailable 599 if readyReplicasCount <= minAvailable { 600 // Cannot scale down. 601 return nil 602 } 603 for _, gsSet := range rest { 604 if totalAlreadyScaledDown >= totalScaleDownCount { 605 // No further scaling required. 606 break 607 } 608 609 // Crucial fix if we are using wrong configuration of a fleet, 610 // that would lead to Status.Replicas being 0 but number of GameServers would be in a Scheduled or Unhealthy state. 611 // Compare with scaleDownOldReplicaSetsForRollingUpdate() for loop. 612 // if the Spec.Replicas are less than or equal to 0, then that means we are done 613 // scaling this GameServerSet down, and can therefore exit/move to the next one. 614 if gsSet.Spec.Replicas <= 0 { 615 continue 616 } 617 618 // If the Spec.Replicas does not equal the Status.Replicas for this GameServerSet, this means 619 // that the rolling down process is currently ongoing, and we should therefore exit so we can wait for it to finish 620 if gsSet.Spec.Replicas != gsSet.Status.Replicas { 621 break 622 } 623 gsSetCopy := gsSet.DeepCopy() 624 if gsSet.Status.ShutdownReplicas == 0 { 625 // Wait for new GameServers to become Ready before scaling down Inactive GameServerset 626 // Scale down. 627 scaleDownCount := int32(integer.IntMin(int(gsSet.Spec.Replicas), int(totalScaleDownCount-totalAlreadyScaledDown))) 628 629 newReplicasCount := gsSet.Spec.Replicas - scaleDownCount 630 if newReplicasCount > gsSet.Spec.Replicas { 631 return fmt.Errorf("when scaling down old GameServerSet, got invalid request to scale down %s/%s %d -> %d", gsSet.Namespace, gsSet.Name, gsSet.Spec.Replicas, newReplicasCount) 632 } 633 634 // No updates on GameServerSet 635 if newReplicasCount == gsSet.Spec.Replicas { 636 continue 637 } 638 639 gsSetCopy.Spec.Replicas = newReplicasCount 640 loggerForFleet(fleet, c.baseLogger).WithField("gameserverset", gsSet.ObjectMeta.Name).WithField("replicas", gsSetCopy.Spec.Replicas). 641 Debug("applying rolling update to inactive gameserverset") 642 643 if _, err := c.gameServerSetGetter.GameServerSets(gsSetCopy.ObjectMeta.Namespace).Update(ctx, gsSetCopy, metav1.UpdateOptions{}); err != nil { 644 return errors.Wrapf(err, "error updating gameserverset %s", gsSetCopy.ObjectMeta.Name) 645 } 646 c.recorder.Eventf(fleet, corev1.EventTypeNormal, "ScalingGameServerSet", 647 "Scaling inactive GameServerSet %s from %d to %d", gsSetCopy.ObjectMeta.Name, gsSet.Spec.Replicas, gsSetCopy.Spec.Replicas) 648 649 totalAlreadyScaledDown += scaleDownCount 650 } 651 } 652 return nil 653 } 654 655 // rollingUpdateRest applies the rolling update to the inactive GameServerSets 656 func (c *Controller) rollingUpdateRest(ctx context.Context, fleet *agonesv1.Fleet, active *agonesv1.GameServerSet, rest []*agonesv1.GameServerSet) error { 657 return c.rollingUpdateRestFixedOnReady(ctx, fleet, active, rest) 658 } 659 660 // updateFleetStatus gets the GameServerSets for this Fleet and then 661 // calculates the counts for the status, and updates the Fleet 662 func (c *Controller) updateFleetStatus(ctx context.Context, fleet *agonesv1.Fleet) error { 663 loggerForFleet(fleet, c.baseLogger).Debug("Update Fleet Status") 664 665 gameServerSetNamespacedLister := c.gameServerSetLister.GameServerSets(fleet.ObjectMeta.Namespace) 666 list, err := ListGameServerSetsByFleetOwner(gameServerSetNamespacedLister, fleet) 667 if err != nil { 668 return err 669 } 670 671 fCopy, err := c.fleetGetter.Fleets(fleet.ObjectMeta.Namespace).Get(ctx, fleet.ObjectMeta.GetName(), metav1.GetOptions{}) 672 if err != nil { 673 return err 674 } 675 fCopy.Status.Replicas = 0 676 fCopy.Status.ReadyReplicas = 0 677 fCopy.Status.ReservedReplicas = 0 678 fCopy.Status.AllocatedReplicas = 0 679 if runtime.FeatureEnabled(runtime.FeatureCountsAndLists) { 680 fCopy.Status.Counters = make(map[string]agonesv1.AggregatedCounterStatus) 681 fCopy.Status.Lists = make(map[string]agonesv1.AggregatedListStatus) 682 } 683 // Drop Counters and Lists status if the feature flag has been set to false 684 if !runtime.FeatureEnabled(runtime.FeatureCountsAndLists) { 685 if len(fCopy.Status.Counters) != 0 || len(fCopy.Status.Lists) != 0 { 686 fCopy.Status.Counters = map[string]agonesv1.AggregatedCounterStatus{} 687 fCopy.Status.Lists = map[string]agonesv1.AggregatedListStatus{} 688 } 689 } 690 691 for _, gsSet := range list { 692 fCopy.Status.Replicas += gsSet.Status.Replicas 693 fCopy.Status.ReadyReplicas += gsSet.Status.ReadyReplicas 694 fCopy.Status.ReservedReplicas += gsSet.Status.ReservedReplicas 695 fCopy.Status.AllocatedReplicas += gsSet.Status.AllocatedReplicas 696 if runtime.FeatureEnabled(runtime.FeatureCountsAndLists) { 697 fCopy.Status.Counters = mergeCounters(fCopy.Status.Counters, gsSet.Status.Counters) 698 fCopy.Status.Lists = mergeLists(fCopy.Status.Lists, gsSet.Status.Lists) 699 } 700 } 701 if runtime.FeatureEnabled(runtime.FeaturePlayerTracking) { 702 // to make this code simpler, while the feature gate is in place, 703 // we will loop around the gsSet list twice. 704 fCopy.Status.Players = &agonesv1.AggregatedPlayerStatus{} 705 // TODO: integrate this extra loop into the above for loop when PlayerTracking moves to GA 706 for _, gsSet := range list { 707 if gsSet.Status.Players != nil { 708 fCopy.Status.Players.Count += gsSet.Status.Players.Count 709 fCopy.Status.Players.Capacity += gsSet.Status.Players.Capacity 710 } 711 } 712 } 713 714 _, err = c.fleetGetter.Fleets(fCopy.ObjectMeta.Namespace).UpdateStatus(ctx, fCopy, metav1.UpdateOptions{}) 715 return errors.Wrapf(err, "error updating status of fleet %s", fCopy.ObjectMeta.Name) 716 } 717 718 // filterGameServerSetByActive returns the active GameServerSet (or nil if it 719 // doesn't exist) and then the rest of the GameServerSets that are controlled 720 // by this Fleet 721 func (c *Controller) filterGameServerSetByActive(fleet *agonesv1.Fleet, list []*agonesv1.GameServerSet) (*agonesv1.GameServerSet, []*agonesv1.GameServerSet) { 722 var active *agonesv1.GameServerSet 723 var rest []*agonesv1.GameServerSet 724 725 for _, gsSet := range list { 726 if apiequality.Semantic.DeepEqual(gsSet.Spec.Template, fleet.Spec.Template) { 727 active = gsSet 728 } else { 729 rest = append(rest, gsSet) 730 } 731 } 732 733 return active, rest 734 } 735 736 // mergeCounters adds the contents of AggregatedCounterStatus c2 into c1. 737 func mergeCounters(c1, c2 map[string]agonesv1.AggregatedCounterStatus) map[string]agonesv1.AggregatedCounterStatus { 738 if c1 == nil { 739 c1 = make(map[string]agonesv1.AggregatedCounterStatus) 740 } 741 742 for key, val := range c2 { 743 // If the Counter exists in both maps, aggregate the values. 744 if counter, ok := c1[key]; ok { 745 counter.AllocatedCapacity = agonesv1.SafeAdd(counter.AllocatedCapacity, val.AllocatedCapacity) 746 counter.AllocatedCount = agonesv1.SafeAdd(counter.AllocatedCount, val.AllocatedCount) 747 counter.Capacity = agonesv1.SafeAdd(counter.Capacity, val.Capacity) 748 counter.Count = agonesv1.SafeAdd(counter.Count, val.Count) 749 c1[key] = counter 750 } else { 751 c1[key] = *val.DeepCopy() 752 } 753 } 754 755 return c1 756 } 757 758 // mergeLists adds the contents of AggregatedListStatus l2 into l1. 759 func mergeLists(l1, l2 map[string]agonesv1.AggregatedListStatus) map[string]agonesv1.AggregatedListStatus { 760 if l1 == nil { 761 l1 = make(map[string]agonesv1.AggregatedListStatus) 762 } 763 764 for key, val := range l2 { 765 // If the List exists in both maps, aggregate the values. 766 if list, ok := l1[key]; ok { 767 list.AllocatedCapacity += val.AllocatedCapacity 768 list.AllocatedCount += val.AllocatedCount 769 list.Capacity += val.Capacity 770 list.Count += val.Count 771 l1[key] = list 772 } else { 773 l1[key] = *val.DeepCopy() 774 } 775 } 776 777 return l1 778 }