agones.dev/agones@v1.54.0/pkg/fleets/controller.go (about)

     1  // Copyright 2018 Google LLC All Rights Reserved.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package fleets
    16  
    17  import (
    18  	"context"
    19  	"encoding/json"
    20  	"fmt"
    21  	"time"
    22  
    23  	"agones.dev/agones/pkg/apis/agones"
    24  	agonesv1 "agones.dev/agones/pkg/apis/agones/v1"
    25  	"agones.dev/agones/pkg/client/clientset/versioned"
    26  	getterv1 "agones.dev/agones/pkg/client/clientset/versioned/typed/agones/v1"
    27  	"agones.dev/agones/pkg/client/informers/externalversions"
    28  	listerv1 "agones.dev/agones/pkg/client/listers/agones/v1"
    29  	"agones.dev/agones/pkg/util/crd"
    30  	"agones.dev/agones/pkg/util/logfields"
    31  	"agones.dev/agones/pkg/util/runtime"
    32  	"agones.dev/agones/pkg/util/webhooks"
    33  	"agones.dev/agones/pkg/util/workerqueue"
    34  	"github.com/google/go-cmp/cmp"
    35  	"github.com/heptiolabs/healthcheck"
    36  	"github.com/pkg/errors"
    37  	"github.com/sirupsen/logrus"
    38  	"gomodules.xyz/jsonpatch/v2"
    39  	admissionv1 "k8s.io/api/admission/v1"
    40  	appsv1 "k8s.io/api/apps/v1"
    41  	corev1 "k8s.io/api/core/v1"
    42  	extclientset "k8s.io/apiextensions-apiserver/pkg/client/clientset/clientset"
    43  	apiextclientv1 "k8s.io/apiextensions-apiserver/pkg/client/clientset/clientset/typed/apiextensions/v1"
    44  	apiequality "k8s.io/apimachinery/pkg/api/equality"
    45  	k8serrors "k8s.io/apimachinery/pkg/api/errors"
    46  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    47  	runtimeschema "k8s.io/apimachinery/pkg/runtime/schema"
    48  	"k8s.io/apimachinery/pkg/util/intstr"
    49  	"k8s.io/client-go/kubernetes"
    50  	"k8s.io/client-go/kubernetes/scheme"
    51  	typedcorev1 "k8s.io/client-go/kubernetes/typed/core/v1"
    52  	"k8s.io/client-go/tools/cache"
    53  	"k8s.io/client-go/tools/record"
    54  	"k8s.io/utils/integer"
    55  )
    56  
    57  // Extensions struct contains what is needed to bind webhook handlers
    58  type Extensions struct {
    59  	baseLogger *logrus.Entry
    60  	apiHooks   agonesv1.APIHooks
    61  }
    62  
    63  // Controller is a the GameServerSet controller
    64  type Controller struct {
    65  	baseLogger          *logrus.Entry
    66  	crdGetter           apiextclientv1.CustomResourceDefinitionInterface
    67  	gameServerSetGetter getterv1.GameServerSetsGetter
    68  	gameServerSetLister listerv1.GameServerSetLister
    69  	gameServerSetSynced cache.InformerSynced
    70  	fleetGetter         getterv1.FleetsGetter
    71  	fleetLister         listerv1.FleetLister
    72  	fleetSynced         cache.InformerSynced
    73  	workerqueue         *workerqueue.WorkerQueue
    74  	recorder            record.EventRecorder
    75  }
    76  
    77  // NewController returns a new fleets crd controller
    78  func NewController(
    79  	health healthcheck.Handler,
    80  	kubeClient kubernetes.Interface,
    81  	extClient extclientset.Interface,
    82  	agonesClient versioned.Interface,
    83  	agonesInformerFactory externalversions.SharedInformerFactory) *Controller {
    84  
    85  	gameServerSets := agonesInformerFactory.Agones().V1().GameServerSets()
    86  	gsSetInformer := gameServerSets.Informer()
    87  
    88  	fleets := agonesInformerFactory.Agones().V1().Fleets()
    89  	fInformer := fleets.Informer()
    90  
    91  	c := &Controller{
    92  		crdGetter:           extClient.ApiextensionsV1().CustomResourceDefinitions(),
    93  		gameServerSetGetter: agonesClient.AgonesV1(),
    94  		gameServerSetLister: gameServerSets.Lister(),
    95  		gameServerSetSynced: gsSetInformer.HasSynced,
    96  		fleetGetter:         agonesClient.AgonesV1(),
    97  		fleetLister:         fleets.Lister(),
    98  		fleetSynced:         fInformer.HasSynced,
    99  	}
   100  
   101  	c.baseLogger = runtime.NewLoggerWithType(c)
   102  	c.workerqueue = workerqueue.NewWorkerQueueWithRateLimiter(c.syncFleet, c.baseLogger, logfields.FleetKey, agones.GroupName+".FleetController", workerqueue.FastRateLimiter(3*time.Second))
   103  	health.AddLivenessCheck("fleet-workerqueue", healthcheck.Check(c.workerqueue.Healthy))
   104  
   105  	eventBroadcaster := record.NewBroadcaster()
   106  	eventBroadcaster.StartLogging(c.baseLogger.Debugf)
   107  	eventBroadcaster.StartRecordingToSink(&typedcorev1.EventSinkImpl{Interface: kubeClient.CoreV1().Events("")})
   108  	c.recorder = eventBroadcaster.NewRecorder(scheme.Scheme, corev1.EventSource{Component: "fleet-controller"})
   109  
   110  	_, _ = fInformer.AddEventHandler(cache.ResourceEventHandlerFuncs{
   111  		AddFunc: c.workerqueue.Enqueue,
   112  		UpdateFunc: func(_, newObj interface{}) {
   113  			c.workerqueue.Enqueue(newObj)
   114  		},
   115  	})
   116  
   117  	_, _ = gsSetInformer.AddEventHandler(cache.ResourceEventHandlerFuncs{
   118  		AddFunc: c.gameServerSetEventHandler,
   119  		UpdateFunc: func(_, newObj interface{}) {
   120  			gsSet := newObj.(*agonesv1.GameServerSet)
   121  			// ignore if already being deleted
   122  			if gsSet.ObjectMeta.DeletionTimestamp.IsZero() {
   123  				c.gameServerSetEventHandler(gsSet)
   124  			}
   125  		},
   126  	})
   127  
   128  	return c
   129  }
   130  
   131  // NewExtensions binds the handlers to the webhook outside the initialization of the controller
   132  // initializes a new logger for extensions.
   133  func NewExtensions(apiHooks agonesv1.APIHooks, wh *webhooks.WebHook) *Extensions {
   134  	ext := &Extensions{apiHooks: apiHooks}
   135  
   136  	ext.baseLogger = runtime.NewLoggerWithType(ext)
   137  
   138  	wh.AddHandler("/mutate", agonesv1.Kind("Fleet"), admissionv1.Create, ext.creationMutationHandler)
   139  	wh.AddHandler("/validate", agonesv1.Kind("Fleet"), admissionv1.Create, ext.creationValidationHandler)
   140  	wh.AddHandler("/validate", agonesv1.Kind("Fleet"), admissionv1.Update, ext.creationValidationHandler)
   141  
   142  	return ext
   143  }
   144  
   145  // creationMutationHandler is the handler for the mutating webhook that sets the
   146  // the default values on the Fleet
   147  // Should only be called on fleet create operations.
   148  // nolint:dupl
   149  func (ext *Extensions) creationMutationHandler(review admissionv1.AdmissionReview) (admissionv1.AdmissionReview, error) {
   150  	ext.baseLogger.WithField("review", review).Debug("creationMutationHandler")
   151  
   152  	obj := review.Request.Object
   153  	fleet := &agonesv1.Fleet{}
   154  	err := json.Unmarshal(obj.Raw, fleet)
   155  	if err != nil {
   156  		// If the JSON is invalid during mutation, fall through to validation. This allows OpenAPI schema validation
   157  		// to proceed, resulting in a more user friendly error message.
   158  		return review, nil
   159  	}
   160  
   161  	// This is the main logic of this function
   162  	// the rest is really just json plumbing
   163  	fleet.ApplyDefaults()
   164  
   165  	newFleet, err := json.Marshal(fleet)
   166  	if err != nil {
   167  		return review, errors.Wrapf(err, "error marshalling default applied Fleet %s to json", fleet.ObjectMeta.Name)
   168  	}
   169  
   170  	patch, err := jsonpatch.CreatePatch(obj.Raw, newFleet)
   171  	if err != nil {
   172  		return review, errors.Wrapf(err, "error creating patch for Fleet %s", fleet.ObjectMeta.Name)
   173  	}
   174  
   175  	jsn, err := json.Marshal(patch)
   176  	if err != nil {
   177  		return review, errors.Wrapf(err, "error creating json for patch for Fleet %s", fleet.ObjectMeta.Name)
   178  	}
   179  
   180  	loggerForFleet(fleet, ext.baseLogger).WithField("patch", string(jsn)).Debug("patch created!")
   181  
   182  	pt := admissionv1.PatchTypeJSONPatch
   183  	review.Response.PatchType = &pt
   184  	review.Response.Patch = jsn
   185  
   186  	return review, nil
   187  }
   188  
   189  // creationValidationHandler that validates a Fleet when it is created
   190  // Should only be called on Fleet create and Update operations.
   191  func (ext *Extensions) creationValidationHandler(review admissionv1.AdmissionReview) (admissionv1.AdmissionReview, error) {
   192  	ext.baseLogger.WithField("review", review).Debug("creationValidationHandler")
   193  
   194  	obj := review.Request.Object
   195  	fleet := &agonesv1.Fleet{}
   196  	err := json.Unmarshal(obj.Raw, fleet)
   197  	if err != nil {
   198  		return review, errors.Wrapf(err, "error unmarshalling Fleet json after schema validation: %s", obj.Raw)
   199  	}
   200  
   201  	if errs := fleet.Validate(ext.apiHooks); len(errs) > 0 {
   202  		kind := runtimeschema.GroupKind{
   203  			Group: review.Request.Kind.Group,
   204  			Kind:  review.Request.Kind.Kind,
   205  		}
   206  		statusErr := k8serrors.NewInvalid(kind, review.Request.Name, errs)
   207  		review.Response.Allowed = false
   208  		review.Response.Result = &statusErr.ErrStatus
   209  		loggerForFleet(fleet, ext.baseLogger).WithField("review", review).Debug("Invalid Fleet")
   210  	}
   211  
   212  	return review, nil
   213  }
   214  
   215  // Run the Fleet controller. Will block until stop is closed.
   216  // Runs threadiness number workers to process the rate limited queue
   217  func (c *Controller) Run(ctx context.Context, workers int) error {
   218  	err := crd.WaitForEstablishedCRD(ctx, c.crdGetter, "fleets.agones.dev", c.baseLogger)
   219  	if err != nil {
   220  		return err
   221  	}
   222  
   223  	c.baseLogger.Debug("Wait for cache sync")
   224  	if !cache.WaitForCacheSync(ctx.Done(), c.gameServerSetSynced, c.fleetSynced) {
   225  		return errors.New("failed to wait for caches to sync")
   226  	}
   227  
   228  	c.workerqueue.Run(ctx, workers)
   229  	return nil
   230  }
   231  
   232  func loggerForFleetKey(key string, logger *logrus.Entry) *logrus.Entry {
   233  	return logfields.AugmentLogEntry(logger, logfields.FleetKey, key)
   234  }
   235  
   236  func loggerForFleet(f *agonesv1.Fleet, logger *logrus.Entry) *logrus.Entry {
   237  	fleetName := "NilFleet"
   238  	if f != nil {
   239  		fleetName = f.ObjectMeta.Namespace + "/" + f.ObjectMeta.Name
   240  	}
   241  	return loggerForFleetKey(fleetName, logger).WithField("fleet", f)
   242  }
   243  
   244  // gameServerSetEventHandler enqueues the owning Fleet for this GameServerSet,
   245  // assuming that it has one
   246  func (c *Controller) gameServerSetEventHandler(obj interface{}) {
   247  	gsSet := obj.(*agonesv1.GameServerSet)
   248  	ref := metav1.GetControllerOf(gsSet)
   249  	if ref == nil {
   250  		return
   251  	}
   252  
   253  	fleet, err := c.fleetLister.Fleets(gsSet.ObjectMeta.Namespace).Get(ref.Name)
   254  	if err != nil {
   255  		if k8serrors.IsNotFound(err) {
   256  			c.baseLogger.WithField("ref", ref).Warn("Owner Fleet no longer available for syncing")
   257  		} else {
   258  			runtime.HandleError(loggerForFleet(fleet, c.baseLogger).WithField("ref", ref),
   259  				errors.Wrap(err, "error retrieving GameServerSet owner"))
   260  		}
   261  		return
   262  	}
   263  	c.workerqueue.Enqueue(fleet)
   264  }
   265  
   266  // syncFleet synchronised the fleet CRDs and configures/updates
   267  // backing GameServerSets
   268  func (c *Controller) syncFleet(ctx context.Context, key string) error {
   269  	loggerForFleetKey(key, c.baseLogger).Debug("Synchronising")
   270  
   271  	// Convert the namespace/name string into a distinct namespace and name
   272  	namespace, name, err := cache.SplitMetaNamespaceKey(key)
   273  	if err != nil {
   274  		// don't return an error, as we don't want this retried
   275  		runtime.HandleError(loggerForFleetKey(key, c.baseLogger), errors.Wrapf(err, "invalid resource key"))
   276  		return nil
   277  	}
   278  
   279  	fleet, err := c.fleetLister.Fleets(namespace).Get(name)
   280  	if err != nil {
   281  		if k8serrors.IsNotFound(err) {
   282  			loggerForFleetKey(key, c.baseLogger).Debug("Fleet is no longer available for syncing")
   283  			return nil
   284  		}
   285  		return errors.Wrapf(err, "error retrieving fleet %s from namespace %s", name, namespace)
   286  	}
   287  
   288  	// If Fleet is marked for deletion don't do anything.
   289  	if !fleet.DeletionTimestamp.IsZero() {
   290  		return nil
   291  	}
   292  
   293  	gameServerSetNamespacedLister := c.gameServerSetLister.GameServerSets(fleet.ObjectMeta.Namespace)
   294  	list, err := ListGameServerSetsByFleetOwner(gameServerSetNamespacedLister, fleet)
   295  	if err != nil {
   296  		return err
   297  	}
   298  
   299  	active, rest := c.filterGameServerSetByActive(fleet, list)
   300  
   301  	// if there isn't an active gameServerSet, create one (but don't persist yet)
   302  	if active == nil {
   303  		loggerForFleet(fleet, c.baseLogger).Debug("could not find active GameServerSet, creating")
   304  		active = fleet.GameServerSet()
   305  	}
   306  
   307  	replicas, err := c.applyDeploymentStrategy(ctx, fleet, active, rest)
   308  	if err != nil {
   309  		return err
   310  	}
   311  	if err := c.deleteEmptyGameServerSets(ctx, fleet, rest); err != nil {
   312  		return err
   313  	}
   314  
   315  	if err := c.upsertGameServerSet(ctx, fleet, active, replicas); err != nil {
   316  		return err
   317  	}
   318  	return c.updateFleetStatus(ctx, fleet)
   319  }
   320  
   321  // upsertGameServerSet if the GameServerSet is new, insert it
   322  // if the replicas do not match the active
   323  // GameServerSet, then update it
   324  func (c *Controller) upsertGameServerSet(ctx context.Context, fleet *agonesv1.Fleet, active *agonesv1.GameServerSet, replicas int32) error {
   325  	if active.ObjectMeta.UID == "" {
   326  		active.Spec.Replicas = replicas
   327  		gsSets := c.gameServerSetGetter.GameServerSets(active.ObjectMeta.Namespace)
   328  		gsSet, err := gsSets.Create(ctx, active, metav1.CreateOptions{})
   329  		if err != nil {
   330  			return errors.Wrapf(err, "error creating gameserverset for fleet %s", fleet.ObjectMeta.Name)
   331  		}
   332  
   333  		// extra step which is needed to set
   334  		// default values for GameServerSet Status Subresource
   335  		gsSetCopy := gsSet.DeepCopy()
   336  		gsSetCopy.Status.ReadyReplicas = 0
   337  		gsSetCopy.Status.Replicas = 0
   338  		gsSetCopy.Status.AllocatedReplicas = 0
   339  		_, err = gsSets.UpdateStatus(ctx, gsSetCopy, metav1.UpdateOptions{})
   340  		if err != nil {
   341  			return errors.Wrapf(err, "error updating status of gameserverset for fleet %s",
   342  				fleet.ObjectMeta.Name)
   343  		}
   344  
   345  		c.recorder.Eventf(fleet, corev1.EventTypeNormal, "CreatingGameServerSet",
   346  			"Created GameServerSet %s", gsSet.ObjectMeta.Name)
   347  		return nil
   348  	}
   349  
   350  	if replicas != active.Spec.Replicas || active.Spec.Scheduling != fleet.Spec.Scheduling {
   351  		gsSetCopy := active.DeepCopy()
   352  		gsSetCopy.Spec.Replicas = replicas
   353  		gsSetCopy.Spec.Scheduling = fleet.Spec.Scheduling
   354  		gsSetCopy, err := c.gameServerSetGetter.GameServerSets(fleet.ObjectMeta.Namespace).Update(ctx, gsSetCopy, metav1.UpdateOptions{})
   355  		if err != nil {
   356  			return errors.Wrapf(err, "error updating replicas for gameserverset for fleet %s", fleet.ObjectMeta.Name)
   357  		}
   358  		c.recorder.Eventf(fleet, corev1.EventTypeNormal, "ScalingGameServerSet",
   359  			"Scaling active GameServerSet %s from %d to %d", gsSetCopy.ObjectMeta.Name, active.Spec.Replicas, gsSetCopy.Spec.Replicas)
   360  	}
   361  
   362  	// Update GameServerSet Counts and Lists Priorities if not equal to the Priorities on the Fleet
   363  	if runtime.FeatureEnabled(runtime.FeatureCountsAndLists) {
   364  		if !cmp.Equal(active.Spec.Priorities, fleet.Spec.Priorities) {
   365  			gsSetCopy := active.DeepCopy()
   366  			gsSetCopy.Spec.Priorities = fleet.Spec.Priorities
   367  			_, err := c.gameServerSetGetter.GameServerSets(fleet.ObjectMeta.Namespace).Update(ctx, gsSetCopy, metav1.UpdateOptions{})
   368  			if err != nil {
   369  				return errors.Wrapf(err, "error updating priorities for gameserverset for fleet %s", fleet.ObjectMeta.Name)
   370  			}
   371  			c.recorder.Eventf(fleet, corev1.EventTypeNormal, "UpdatingGameServerSet",
   372  				"Updated GameServerSet %s Priorities", gsSetCopy.ObjectMeta.Name)
   373  		}
   374  	}
   375  
   376  	return nil
   377  }
   378  
   379  // applyDeploymentStrategy applies the Fleet > Spec > Deployment strategy to all the non-active
   380  // GameServerSets that are passed in
   381  func (c *Controller) applyDeploymentStrategy(ctx context.Context, fleet *agonesv1.Fleet, active *agonesv1.GameServerSet, rest []*agonesv1.GameServerSet) (int32, error) {
   382  	// if there is nothing `rest`, then it's either a brand-new Fleet, or we can just jump to the fleet value,
   383  	// since there is nothing else scaling down at this point
   384  	if len(rest) == 0 {
   385  		return fleet.Spec.Replicas, nil
   386  	}
   387  
   388  	// if we do have `rest` but all their spec.replicas is zero, we can just do subtraction against whatever is allocated in `rest`.
   389  	if agonesv1.SumSpecReplicas(rest) == 0 {
   390  		blocked := agonesv1.SumGameServerSets(rest, func(gsSet *agonesv1.GameServerSet) int32 {
   391  			return gsSet.Status.ReservedReplicas + gsSet.Status.AllocatedReplicas
   392  		})
   393  		replicas := fleet.Spec.Replicas - blocked
   394  		if replicas < 0 {
   395  			replicas = 0
   396  		}
   397  		return replicas, nil
   398  	}
   399  
   400  	switch fleet.Spec.Strategy.Type {
   401  	case appsv1.RecreateDeploymentStrategyType:
   402  		return c.recreateDeployment(ctx, fleet, rest)
   403  	case appsv1.RollingUpdateDeploymentStrategyType:
   404  		return c.rollingUpdateDeployment(ctx, fleet, active, rest)
   405  	}
   406  
   407  	return 0, errors.Errorf("unexpected deployment strategy type: %s", fleet.Spec.Strategy.Type)
   408  }
   409  
   410  // deleteEmptyGameServerSets deletes all GameServerServerSets
   411  // That have `Status > Replicas` of 0
   412  func (c *Controller) deleteEmptyGameServerSets(ctx context.Context, fleet *agonesv1.Fleet, list []*agonesv1.GameServerSet) error {
   413  	p := metav1.DeletePropagationBackground
   414  	for _, gsSet := range list {
   415  		if gsSet.Status.Replicas == 0 && gsSet.Status.ShutdownReplicas == 0 {
   416  			err := c.gameServerSetGetter.GameServerSets(gsSet.ObjectMeta.Namespace).Delete(ctx, gsSet.ObjectMeta.Name, metav1.DeleteOptions{PropagationPolicy: &p})
   417  			if err != nil {
   418  				return errors.Wrapf(err, "error updating gameserverset %s", gsSet.ObjectMeta.Name)
   419  			}
   420  
   421  			c.recorder.Eventf(fleet, corev1.EventTypeNormal, "DeletingGameServerSet", "Deleting inactive GameServerSet %s", gsSet.ObjectMeta.Name)
   422  		}
   423  	}
   424  
   425  	return nil
   426  }
   427  
   428  // recreateDeployment applies the recreate deployment strategy to all non-active
   429  // GameServerSets, and return the replica count for the active GameServerSet
   430  func (c *Controller) recreateDeployment(ctx context.Context, fleet *agonesv1.Fleet, rest []*agonesv1.GameServerSet) (int32, error) {
   431  	for _, gsSet := range rest {
   432  		if gsSet.Spec.Replicas == 0 {
   433  			continue
   434  		}
   435  		loggerForFleet(fleet, c.baseLogger).WithField("gameserverset", gsSet.ObjectMeta.Name).Debug("applying recreate deployment: scaling to 0")
   436  		gsSetCopy := gsSet.DeepCopy()
   437  		gsSetCopy.Spec.Replicas = 0
   438  		if _, err := c.gameServerSetGetter.GameServerSets(gsSetCopy.ObjectMeta.Namespace).Update(ctx, gsSetCopy, metav1.UpdateOptions{}); err != nil {
   439  			return 0, errors.Wrapf(err, "error updating gameserverset %s", gsSetCopy.ObjectMeta.Name)
   440  		}
   441  		c.recorder.Eventf(fleet, corev1.EventTypeNormal, "ScalingGameServerSet",
   442  			"Scaling inactive GameServerSet %s from %d to %d", gsSetCopy.ObjectMeta.Name, gsSet.Spec.Replicas, gsSetCopy.Spec.Replicas)
   443  	}
   444  
   445  	return fleet.LowerBoundReplicas(fleet.Spec.Replicas - agonesv1.SumStatusAllocatedReplicas(rest)), nil
   446  }
   447  
   448  // rollingUpdateDeployment will do the rolling update of the old GameServers
   449  // through to the new ones, based on the fleet.Spec.Strategy.RollingUpdate configuration
   450  // and return the replica count for the active GameServerSet
   451  func (c *Controller) rollingUpdateDeployment(ctx context.Context, fleet *agonesv1.Fleet, active *agonesv1.GameServerSet, rest []*agonesv1.GameServerSet) (int32, error) {
   452  	replicas, err := c.rollingUpdateActive(fleet, active, rest)
   453  	if err != nil {
   454  		return 0, err
   455  	}
   456  	if err := c.rollingUpdateRest(ctx, fleet, active, rest); err != nil {
   457  		return 0, err
   458  	}
   459  	return replicas, nil
   460  }
   461  
   462  // rollingUpdateActive applies the rolling update to the active GameServerSet
   463  // and returns what its replica value should be set to
   464  func (c *Controller) rollingUpdateActive(fleet *agonesv1.Fleet, active *agonesv1.GameServerSet, rest []*agonesv1.GameServerSet) (int32, error) {
   465  	replicas := active.Spec.Replicas
   466  	// always leave room for Allocated GameServers
   467  	sumAllocated := agonesv1.SumStatusAllocatedReplicas(rest)
   468  
   469  	// if the active spec replicas don't equal the active status replicas, this means we are
   470  	// in the middle of a rolling update, and should wait for it to complete.
   471  	if active.Spec.Replicas != active.Status.Replicas {
   472  		return replicas, nil
   473  	}
   474  
   475  	// if the current number replicas from the fleet is zero, the rolling update can be ignored
   476  	// and the cleanup stage will remove dangling GameServerSets
   477  	if fleet.Spec.Replicas == 0 {
   478  		return 0, nil
   479  	}
   480  
   481  	// if the active spec replicas are greater than or equal the fleet spec replicas, then we don't
   482  	// need to do another rolling update upwards.
   483  	if active.Spec.Replicas >= (fleet.Spec.Replicas - sumAllocated) {
   484  		return fleet.Spec.Replicas - sumAllocated, nil
   485  	}
   486  
   487  	r, err := intstr.GetValueFromIntOrPercent(fleet.Spec.Strategy.RollingUpdate.MaxSurge, int(fleet.Spec.Replicas), true)
   488  	if err != nil {
   489  		return 0, errors.Wrapf(err, "error parsing MaxSurge value: %s", fleet.ObjectMeta.Name)
   490  	}
   491  	surge := int32(r)
   492  
   493  	// make sure we don't end up with more than the configured max surge
   494  	maxSurge := surge + fleet.Spec.Replicas
   495  	replicas = fleet.UpperBoundReplicas(replicas + surge)
   496  	total := agonesv1.SumStatusReplicas(rest) + replicas
   497  	if total > maxSurge {
   498  		replicas = fleet.LowerBoundReplicas(replicas - (total - maxSurge))
   499  	}
   500  
   501  	// make room for allocated game servers, but not over the fleet replica count
   502  	if replicas+sumAllocated > fleet.Spec.Replicas {
   503  		replicas = fleet.LowerBoundReplicas(fleet.Spec.Replicas - sumAllocated)
   504  	}
   505  
   506  	loggerForFleet(fleet, c.baseLogger).WithField("gameserverset", active.ObjectMeta.Name).WithField("replicas", replicas).
   507  		Debug("applying rolling update to active gameserverset")
   508  
   509  	return replicas, nil
   510  }
   511  
   512  func (c *Controller) cleanupUnhealthyReplicas(ctx context.Context, rest []*agonesv1.GameServerSet,
   513  	fleet *agonesv1.Fleet, maxCleanupCount int32) ([]*agonesv1.GameServerSet, int32, error) {
   514  
   515  	// Safely scale down all old GameServerSets with unhealthy replicas.
   516  	totalScaledDown := int32(0)
   517  	for i, gsSet := range rest {
   518  		if totalScaledDown >= maxCleanupCount {
   519  			break
   520  		}
   521  		if gsSet.Spec.Replicas == 0 {
   522  			// cannot scale down this replica set.
   523  			continue
   524  		}
   525  		if gsSet.Spec.Replicas == gsSet.Status.ReadyReplicas {
   526  			// no unhealthy replicas found, no scaling required.
   527  			continue
   528  		}
   529  
   530  		scaledDownCount := int32(integer.IntMin(int(maxCleanupCount-totalScaledDown), int(gsSet.Spec.Replicas-gsSet.Status.ReadyReplicas)))
   531  		newReplicasCount := gsSet.Spec.Replicas - scaledDownCount
   532  		if newReplicasCount > gsSet.Spec.Replicas {
   533  			return nil, 0, fmt.Errorf("when cleaning up unhealthy replicas, got invalid request to scale down %s/%s %d -> %d", gsSet.Namespace, gsSet.Name, gsSet.Spec.Replicas, newReplicasCount)
   534  		}
   535  
   536  		gsSetCopy := gsSet.DeepCopy()
   537  		gsSetCopy.Spec.Replicas = newReplicasCount
   538  		totalScaledDown += scaledDownCount
   539  		if _, err := c.gameServerSetGetter.GameServerSets(gsSetCopy.ObjectMeta.Namespace).Update(ctx, gsSetCopy, metav1.UpdateOptions{}); err != nil {
   540  			return nil, totalScaledDown, errors.Wrapf(err, "error updating gameserverset %s", gsSetCopy.ObjectMeta.Name)
   541  		}
   542  		c.recorder.Eventf(fleet, corev1.EventTypeNormal, "ScalingGameServerSet",
   543  			"Scaling inactive GameServerSet %s from %d to %d", gsSetCopy.ObjectMeta.Name, gsSet.Spec.Replicas, gsSetCopy.Spec.Replicas)
   544  
   545  		rest[i] = gsSetCopy
   546  	}
   547  	return rest, totalScaledDown, nil
   548  }
   549  
   550  func (c *Controller) rollingUpdateRestFixedOnReady(ctx context.Context, fleet *agonesv1.Fleet, active *agonesv1.GameServerSet, rest []*agonesv1.GameServerSet) error {
   551  	if len(rest) == 0 {
   552  		return nil
   553  	}
   554  	if runtime.FeatureEnabled(runtime.FeatureRollingUpdateFix) {
   555  		return c.rollingUpdateRestFixedOnReadyRollingUpdateFix(ctx, fleet, active, rest)
   556  	}
   557  
   558  	// Look at Kubernetes Deployment util ResolveFenceposts() function
   559  	r, err := intstr.GetValueFromIntOrPercent(fleet.Spec.Strategy.RollingUpdate.MaxUnavailable, int(fleet.Spec.Replicas), false)
   560  	if err != nil {
   561  		return errors.Wrapf(err, "error parsing MaxUnavailable value: %s", fleet.ObjectMeta.Name)
   562  	}
   563  	if r == 0 {
   564  		r = 1
   565  	}
   566  	if r > int(fleet.Spec.Replicas) {
   567  		r = int(fleet.Spec.Replicas)
   568  	}
   569  	unavailable := int32(r)
   570  
   571  	totalAlreadyScaledDown := int32(0)
   572  
   573  	totalScaleDownCount := int32(0)
   574  	// Check if we can scale down.
   575  	allGSS := rest
   576  	allGSS = append(allGSS, active)
   577  	readyReplicasCount := agonesv1.GetReadyReplicaCountForGameServerSets(allGSS)
   578  	minAvailable := fleet.Spec.Replicas - unavailable
   579  
   580  	// Check if we are ready to scale down
   581  	allPodsCount := agonesv1.SumSpecReplicas(allGSS)
   582  	newGSSUnavailablePodCount := active.Spec.Replicas - active.Status.ReadyReplicas - active.Status.AllocatedReplicas
   583  	maxScaledDown := allPodsCount - minAvailable - newGSSUnavailablePodCount
   584  
   585  	if maxScaledDown <= 0 {
   586  		return nil
   587  	}
   588  	rest, _, err = c.cleanupUnhealthyReplicas(ctx, rest, fleet, maxScaledDown)
   589  	if err != nil {
   590  		loggerForFleet(fleet, c.baseLogger).WithField("fleet", fleet.ObjectMeta.Name).WithField("maxScaledDown", maxScaledDown).
   591  			Debug("Can not cleanup Unhealth Replicas")
   592  		// There could be the case when GameServerSet would be updated from another place, say Status or Spec would be updated
   593  		// We don't want to propagate such errors further
   594  		// And this set in sync with reconcileOldReplicaSets() Kubernetes code
   595  		return nil
   596  	}
   597  	// Resulting value is readyReplicasCount + unavailable - fleet.Spec.Replicas
   598  	totalScaleDownCount = readyReplicasCount - minAvailable
   599  	if readyReplicasCount <= minAvailable {
   600  		// Cannot scale down.
   601  		return nil
   602  	}
   603  	for _, gsSet := range rest {
   604  		if totalAlreadyScaledDown >= totalScaleDownCount {
   605  			// No further scaling required.
   606  			break
   607  		}
   608  
   609  		// Crucial fix if we are using wrong configuration of a fleet,
   610  		// that would lead to Status.Replicas being 0 but number of GameServers would be in a Scheduled or Unhealthy state.
   611  		// Compare with scaleDownOldReplicaSetsForRollingUpdate() for loop.
   612  		// if the Spec.Replicas are less than or equal to 0, then that means we are done
   613  		// scaling this GameServerSet down, and can therefore exit/move to the next one.
   614  		if gsSet.Spec.Replicas <= 0 {
   615  			continue
   616  		}
   617  
   618  		// If the Spec.Replicas does not equal the Status.Replicas for this GameServerSet, this means
   619  		// that the rolling down process is currently ongoing, and we should therefore exit so we can wait for it to finish
   620  		if gsSet.Spec.Replicas != gsSet.Status.Replicas {
   621  			break
   622  		}
   623  		gsSetCopy := gsSet.DeepCopy()
   624  		if gsSet.Status.ShutdownReplicas == 0 {
   625  			// Wait for new GameServers to become Ready before scaling down Inactive GameServerset
   626  			// Scale down.
   627  			scaleDownCount := int32(integer.IntMin(int(gsSet.Spec.Replicas), int(totalScaleDownCount-totalAlreadyScaledDown)))
   628  
   629  			newReplicasCount := gsSet.Spec.Replicas - scaleDownCount
   630  			if newReplicasCount > gsSet.Spec.Replicas {
   631  				return fmt.Errorf("when scaling down old GameServerSet, got invalid request to scale down %s/%s %d -> %d", gsSet.Namespace, gsSet.Name, gsSet.Spec.Replicas, newReplicasCount)
   632  			}
   633  
   634  			// No updates on GameServerSet
   635  			if newReplicasCount == gsSet.Spec.Replicas {
   636  				continue
   637  			}
   638  
   639  			gsSetCopy.Spec.Replicas = newReplicasCount
   640  			loggerForFleet(fleet, c.baseLogger).WithField("gameserverset", gsSet.ObjectMeta.Name).WithField("replicas", gsSetCopy.Spec.Replicas).
   641  				Debug("applying rolling update to inactive gameserverset")
   642  
   643  			if _, err := c.gameServerSetGetter.GameServerSets(gsSetCopy.ObjectMeta.Namespace).Update(ctx, gsSetCopy, metav1.UpdateOptions{}); err != nil {
   644  				return errors.Wrapf(err, "error updating gameserverset %s", gsSetCopy.ObjectMeta.Name)
   645  			}
   646  			c.recorder.Eventf(fleet, corev1.EventTypeNormal, "ScalingGameServerSet",
   647  				"Scaling inactive GameServerSet %s from %d to %d", gsSetCopy.ObjectMeta.Name, gsSet.Spec.Replicas, gsSetCopy.Spec.Replicas)
   648  
   649  			totalAlreadyScaledDown += scaleDownCount
   650  		}
   651  	}
   652  	return nil
   653  }
   654  
   655  // rollingUpdateRest applies the rolling update to the inactive GameServerSets
   656  func (c *Controller) rollingUpdateRest(ctx context.Context, fleet *agonesv1.Fleet, active *agonesv1.GameServerSet, rest []*agonesv1.GameServerSet) error {
   657  	return c.rollingUpdateRestFixedOnReady(ctx, fleet, active, rest)
   658  }
   659  
   660  // updateFleetStatus gets the GameServerSets for this Fleet and then
   661  // calculates the counts for the status, and updates the Fleet
   662  func (c *Controller) updateFleetStatus(ctx context.Context, fleet *agonesv1.Fleet) error {
   663  	loggerForFleet(fleet, c.baseLogger).Debug("Update Fleet Status")
   664  
   665  	gameServerSetNamespacedLister := c.gameServerSetLister.GameServerSets(fleet.ObjectMeta.Namespace)
   666  	list, err := ListGameServerSetsByFleetOwner(gameServerSetNamespacedLister, fleet)
   667  	if err != nil {
   668  		return err
   669  	}
   670  
   671  	fCopy, err := c.fleetGetter.Fleets(fleet.ObjectMeta.Namespace).Get(ctx, fleet.ObjectMeta.GetName(), metav1.GetOptions{})
   672  	if err != nil {
   673  		return err
   674  	}
   675  	fCopy.Status.Replicas = 0
   676  	fCopy.Status.ReadyReplicas = 0
   677  	fCopy.Status.ReservedReplicas = 0
   678  	fCopy.Status.AllocatedReplicas = 0
   679  	if runtime.FeatureEnabled(runtime.FeatureCountsAndLists) {
   680  		fCopy.Status.Counters = make(map[string]agonesv1.AggregatedCounterStatus)
   681  		fCopy.Status.Lists = make(map[string]agonesv1.AggregatedListStatus)
   682  	}
   683  	// Drop Counters and Lists status if the feature flag has been set to false
   684  	if !runtime.FeatureEnabled(runtime.FeatureCountsAndLists) {
   685  		if len(fCopy.Status.Counters) != 0 || len(fCopy.Status.Lists) != 0 {
   686  			fCopy.Status.Counters = map[string]agonesv1.AggregatedCounterStatus{}
   687  			fCopy.Status.Lists = map[string]agonesv1.AggregatedListStatus{}
   688  		}
   689  	}
   690  
   691  	for _, gsSet := range list {
   692  		fCopy.Status.Replicas += gsSet.Status.Replicas
   693  		fCopy.Status.ReadyReplicas += gsSet.Status.ReadyReplicas
   694  		fCopy.Status.ReservedReplicas += gsSet.Status.ReservedReplicas
   695  		fCopy.Status.AllocatedReplicas += gsSet.Status.AllocatedReplicas
   696  		if runtime.FeatureEnabled(runtime.FeatureCountsAndLists) {
   697  			fCopy.Status.Counters = mergeCounters(fCopy.Status.Counters, gsSet.Status.Counters)
   698  			fCopy.Status.Lists = mergeLists(fCopy.Status.Lists, gsSet.Status.Lists)
   699  		}
   700  	}
   701  	if runtime.FeatureEnabled(runtime.FeaturePlayerTracking) {
   702  		// to make this code simpler, while the feature gate is in place,
   703  		// we will loop around the gsSet list twice.
   704  		fCopy.Status.Players = &agonesv1.AggregatedPlayerStatus{}
   705  		// TODO: integrate this extra loop into the above for loop when PlayerTracking moves to GA
   706  		for _, gsSet := range list {
   707  			if gsSet.Status.Players != nil {
   708  				fCopy.Status.Players.Count += gsSet.Status.Players.Count
   709  				fCopy.Status.Players.Capacity += gsSet.Status.Players.Capacity
   710  			}
   711  		}
   712  	}
   713  
   714  	_, err = c.fleetGetter.Fleets(fCopy.ObjectMeta.Namespace).UpdateStatus(ctx, fCopy, metav1.UpdateOptions{})
   715  	return errors.Wrapf(err, "error updating status of fleet %s", fCopy.ObjectMeta.Name)
   716  }
   717  
   718  // filterGameServerSetByActive returns the active GameServerSet (or nil if it
   719  // doesn't exist) and then the rest of the GameServerSets that are controlled
   720  // by this Fleet
   721  func (c *Controller) filterGameServerSetByActive(fleet *agonesv1.Fleet, list []*agonesv1.GameServerSet) (*agonesv1.GameServerSet, []*agonesv1.GameServerSet) {
   722  	var active *agonesv1.GameServerSet
   723  	var rest []*agonesv1.GameServerSet
   724  
   725  	for _, gsSet := range list {
   726  		if apiequality.Semantic.DeepEqual(gsSet.Spec.Template, fleet.Spec.Template) {
   727  			active = gsSet
   728  		} else {
   729  			rest = append(rest, gsSet)
   730  		}
   731  	}
   732  
   733  	return active, rest
   734  }
   735  
   736  // mergeCounters adds the contents of AggregatedCounterStatus c2 into c1.
   737  func mergeCounters(c1, c2 map[string]agonesv1.AggregatedCounterStatus) map[string]agonesv1.AggregatedCounterStatus {
   738  	if c1 == nil {
   739  		c1 = make(map[string]agonesv1.AggregatedCounterStatus)
   740  	}
   741  
   742  	for key, val := range c2 {
   743  		// If the Counter exists in both maps, aggregate the values.
   744  		if counter, ok := c1[key]; ok {
   745  			counter.AllocatedCapacity = agonesv1.SafeAdd(counter.AllocatedCapacity, val.AllocatedCapacity)
   746  			counter.AllocatedCount = agonesv1.SafeAdd(counter.AllocatedCount, val.AllocatedCount)
   747  			counter.Capacity = agonesv1.SafeAdd(counter.Capacity, val.Capacity)
   748  			counter.Count = agonesv1.SafeAdd(counter.Count, val.Count)
   749  			c1[key] = counter
   750  		} else {
   751  			c1[key] = *val.DeepCopy()
   752  		}
   753  	}
   754  
   755  	return c1
   756  }
   757  
   758  // mergeLists adds the contents of AggregatedListStatus l2 into l1.
   759  func mergeLists(l1, l2 map[string]agonesv1.AggregatedListStatus) map[string]agonesv1.AggregatedListStatus {
   760  	if l1 == nil {
   761  		l1 = make(map[string]agonesv1.AggregatedListStatus)
   762  	}
   763  
   764  	for key, val := range l2 {
   765  		// If the List exists in both maps, aggregate the values.
   766  		if list, ok := l1[key]; ok {
   767  			list.AllocatedCapacity += val.AllocatedCapacity
   768  			list.AllocatedCount += val.AllocatedCount
   769  			list.Capacity += val.Capacity
   770  			list.Count += val.Count
   771  			l1[key] = list
   772  		} else {
   773  			l1[key] = *val.DeepCopy()
   774  		}
   775  	}
   776  
   777  	return l1
   778  }