agones.dev/agones@v1.54.0/pkg/fleetautoscalers/fleetautoscalers.go (about)

     1  /*
     2   * Copyright 2018 Google LLC All Rights Reserved.
     3   *
     4   * Licensed under the Apache License, Version 2.0 (the "License");
     5   * you may not use this file except in compliance with the License.
     6   * You may obtain a copy of the License at
     7   *
     8   *     http://www.apache.org/licenses/LICENSE-2.0
     9   *
    10   * Unless required by applicable law or agreed to in writing, software
    11   * distributed under the License is distributed on an "AS IS" BASIS,
    12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13   * See the License for the specific language governing permissions and
    14   * limitations under the License.
    15   */
    16  
    17  package fleetautoscalers
    18  
    19  import (
    20  	"context"
    21  	"crypto/tls"
    22  	"crypto/x509"
    23  	"encoding/json"
    24  	"fmt"
    25  	"io"
    26  	"math"
    27  	"net/http"
    28  	"net/url"
    29  	"strings"
    30  	"time"
    31  
    32  	extism "github.com/extism/go-sdk"
    33  	"github.com/pkg/errors"
    34  	"github.com/robfig/cron/v3"
    35  	corev1 "k8s.io/api/core/v1"
    36  	"k8s.io/apimachinery/pkg/util/intstr"
    37  	"k8s.io/apimachinery/pkg/util/uuid"
    38  
    39  	agonesv1 "agones.dev/agones/pkg/apis/agones/v1"
    40  	autoscalingv1 "agones.dev/agones/pkg/apis/autoscaling/v1"
    41  	listeragonesv1 "agones.dev/agones/pkg/client/listers/agones/v1"
    42  	"agones.dev/agones/pkg/fleets"
    43  	"agones.dev/agones/pkg/gameservers"
    44  	gssets "agones.dev/agones/pkg/gameserversets"
    45  	"agones.dev/agones/pkg/util/runtime"
    46  )
    47  
    48  const (
    49  	maxDuration = "2540400h" // 290 Years
    50  )
    51  
    52  // InactiveScheduleError denotes an error for schedules that are not currently active.
    53  type InactiveScheduleError struct{}
    54  
    55  func (InactiveScheduleError) Error() string {
    56  	return "inactive schedule, policy not applicable"
    57  }
    58  
    59  // computeDesiredFleetSize computes the new desired size of the given fleet
    60  func computeDesiredFleetSize(ctx context.Context, state *fasState, pol autoscalingv1.FleetAutoscalerPolicy, f *agonesv1.Fleet,
    61  	gameServerNamespacedLister listeragonesv1.GameServerNamespaceLister, nodeCounts map[string]gameservers.NodeCount, fasLog *FasLogger) (int32, bool, error) {
    62  
    63  	var (
    64  		replicas int32
    65  		limited  bool
    66  		err      error
    67  	)
    68  
    69  	switch pol.Type {
    70  	case autoscalingv1.BufferPolicyType:
    71  		replicas, limited, err = applyBufferPolicy(state, pol.Buffer, f, fasLog)
    72  	case autoscalingv1.WebhookPolicyType:
    73  		replicas, limited, err = applyWebhookPolicy(state, pol.Webhook, f, fasLog)
    74  	case autoscalingv1.CounterPolicyType:
    75  		replicas, limited, err = applyCounterOrListPolicyWrapper(state, pol.Counter, nil, f, gameServerNamespacedLister, nodeCounts, fasLog)
    76  	case autoscalingv1.ListPolicyType:
    77  		replicas, limited, err = applyCounterOrListPolicyWrapper(state, nil, pol.List, f, gameServerNamespacedLister, nodeCounts, fasLog)
    78  	case autoscalingv1.SchedulePolicyType:
    79  		replicas, limited, err = applySchedulePolicy(ctx, state, pol.Schedule, f, gameServerNamespacedLister, nodeCounts, time.Now(), fasLog)
    80  	case autoscalingv1.ChainPolicyType:
    81  		replicas, limited, err = applyChainPolicy(ctx, state, pol.Chain, f, gameServerNamespacedLister, nodeCounts, time.Now(), fasLog)
    82  	case autoscalingv1.WasmPolicyType:
    83  		replicas, limited, err = applyWasmPolicy(ctx, state, pol.Wasm, f, fasLog)
    84  
    85  	default:
    86  		err = errors.New("wrong policy type, should be one of: Buffer, Webhook, Counter, List, Schedule, Chain")
    87  	}
    88  
    89  	if err != nil && !errors.Is(err, InactiveScheduleError{}) {
    90  		loggerForFleetAutoscalerKey(fasLog.fas.ObjectMeta.Name, fasLog.baseLogger).
    91  			Debugf("Failed to apply policy type %q: %v", pol.Type, err)
    92  	}
    93  
    94  	return replicas, limited, err
    95  }
    96  
    97  func applyWasmPolicy(ctx context.Context, state *fasState, wp *autoscalingv1.WasmPolicy, f *agonesv1.Fleet, log *FasLogger) (int32, bool, error) {
    98  	if !runtime.FeatureEnabled(runtime.FeatureWasmAutoscaler) {
    99  		return 0, false, errors.Errorf("cannot apply WasmPolicy unless feature flag %s is enabled", runtime.FeatureWasmAutoscaler)
   100  	}
   101  
   102  	if wp == nil {
   103  		return 0, false, errors.New("wasmPolicy parameter must not be nil")
   104  	}
   105  
   106  	if f == nil {
   107  		return 0, false, errors.New("fleet parameter must not be nil")
   108  	}
   109  
   110  	if state.wasmPlugin == nil {
   111  		// Build URL from the WasmPolicy
   112  		u, err := buildURLFromConfiguration(state, wp.From.URL)
   113  		if err != nil {
   114  			return 0, false, err
   115  		}
   116  
   117  		if state.httpClient == nil {
   118  			return 0, false, errors.New("http client not set")
   119  		}
   120  
   121  		res, err := state.httpClient.Get(u.String())
   122  		if err != nil {
   123  			return 0, false, errors.Wrapf(err, "failed to fetch Wasm module from %s", u.String())
   124  		}
   125  		defer res.Body.Close() //nolint:errcheck
   126  
   127  		if res.StatusCode != http.StatusOK {
   128  			return 0, false, fmt.Errorf("bad status code %d from the server: %s", res.StatusCode, u.String())
   129  		}
   130  
   131  		b, err := io.ReadAll(res.Body)
   132  		if err != nil {
   133  			return 0, false, errors.Wrapf(err, "failed to read Wasm module from %s", u.String())
   134  		}
   135  
   136  		data := extism.WasmData{Data: b}
   137  		if len(wp.Hash) > 0 {
   138  			data.Hash = wp.Hash
   139  		}
   140  		manifest := extism.Manifest{
   141  			Wasm: []extism.Wasm{
   142  				data,
   143  			},
   144  			Config: wp.Config,
   145  		}
   146  
   147  		config := extism.PluginConfig{
   148  			EnableWasi: true,
   149  		}
   150  		plugin, err := extism.NewPlugin(ctx, manifest, config, []extism.HostFunction{})
   151  		if err != nil {
   152  			return 0, false, errors.Wrapf(err, "failed to create Wasm plugin from %s", u.String())
   153  		}
   154  		state.wasmPlugin = plugin // Store the plugin in the state map
   155  	}
   156  
   157  	// Create FleetAutoscaleReview
   158  	review := autoscalingv1.FleetAutoscaleReview{
   159  		Request: &autoscalingv1.FleetAutoscaleRequest{
   160  			UID:       uuid.NewUUID(),
   161  			Name:      f.Name,
   162  			Namespace: f.Namespace,
   163  			Status:    f.Status,
   164  		},
   165  		Response: nil,
   166  	}
   167  
   168  	if runtime.FeatureEnabled(runtime.FeatureFleetAutoscaleRequestMetaData) {
   169  		review.Request.Annotations = f.ObjectMeta.Annotations
   170  		review.Request.Labels = f.ObjectMeta.Labels
   171  	}
   172  
   173  	b, err := json.Marshal(review)
   174  	if err != nil {
   175  		return 0, false, errors.Wrap(err, "failed to marshal autoscaling request")
   176  	}
   177  
   178  	_, b, err = state.wasmPlugin.CallWithContext(ctx, wp.Function, b)
   179  	if err != nil {
   180  		return 0, false, errors.Wrapf(err, "failed to call Wasm plugin function %s", wp.Function)
   181  	}
   182  
   183  	if err := json.Unmarshal(b, &review); err != nil {
   184  		return 0, false, errors.Wrap(err, "failed to unmarshal autoscaling response")
   185  	}
   186  
   187  	loggerForFleetAutoscalerKey(log.fas.ObjectMeta.Name, log.baseLogger).Debugf(
   188  		"Fleet Autoscaler operation completed for fleet: %s, with was function: %s", f.ObjectMeta.Name, wp.Function)
   189  
   190  	if review.Response.Scale {
   191  		return review.Response.Replicas, false, nil
   192  	}
   193  
   194  	return f.Status.Replicas, false, nil
   195  }
   196  
   197  // buildURLFromConfiguration - build URL for Webhook and set CARoot for client Transport
   198  func buildURLFromConfiguration(state *fasState, w *autoscalingv1.URLConfiguration) (u *url.URL, err error) {
   199  	if w.URL != nil && w.Service != nil {
   200  		return nil, errors.New("service and URL cannot be used simultaneously")
   201  	}
   202  
   203  	// if we haven't created the http state yet, let's create the http client, with appropriate tls configuration.
   204  	if state.httpClient == nil {
   205  		config := &tls.Config{}
   206  		state.httpClient = &http.Client{
   207  			Timeout: 15 * time.Second,
   208  			Transport: &http.Transport{
   209  				TLSClientConfig: config,
   210  			},
   211  		}
   212  
   213  		if w.CABundle != nil {
   214  			if err := setCABundle(config, w.CABundle); err != nil {
   215  				return nil, err
   216  			}
   217  		}
   218  	}
   219  
   220  	scheme := "http"
   221  	if w.CABundle != nil {
   222  		scheme = "https"
   223  	}
   224  
   225  	if w.URL != nil {
   226  		if *w.URL == "" {
   227  			return nil, errors.New("URL was not provided")
   228  		}
   229  
   230  		return url.ParseRequestURI(*w.URL)
   231  	}
   232  
   233  	if w.Service == nil {
   234  		return nil, errors.New("service was not provided, either URL or Service must be provided")
   235  	}
   236  
   237  	if w.Service.Name == "" {
   238  		return nil, errors.New("service name was not provided")
   239  	}
   240  
   241  	if w.Service.Path == nil {
   242  		empty := ""
   243  		w.Service.Path = &empty
   244  	}
   245  
   246  	if w.Service.Namespace == "" {
   247  		w.Service.Namespace = "default"
   248  	}
   249  
   250  	return createURL(scheme, w.Service.Name, w.Service.Namespace, *w.Service.Path, w.Service.Port), nil
   251  }
   252  
   253  // moved to a separate method to cover it with unit tests and check that URL corresponds to a proper pattern
   254  func createURL(scheme, name, namespace, path string, port *int32) *url.URL {
   255  	var hostPort int32 = 8000
   256  	if port != nil {
   257  		hostPort = *port
   258  	}
   259  
   260  	return &url.URL{
   261  		Scheme: scheme,
   262  		Host:   fmt.Sprintf("%s.%s.svc:%d", name, namespace, hostPort),
   263  		Path:   path,
   264  	}
   265  }
   266  
   267  func setCABundle(tls *tls.Config, caBundle []byte) error {
   268  	rootCAs := x509.NewCertPool()
   269  	if ok := rootCAs.AppendCertsFromPEM(caBundle); !ok {
   270  		return errors.New("no certs were appended from caBundle")
   271  	}
   272  	tls.RootCAs = rootCAs
   273  	return nil
   274  }
   275  
   276  func applyWebhookPolicy(state *fasState, w *autoscalingv1.URLConfiguration, f *agonesv1.Fleet, fasLog *FasLogger) (replicas int32, limited bool, err error) {
   277  	if w == nil {
   278  		return 0, false, errors.New("webhookPolicy parameter must not be nil")
   279  	}
   280  
   281  	if f == nil {
   282  		return 0, false, errors.New("fleet parameter must not be nil")
   283  	}
   284  
   285  	u, err := buildURLFromConfiguration(state, w)
   286  	if err != nil {
   287  		return 0, false, err
   288  	}
   289  	if state.httpClient == nil {
   290  		return 0, false, errors.New("http client not set")
   291  	}
   292  
   293  	faReq := autoscalingv1.FleetAutoscaleReview{
   294  		Request: &autoscalingv1.FleetAutoscaleRequest{
   295  			UID:       uuid.NewUUID(),
   296  			Name:      f.Name,
   297  			Namespace: f.Namespace,
   298  			Status:    f.Status,
   299  		},
   300  		Response: nil,
   301  	}
   302  
   303  	if runtime.FeatureEnabled(runtime.FeatureFleetAutoscaleRequestMetaData) {
   304  		faReq.Request.Annotations = f.ObjectMeta.Annotations
   305  		faReq.Request.Labels = f.ObjectMeta.Labels
   306  	}
   307  
   308  	b, err := json.Marshal(faReq)
   309  	if err != nil {
   310  		return 0, false, err
   311  	}
   312  
   313  	res, err := state.httpClient.Post(
   314  		u.String(),
   315  		"application/json",
   316  		strings.NewReader(string(b)),
   317  	)
   318  	if err != nil {
   319  		return 0, false, err
   320  	}
   321  	defer func() {
   322  		if cerr := res.Body.Close(); cerr != nil {
   323  			if err != nil {
   324  				err = errors.Wrap(err, cerr.Error())
   325  			} else {
   326  				err = cerr
   327  			}
   328  		}
   329  	}()
   330  
   331  	if res.StatusCode != http.StatusOK {
   332  		return 0, false, fmt.Errorf("bad status code %d from the server: %s", res.StatusCode, u.String())
   333  	}
   334  	result, err := io.ReadAll(res.Body)
   335  	if err != nil {
   336  		return 0, false, err
   337  	}
   338  
   339  	var faResp autoscalingv1.FleetAutoscaleReview
   340  	err = json.Unmarshal(result, &faResp)
   341  	if err != nil {
   342  		return 0, false, err
   343  	}
   344  
   345  	// Log Fleet Autoscaler operation, handling nil or empty Name in one line
   346  	webhookPolicyName := "<nil>"
   347  	if w.Service != nil && w.Service.Name != "" {
   348  		webhookPolicyName = w.Service.Name
   349  	}
   350  	loggerForFleetAutoscalerKey(fasLog.fas.ObjectMeta.Name, fasLog.baseLogger).Debugf(
   351  		"Fleet Autoscaler operation completed for fleet: %s, with WebhookPolicy: %s", f.ObjectMeta.Name, webhookPolicyName)
   352  
   353  	if faResp.Response.Scale {
   354  		return faResp.Response.Replicas, false, nil
   355  	}
   356  
   357  	return f.Status.Replicas, false, nil
   358  }
   359  
   360  func applyBufferPolicy(_ *fasState, b *autoscalingv1.BufferPolicy, f *agonesv1.Fleet, fasLog *FasLogger) (int32, bool, error) {
   361  	var replicas int32
   362  
   363  	if b.BufferSize.Type == intstr.Int {
   364  		replicas = f.Status.AllocatedReplicas + int32(b.BufferSize.IntValue())
   365  	} else {
   366  		// the percentage value is a little more complex, as we can't apply
   367  		// the desired percentage to any current value, but to the future one
   368  		// Example: we have 8 allocated replicas, 10 total replicas and bufferSize set to 30%.
   369  		// 30% means that we must have 30% ready instances in the fleet
   370  		// Right now there are 20%, so we must increase the fleet until we reach 30%
   371  		// To compute the new size, we start from the other end: if ready must be 30%
   372  		// it means that allocated must be 70% and adjust the fleet size to make that true.
   373  		bufferPercent, err := intstr.GetValueFromIntOrPercent(&b.BufferSize, 100, true)
   374  		if err != nil {
   375  			return 0, false, err
   376  		}
   377  		// use Math.Ceil to round the result up
   378  		replicas = int32(math.Ceil(float64(f.Status.AllocatedReplicas*100) / float64(100-bufferPercent)))
   379  	}
   380  
   381  	scalingInLimited := false
   382  	scalingOutLimited := false
   383  
   384  	if replicas < b.MinReplicas {
   385  		replicas = b.MinReplicas
   386  		scalingInLimited = true
   387  	}
   388  	if replicas > b.MaxReplicas {
   389  		replicas = b.MaxReplicas
   390  		scalingOutLimited = true
   391  	}
   392  
   393  	loggerForFleetAutoscalerKey(fasLog.fas.ObjectMeta.Name, fasLog.baseLogger).Debugf(
   394  		"Fleet Autoscaler operation completed for fleet: %s, with BufferPolicy: %v", f.ObjectMeta.Name, b.BufferSize)
   395  
   396  	return replicas, scalingInLimited || scalingOutLimited, nil
   397  }
   398  
   399  // New function to call applyCounterOrListPolicy
   400  func applyCounterOrListPolicyWrapper(_ *fasState, c *autoscalingv1.CounterPolicy, l *autoscalingv1.ListPolicy,
   401  	f *agonesv1.Fleet, gameServerNamespacedLister listeragonesv1.GameServerNamespaceLister,
   402  	nodeCounts map[string]gameservers.NodeCount, fasLog *FasLogger) (int32, bool, error) {
   403  
   404  	// Call applyCounterOrListPolicy inside the wrapper
   405  	desiredReplicas, scalingLimited, err := applyCounterOrListPolicy(c, l, f, gameServerNamespacedLister, nodeCounts)
   406  
   407  	if err == nil {
   408  		// Log directly based on which policy is used, with a description of the key
   409  		if c != nil {
   410  			// Log the Key from CounterPolicy with a description
   411  			loggerForFleetAutoscalerKey(fasLog.fas.ObjectMeta.Name, fasLog.baseLogger).Debugf(
   412  				"Fleet Autoscaler operation completed for fleet: %s, with CounterPolicy - Key: %v", f.ObjectMeta.Name, c.Key)
   413  		} else if l != nil {
   414  			// Log the Key from ListPolicy with a description
   415  			loggerForFleetAutoscalerKey(fasLog.fas.ObjectMeta.Name, fasLog.baseLogger).Debugf(
   416  				"Fleet Autoscaler operation completed for fleet: %s, with ListPolicy - Key: %v", f.ObjectMeta.Name, l.Key)
   417  		}
   418  	}
   419  
   420  	return desiredReplicas, scalingLimited, err
   421  }
   422  
   423  func applyCounterOrListPolicy(c *autoscalingv1.CounterPolicy, l *autoscalingv1.ListPolicy,
   424  	f *agonesv1.Fleet, gameServerNamespacedLister listeragonesv1.GameServerNamespaceLister,
   425  	nodeCounts map[string]gameservers.NodeCount) (int32, bool, error) {
   426  
   427  	if !runtime.FeatureEnabled(runtime.FeatureCountsAndLists) {
   428  		return 0, false, errors.Errorf("cannot apply CounterPolicy unless feature flag %s is enabled", runtime.FeatureCountsAndLists)
   429  	}
   430  
   431  	var isCounter bool          // True if a CounterPolicy False if a ListPolicy
   432  	var key string              // The specified Counter or List
   433  	var count int64             // The Count or number of Values in the template Game Server
   434  	var capacity int64          // The Capacity in the template Game Server
   435  	var aggCount int64          // The Aggregate Count of the specified Counter or List of all GameServers across the GameServerSet in the Fleet
   436  	var aggCapacity int64       // The Aggregate Capacity of the specified Counter or List of all GameServers across the GameServerSet in the Fleet
   437  	var aggAllocatedCount int64 // The Aggregate Count of the specified Counter or List of GameServers in an Allocated state across the GameServerSet in the Fleet
   438  	var minCapacity int64       // The Minimum Aggregate Capacity
   439  	var maxCapacity int64       // The Maximum Aggregate Capacity
   440  	var bufferSize intstr.IntOrString
   441  
   442  	if c != nil {
   443  		isCounter = true
   444  		counter, ok := f.Spec.Template.Spec.Counters[c.Key]
   445  		if !ok {
   446  			return 0, false, errors.Errorf("cannot apply CounterPolicy as Counter key %s does not exist in the Fleet Spec", c.Key)
   447  		}
   448  
   449  		aggCounter, ok := f.Status.Counters[c.Key]
   450  		if !ok {
   451  			return 0, false, errors.Errorf("cannot apply CounterPolicy as Counter key %s does not exist in the Fleet Status", c.Key)
   452  		}
   453  
   454  		key = c.Key
   455  		count = counter.Count
   456  		capacity = counter.Capacity
   457  		aggCount = aggCounter.Count
   458  		aggCapacity = aggCounter.Capacity
   459  		aggAllocatedCount = aggCounter.AllocatedCount
   460  		minCapacity = c.MinCapacity
   461  		maxCapacity = c.MaxCapacity
   462  		bufferSize = c.BufferSize
   463  
   464  	} else {
   465  		isCounter = false
   466  		list, ok := f.Spec.Template.Spec.Lists[l.Key]
   467  		if !ok {
   468  			return 0, false, errors.Errorf("cannot apply ListPolicy as List key %s does not exist in the Fleet Spec", l.Key)
   469  		}
   470  
   471  		aggList, ok := f.Status.Lists[l.Key]
   472  		if !ok {
   473  			return 0, false, errors.Errorf("cannot apply ListPolicy as List key %s does not exist in the Fleet Status", l.Key)
   474  		}
   475  
   476  		key = l.Key
   477  		count = int64(len(list.Values))
   478  		capacity = list.Capacity
   479  		aggCount = aggList.Count
   480  		aggCapacity = aggList.Capacity
   481  		aggAllocatedCount = aggList.AllocatedCount
   482  		minCapacity = l.MinCapacity
   483  		maxCapacity = l.MaxCapacity
   484  		bufferSize = l.BufferSize
   485  	}
   486  
   487  	// Checks if we've limited by TOTAL capacity
   488  	limited, scale := isLimited(aggCapacity, minCapacity, maxCapacity)
   489  
   490  	// Total current number of Replicas
   491  	replicas := f.Status.Replicas
   492  
   493  	// The buffer is the desired available capacity
   494  	var buffer int64
   495  
   496  	switch {
   497  	// Desired replicas based on BufferSize specified as an absolute value (i.e. 5)
   498  	case bufferSize.Type == intstr.Int:
   499  		buffer = int64(bufferSize.IntValue())
   500  	// Desired replicas based on BufferSize specified as a percent (i.e. 5%)
   501  	case bufferSize.Type == intstr.String:
   502  		bufferPercent, err := intstr.GetValueFromIntOrPercent(&bufferSize, 100, isCounter)
   503  		if err != nil {
   504  			return 0, false, err
   505  		}
   506  		// If the Aggregated Allocated Counts is 0 then desired capacity gets calculated as 0. If the
   507  		// capacity of 1 replica is equal to or greater than minimum capacity we can exit early.
   508  		if aggAllocatedCount <= 0 && capacity >= minCapacity {
   509  			return 1, true, nil
   510  		}
   511  
   512  		// The desired TOTAL capacity based on the Aggregated Allocated Counts (see applyBufferPolicy for explanation)
   513  		desiredCapacity := int64(math.Ceil(float64(aggAllocatedCount*100) / float64(100-bufferPercent)))
   514  		// Convert into a desired AVAILABLE capacity aka the buffer
   515  		buffer = desiredCapacity - aggAllocatedCount
   516  	}
   517  
   518  	// Current available capacity across the TOTAL fleet
   519  	switch availableCapacity := aggCapacity - aggCount; {
   520  	case availableCapacity == buffer:
   521  		if limited {
   522  			return scaleLimited(scale, f, gameServerNamespacedLister, nodeCounts, key, isCounter, replicas,
   523  				capacity, aggCapacity, minCapacity, maxCapacity)
   524  		}
   525  		return replicas, false, nil
   526  	case availableCapacity < buffer: // Scale Up
   527  		if limited { // Case where we want to scale up, but we're already limited by MaxCapacity.
   528  			return scaleLimited(scale, f, gameServerNamespacedLister, nodeCounts, key, isCounter, replicas,
   529  				capacity, aggCapacity, minCapacity, maxCapacity)
   530  		}
   531  		return scaleUp(replicas, capacity, count, aggCapacity, availableCapacity, maxCapacity,
   532  			minCapacity, buffer)
   533  	case availableCapacity > buffer: // Scale Down
   534  		if limited && scale == 1 { // Case where we want to scale down but we're already limited by MinCapacity
   535  			return scaleLimited(scale, f, gameServerNamespacedLister, nodeCounts, key, isCounter, replicas,
   536  				capacity, aggCapacity, minCapacity, maxCapacity)
   537  		}
   538  		return scaleDown(f, gameServerNamespacedLister, nodeCounts, key, isCounter, replicas, aggCount,
   539  			aggCapacity, minCapacity, buffer)
   540  	}
   541  
   542  	if isCounter {
   543  		return 0, false, errors.Errorf("unable to apply CounterPolicy %v", c)
   544  	}
   545  	return 0, false, errors.Errorf("unable to apply ListPolicy %v", l)
   546  }
   547  
   548  func applySchedulePolicy(ctx context.Context, state *fasState, s *autoscalingv1.SchedulePolicy, f *agonesv1.Fleet, gameServerNamespacedLister listeragonesv1.GameServerNamespaceLister, nodeCounts map[string]gameservers.NodeCount, currentTime time.Time, fasLog *FasLogger) (int32, bool, error) {
   549  	// Ensure the scheduled autoscaler feature gate is enabled
   550  	if !runtime.FeatureEnabled(runtime.FeatureScheduledAutoscaler) {
   551  		return 0, false, errors.Errorf("cannot apply SchedulePolicy unless feature flag %s is enabled", runtime.FeatureScheduledAutoscaler)
   552  	}
   553  
   554  	if isScheduleActive(s, currentTime) {
   555  		return computeDesiredFleetSize(ctx, state, s.Policy, f, gameServerNamespacedLister, nodeCounts, fasLog)
   556  	}
   557  
   558  	loggerForFleetAutoscalerKey(fasLog.fas.ObjectMeta.Name, fasLog.baseLogger).Debugf(
   559  		"Fleet autoscaler check: Schedule not active for fleet %s", f.ObjectMeta.Name)
   560  
   561  	// If the schedule wasn't active then return the current replica amount of the fleet
   562  	return f.Status.Replicas, false, &InactiveScheduleError{}
   563  }
   564  
   565  func applyChainPolicy(ctx context.Context, state *fasState, c autoscalingv1.ChainPolicy, f *agonesv1.Fleet, gameServerNamespacedLister listeragonesv1.GameServerNamespaceLister, nodeCounts map[string]gameservers.NodeCount, currentTime time.Time, fasLog *FasLogger) (int32, bool, error) {
   566  	// Ensure the scheduled autoscaler feature gate is enabled
   567  	if !runtime.FeatureEnabled(runtime.FeatureScheduledAutoscaler) {
   568  		return 0, false, errors.Errorf("cannot apply ChainPolicy unless feature flag %s is enabled", runtime.FeatureScheduledAutoscaler)
   569  	}
   570  
   571  	replicas := f.Status.Replicas
   572  	var limited bool
   573  	var err error
   574  	var chainEntry autoscalingv1.FleetAutoscalerPolicyType
   575  
   576  	// Loop over all entries in the chain
   577  	for _, entry := range c {
   578  		switch entry.Type {
   579  		case autoscalingv1.SchedulePolicyType:
   580  			replicas, limited, err = applySchedulePolicy(ctx, state, entry.Schedule, f, gameServerNamespacedLister, nodeCounts, currentTime, fasLog)
   581  
   582  			if err != nil {
   583  				loggerForFleetAutoscalerKey(fasLog.fas.ObjectMeta.Name, fasLog.baseLogger).Debugf(
   584  					"Failed to apply SchedulePolicy ID=%s in ChainPolicy: %v", entry.ID, err)
   585  			}
   586  		case autoscalingv1.WebhookPolicyType:
   587  			replicas, limited, err = applyWebhookPolicy(state, entry.Webhook, f, fasLog)
   588  
   589  			if err != nil {
   590  				loggerForFleetAutoscalerKey(fasLog.fas.ObjectMeta.Name, fasLog.baseLogger).Debugf(
   591  					"Failed to apply WebhookPolicy ID=%s in ChainPolicy: %v", entry.ID, err)
   592  			}
   593  		default:
   594  			// Every other policy type we just want to compute the desired fleet and return it
   595  			replicas, limited, err = computeDesiredFleetSize(ctx, state, entry.FleetAutoscalerPolicy, f, gameServerNamespacedLister, nodeCounts, fasLog)
   596  
   597  			if err != nil && !errors.Is(err, InactiveScheduleError{}) {
   598  				loggerForFleetAutoscalerKey(fasLog.fas.ObjectMeta.Name, fasLog.baseLogger).Debugf(
   599  					"Failed to apply %s ID=%s in ChainPolicy: %v", entry.Type, entry.ID, err)
   600  			}
   601  		}
   602  
   603  		// If no error occurred, exit the loop early
   604  		if err == nil {
   605  			chainEntry = autoscalingv1.FleetAutoscalerPolicyType(fmt.Sprintf("%s:%s:%s", autoscalingv1.ChainPolicyType, entry.ID, entry.Type))
   606  			break
   607  		}
   608  	}
   609  
   610  	if err != nil && !errors.Is(err, InactiveScheduleError{}) {
   611  		emitChainPolicyEvent(fasLog, "Unknown", "")
   612  		loggerForFleetAutoscalerKey(fasLog.fas.ObjectMeta.Name, fasLog.baseLogger).Debug("Failed to apply ChainPolicy: no valid policy applied")
   613  		return replicas, limited, err
   614  	}
   615  
   616  	currChainEntry := strings.Split(string(chainEntry), ":")
   617  
   618  	// Handle the final state of the chain and update status if necessary
   619  	if lastAppliedPolicy := fasLog.fas.Status.LastAppliedPolicy; strings.Contains(string(lastAppliedPolicy), string(autoscalingv1.ChainPolicyType)) {
   620  		prevChainEntry := strings.Split(string(lastAppliedPolicy), ":")
   621  
   622  		// Only log if there is a change in the policy
   623  		if len(prevChainEntry) > 2 && (currChainEntry[1] != prevChainEntry[1] || currChainEntry[2] != prevChainEntry[2]) {
   624  			fasLog.currChainEntry = &chainEntry
   625  			emitChainPolicyEvent(fasLog, currChainEntry[1], currChainEntry[2])
   626  		}
   627  	} else {
   628  		fasLog.currChainEntry = &chainEntry
   629  		emitChainPolicyEvent(fasLog, currChainEntry[1], currChainEntry[2])
   630  	}
   631  
   632  	return replicas, limited, nil
   633  }
   634  
   635  // isScheduleActive checks if a chain entry's is active and returns a boolean, true if active, false otherwise
   636  func isScheduleActive(s *autoscalingv1.SchedulePolicy, currentTime time.Time) bool {
   637  	// Used for checking ahead of the schedule for daylight savings purposes
   638  	cronDelta := (time.Minute * -1) + (time.Second * -30)
   639  
   640  	// If the current time is before the start time, the schedule is inactive so return false
   641  	startTime := s.Between.Start.Time
   642  	if currentTime.Before(startTime) {
   643  		return false
   644  	}
   645  
   646  	// If an end time is present and the current time is after the end time, the schedule is inactive so return false
   647  	endTime := s.Between.End.Time
   648  	if !endTime.IsZero() && currentTime.After(endTime) {
   649  		return false
   650  	}
   651  
   652  	// If no startCron field is specified, then it's automatically true (duration is no longer relevant since we're always running)
   653  	if s.ActivePeriod.StartCron == "" {
   654  		return true
   655  	}
   656  
   657  	// Ignore the error as validation is already done within the validateChainPolicy after being unmarshalled
   658  	location, _ := time.LoadLocation(s.ActivePeriod.Timezone)
   659  
   660  	// Ignore the error as validation is already done within the validateChainPolicy after being unmarshalled
   661  	startCron, _ := cron.ParseStandard(s.ActivePeriod.StartCron)
   662  
   663  	// Ignore the error as validation is already done within the validateChainPolicy after being unmarshalled.
   664  	// If the duration is empty set it to the largest duration possible (290 years)
   665  	duration, _ := time.ParseDuration(s.ActivePeriod.Duration)
   666  	if s.ActivePeriod.Duration == "" {
   667  		duration, _ = time.ParseDuration(maxDuration)
   668  	}
   669  
   670  	// Get the current time - duration
   671  	currentTimeMinusDuration := currentTime.Add(duration * -1)
   672  	// Take (current time - duration) to get the first available start time
   673  	cronStartTime := startCron.Next(currentTimeMinusDuration.In(location))
   674  	// Take the (cronStartTime + duration) to get the end time
   675  	cronEndTime := cronStartTime.Add(duration)
   676  
   677  	// If the current time is after the cronStartTime - 90 seconds (for daylight saving purposes) AND the current time before the cronEndTime
   678  	// then return true
   679  	// Example: startCron = 0 14 * * * // 2:00 PM Everyday | duration = 1 hr | cronDelta = 90 seconds | currentTime = 2024-08-01T14:30:00Z | currentTimeMinusDuration = 2024-08-01T13:30:00Z
   680  	// then cronStartTime = 2024-08-01T14:00:00Z and cronEndTime = 2024-08-01T15:00:00Z
   681  	// and since currentTime > cronStartTime + cronDelta AND currentTime < cronEndTime, we return true
   682  	if currentTime.After(cronStartTime.Add(cronDelta)) && currentTime.Before(cronEndTime) {
   683  		return true
   684  	}
   685  
   686  	return false
   687  }
   688  
   689  // getSortedGameServers returns the list of Game Servers for the Fleet in the order in which the
   690  // Game Servers would be deleted.
   691  func getSortedGameServers(f *agonesv1.Fleet, gameServerNamespacedLister listeragonesv1.GameServerNamespaceLister,
   692  	nodeCounts map[string]gameservers.NodeCount) ([]*agonesv1.GameServer, error) {
   693  	gsList, err := fleets.ListGameServersByFleetOwner(gameServerNamespacedLister, f)
   694  	if err != nil {
   695  		return nil, err
   696  	}
   697  
   698  	gameServers := gssets.SortGameServersByStrategy(f.Spec.Scheduling, gsList, nodeCounts, f.Spec.Priorities)
   699  	return gameServers, nil
   700  }
   701  
   702  // isLimited indicates that the calculated scale would be above or below the range defined by
   703  // MinCapacity and MaxCapacity in the ListPolicy or CounterPolicy.
   704  // Return 1 if the fleet needs to scale up, -1 if the fleets need to scale down, 0 if the fleet does
   705  // not need to scale, or if the fleet is not limited.
   706  func isLimited(aggCapacity, minCapacity, maxCapacity int64) (bool, int) {
   707  	if aggCapacity < minCapacity { // Scale up
   708  		return true, 1
   709  	}
   710  	if aggCapacity > maxCapacity { // Scale down
   711  		return true, -1
   712  	}
   713  	return false, 0
   714  }
   715  
   716  // scaleUpLimited scales up the fleet to meet the MinCapacity
   717  func scaleUpLimited(replicas int32, capacity, aggCapacity, minCapacity int64) (int32, bool, error) {
   718  	if capacity == 0 {
   719  		return 0, false, errors.Errorf("cannot scale up as Capacity is equal to 0")
   720  	}
   721  	for aggCapacity < minCapacity {
   722  		aggCapacity += capacity
   723  		replicas++
   724  	}
   725  	return replicas, true, nil
   726  }
   727  
   728  // scaleDownLimited scales down the fleet to meet the MaxCapacity
   729  func scaleDownLimited(f *agonesv1.Fleet, gameServerNamespacedLister listeragonesv1.GameServerNamespaceLister,
   730  	nodeCounts map[string]gameservers.NodeCount, key string, isCounter bool, replicas int32,
   731  	aggCapacity, maxCapacity int64) (int32, bool, error) {
   732  	// Game Servers in order of deletion on scale down
   733  	gameServers, err := getSortedGameServers(f, gameServerNamespacedLister, nodeCounts)
   734  	if err != nil {
   735  		return 0, false, err
   736  	}
   737  	for _, gs := range gameServers {
   738  		if aggCapacity <= maxCapacity {
   739  			break
   740  		}
   741  		switch isCounter {
   742  		case true:
   743  			if counter, ok := gs.Status.Counters[key]; ok {
   744  				aggCapacity -= counter.Capacity
   745  			}
   746  		case false:
   747  			if list, ok := gs.Status.Lists[key]; ok {
   748  				aggCapacity -= list.Capacity
   749  			}
   750  		}
   751  		replicas--
   752  	}
   753  
   754  	// We are not currently able to scale down to zero replicas, so one replica is the minimum allowed
   755  	if replicas < 1 {
   756  		replicas = 1
   757  	}
   758  
   759  	return replicas, true, nil
   760  }
   761  
   762  func scaleLimited(scale int, f *agonesv1.Fleet, gameServerNamespacedLister listeragonesv1.GameServerNamespaceLister,
   763  	nodeCounts map[string]gameservers.NodeCount, key string, isCounter bool, replicas int32,
   764  	capacity, aggCapacity, minCapacity, maxCapacity int64) (int32, bool, error) {
   765  
   766  	switch scale {
   767  	case 1: // scale up
   768  		return scaleUpLimited(replicas, capacity, aggCapacity, minCapacity)
   769  	case -1: // scale down
   770  		return scaleDownLimited(f, gameServerNamespacedLister, nodeCounts, key, isCounter, replicas,
   771  			aggCapacity, maxCapacity)
   772  	case 0:
   773  		return replicas, false, nil
   774  	}
   775  
   776  	return 0, false, errors.Errorf("cannot scale due to error in scaleLimited function")
   777  }
   778  
   779  // scaleUp scales up for either Integer or Percentage Buffer.
   780  func scaleUp(replicas int32, capacity, count, aggCapacity, availableCapacity, maxCapacity,
   781  	minCapacity, buffer int64) (int32, bool, error) {
   782  
   783  	// How much capacity is gained by adding one more replica to the fleet.
   784  	replicaCapacity := capacity - count
   785  	if replicaCapacity <= 0 {
   786  		return 0, false, errors.Errorf("cannot scale up as adding additional replicas does not increase available Capacity")
   787  	}
   788  
   789  	additionalReplicas := int32(math.Ceil((float64(buffer) - float64(availableCapacity)) / float64(replicaCapacity)))
   790  
   791  	// Check to make sure we're not limited (over Max Capacity)
   792  	limited, _ := isLimited(aggCapacity+(int64(additionalReplicas)*capacity), minCapacity, maxCapacity)
   793  	if limited {
   794  		additionalReplicas = int32((maxCapacity - aggCapacity) / capacity)
   795  	}
   796  
   797  	return replicas + additionalReplicas, limited, nil
   798  }
   799  
   800  // scaleDown scales down for either Integer or Percentage Buffer.
   801  func scaleDown(f *agonesv1.Fleet, gameServerNamespacedLister listeragonesv1.GameServerNamespaceLister,
   802  	nodeCounts map[string]gameservers.NodeCount, key string, isCounter bool, replicas int32,
   803  	aggCount, aggCapacity, minCapacity, buffer int64) (int32, bool, error) {
   804  	// Exit early if we're already at MinCapacity to avoid calling getSortedGameServers if unnecessary
   805  	if aggCapacity == minCapacity {
   806  		return replicas, true, nil
   807  	}
   808  
   809  	// We first need to get the individual game servers in order of deletion on scale down, as any
   810  	// game server may have a unique value for counts and / or capacity.
   811  	gameServers, err := getSortedGameServers(f, gameServerNamespacedLister, nodeCounts)
   812  	if err != nil {
   813  		return 0, false, err
   814  	}
   815  
   816  	var availableCapacity int64
   817  
   818  	// "Remove" one game server at a time in order of potential deletion. (Not actually removed here,
   819  	// that's done later, if possible, by the fleetautoscaler controller.)
   820  	for _, gs := range gameServers {
   821  		replicas--
   822  		switch isCounter {
   823  		case true:
   824  			if counter, ok := gs.Status.Counters[key]; ok {
   825  				aggCount -= counter.Count
   826  				aggCapacity -= counter.Capacity
   827  			} else {
   828  				continue
   829  			}
   830  		case false:
   831  			if list, ok := gs.Status.Lists[key]; ok {
   832  				aggCount -= int64(len(list.Values))
   833  				aggCapacity -= list.Capacity
   834  			} else {
   835  				continue
   836  			}
   837  		}
   838  		availableCapacity = aggCapacity - aggCount
   839  		// Check if we've overshot our buffer
   840  		if availableCapacity < buffer {
   841  			return replicas + 1, false, nil
   842  		}
   843  		// Check if we're Limited (Below MinCapacity)
   844  		if aggCapacity < minCapacity {
   845  			return replicas + 1, true, nil
   846  		}
   847  		// Check if we're at our desired Buffer
   848  		if availableCapacity == buffer {
   849  			return replicas, false, nil
   850  		}
   851  		// Check if we're at Limited
   852  		if aggCapacity == minCapacity {
   853  			return replicas, true, nil
   854  		}
   855  	}
   856  
   857  	// We are not currently able to scale down to zero replicas, so one replica is the minimum allowed.
   858  	if replicas < 1 {
   859  		replicas = 1
   860  	}
   861  
   862  	return replicas, false, nil
   863  }
   864  
   865  func emitChainPolicyEvent(fasLog *FasLogger, chainID string, chainType string) {
   866  	if fasLog.recorder == nil {
   867  		return
   868  	}
   869  
   870  	var eventMessage string
   871  	var eventType string
   872  
   873  	if chainID == "Unknown" {
   874  		eventMessage = fmt.Sprintf("FleetAutoscaler '%s' failed to apply ChainPolicy | ID: %s | Type: %s",
   875  			fasLog.fas.ObjectMeta.Name, chainID, chainType)
   876  		eventType = corev1.EventTypeWarning // Use Warning for failure
   877  	} else {
   878  		eventMessage = fmt.Sprintf("FleetAutoscaler '%s' successfully applied ChainPolicy | ID: %s | Type: %s",
   879  			fasLog.fas.ObjectMeta.Name, chainID, chainType)
   880  		eventType = corev1.EventTypeNormal // Use Normal for success
   881  	}
   882  
   883  	// Emit the event
   884  	fasLog.recorder.Eventf(fasLog.fas, eventType, "ChainPolicy", eventMessage)
   885  }