agones.dev/agones@v1.53.0/pkg/fleetautoscalers/fleetautoscalers.go (about)

     1  /*
     2   * Copyright 2018 Google LLC All Rights Reserved.
     3   *
     4   * Licensed under the Apache License, Version 2.0 (the "License");
     5   * you may not use this file except in compliance with the License.
     6   * You may obtain a copy of the License at
     7   *
     8   *     http://www.apache.org/licenses/LICENSE-2.0
     9   *
    10   * Unless required by applicable law or agreed to in writing, software
    11   * distributed under the License is distributed on an "AS IS" BASIS,
    12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13   * See the License for the specific language governing permissions and
    14   * limitations under the License.
    15   */
    16  
    17  package fleetautoscalers
    18  
    19  import (
    20  	"context"
    21  	"crypto/tls"
    22  	"crypto/x509"
    23  	"encoding/json"
    24  	"fmt"
    25  	"io"
    26  	"math"
    27  	"net/http"
    28  	"net/url"
    29  	"strings"
    30  	"time"
    31  
    32  	extism "github.com/extism/go-sdk"
    33  	"github.com/pkg/errors"
    34  	"github.com/robfig/cron/v3"
    35  	corev1 "k8s.io/api/core/v1"
    36  	"k8s.io/apimachinery/pkg/util/intstr"
    37  	"k8s.io/apimachinery/pkg/util/uuid"
    38  
    39  	agonesv1 "agones.dev/agones/pkg/apis/agones/v1"
    40  	autoscalingv1 "agones.dev/agones/pkg/apis/autoscaling/v1"
    41  	listeragonesv1 "agones.dev/agones/pkg/client/listers/agones/v1"
    42  	"agones.dev/agones/pkg/fleets"
    43  	"agones.dev/agones/pkg/gameservers"
    44  	gssets "agones.dev/agones/pkg/gameserversets"
    45  	"agones.dev/agones/pkg/util/runtime"
    46  )
    47  
    48  const (
    49  	maxDuration  = "2540400h" // 290 Years
    50  	wasmStateKey = "wasm"     // Key used to store the Wasm plugin in the state map
    51  )
    52  
    53  var tlsConfig = &tls.Config{}
    54  var client = http.Client{
    55  	Timeout: 15 * time.Second,
    56  	Transport: &http.Transport{
    57  		TLSClientConfig: tlsConfig,
    58  	},
    59  }
    60  
    61  // InactiveScheduleError denotes an error for schedules that are not currently active.
    62  type InactiveScheduleError struct{}
    63  
    64  func (InactiveScheduleError) Error() string {
    65  	return "inactive schedule, policy not applicable"
    66  }
    67  
    68  // computeDesiredFleetSize computes the new desired size of the given fleet
    69  func computeDesiredFleetSize(ctx context.Context, state map[string]any, pol autoscalingv1.FleetAutoscalerPolicy, f *agonesv1.Fleet,
    70  	gameServerNamespacedLister listeragonesv1.GameServerNamespaceLister, nodeCounts map[string]gameservers.NodeCount, fasLog *FasLogger) (int32, bool, error) {
    71  
    72  	var (
    73  		replicas int32
    74  		limited  bool
    75  		err      error
    76  	)
    77  
    78  	switch pol.Type {
    79  	case autoscalingv1.BufferPolicyType:
    80  		replicas, limited, err = applyBufferPolicy(pol.Buffer, f, fasLog)
    81  	case autoscalingv1.WebhookPolicyType:
    82  		replicas, limited, err = applyWebhookPolicy(pol.Webhook, f, fasLog)
    83  	case autoscalingv1.CounterPolicyType:
    84  		replicas, limited, err = applyCounterOrListPolicyWrapper(pol.Counter, nil, f, gameServerNamespacedLister, nodeCounts, fasLog)
    85  	case autoscalingv1.ListPolicyType:
    86  		replicas, limited, err = applyCounterOrListPolicyWrapper(nil, pol.List, f, gameServerNamespacedLister, nodeCounts, fasLog)
    87  	case autoscalingv1.SchedulePolicyType:
    88  		replicas, limited, err = applySchedulePolicy(ctx, state, pol.Schedule, f, gameServerNamespacedLister, nodeCounts, time.Now(), fasLog)
    89  	case autoscalingv1.ChainPolicyType:
    90  		replicas, limited, err = applyChainPolicy(ctx, state, pol.Chain, f, gameServerNamespacedLister, nodeCounts, time.Now(), fasLog)
    91  	case autoscalingv1.WasmPolicyType:
    92  		replicas, limited, err = applyWasmPolicy(ctx, state, pol.Wasm, f, fasLog)
    93  
    94  	default:
    95  		err = errors.New("wrong policy type, should be one of: Buffer, Webhook, Counter, List, Schedule, Chain")
    96  	}
    97  
    98  	if err != nil && !errors.Is(err, InactiveScheduleError{}) {
    99  		loggerForFleetAutoscalerKey(fasLog.fas.ObjectMeta.Name, fasLog.baseLogger).
   100  			Debugf("Failed to apply policy type %q: %v", pol.Type, err)
   101  	}
   102  
   103  	return replicas, limited, err
   104  }
   105  
   106  func applyWasmPolicy(ctx context.Context, state map[string]any, wp *autoscalingv1.WasmPolicy, f *agonesv1.Fleet, log *FasLogger) (int32, bool, error) {
   107  	if !runtime.FeatureEnabled(runtime.FeatureWasmAutoscaler) {
   108  		return 0, false, errors.Errorf("cannot apply WasmPolicy unless feature flag %s is enabled", runtime.FeatureWasmAutoscaler)
   109  	}
   110  
   111  	if wp == nil {
   112  		return 0, false, errors.New("wasmPolicy parameter must not be nil")
   113  	}
   114  
   115  	if f == nil {
   116  		return 0, false, errors.New("fleet parameter must not be nil")
   117  	}
   118  
   119  	_, ok := state[wasmStateKey]
   120  	if !ok {
   121  		// Build URL from the WasmPolicy
   122  		u, err := buildURLFromWebhookPolicy(wp.From.URL)
   123  		if err != nil {
   124  			return 0, false, err
   125  		}
   126  		res, err := client.Get(u.String())
   127  		if err != nil {
   128  			return 0, false, errors.Wrapf(err, "failed to fetch Wasm module from %s", u.String())
   129  		}
   130  		defer res.Body.Close() //nolint:errcheck
   131  
   132  		if res.StatusCode != http.StatusOK {
   133  			return 0, false, fmt.Errorf("bad status code %d from the server: %s", res.StatusCode, u.String())
   134  		}
   135  
   136  		b, err := io.ReadAll(res.Body)
   137  		if err != nil {
   138  			return 0, false, errors.Wrapf(err, "failed to read Wasm module from %s", u.String())
   139  		}
   140  
   141  		data := extism.WasmData{Data: b}
   142  		if len(wp.Hash) > 0 {
   143  			data.Hash = wp.Hash
   144  		}
   145  		manifest := extism.Manifest{
   146  			Wasm: []extism.Wasm{
   147  				data,
   148  			},
   149  			Config: wp.Config,
   150  		}
   151  
   152  		config := extism.PluginConfig{
   153  			EnableWasi: true,
   154  		}
   155  		plugin, err := extism.NewPlugin(ctx, manifest, config, []extism.HostFunction{})
   156  		if err != nil {
   157  			return 0, false, errors.Wrapf(err, "failed to create Wasm plugin from %s", u.String())
   158  		}
   159  		state[wasmStateKey] = plugin // Store the plugin in the state map
   160  	}
   161  
   162  	// This should never panic as we control what's in the state map
   163  	plugin := state[wasmStateKey].(*extism.Plugin)
   164  
   165  	// Create FleetAutoscaleReview
   166  	review := autoscalingv1.FleetAutoscaleReview{
   167  		Request: &autoscalingv1.FleetAutoscaleRequest{
   168  			UID:       uuid.NewUUID(),
   169  			Name:      f.Name,
   170  			Namespace: f.Namespace,
   171  			Status:    f.Status,
   172  		},
   173  		Response: nil,
   174  	}
   175  
   176  	if runtime.FeatureEnabled(runtime.FeatureFleetAutoscaleRequestMetaData) {
   177  		review.Request.Annotations = f.ObjectMeta.Annotations
   178  		review.Request.Labels = f.ObjectMeta.Labels
   179  	}
   180  
   181  	b, err := json.Marshal(review)
   182  	if err != nil {
   183  		return 0, false, errors.Wrap(err, "failed to marshal autoscaling request")
   184  	}
   185  
   186  	_, b, err = plugin.CallWithContext(ctx, wp.Function, b)
   187  	if err != nil {
   188  		return 0, false, errors.Wrapf(err, "failed to call Wasm plugin function %s", wp.Function)
   189  	}
   190  
   191  	if err := json.Unmarshal(b, &review); err != nil {
   192  		return 0, false, errors.Wrap(err, "failed to unmarshal autoscaling response")
   193  	}
   194  
   195  	loggerForFleetAutoscalerKey(log.fas.ObjectMeta.Name, log.baseLogger).Debugf(
   196  		"Fleet Autoscaler operation completed for fleet: %s, with was function: %s", f.ObjectMeta.Name, wp.Function)
   197  
   198  	if review.Response.Scale {
   199  		return review.Response.Replicas, false, nil
   200  	}
   201  
   202  	return f.Status.Replicas, false, nil
   203  }
   204  
   205  // buildURLFromWebhookPolicy - build URL for Webhook and set CARoot for client Transport
   206  func buildURLFromWebhookPolicy(w *autoscalingv1.URLConfiguration) (u *url.URL, err error) {
   207  	if w.URL != nil && w.Service != nil {
   208  		return nil, errors.New("service and URL cannot be used simultaneously")
   209  	}
   210  
   211  	scheme := "http"
   212  	if w.CABundle != nil {
   213  		scheme = "https"
   214  
   215  		if err := setCABundle(w.CABundle); err != nil {
   216  			return nil, err
   217  		}
   218  	}
   219  
   220  	if w.URL != nil {
   221  		if *w.URL == "" {
   222  			return nil, errors.New("URL was not provided")
   223  		}
   224  
   225  		return url.ParseRequestURI(*w.URL)
   226  	}
   227  
   228  	if w.Service == nil {
   229  		return nil, errors.New("service was not provided, either URL or Service must be provided")
   230  	}
   231  
   232  	if w.Service.Name == "" {
   233  		return nil, errors.New("service name was not provided")
   234  	}
   235  
   236  	if w.Service.Path == nil {
   237  		empty := ""
   238  		w.Service.Path = &empty
   239  	}
   240  
   241  	if w.Service.Namespace == "" {
   242  		w.Service.Namespace = "default"
   243  	}
   244  
   245  	return createURL(scheme, w.Service.Name, w.Service.Namespace, *w.Service.Path, w.Service.Port), nil
   246  }
   247  
   248  // moved to a separate method to cover it with unit tests and check that URL corresponds to a proper pattern
   249  func createURL(scheme, name, namespace, path string, port *int32) *url.URL {
   250  	var hostPort int32 = 8000
   251  	if port != nil {
   252  		hostPort = *port
   253  	}
   254  
   255  	return &url.URL{
   256  		Scheme: scheme,
   257  		Host:   fmt.Sprintf("%s.%s.svc:%d", name, namespace, hostPort),
   258  		Path:   path,
   259  	}
   260  }
   261  
   262  func setCABundle(caBundle []byte) error {
   263  	// We can have multiple fleetautoscalers with different CABundles defined,
   264  	// so we switch client.Transport before each POST request
   265  	rootCAs := x509.NewCertPool()
   266  	if ok := rootCAs.AppendCertsFromPEM(caBundle); !ok {
   267  		return errors.New("no certs were appended from caBundle")
   268  	}
   269  	tlsConfig.RootCAs = rootCAs
   270  	return nil
   271  }
   272  
   273  func applyWebhookPolicy(w *autoscalingv1.URLConfiguration, f *agonesv1.Fleet, fasLog *FasLogger) (replicas int32, limited bool, err error) {
   274  	if w == nil {
   275  		return 0, false, errors.New("webhookPolicy parameter must not be nil")
   276  	}
   277  
   278  	if f == nil {
   279  		return 0, false, errors.New("fleet parameter must not be nil")
   280  	}
   281  
   282  	u, err := buildURLFromWebhookPolicy(w)
   283  	if err != nil {
   284  		return 0, false, err
   285  	}
   286  
   287  	faReq := autoscalingv1.FleetAutoscaleReview{
   288  		Request: &autoscalingv1.FleetAutoscaleRequest{
   289  			UID:       uuid.NewUUID(),
   290  			Name:      f.Name,
   291  			Namespace: f.Namespace,
   292  			Status:    f.Status,
   293  		},
   294  		Response: nil,
   295  	}
   296  
   297  	if runtime.FeatureEnabled(runtime.FeatureFleetAutoscaleRequestMetaData) {
   298  		faReq.Request.Annotations = f.ObjectMeta.Annotations
   299  		faReq.Request.Labels = f.ObjectMeta.Labels
   300  	}
   301  
   302  	b, err := json.Marshal(faReq)
   303  	if err != nil {
   304  		return 0, false, err
   305  	}
   306  
   307  	res, err := client.Post(
   308  		u.String(),
   309  		"application/json",
   310  		strings.NewReader(string(b)),
   311  	)
   312  	if err != nil {
   313  		return 0, false, err
   314  	}
   315  	defer func() {
   316  		if cerr := res.Body.Close(); cerr != nil {
   317  			if err != nil {
   318  				err = errors.Wrap(err, cerr.Error())
   319  			} else {
   320  				err = cerr
   321  			}
   322  		}
   323  	}()
   324  
   325  	if res.StatusCode != http.StatusOK {
   326  		return 0, false, fmt.Errorf("bad status code %d from the server: %s", res.StatusCode, u.String())
   327  	}
   328  	result, err := io.ReadAll(res.Body)
   329  	if err != nil {
   330  		return 0, false, err
   331  	}
   332  
   333  	var faResp autoscalingv1.FleetAutoscaleReview
   334  	err = json.Unmarshal(result, &faResp)
   335  	if err != nil {
   336  		return 0, false, err
   337  	}
   338  
   339  	// Log Fleet Autoscaler operation, handling nil or empty Name in one line
   340  	webhookPolicyName := "<nil>"
   341  	if w.Service != nil && w.Service.Name != "" {
   342  		webhookPolicyName = w.Service.Name
   343  	}
   344  	loggerForFleetAutoscalerKey(fasLog.fas.ObjectMeta.Name, fasLog.baseLogger).Debugf(
   345  		"Fleet Autoscaler operation completed for fleet: %s, with WebhookPolicy: %s", f.ObjectMeta.Name, webhookPolicyName)
   346  
   347  	if faResp.Response.Scale {
   348  		return faResp.Response.Replicas, false, nil
   349  	}
   350  
   351  	return f.Status.Replicas, false, nil
   352  }
   353  
   354  func applyBufferPolicy(b *autoscalingv1.BufferPolicy, f *agonesv1.Fleet, fasLog *FasLogger) (int32, bool, error) {
   355  	var replicas int32
   356  
   357  	if b.BufferSize.Type == intstr.Int {
   358  		replicas = f.Status.AllocatedReplicas + int32(b.BufferSize.IntValue())
   359  	} else {
   360  		// the percentage value is a little more complex, as we can't apply
   361  		// the desired percentage to any current value, but to the future one
   362  		// Example: we have 8 allocated replicas, 10 total replicas and bufferSize set to 30%
   363  		// 30% means that we must have 30% ready instances in the fleet
   364  		// Right now there are 20%, so we must increase the fleet until we reach 30%
   365  		// To compute the new size, we start from the other end: if ready must be 30%
   366  		// it means that allocated must be 70% and adjust the fleet size to make that true.
   367  		bufferPercent, err := intstr.GetValueFromIntOrPercent(&b.BufferSize, 100, true)
   368  		if err != nil {
   369  			return 0, false, err
   370  		}
   371  		// use Math.Ceil to round the result up
   372  		replicas = int32(math.Ceil(float64(f.Status.AllocatedReplicas*100) / float64(100-bufferPercent)))
   373  	}
   374  
   375  	scalingInLimited := false
   376  	scalingOutLimited := false
   377  
   378  	if replicas < b.MinReplicas {
   379  		replicas = b.MinReplicas
   380  		scalingInLimited = true
   381  	}
   382  	if replicas > b.MaxReplicas {
   383  		replicas = b.MaxReplicas
   384  		scalingOutLimited = true
   385  	}
   386  
   387  	loggerForFleetAutoscalerKey(fasLog.fas.ObjectMeta.Name, fasLog.baseLogger).Debugf(
   388  		"Fleet Autoscaler operation completed for fleet: %s, with BufferPolicy: %v", f.ObjectMeta.Name, b.BufferSize)
   389  
   390  	return replicas, scalingInLimited || scalingOutLimited, nil
   391  }
   392  
   393  // New function to call applyCounterOrListPolicy
   394  func applyCounterOrListPolicyWrapper(c *autoscalingv1.CounterPolicy, l *autoscalingv1.ListPolicy,
   395  	f *agonesv1.Fleet, gameServerNamespacedLister listeragonesv1.GameServerNamespaceLister,
   396  	nodeCounts map[string]gameservers.NodeCount, fasLog *FasLogger) (int32, bool, error) {
   397  
   398  	// Call applyCounterOrListPolicy inside the wrapper
   399  	desiredReplicas, scalingLimited, err := applyCounterOrListPolicy(c, l, f, gameServerNamespacedLister, nodeCounts)
   400  
   401  	if err == nil {
   402  		// Log directly based on which policy is used, with a description of the key
   403  		if c != nil {
   404  			// Log the Key from CounterPolicy with a description
   405  			loggerForFleetAutoscalerKey(fasLog.fas.ObjectMeta.Name, fasLog.baseLogger).Debugf(
   406  				"Fleet Autoscaler operation completed for fleet: %s, with CounterPolicy - Key: %v", f.ObjectMeta.Name, c.Key)
   407  		} else if l != nil {
   408  			// Log the Key from ListPolicy with a description
   409  			loggerForFleetAutoscalerKey(fasLog.fas.ObjectMeta.Name, fasLog.baseLogger).Debugf(
   410  				"Fleet Autoscaler operation completed for fleet: %s, with ListPolicy - Key: %v", f.ObjectMeta.Name, l.Key)
   411  		}
   412  	}
   413  
   414  	return desiredReplicas, scalingLimited, err
   415  }
   416  
   417  func applyCounterOrListPolicy(c *autoscalingv1.CounterPolicy, l *autoscalingv1.ListPolicy,
   418  	f *agonesv1.Fleet, gameServerNamespacedLister listeragonesv1.GameServerNamespaceLister,
   419  	nodeCounts map[string]gameservers.NodeCount) (int32, bool, error) {
   420  
   421  	if !runtime.FeatureEnabled(runtime.FeatureCountsAndLists) {
   422  		return 0, false, errors.Errorf("cannot apply CounterPolicy unless feature flag %s is enabled", runtime.FeatureCountsAndLists)
   423  	}
   424  
   425  	var isCounter bool          // True if a CounterPolicy False if a ListPolicy
   426  	var key string              // The specified Counter or List
   427  	var count int64             // The Count or number of Values in the template Game Server
   428  	var capacity int64          // The Capacity in the template Game Server
   429  	var aggCount int64          // The Aggregate Count of the specified Counter or List of all GameServers across the GameServerSet in the Fleet
   430  	var aggCapacity int64       // The Aggregate Capacity of the specified Counter or List of all GameServers across the GameServerSet in the Fleet
   431  	var aggAllocatedCount int64 // The Aggregate Count of the specified Counter or List of GameServers in an Allocated state across the GameServerSet in the Fleet
   432  	var minCapacity int64       // The Minimum Aggregate Capacity
   433  	var maxCapacity int64       // The Maximum Aggregate Capacity
   434  	var bufferSize intstr.IntOrString
   435  
   436  	if c != nil {
   437  		isCounter = true
   438  		counter, ok := f.Spec.Template.Spec.Counters[c.Key]
   439  		if !ok {
   440  			return 0, false, errors.Errorf("cannot apply CounterPolicy as Counter key %s does not exist in the Fleet Spec", c.Key)
   441  		}
   442  
   443  		aggCounter, ok := f.Status.Counters[c.Key]
   444  		if !ok {
   445  			return 0, false, errors.Errorf("cannot apply CounterPolicy as Counter key %s does not exist in the Fleet Status", c.Key)
   446  		}
   447  
   448  		key = c.Key
   449  		count = counter.Count
   450  		capacity = counter.Capacity
   451  		aggCount = aggCounter.Count
   452  		aggCapacity = aggCounter.Capacity
   453  		aggAllocatedCount = aggCounter.AllocatedCount
   454  		minCapacity = c.MinCapacity
   455  		maxCapacity = c.MaxCapacity
   456  		bufferSize = c.BufferSize
   457  
   458  	} else {
   459  		isCounter = false
   460  		list, ok := f.Spec.Template.Spec.Lists[l.Key]
   461  		if !ok {
   462  			return 0, false, errors.Errorf("cannot apply ListPolicy as List key %s does not exist in the Fleet Spec", l.Key)
   463  		}
   464  
   465  		aggList, ok := f.Status.Lists[l.Key]
   466  		if !ok {
   467  			return 0, false, errors.Errorf("cannot apply ListPolicy as List key %s does not exist in the Fleet Status", l.Key)
   468  		}
   469  
   470  		key = l.Key
   471  		count = int64(len(list.Values))
   472  		capacity = list.Capacity
   473  		aggCount = aggList.Count
   474  		aggCapacity = aggList.Capacity
   475  		aggAllocatedCount = aggList.AllocatedCount
   476  		minCapacity = l.MinCapacity
   477  		maxCapacity = l.MaxCapacity
   478  		bufferSize = l.BufferSize
   479  	}
   480  
   481  	// Checks if we've limited by TOTAL capacity
   482  	limited, scale := isLimited(aggCapacity, minCapacity, maxCapacity)
   483  
   484  	// Total current number of Replicas
   485  	replicas := f.Status.Replicas
   486  
   487  	// The buffer is the desired available capacity
   488  	var buffer int64
   489  
   490  	switch {
   491  	// Desired replicas based on BufferSize specified as an absolute value (i.e. 5)
   492  	case bufferSize.Type == intstr.Int:
   493  		buffer = int64(bufferSize.IntValue())
   494  	// Desired replicas based on BufferSize specified as a percent (i.e. 5%)
   495  	case bufferSize.Type == intstr.String:
   496  		bufferPercent, err := intstr.GetValueFromIntOrPercent(&bufferSize, 100, isCounter)
   497  		if err != nil {
   498  			return 0, false, err
   499  		}
   500  		// If the Aggregated Allocated Counts is 0 then desired capacity gets calculated as 0. If the
   501  		// capacity of 1 replica is equal to or greater than minimum capacity we can exit early.
   502  		if aggAllocatedCount <= 0 && capacity >= minCapacity {
   503  			return 1, true, nil
   504  		}
   505  
   506  		// The desired TOTAL capacity based on the Aggregated Allocated Counts (see applyBufferPolicy for explanation)
   507  		desiredCapacity := int64(math.Ceil(float64(aggAllocatedCount*100) / float64(100-bufferPercent)))
   508  		// Convert into a desired AVAILABLE capacity aka the buffer
   509  		buffer = desiredCapacity - aggAllocatedCount
   510  	}
   511  
   512  	// Current available capacity across the TOTAL fleet
   513  	switch availableCapacity := aggCapacity - aggCount; {
   514  	case availableCapacity == buffer:
   515  		if limited {
   516  			return scaleLimited(scale, f, gameServerNamespacedLister, nodeCounts, key, isCounter, replicas,
   517  				capacity, aggCapacity, minCapacity, maxCapacity)
   518  		}
   519  		return replicas, false, nil
   520  	case availableCapacity < buffer: // Scale Up
   521  		if limited { // Case where we want to scale up but we're already limited by MaxCapacity.
   522  			return scaleLimited(scale, f, gameServerNamespacedLister, nodeCounts, key, isCounter, replicas,
   523  				capacity, aggCapacity, minCapacity, maxCapacity)
   524  		}
   525  		return scaleUp(replicas, capacity, count, aggCapacity, availableCapacity, maxCapacity,
   526  			minCapacity, buffer)
   527  	case availableCapacity > buffer: // Scale Down
   528  		if limited && scale == 1 { // Case where we want to scale down but we're already limited by MinCapacity
   529  			return scaleLimited(scale, f, gameServerNamespacedLister, nodeCounts, key, isCounter, replicas,
   530  				capacity, aggCapacity, minCapacity, maxCapacity)
   531  		}
   532  		return scaleDown(f, gameServerNamespacedLister, nodeCounts, key, isCounter, replicas, aggCount,
   533  			aggCapacity, minCapacity, buffer)
   534  	}
   535  
   536  	if isCounter {
   537  		return 0, false, errors.Errorf("unable to apply CounterPolicy %v", c)
   538  	}
   539  	return 0, false, errors.Errorf("unable to apply ListPolicy %v", l)
   540  }
   541  
   542  func applySchedulePolicy(ctx context.Context, state map[string]any, s *autoscalingv1.SchedulePolicy, f *agonesv1.Fleet, gameServerNamespacedLister listeragonesv1.GameServerNamespaceLister, nodeCounts map[string]gameservers.NodeCount, currentTime time.Time, fasLog *FasLogger) (int32, bool, error) {
   543  	// Ensure the scheduled autoscaler feature gate is enabled
   544  	if !runtime.FeatureEnabled(runtime.FeatureScheduledAutoscaler) {
   545  		return 0, false, errors.Errorf("cannot apply SchedulePolicy unless feature flag %s is enabled", runtime.FeatureScheduledAutoscaler)
   546  	}
   547  
   548  	if isScheduleActive(s, currentTime) {
   549  		return computeDesiredFleetSize(ctx, state, s.Policy, f, gameServerNamespacedLister, nodeCounts, fasLog)
   550  	}
   551  
   552  	loggerForFleetAutoscalerKey(fasLog.fas.ObjectMeta.Name, fasLog.baseLogger).Debugf(
   553  		"Fleet autoscaler check: Schedule not active for fleet %s", f.ObjectMeta.Name)
   554  
   555  	// If the schedule wasn't active then return the current replica amount of the fleet
   556  	return f.Status.Replicas, false, &InactiveScheduleError{}
   557  }
   558  
   559  func applyChainPolicy(ctx context.Context, state map[string]any, c autoscalingv1.ChainPolicy, f *agonesv1.Fleet, gameServerNamespacedLister listeragonesv1.GameServerNamespaceLister, nodeCounts map[string]gameservers.NodeCount, currentTime time.Time, fasLog *FasLogger) (int32, bool, error) {
   560  	// Ensure the scheduled autoscaler feature gate is enabled
   561  	if !runtime.FeatureEnabled(runtime.FeatureScheduledAutoscaler) {
   562  		return 0, false, errors.Errorf("cannot apply ChainPolicy unless feature flag %s is enabled", runtime.FeatureScheduledAutoscaler)
   563  	}
   564  
   565  	replicas := f.Status.Replicas
   566  	var limited bool
   567  	var err error
   568  	var chainEntry autoscalingv1.FleetAutoscalerPolicyType
   569  
   570  	// Loop over all entries in the chain
   571  	for _, entry := range c {
   572  		switch entry.Type {
   573  		case autoscalingv1.SchedulePolicyType:
   574  			replicas, limited, err = applySchedulePolicy(ctx, state, entry.Schedule, f, gameServerNamespacedLister, nodeCounts, currentTime, fasLog)
   575  
   576  			if err != nil {
   577  				loggerForFleetAutoscalerKey(fasLog.fas.ObjectMeta.Name, fasLog.baseLogger).Debugf(
   578  					"Failed to apply SchedulePolicy ID=%s in ChainPolicy: %v", entry.ID, err)
   579  			}
   580  		case autoscalingv1.WebhookPolicyType:
   581  			replicas, limited, err = applyWebhookPolicy(entry.Webhook, f, fasLog)
   582  
   583  			if err != nil {
   584  				loggerForFleetAutoscalerKey(fasLog.fas.ObjectMeta.Name, fasLog.baseLogger).Debugf(
   585  					"Failed to apply WebhookPolicy ID=%s in ChainPolicy: %v", entry.ID, err)
   586  			}
   587  		default:
   588  			// Every other policy type we just want to compute the desired fleet and return it
   589  			replicas, limited, err = computeDesiredFleetSize(ctx, state, entry.FleetAutoscalerPolicy, f, gameServerNamespacedLister, nodeCounts, fasLog)
   590  
   591  			if err != nil && !errors.Is(err, InactiveScheduleError{}) {
   592  				loggerForFleetAutoscalerKey(fasLog.fas.ObjectMeta.Name, fasLog.baseLogger).Debugf(
   593  					"Failed to apply %s ID=%s in ChainPolicy: %v", entry.Type, entry.ID, err)
   594  			}
   595  		}
   596  
   597  		// If no error occurred, exit the loop early
   598  		if err == nil {
   599  			chainEntry = autoscalingv1.FleetAutoscalerPolicyType(fmt.Sprintf("%s:%s:%s", autoscalingv1.ChainPolicyType, entry.ID, entry.Type))
   600  			break
   601  		}
   602  	}
   603  
   604  	if err != nil && !errors.Is(err, InactiveScheduleError{}) {
   605  		emitChainPolicyEvent(fasLog, "Unknown", "")
   606  		loggerForFleetAutoscalerKey(fasLog.fas.ObjectMeta.Name, fasLog.baseLogger).Debug("Failed to apply ChainPolicy: no valid policy applied")
   607  		return replicas, limited, err
   608  	}
   609  
   610  	currChainEntry := strings.Split(string(chainEntry), ":")
   611  
   612  	// Handle the final state of the chain and update status if necessary
   613  	if lastAppliedPolicy := fasLog.fas.Status.LastAppliedPolicy; strings.Contains(string(lastAppliedPolicy), string(autoscalingv1.ChainPolicyType)) {
   614  		prevChainEntry := strings.Split(string(lastAppliedPolicy), ":")
   615  
   616  		// Only log if there is a change in the policy
   617  		if len(prevChainEntry) > 2 && (currChainEntry[1] != prevChainEntry[1] || currChainEntry[2] != prevChainEntry[2]) {
   618  			fasLog.currChainEntry = &chainEntry
   619  			emitChainPolicyEvent(fasLog, currChainEntry[1], currChainEntry[2])
   620  		}
   621  	} else {
   622  		fasLog.currChainEntry = &chainEntry
   623  		emitChainPolicyEvent(fasLog, currChainEntry[1], currChainEntry[2])
   624  	}
   625  
   626  	return replicas, limited, nil
   627  }
   628  
   629  // isScheduleActive checks if a chain entry's is active and returns a boolean, true if active, false otherwise
   630  func isScheduleActive(s *autoscalingv1.SchedulePolicy, currentTime time.Time) bool {
   631  	// Used for checking ahead of the schedule for daylight savings purposes
   632  	cronDelta := (time.Minute * -1) + (time.Second * -30)
   633  
   634  	// If the current time is before the start time, the schedule is inactive so return false
   635  	startTime := s.Between.Start.Time
   636  	if currentTime.Before(startTime) {
   637  		return false
   638  	}
   639  
   640  	// If an end time is present and the current time is after the end time, the schedule is inactive so return false
   641  	endTime := s.Between.End.Time
   642  	if !endTime.IsZero() && currentTime.After(endTime) {
   643  		return false
   644  	}
   645  
   646  	// If no startCron field is specified, then it's automatically true (duration is no longer relevant since we're always running)
   647  	if s.ActivePeriod.StartCron == "" {
   648  		return true
   649  	}
   650  
   651  	// Ignore the error as validation is already done within the validateChainPolicy after being unmarshalled
   652  	location, _ := time.LoadLocation(s.ActivePeriod.Timezone)
   653  
   654  	// Ignore the error as validation is already done within the validateChainPolicy after being unmarshalled
   655  	startCron, _ := cron.ParseStandard(s.ActivePeriod.StartCron)
   656  
   657  	// Ignore the error as validation is already done within the validateChainPolicy after being unmarshalled.
   658  	// If the duration is empty set it to the largest duration possible (290 years)
   659  	duration, _ := time.ParseDuration(s.ActivePeriod.Duration)
   660  	if s.ActivePeriod.Duration == "" {
   661  		duration, _ = time.ParseDuration(maxDuration)
   662  	}
   663  
   664  	// Get the current time - duration
   665  	currentTimeMinusDuration := currentTime.Add(duration * -1)
   666  	// Take (current time - duration) to get the first available start time
   667  	cronStartTime := startCron.Next(currentTimeMinusDuration.In(location))
   668  	// Take the (cronStartTime + duration) to get the end time
   669  	cronEndTime := cronStartTime.Add(duration)
   670  
   671  	// If the current time is after the cronStartTime - 90 seconds (for daylight saving purposes) AND the current time before the cronEndTime
   672  	// then return true
   673  	// Example: startCron = 0 14 * * * // 2:00 PM Everyday | duration = 1 hr | cronDelta = 90 seconds | currentTime = 2024-08-01T14:30:00Z | currentTimeMinusDuration = 2024-08-01T13:30:00Z
   674  	// then cronStartTime = 2024-08-01T14:00:00Z and cronEndTime = 2024-08-01T15:00:00Z
   675  	// and since currentTime > cronStartTime + cronDelta AND currentTime < cronEndTime, we return true
   676  	if currentTime.After(cronStartTime.Add(cronDelta)) && currentTime.Before(cronEndTime) {
   677  		return true
   678  	}
   679  
   680  	return false
   681  }
   682  
   683  // getSortedGameServers returns the list of Game Servers for the Fleet in the order in which the
   684  // Game Servers would be deleted.
   685  func getSortedGameServers(f *agonesv1.Fleet, gameServerNamespacedLister listeragonesv1.GameServerNamespaceLister,
   686  	nodeCounts map[string]gameservers.NodeCount) ([]*agonesv1.GameServer, error) {
   687  	gsList, err := fleets.ListGameServersByFleetOwner(gameServerNamespacedLister, f)
   688  	if err != nil {
   689  		return nil, err
   690  	}
   691  
   692  	gameServers := gssets.SortGameServersByStrategy(f.Spec.Scheduling, gsList, nodeCounts, f.Spec.Priorities)
   693  	return gameServers, nil
   694  }
   695  
   696  // isLimited indicates that the calculated scale would be above or below the range defined by
   697  // MinCapacity and MaxCapacity in the ListPolicy or CounterPolicy.
   698  // Return 1 if the fleet needs to scale up, -1 if the fleets need to scale down, 0 if the fleet does
   699  // not need to scale, or if the fleet is not limited.
   700  func isLimited(aggCapacity, minCapacity, maxCapacity int64) (bool, int) {
   701  	if aggCapacity < minCapacity { // Scale up
   702  		return true, 1
   703  	}
   704  	if aggCapacity > maxCapacity { // Scale down
   705  		return true, -1
   706  	}
   707  	return false, 0
   708  }
   709  
   710  // scaleUpLimited scales up the fleet to meet the MinCapacity
   711  func scaleUpLimited(replicas int32, capacity, aggCapacity, minCapacity int64) (int32, bool, error) {
   712  	if capacity == 0 {
   713  		return 0, false, errors.Errorf("cannot scale up as Capacity is equal to 0")
   714  	}
   715  	for aggCapacity < minCapacity {
   716  		aggCapacity += capacity
   717  		replicas++
   718  	}
   719  	return replicas, true, nil
   720  }
   721  
   722  // scaleDownLimited scales down the fleet to meet the MaxCapacity
   723  func scaleDownLimited(f *agonesv1.Fleet, gameServerNamespacedLister listeragonesv1.GameServerNamespaceLister,
   724  	nodeCounts map[string]gameservers.NodeCount, key string, isCounter bool, replicas int32,
   725  	aggCapacity, maxCapacity int64) (int32, bool, error) {
   726  	// Game Servers in order of deletion on scale down
   727  	gameServers, err := getSortedGameServers(f, gameServerNamespacedLister, nodeCounts)
   728  	if err != nil {
   729  		return 0, false, err
   730  	}
   731  	for _, gs := range gameServers {
   732  		if aggCapacity <= maxCapacity {
   733  			break
   734  		}
   735  		switch isCounter {
   736  		case true:
   737  			if counter, ok := gs.Status.Counters[key]; ok {
   738  				aggCapacity -= counter.Capacity
   739  			}
   740  		case false:
   741  			if list, ok := gs.Status.Lists[key]; ok {
   742  				aggCapacity -= list.Capacity
   743  			}
   744  		}
   745  		replicas--
   746  	}
   747  
   748  	// We are not currently able to scale down to zero replicas, so one replica is the minimum allowed
   749  	if replicas < 1 {
   750  		replicas = 1
   751  	}
   752  
   753  	return replicas, true, nil
   754  }
   755  
   756  func scaleLimited(scale int, f *agonesv1.Fleet, gameServerNamespacedLister listeragonesv1.GameServerNamespaceLister,
   757  	nodeCounts map[string]gameservers.NodeCount, key string, isCounter bool, replicas int32,
   758  	capacity, aggCapacity, minCapacity, maxCapacity int64) (int32, bool, error) {
   759  
   760  	switch scale {
   761  	case 1: // scale up
   762  		return scaleUpLimited(replicas, capacity, aggCapacity, minCapacity)
   763  	case -1: // scale down
   764  		return scaleDownLimited(f, gameServerNamespacedLister, nodeCounts, key, isCounter, replicas,
   765  			aggCapacity, maxCapacity)
   766  	case 0:
   767  		return replicas, false, nil
   768  	}
   769  
   770  	return 0, false, errors.Errorf("cannot scale due to error in scaleLimited function")
   771  }
   772  
   773  // scaleUp scales up for either Integer or Percentage Buffer.
   774  func scaleUp(replicas int32, capacity, count, aggCapacity, availableCapacity, maxCapacity,
   775  	minCapacity, buffer int64) (int32, bool, error) {
   776  
   777  	// How much capacity is gained by adding one more replica to the fleet.
   778  	replicaCapacity := capacity - count
   779  	if replicaCapacity <= 0 {
   780  		return 0, false, errors.Errorf("cannot scale up as adding additional replicas does not increase available Capacity")
   781  	}
   782  
   783  	additionalReplicas := int32(math.Ceil((float64(buffer) - float64(availableCapacity)) / float64(replicaCapacity)))
   784  
   785  	// Check to make sure we're not limited (over Max Capacity)
   786  	limited, _ := isLimited(aggCapacity+(int64(additionalReplicas)*capacity), minCapacity, maxCapacity)
   787  	if limited {
   788  		additionalReplicas = int32((maxCapacity - aggCapacity) / capacity)
   789  	}
   790  
   791  	return replicas + additionalReplicas, limited, nil
   792  }
   793  
   794  // scaleDown scales down for either Integer or Percentage Buffer.
   795  func scaleDown(f *agonesv1.Fleet, gameServerNamespacedLister listeragonesv1.GameServerNamespaceLister,
   796  	nodeCounts map[string]gameservers.NodeCount, key string, isCounter bool, replicas int32,
   797  	aggCount, aggCapacity, minCapacity, buffer int64) (int32, bool, error) {
   798  	// Exit early if we're already at MinCapacity to avoid calling getSortedGameServers if unnecessary
   799  	if aggCapacity == minCapacity {
   800  		return replicas, true, nil
   801  	}
   802  
   803  	// We first need to get the individual game servers in order of deletion on scale down, as any
   804  	// game server may have a unique value for counts and / or capacity.
   805  	gameServers, err := getSortedGameServers(f, gameServerNamespacedLister, nodeCounts)
   806  	if err != nil {
   807  		return 0, false, err
   808  	}
   809  
   810  	var availableCapacity int64
   811  
   812  	// "Remove" one game server at a time in order of potential deletion. (Not actually removed here,
   813  	// that's done later, if possible, by the fleetautoscaler controller.)
   814  	for _, gs := range gameServers {
   815  		replicas--
   816  		switch isCounter {
   817  		case true:
   818  			if counter, ok := gs.Status.Counters[key]; ok {
   819  				aggCount -= counter.Count
   820  				aggCapacity -= counter.Capacity
   821  			} else {
   822  				continue
   823  			}
   824  		case false:
   825  			if list, ok := gs.Status.Lists[key]; ok {
   826  				aggCount -= int64(len(list.Values))
   827  				aggCapacity -= list.Capacity
   828  			} else {
   829  				continue
   830  			}
   831  		}
   832  		availableCapacity = aggCapacity - aggCount
   833  		// Check if we've overshot our buffer
   834  		if availableCapacity < buffer {
   835  			return replicas + 1, false, nil
   836  		}
   837  		// Check if we're Limited (Below MinCapacity)
   838  		if aggCapacity < minCapacity {
   839  			return replicas + 1, true, nil
   840  		}
   841  		// Check if we're at our desired Buffer
   842  		if availableCapacity == buffer {
   843  			return replicas, false, nil
   844  		}
   845  		// Check if we're at Limited
   846  		if aggCapacity == minCapacity {
   847  			return replicas, true, nil
   848  		}
   849  	}
   850  
   851  	// We are not currently able to scale down to zero replicas, so one replica is the minimum allowed.
   852  	if replicas < 1 {
   853  		replicas = 1
   854  	}
   855  
   856  	return replicas, false, nil
   857  }
   858  
   859  func emitChainPolicyEvent(fasLog *FasLogger, chainID string, chainType string) {
   860  	if fasLog.recorder == nil {
   861  		return
   862  	}
   863  
   864  	var eventMessage string
   865  	var eventType string
   866  
   867  	if chainID == "Unknown" {
   868  		eventMessage = fmt.Sprintf("FleetAutoscaler '%s' failed to apply ChainPolicy | ID: %s | Type: %s",
   869  			fasLog.fas.ObjectMeta.Name, chainID, chainType)
   870  		eventType = corev1.EventTypeWarning // Use Warning for failure
   871  	} else {
   872  		eventMessage = fmt.Sprintf("FleetAutoscaler '%s' successfully applied ChainPolicy | ID: %s | Type: %s",
   873  			fasLog.fas.ObjectMeta.Name, chainID, chainType)
   874  		eventType = corev1.EventTypeNormal // Use Normal for success
   875  	}
   876  
   877  	// Emit the event
   878  	fasLog.recorder.Eventf(fasLog.fas, eventType, "ChainPolicy", eventMessage)
   879  }