agones.dev/agones@v1.53.0/pkg/fleetautoscalers/fleetautoscalers.go (about) 1 /* 2 * Copyright 2018 Google LLC All Rights Reserved. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package fleetautoscalers 18 19 import ( 20 "context" 21 "crypto/tls" 22 "crypto/x509" 23 "encoding/json" 24 "fmt" 25 "io" 26 "math" 27 "net/http" 28 "net/url" 29 "strings" 30 "time" 31 32 extism "github.com/extism/go-sdk" 33 "github.com/pkg/errors" 34 "github.com/robfig/cron/v3" 35 corev1 "k8s.io/api/core/v1" 36 "k8s.io/apimachinery/pkg/util/intstr" 37 "k8s.io/apimachinery/pkg/util/uuid" 38 39 agonesv1 "agones.dev/agones/pkg/apis/agones/v1" 40 autoscalingv1 "agones.dev/agones/pkg/apis/autoscaling/v1" 41 listeragonesv1 "agones.dev/agones/pkg/client/listers/agones/v1" 42 "agones.dev/agones/pkg/fleets" 43 "agones.dev/agones/pkg/gameservers" 44 gssets "agones.dev/agones/pkg/gameserversets" 45 "agones.dev/agones/pkg/util/runtime" 46 ) 47 48 const ( 49 maxDuration = "2540400h" // 290 Years 50 wasmStateKey = "wasm" // Key used to store the Wasm plugin in the state map 51 ) 52 53 var tlsConfig = &tls.Config{} 54 var client = http.Client{ 55 Timeout: 15 * time.Second, 56 Transport: &http.Transport{ 57 TLSClientConfig: tlsConfig, 58 }, 59 } 60 61 // InactiveScheduleError denotes an error for schedules that are not currently active. 62 type InactiveScheduleError struct{} 63 64 func (InactiveScheduleError) Error() string { 65 return "inactive schedule, policy not applicable" 66 } 67 68 // computeDesiredFleetSize computes the new desired size of the given fleet 69 func computeDesiredFleetSize(ctx context.Context, state map[string]any, pol autoscalingv1.FleetAutoscalerPolicy, f *agonesv1.Fleet, 70 gameServerNamespacedLister listeragonesv1.GameServerNamespaceLister, nodeCounts map[string]gameservers.NodeCount, fasLog *FasLogger) (int32, bool, error) { 71 72 var ( 73 replicas int32 74 limited bool 75 err error 76 ) 77 78 switch pol.Type { 79 case autoscalingv1.BufferPolicyType: 80 replicas, limited, err = applyBufferPolicy(pol.Buffer, f, fasLog) 81 case autoscalingv1.WebhookPolicyType: 82 replicas, limited, err = applyWebhookPolicy(pol.Webhook, f, fasLog) 83 case autoscalingv1.CounterPolicyType: 84 replicas, limited, err = applyCounterOrListPolicyWrapper(pol.Counter, nil, f, gameServerNamespacedLister, nodeCounts, fasLog) 85 case autoscalingv1.ListPolicyType: 86 replicas, limited, err = applyCounterOrListPolicyWrapper(nil, pol.List, f, gameServerNamespacedLister, nodeCounts, fasLog) 87 case autoscalingv1.SchedulePolicyType: 88 replicas, limited, err = applySchedulePolicy(ctx, state, pol.Schedule, f, gameServerNamespacedLister, nodeCounts, time.Now(), fasLog) 89 case autoscalingv1.ChainPolicyType: 90 replicas, limited, err = applyChainPolicy(ctx, state, pol.Chain, f, gameServerNamespacedLister, nodeCounts, time.Now(), fasLog) 91 case autoscalingv1.WasmPolicyType: 92 replicas, limited, err = applyWasmPolicy(ctx, state, pol.Wasm, f, fasLog) 93 94 default: 95 err = errors.New("wrong policy type, should be one of: Buffer, Webhook, Counter, List, Schedule, Chain") 96 } 97 98 if err != nil && !errors.Is(err, InactiveScheduleError{}) { 99 loggerForFleetAutoscalerKey(fasLog.fas.ObjectMeta.Name, fasLog.baseLogger). 100 Debugf("Failed to apply policy type %q: %v", pol.Type, err) 101 } 102 103 return replicas, limited, err 104 } 105 106 func applyWasmPolicy(ctx context.Context, state map[string]any, wp *autoscalingv1.WasmPolicy, f *agonesv1.Fleet, log *FasLogger) (int32, bool, error) { 107 if !runtime.FeatureEnabled(runtime.FeatureWasmAutoscaler) { 108 return 0, false, errors.Errorf("cannot apply WasmPolicy unless feature flag %s is enabled", runtime.FeatureWasmAutoscaler) 109 } 110 111 if wp == nil { 112 return 0, false, errors.New("wasmPolicy parameter must not be nil") 113 } 114 115 if f == nil { 116 return 0, false, errors.New("fleet parameter must not be nil") 117 } 118 119 _, ok := state[wasmStateKey] 120 if !ok { 121 // Build URL from the WasmPolicy 122 u, err := buildURLFromWebhookPolicy(wp.From.URL) 123 if err != nil { 124 return 0, false, err 125 } 126 res, err := client.Get(u.String()) 127 if err != nil { 128 return 0, false, errors.Wrapf(err, "failed to fetch Wasm module from %s", u.String()) 129 } 130 defer res.Body.Close() //nolint:errcheck 131 132 if res.StatusCode != http.StatusOK { 133 return 0, false, fmt.Errorf("bad status code %d from the server: %s", res.StatusCode, u.String()) 134 } 135 136 b, err := io.ReadAll(res.Body) 137 if err != nil { 138 return 0, false, errors.Wrapf(err, "failed to read Wasm module from %s", u.String()) 139 } 140 141 data := extism.WasmData{Data: b} 142 if len(wp.Hash) > 0 { 143 data.Hash = wp.Hash 144 } 145 manifest := extism.Manifest{ 146 Wasm: []extism.Wasm{ 147 data, 148 }, 149 Config: wp.Config, 150 } 151 152 config := extism.PluginConfig{ 153 EnableWasi: true, 154 } 155 plugin, err := extism.NewPlugin(ctx, manifest, config, []extism.HostFunction{}) 156 if err != nil { 157 return 0, false, errors.Wrapf(err, "failed to create Wasm plugin from %s", u.String()) 158 } 159 state[wasmStateKey] = plugin // Store the plugin in the state map 160 } 161 162 // This should never panic as we control what's in the state map 163 plugin := state[wasmStateKey].(*extism.Plugin) 164 165 // Create FleetAutoscaleReview 166 review := autoscalingv1.FleetAutoscaleReview{ 167 Request: &autoscalingv1.FleetAutoscaleRequest{ 168 UID: uuid.NewUUID(), 169 Name: f.Name, 170 Namespace: f.Namespace, 171 Status: f.Status, 172 }, 173 Response: nil, 174 } 175 176 if runtime.FeatureEnabled(runtime.FeatureFleetAutoscaleRequestMetaData) { 177 review.Request.Annotations = f.ObjectMeta.Annotations 178 review.Request.Labels = f.ObjectMeta.Labels 179 } 180 181 b, err := json.Marshal(review) 182 if err != nil { 183 return 0, false, errors.Wrap(err, "failed to marshal autoscaling request") 184 } 185 186 _, b, err = plugin.CallWithContext(ctx, wp.Function, b) 187 if err != nil { 188 return 0, false, errors.Wrapf(err, "failed to call Wasm plugin function %s", wp.Function) 189 } 190 191 if err := json.Unmarshal(b, &review); err != nil { 192 return 0, false, errors.Wrap(err, "failed to unmarshal autoscaling response") 193 } 194 195 loggerForFleetAutoscalerKey(log.fas.ObjectMeta.Name, log.baseLogger).Debugf( 196 "Fleet Autoscaler operation completed for fleet: %s, with was function: %s", f.ObjectMeta.Name, wp.Function) 197 198 if review.Response.Scale { 199 return review.Response.Replicas, false, nil 200 } 201 202 return f.Status.Replicas, false, nil 203 } 204 205 // buildURLFromWebhookPolicy - build URL for Webhook and set CARoot for client Transport 206 func buildURLFromWebhookPolicy(w *autoscalingv1.URLConfiguration) (u *url.URL, err error) { 207 if w.URL != nil && w.Service != nil { 208 return nil, errors.New("service and URL cannot be used simultaneously") 209 } 210 211 scheme := "http" 212 if w.CABundle != nil { 213 scheme = "https" 214 215 if err := setCABundle(w.CABundle); err != nil { 216 return nil, err 217 } 218 } 219 220 if w.URL != nil { 221 if *w.URL == "" { 222 return nil, errors.New("URL was not provided") 223 } 224 225 return url.ParseRequestURI(*w.URL) 226 } 227 228 if w.Service == nil { 229 return nil, errors.New("service was not provided, either URL or Service must be provided") 230 } 231 232 if w.Service.Name == "" { 233 return nil, errors.New("service name was not provided") 234 } 235 236 if w.Service.Path == nil { 237 empty := "" 238 w.Service.Path = &empty 239 } 240 241 if w.Service.Namespace == "" { 242 w.Service.Namespace = "default" 243 } 244 245 return createURL(scheme, w.Service.Name, w.Service.Namespace, *w.Service.Path, w.Service.Port), nil 246 } 247 248 // moved to a separate method to cover it with unit tests and check that URL corresponds to a proper pattern 249 func createURL(scheme, name, namespace, path string, port *int32) *url.URL { 250 var hostPort int32 = 8000 251 if port != nil { 252 hostPort = *port 253 } 254 255 return &url.URL{ 256 Scheme: scheme, 257 Host: fmt.Sprintf("%s.%s.svc:%d", name, namespace, hostPort), 258 Path: path, 259 } 260 } 261 262 func setCABundle(caBundle []byte) error { 263 // We can have multiple fleetautoscalers with different CABundles defined, 264 // so we switch client.Transport before each POST request 265 rootCAs := x509.NewCertPool() 266 if ok := rootCAs.AppendCertsFromPEM(caBundle); !ok { 267 return errors.New("no certs were appended from caBundle") 268 } 269 tlsConfig.RootCAs = rootCAs 270 return nil 271 } 272 273 func applyWebhookPolicy(w *autoscalingv1.URLConfiguration, f *agonesv1.Fleet, fasLog *FasLogger) (replicas int32, limited bool, err error) { 274 if w == nil { 275 return 0, false, errors.New("webhookPolicy parameter must not be nil") 276 } 277 278 if f == nil { 279 return 0, false, errors.New("fleet parameter must not be nil") 280 } 281 282 u, err := buildURLFromWebhookPolicy(w) 283 if err != nil { 284 return 0, false, err 285 } 286 287 faReq := autoscalingv1.FleetAutoscaleReview{ 288 Request: &autoscalingv1.FleetAutoscaleRequest{ 289 UID: uuid.NewUUID(), 290 Name: f.Name, 291 Namespace: f.Namespace, 292 Status: f.Status, 293 }, 294 Response: nil, 295 } 296 297 if runtime.FeatureEnabled(runtime.FeatureFleetAutoscaleRequestMetaData) { 298 faReq.Request.Annotations = f.ObjectMeta.Annotations 299 faReq.Request.Labels = f.ObjectMeta.Labels 300 } 301 302 b, err := json.Marshal(faReq) 303 if err != nil { 304 return 0, false, err 305 } 306 307 res, err := client.Post( 308 u.String(), 309 "application/json", 310 strings.NewReader(string(b)), 311 ) 312 if err != nil { 313 return 0, false, err 314 } 315 defer func() { 316 if cerr := res.Body.Close(); cerr != nil { 317 if err != nil { 318 err = errors.Wrap(err, cerr.Error()) 319 } else { 320 err = cerr 321 } 322 } 323 }() 324 325 if res.StatusCode != http.StatusOK { 326 return 0, false, fmt.Errorf("bad status code %d from the server: %s", res.StatusCode, u.String()) 327 } 328 result, err := io.ReadAll(res.Body) 329 if err != nil { 330 return 0, false, err 331 } 332 333 var faResp autoscalingv1.FleetAutoscaleReview 334 err = json.Unmarshal(result, &faResp) 335 if err != nil { 336 return 0, false, err 337 } 338 339 // Log Fleet Autoscaler operation, handling nil or empty Name in one line 340 webhookPolicyName := "<nil>" 341 if w.Service != nil && w.Service.Name != "" { 342 webhookPolicyName = w.Service.Name 343 } 344 loggerForFleetAutoscalerKey(fasLog.fas.ObjectMeta.Name, fasLog.baseLogger).Debugf( 345 "Fleet Autoscaler operation completed for fleet: %s, with WebhookPolicy: %s", f.ObjectMeta.Name, webhookPolicyName) 346 347 if faResp.Response.Scale { 348 return faResp.Response.Replicas, false, nil 349 } 350 351 return f.Status.Replicas, false, nil 352 } 353 354 func applyBufferPolicy(b *autoscalingv1.BufferPolicy, f *agonesv1.Fleet, fasLog *FasLogger) (int32, bool, error) { 355 var replicas int32 356 357 if b.BufferSize.Type == intstr.Int { 358 replicas = f.Status.AllocatedReplicas + int32(b.BufferSize.IntValue()) 359 } else { 360 // the percentage value is a little more complex, as we can't apply 361 // the desired percentage to any current value, but to the future one 362 // Example: we have 8 allocated replicas, 10 total replicas and bufferSize set to 30% 363 // 30% means that we must have 30% ready instances in the fleet 364 // Right now there are 20%, so we must increase the fleet until we reach 30% 365 // To compute the new size, we start from the other end: if ready must be 30% 366 // it means that allocated must be 70% and adjust the fleet size to make that true. 367 bufferPercent, err := intstr.GetValueFromIntOrPercent(&b.BufferSize, 100, true) 368 if err != nil { 369 return 0, false, err 370 } 371 // use Math.Ceil to round the result up 372 replicas = int32(math.Ceil(float64(f.Status.AllocatedReplicas*100) / float64(100-bufferPercent))) 373 } 374 375 scalingInLimited := false 376 scalingOutLimited := false 377 378 if replicas < b.MinReplicas { 379 replicas = b.MinReplicas 380 scalingInLimited = true 381 } 382 if replicas > b.MaxReplicas { 383 replicas = b.MaxReplicas 384 scalingOutLimited = true 385 } 386 387 loggerForFleetAutoscalerKey(fasLog.fas.ObjectMeta.Name, fasLog.baseLogger).Debugf( 388 "Fleet Autoscaler operation completed for fleet: %s, with BufferPolicy: %v", f.ObjectMeta.Name, b.BufferSize) 389 390 return replicas, scalingInLimited || scalingOutLimited, nil 391 } 392 393 // New function to call applyCounterOrListPolicy 394 func applyCounterOrListPolicyWrapper(c *autoscalingv1.CounterPolicy, l *autoscalingv1.ListPolicy, 395 f *agonesv1.Fleet, gameServerNamespacedLister listeragonesv1.GameServerNamespaceLister, 396 nodeCounts map[string]gameservers.NodeCount, fasLog *FasLogger) (int32, bool, error) { 397 398 // Call applyCounterOrListPolicy inside the wrapper 399 desiredReplicas, scalingLimited, err := applyCounterOrListPolicy(c, l, f, gameServerNamespacedLister, nodeCounts) 400 401 if err == nil { 402 // Log directly based on which policy is used, with a description of the key 403 if c != nil { 404 // Log the Key from CounterPolicy with a description 405 loggerForFleetAutoscalerKey(fasLog.fas.ObjectMeta.Name, fasLog.baseLogger).Debugf( 406 "Fleet Autoscaler operation completed for fleet: %s, with CounterPolicy - Key: %v", f.ObjectMeta.Name, c.Key) 407 } else if l != nil { 408 // Log the Key from ListPolicy with a description 409 loggerForFleetAutoscalerKey(fasLog.fas.ObjectMeta.Name, fasLog.baseLogger).Debugf( 410 "Fleet Autoscaler operation completed for fleet: %s, with ListPolicy - Key: %v", f.ObjectMeta.Name, l.Key) 411 } 412 } 413 414 return desiredReplicas, scalingLimited, err 415 } 416 417 func applyCounterOrListPolicy(c *autoscalingv1.CounterPolicy, l *autoscalingv1.ListPolicy, 418 f *agonesv1.Fleet, gameServerNamespacedLister listeragonesv1.GameServerNamespaceLister, 419 nodeCounts map[string]gameservers.NodeCount) (int32, bool, error) { 420 421 if !runtime.FeatureEnabled(runtime.FeatureCountsAndLists) { 422 return 0, false, errors.Errorf("cannot apply CounterPolicy unless feature flag %s is enabled", runtime.FeatureCountsAndLists) 423 } 424 425 var isCounter bool // True if a CounterPolicy False if a ListPolicy 426 var key string // The specified Counter or List 427 var count int64 // The Count or number of Values in the template Game Server 428 var capacity int64 // The Capacity in the template Game Server 429 var aggCount int64 // The Aggregate Count of the specified Counter or List of all GameServers across the GameServerSet in the Fleet 430 var aggCapacity int64 // The Aggregate Capacity of the specified Counter or List of all GameServers across the GameServerSet in the Fleet 431 var aggAllocatedCount int64 // The Aggregate Count of the specified Counter or List of GameServers in an Allocated state across the GameServerSet in the Fleet 432 var minCapacity int64 // The Minimum Aggregate Capacity 433 var maxCapacity int64 // The Maximum Aggregate Capacity 434 var bufferSize intstr.IntOrString 435 436 if c != nil { 437 isCounter = true 438 counter, ok := f.Spec.Template.Spec.Counters[c.Key] 439 if !ok { 440 return 0, false, errors.Errorf("cannot apply CounterPolicy as Counter key %s does not exist in the Fleet Spec", c.Key) 441 } 442 443 aggCounter, ok := f.Status.Counters[c.Key] 444 if !ok { 445 return 0, false, errors.Errorf("cannot apply CounterPolicy as Counter key %s does not exist in the Fleet Status", c.Key) 446 } 447 448 key = c.Key 449 count = counter.Count 450 capacity = counter.Capacity 451 aggCount = aggCounter.Count 452 aggCapacity = aggCounter.Capacity 453 aggAllocatedCount = aggCounter.AllocatedCount 454 minCapacity = c.MinCapacity 455 maxCapacity = c.MaxCapacity 456 bufferSize = c.BufferSize 457 458 } else { 459 isCounter = false 460 list, ok := f.Spec.Template.Spec.Lists[l.Key] 461 if !ok { 462 return 0, false, errors.Errorf("cannot apply ListPolicy as List key %s does not exist in the Fleet Spec", l.Key) 463 } 464 465 aggList, ok := f.Status.Lists[l.Key] 466 if !ok { 467 return 0, false, errors.Errorf("cannot apply ListPolicy as List key %s does not exist in the Fleet Status", l.Key) 468 } 469 470 key = l.Key 471 count = int64(len(list.Values)) 472 capacity = list.Capacity 473 aggCount = aggList.Count 474 aggCapacity = aggList.Capacity 475 aggAllocatedCount = aggList.AllocatedCount 476 minCapacity = l.MinCapacity 477 maxCapacity = l.MaxCapacity 478 bufferSize = l.BufferSize 479 } 480 481 // Checks if we've limited by TOTAL capacity 482 limited, scale := isLimited(aggCapacity, minCapacity, maxCapacity) 483 484 // Total current number of Replicas 485 replicas := f.Status.Replicas 486 487 // The buffer is the desired available capacity 488 var buffer int64 489 490 switch { 491 // Desired replicas based on BufferSize specified as an absolute value (i.e. 5) 492 case bufferSize.Type == intstr.Int: 493 buffer = int64(bufferSize.IntValue()) 494 // Desired replicas based on BufferSize specified as a percent (i.e. 5%) 495 case bufferSize.Type == intstr.String: 496 bufferPercent, err := intstr.GetValueFromIntOrPercent(&bufferSize, 100, isCounter) 497 if err != nil { 498 return 0, false, err 499 } 500 // If the Aggregated Allocated Counts is 0 then desired capacity gets calculated as 0. If the 501 // capacity of 1 replica is equal to or greater than minimum capacity we can exit early. 502 if aggAllocatedCount <= 0 && capacity >= minCapacity { 503 return 1, true, nil 504 } 505 506 // The desired TOTAL capacity based on the Aggregated Allocated Counts (see applyBufferPolicy for explanation) 507 desiredCapacity := int64(math.Ceil(float64(aggAllocatedCount*100) / float64(100-bufferPercent))) 508 // Convert into a desired AVAILABLE capacity aka the buffer 509 buffer = desiredCapacity - aggAllocatedCount 510 } 511 512 // Current available capacity across the TOTAL fleet 513 switch availableCapacity := aggCapacity - aggCount; { 514 case availableCapacity == buffer: 515 if limited { 516 return scaleLimited(scale, f, gameServerNamespacedLister, nodeCounts, key, isCounter, replicas, 517 capacity, aggCapacity, minCapacity, maxCapacity) 518 } 519 return replicas, false, nil 520 case availableCapacity < buffer: // Scale Up 521 if limited { // Case where we want to scale up but we're already limited by MaxCapacity. 522 return scaleLimited(scale, f, gameServerNamespacedLister, nodeCounts, key, isCounter, replicas, 523 capacity, aggCapacity, minCapacity, maxCapacity) 524 } 525 return scaleUp(replicas, capacity, count, aggCapacity, availableCapacity, maxCapacity, 526 minCapacity, buffer) 527 case availableCapacity > buffer: // Scale Down 528 if limited && scale == 1 { // Case where we want to scale down but we're already limited by MinCapacity 529 return scaleLimited(scale, f, gameServerNamespacedLister, nodeCounts, key, isCounter, replicas, 530 capacity, aggCapacity, minCapacity, maxCapacity) 531 } 532 return scaleDown(f, gameServerNamespacedLister, nodeCounts, key, isCounter, replicas, aggCount, 533 aggCapacity, minCapacity, buffer) 534 } 535 536 if isCounter { 537 return 0, false, errors.Errorf("unable to apply CounterPolicy %v", c) 538 } 539 return 0, false, errors.Errorf("unable to apply ListPolicy %v", l) 540 } 541 542 func applySchedulePolicy(ctx context.Context, state map[string]any, s *autoscalingv1.SchedulePolicy, f *agonesv1.Fleet, gameServerNamespacedLister listeragonesv1.GameServerNamespaceLister, nodeCounts map[string]gameservers.NodeCount, currentTime time.Time, fasLog *FasLogger) (int32, bool, error) { 543 // Ensure the scheduled autoscaler feature gate is enabled 544 if !runtime.FeatureEnabled(runtime.FeatureScheduledAutoscaler) { 545 return 0, false, errors.Errorf("cannot apply SchedulePolicy unless feature flag %s is enabled", runtime.FeatureScheduledAutoscaler) 546 } 547 548 if isScheduleActive(s, currentTime) { 549 return computeDesiredFleetSize(ctx, state, s.Policy, f, gameServerNamespacedLister, nodeCounts, fasLog) 550 } 551 552 loggerForFleetAutoscalerKey(fasLog.fas.ObjectMeta.Name, fasLog.baseLogger).Debugf( 553 "Fleet autoscaler check: Schedule not active for fleet %s", f.ObjectMeta.Name) 554 555 // If the schedule wasn't active then return the current replica amount of the fleet 556 return f.Status.Replicas, false, &InactiveScheduleError{} 557 } 558 559 func applyChainPolicy(ctx context.Context, state map[string]any, c autoscalingv1.ChainPolicy, f *agonesv1.Fleet, gameServerNamespacedLister listeragonesv1.GameServerNamespaceLister, nodeCounts map[string]gameservers.NodeCount, currentTime time.Time, fasLog *FasLogger) (int32, bool, error) { 560 // Ensure the scheduled autoscaler feature gate is enabled 561 if !runtime.FeatureEnabled(runtime.FeatureScheduledAutoscaler) { 562 return 0, false, errors.Errorf("cannot apply ChainPolicy unless feature flag %s is enabled", runtime.FeatureScheduledAutoscaler) 563 } 564 565 replicas := f.Status.Replicas 566 var limited bool 567 var err error 568 var chainEntry autoscalingv1.FleetAutoscalerPolicyType 569 570 // Loop over all entries in the chain 571 for _, entry := range c { 572 switch entry.Type { 573 case autoscalingv1.SchedulePolicyType: 574 replicas, limited, err = applySchedulePolicy(ctx, state, entry.Schedule, f, gameServerNamespacedLister, nodeCounts, currentTime, fasLog) 575 576 if err != nil { 577 loggerForFleetAutoscalerKey(fasLog.fas.ObjectMeta.Name, fasLog.baseLogger).Debugf( 578 "Failed to apply SchedulePolicy ID=%s in ChainPolicy: %v", entry.ID, err) 579 } 580 case autoscalingv1.WebhookPolicyType: 581 replicas, limited, err = applyWebhookPolicy(entry.Webhook, f, fasLog) 582 583 if err != nil { 584 loggerForFleetAutoscalerKey(fasLog.fas.ObjectMeta.Name, fasLog.baseLogger).Debugf( 585 "Failed to apply WebhookPolicy ID=%s in ChainPolicy: %v", entry.ID, err) 586 } 587 default: 588 // Every other policy type we just want to compute the desired fleet and return it 589 replicas, limited, err = computeDesiredFleetSize(ctx, state, entry.FleetAutoscalerPolicy, f, gameServerNamespacedLister, nodeCounts, fasLog) 590 591 if err != nil && !errors.Is(err, InactiveScheduleError{}) { 592 loggerForFleetAutoscalerKey(fasLog.fas.ObjectMeta.Name, fasLog.baseLogger).Debugf( 593 "Failed to apply %s ID=%s in ChainPolicy: %v", entry.Type, entry.ID, err) 594 } 595 } 596 597 // If no error occurred, exit the loop early 598 if err == nil { 599 chainEntry = autoscalingv1.FleetAutoscalerPolicyType(fmt.Sprintf("%s:%s:%s", autoscalingv1.ChainPolicyType, entry.ID, entry.Type)) 600 break 601 } 602 } 603 604 if err != nil && !errors.Is(err, InactiveScheduleError{}) { 605 emitChainPolicyEvent(fasLog, "Unknown", "") 606 loggerForFleetAutoscalerKey(fasLog.fas.ObjectMeta.Name, fasLog.baseLogger).Debug("Failed to apply ChainPolicy: no valid policy applied") 607 return replicas, limited, err 608 } 609 610 currChainEntry := strings.Split(string(chainEntry), ":") 611 612 // Handle the final state of the chain and update status if necessary 613 if lastAppliedPolicy := fasLog.fas.Status.LastAppliedPolicy; strings.Contains(string(lastAppliedPolicy), string(autoscalingv1.ChainPolicyType)) { 614 prevChainEntry := strings.Split(string(lastAppliedPolicy), ":") 615 616 // Only log if there is a change in the policy 617 if len(prevChainEntry) > 2 && (currChainEntry[1] != prevChainEntry[1] || currChainEntry[2] != prevChainEntry[2]) { 618 fasLog.currChainEntry = &chainEntry 619 emitChainPolicyEvent(fasLog, currChainEntry[1], currChainEntry[2]) 620 } 621 } else { 622 fasLog.currChainEntry = &chainEntry 623 emitChainPolicyEvent(fasLog, currChainEntry[1], currChainEntry[2]) 624 } 625 626 return replicas, limited, nil 627 } 628 629 // isScheduleActive checks if a chain entry's is active and returns a boolean, true if active, false otherwise 630 func isScheduleActive(s *autoscalingv1.SchedulePolicy, currentTime time.Time) bool { 631 // Used for checking ahead of the schedule for daylight savings purposes 632 cronDelta := (time.Minute * -1) + (time.Second * -30) 633 634 // If the current time is before the start time, the schedule is inactive so return false 635 startTime := s.Between.Start.Time 636 if currentTime.Before(startTime) { 637 return false 638 } 639 640 // If an end time is present and the current time is after the end time, the schedule is inactive so return false 641 endTime := s.Between.End.Time 642 if !endTime.IsZero() && currentTime.After(endTime) { 643 return false 644 } 645 646 // If no startCron field is specified, then it's automatically true (duration is no longer relevant since we're always running) 647 if s.ActivePeriod.StartCron == "" { 648 return true 649 } 650 651 // Ignore the error as validation is already done within the validateChainPolicy after being unmarshalled 652 location, _ := time.LoadLocation(s.ActivePeriod.Timezone) 653 654 // Ignore the error as validation is already done within the validateChainPolicy after being unmarshalled 655 startCron, _ := cron.ParseStandard(s.ActivePeriod.StartCron) 656 657 // Ignore the error as validation is already done within the validateChainPolicy after being unmarshalled. 658 // If the duration is empty set it to the largest duration possible (290 years) 659 duration, _ := time.ParseDuration(s.ActivePeriod.Duration) 660 if s.ActivePeriod.Duration == "" { 661 duration, _ = time.ParseDuration(maxDuration) 662 } 663 664 // Get the current time - duration 665 currentTimeMinusDuration := currentTime.Add(duration * -1) 666 // Take (current time - duration) to get the first available start time 667 cronStartTime := startCron.Next(currentTimeMinusDuration.In(location)) 668 // Take the (cronStartTime + duration) to get the end time 669 cronEndTime := cronStartTime.Add(duration) 670 671 // If the current time is after the cronStartTime - 90 seconds (for daylight saving purposes) AND the current time before the cronEndTime 672 // then return true 673 // Example: startCron = 0 14 * * * // 2:00 PM Everyday | duration = 1 hr | cronDelta = 90 seconds | currentTime = 2024-08-01T14:30:00Z | currentTimeMinusDuration = 2024-08-01T13:30:00Z 674 // then cronStartTime = 2024-08-01T14:00:00Z and cronEndTime = 2024-08-01T15:00:00Z 675 // and since currentTime > cronStartTime + cronDelta AND currentTime < cronEndTime, we return true 676 if currentTime.After(cronStartTime.Add(cronDelta)) && currentTime.Before(cronEndTime) { 677 return true 678 } 679 680 return false 681 } 682 683 // getSortedGameServers returns the list of Game Servers for the Fleet in the order in which the 684 // Game Servers would be deleted. 685 func getSortedGameServers(f *agonesv1.Fleet, gameServerNamespacedLister listeragonesv1.GameServerNamespaceLister, 686 nodeCounts map[string]gameservers.NodeCount) ([]*agonesv1.GameServer, error) { 687 gsList, err := fleets.ListGameServersByFleetOwner(gameServerNamespacedLister, f) 688 if err != nil { 689 return nil, err 690 } 691 692 gameServers := gssets.SortGameServersByStrategy(f.Spec.Scheduling, gsList, nodeCounts, f.Spec.Priorities) 693 return gameServers, nil 694 } 695 696 // isLimited indicates that the calculated scale would be above or below the range defined by 697 // MinCapacity and MaxCapacity in the ListPolicy or CounterPolicy. 698 // Return 1 if the fleet needs to scale up, -1 if the fleets need to scale down, 0 if the fleet does 699 // not need to scale, or if the fleet is not limited. 700 func isLimited(aggCapacity, minCapacity, maxCapacity int64) (bool, int) { 701 if aggCapacity < minCapacity { // Scale up 702 return true, 1 703 } 704 if aggCapacity > maxCapacity { // Scale down 705 return true, -1 706 } 707 return false, 0 708 } 709 710 // scaleUpLimited scales up the fleet to meet the MinCapacity 711 func scaleUpLimited(replicas int32, capacity, aggCapacity, minCapacity int64) (int32, bool, error) { 712 if capacity == 0 { 713 return 0, false, errors.Errorf("cannot scale up as Capacity is equal to 0") 714 } 715 for aggCapacity < minCapacity { 716 aggCapacity += capacity 717 replicas++ 718 } 719 return replicas, true, nil 720 } 721 722 // scaleDownLimited scales down the fleet to meet the MaxCapacity 723 func scaleDownLimited(f *agonesv1.Fleet, gameServerNamespacedLister listeragonesv1.GameServerNamespaceLister, 724 nodeCounts map[string]gameservers.NodeCount, key string, isCounter bool, replicas int32, 725 aggCapacity, maxCapacity int64) (int32, bool, error) { 726 // Game Servers in order of deletion on scale down 727 gameServers, err := getSortedGameServers(f, gameServerNamespacedLister, nodeCounts) 728 if err != nil { 729 return 0, false, err 730 } 731 for _, gs := range gameServers { 732 if aggCapacity <= maxCapacity { 733 break 734 } 735 switch isCounter { 736 case true: 737 if counter, ok := gs.Status.Counters[key]; ok { 738 aggCapacity -= counter.Capacity 739 } 740 case false: 741 if list, ok := gs.Status.Lists[key]; ok { 742 aggCapacity -= list.Capacity 743 } 744 } 745 replicas-- 746 } 747 748 // We are not currently able to scale down to zero replicas, so one replica is the minimum allowed 749 if replicas < 1 { 750 replicas = 1 751 } 752 753 return replicas, true, nil 754 } 755 756 func scaleLimited(scale int, f *agonesv1.Fleet, gameServerNamespacedLister listeragonesv1.GameServerNamespaceLister, 757 nodeCounts map[string]gameservers.NodeCount, key string, isCounter bool, replicas int32, 758 capacity, aggCapacity, minCapacity, maxCapacity int64) (int32, bool, error) { 759 760 switch scale { 761 case 1: // scale up 762 return scaleUpLimited(replicas, capacity, aggCapacity, minCapacity) 763 case -1: // scale down 764 return scaleDownLimited(f, gameServerNamespacedLister, nodeCounts, key, isCounter, replicas, 765 aggCapacity, maxCapacity) 766 case 0: 767 return replicas, false, nil 768 } 769 770 return 0, false, errors.Errorf("cannot scale due to error in scaleLimited function") 771 } 772 773 // scaleUp scales up for either Integer or Percentage Buffer. 774 func scaleUp(replicas int32, capacity, count, aggCapacity, availableCapacity, maxCapacity, 775 minCapacity, buffer int64) (int32, bool, error) { 776 777 // How much capacity is gained by adding one more replica to the fleet. 778 replicaCapacity := capacity - count 779 if replicaCapacity <= 0 { 780 return 0, false, errors.Errorf("cannot scale up as adding additional replicas does not increase available Capacity") 781 } 782 783 additionalReplicas := int32(math.Ceil((float64(buffer) - float64(availableCapacity)) / float64(replicaCapacity))) 784 785 // Check to make sure we're not limited (over Max Capacity) 786 limited, _ := isLimited(aggCapacity+(int64(additionalReplicas)*capacity), minCapacity, maxCapacity) 787 if limited { 788 additionalReplicas = int32((maxCapacity - aggCapacity) / capacity) 789 } 790 791 return replicas + additionalReplicas, limited, nil 792 } 793 794 // scaleDown scales down for either Integer or Percentage Buffer. 795 func scaleDown(f *agonesv1.Fleet, gameServerNamespacedLister listeragonesv1.GameServerNamespaceLister, 796 nodeCounts map[string]gameservers.NodeCount, key string, isCounter bool, replicas int32, 797 aggCount, aggCapacity, minCapacity, buffer int64) (int32, bool, error) { 798 // Exit early if we're already at MinCapacity to avoid calling getSortedGameServers if unnecessary 799 if aggCapacity == minCapacity { 800 return replicas, true, nil 801 } 802 803 // We first need to get the individual game servers in order of deletion on scale down, as any 804 // game server may have a unique value for counts and / or capacity. 805 gameServers, err := getSortedGameServers(f, gameServerNamespacedLister, nodeCounts) 806 if err != nil { 807 return 0, false, err 808 } 809 810 var availableCapacity int64 811 812 // "Remove" one game server at a time in order of potential deletion. (Not actually removed here, 813 // that's done later, if possible, by the fleetautoscaler controller.) 814 for _, gs := range gameServers { 815 replicas-- 816 switch isCounter { 817 case true: 818 if counter, ok := gs.Status.Counters[key]; ok { 819 aggCount -= counter.Count 820 aggCapacity -= counter.Capacity 821 } else { 822 continue 823 } 824 case false: 825 if list, ok := gs.Status.Lists[key]; ok { 826 aggCount -= int64(len(list.Values)) 827 aggCapacity -= list.Capacity 828 } else { 829 continue 830 } 831 } 832 availableCapacity = aggCapacity - aggCount 833 // Check if we've overshot our buffer 834 if availableCapacity < buffer { 835 return replicas + 1, false, nil 836 } 837 // Check if we're Limited (Below MinCapacity) 838 if aggCapacity < minCapacity { 839 return replicas + 1, true, nil 840 } 841 // Check if we're at our desired Buffer 842 if availableCapacity == buffer { 843 return replicas, false, nil 844 } 845 // Check if we're at Limited 846 if aggCapacity == minCapacity { 847 return replicas, true, nil 848 } 849 } 850 851 // We are not currently able to scale down to zero replicas, so one replica is the minimum allowed. 852 if replicas < 1 { 853 replicas = 1 854 } 855 856 return replicas, false, nil 857 } 858 859 func emitChainPolicyEvent(fasLog *FasLogger, chainID string, chainType string) { 860 if fasLog.recorder == nil { 861 return 862 } 863 864 var eventMessage string 865 var eventType string 866 867 if chainID == "Unknown" { 868 eventMessage = fmt.Sprintf("FleetAutoscaler '%s' failed to apply ChainPolicy | ID: %s | Type: %s", 869 fasLog.fas.ObjectMeta.Name, chainID, chainType) 870 eventType = corev1.EventTypeWarning // Use Warning for failure 871 } else { 872 eventMessage = fmt.Sprintf("FleetAutoscaler '%s' successfully applied ChainPolicy | ID: %s | Type: %s", 873 fasLog.fas.ObjectMeta.Name, chainID, chainType) 874 eventType = corev1.EventTypeNormal // Use Normal for success 875 } 876 877 // Emit the event 878 fasLog.recorder.Eventf(fasLog.fas, eventType, "ChainPolicy", eventMessage) 879 }