agones.dev/agones@v1.54.0/pkg/sdkserver/sdkserver.go (about) 1 // Copyright 2018 Google LLC All Rights Reserved. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package sdkserver 16 17 import ( 18 "context" 19 "fmt" 20 "io" 21 "net/http" 22 "slices" 23 "strings" 24 "sync" 25 "time" 26 27 "github.com/mennanov/fmutils" 28 "github.com/pkg/errors" 29 "github.com/sirupsen/logrus" 30 corev1 "k8s.io/api/core/v1" 31 apiequality "k8s.io/apimachinery/pkg/api/equality" 32 k8serrors "k8s.io/apimachinery/pkg/api/errors" 33 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 34 "k8s.io/apimachinery/pkg/fields" 35 "k8s.io/apimachinery/pkg/types" 36 "k8s.io/apimachinery/pkg/util/wait" 37 "k8s.io/client-go/kubernetes" 38 "k8s.io/client-go/kubernetes/scheme" 39 k8sv1 "k8s.io/client-go/kubernetes/typed/core/v1" 40 "k8s.io/client-go/tools/cache" 41 "k8s.io/client-go/tools/record" 42 "k8s.io/utils/clock" 43 44 "agones.dev/agones/pkg/apis/agones" 45 agonesv1 "agones.dev/agones/pkg/apis/agones/v1" 46 "agones.dev/agones/pkg/client/clientset/versioned" 47 typedv1 "agones.dev/agones/pkg/client/clientset/versioned/typed/agones/v1" 48 "agones.dev/agones/pkg/client/informers/externalversions" 49 listersv1 "agones.dev/agones/pkg/client/listers/agones/v1" 50 "agones.dev/agones/pkg/gameserverallocations" 51 "agones.dev/agones/pkg/sdk" 52 "agones.dev/agones/pkg/sdk/alpha" 53 "agones.dev/agones/pkg/sdk/beta" 54 "agones.dev/agones/pkg/util/apiserver" 55 "agones.dev/agones/pkg/util/logfields" 56 "agones.dev/agones/pkg/util/runtime" 57 "agones.dev/agones/pkg/util/workerqueue" 58 ) 59 60 // Operation is a synchronisation action 61 type Operation string 62 63 const ( 64 updateState Operation = "updateState" 65 updateLabel Operation = "updateLabel" 66 updateAnnotation Operation = "updateAnnotation" 67 updatePlayerCapacity Operation = "updatePlayerCapacity" 68 updateConnectedPlayers Operation = "updateConnectedPlayers" 69 updateCounters Operation = "updateCounters" 70 updateLists Operation = "updateLists" 71 updatePeriod time.Duration = time.Second 72 ) 73 74 var ( 75 _ sdk.SDKServer = &SDKServer{} 76 _ alpha.SDKServer = &SDKServer{} 77 _ beta.SDKServer = &SDKServer{} 78 ) 79 80 type counterUpdateRequest struct { 81 // Capacity of the Counter as set by capacitySet. 82 capacitySet *int64 83 // Count of the Counter as set by countSet. 84 countSet *int64 85 // Tracks the sum of CountIncrement, CountDecrement, and/or CountSet requests from the client SDK. 86 diff int64 87 // Counter as retreived from the GameServer 88 counter agonesv1.CounterStatus 89 } 90 91 type listUpdateRequest struct { 92 // Capacity of the List as set by capacitySet. 93 capacitySet *int64 94 // String keys are the Values to remove from the List 95 valuesToDelete map[string]bool 96 // Values to add to the List 97 valuesToAppend []string 98 } 99 100 // SDKServer is a gRPC server, that is meant to be a sidecar 101 // for a GameServer that will update the game server status on SDK requests 102 // 103 //nolint:govet // ignore fieldalignment, singleton 104 type SDKServer struct { 105 logger *logrus.Entry 106 gameServerName string 107 namespace string 108 informerFactory externalversions.SharedInformerFactory 109 gameServerGetter typedv1.GameServersGetter 110 gameServerLister listersv1.GameServerLister 111 gameServerSynced cache.InformerSynced 112 connected bool 113 server *http.Server 114 clock clock.Clock 115 health agonesv1.Health 116 healthTimeout time.Duration 117 healthMutex sync.RWMutex 118 healthLastUpdated time.Time 119 healthFailureCount int32 120 healthChecksRunning sync.Once 121 workerqueue *workerqueue.WorkerQueue 122 streamMutex sync.RWMutex 123 connectedStreams []sdk.SDK_WatchGameServerServer 124 ctx context.Context 125 recorder record.EventRecorder 126 gsLabels map[string]string 127 gsAnnotations map[string]string 128 gsState agonesv1.GameServerState 129 gsStateChannel chan agonesv1.GameServerState 130 gsUpdateMutex sync.RWMutex 131 gsWaitForSync sync.WaitGroup 132 reserveTimer *time.Timer 133 gsReserveDuration *time.Duration 134 gsPlayerCapacity int64 135 gsConnectedPlayers []string 136 gsCounterUpdates map[string]counterUpdateRequest 137 gsListUpdates map[string]listUpdateRequest 138 gsCopy *agonesv1.GameServer 139 } 140 141 // NewSDKServer creates a SDKServer that sets up an 142 // InClusterConfig for Kubernetes 143 func NewSDKServer(gameServerName, namespace string, kubeClient kubernetes.Interface, 144 agonesClient versioned.Interface, logLevel logrus.Level, healthPort int, requestsRateLimit time.Duration) (*SDKServer, error) { 145 mux := http.NewServeMux() 146 resync := 0 * time.Second 147 148 // limit the informer to only working with the gameserver that the sdk is attached to 149 tweakListOptions := func(opts *metav1.ListOptions) { 150 s1 := fields.OneTermEqualSelector("metadata.name", gameServerName) 151 opts.FieldSelector = s1.String() 152 } 153 factory := externalversions.NewSharedInformerFactoryWithOptions(agonesClient, resync, externalversions.WithNamespace(namespace), externalversions.WithTweakListOptions(tweakListOptions)) 154 gameServers := factory.Agones().V1().GameServers() 155 156 s := &SDKServer{ 157 gameServerName: gameServerName, 158 namespace: namespace, 159 gameServerGetter: agonesClient.AgonesV1(), 160 gameServerLister: gameServers.Lister(), 161 gameServerSynced: gameServers.Informer().HasSynced, 162 server: &http.Server{ 163 Addr: fmt.Sprintf(":%d", healthPort), 164 Handler: mux, 165 }, 166 clock: clock.RealClock{}, 167 healthMutex: sync.RWMutex{}, 168 healthFailureCount: 0, 169 streamMutex: sync.RWMutex{}, 170 gsLabels: map[string]string{}, 171 gsAnnotations: map[string]string{}, 172 gsUpdateMutex: sync.RWMutex{}, 173 gsWaitForSync: sync.WaitGroup{}, 174 gsConnectedPlayers: []string{}, 175 gsStateChannel: make(chan agonesv1.GameServerState, 2), 176 } 177 178 if runtime.FeatureEnabled(runtime.FeatureCountsAndLists) { 179 // Once FeatureCountsAndLists is in GA, move this into SDKServer creation above. 180 s.gsCounterUpdates = map[string]counterUpdateRequest{} 181 s.gsListUpdates = map[string]listUpdateRequest{} 182 } 183 184 s.informerFactory = factory 185 s.logger = runtime.NewLoggerWithType(s).WithField("gsKey", namespace+"/"+gameServerName) 186 s.logger.Logger.SetLevel(logLevel) 187 188 _, _ = gameServers.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{ 189 UpdateFunc: func(_, newObj interface{}) { 190 gs := newObj.(*agonesv1.GameServer) 191 s.sendGameServerUpdate(gs) 192 }, 193 }) 194 195 eventBroadcaster := record.NewBroadcaster() 196 eventBroadcaster.StartLogging(s.logger.Debugf) 197 eventBroadcaster.StartRecordingToSink(&k8sv1.EventSinkImpl{Interface: kubeClient.CoreV1().Events("")}) 198 s.recorder = eventBroadcaster.NewRecorder(scheme.Scheme, corev1.EventSource{Component: "gameserver-sidecar"}) 199 200 mux.HandleFunc("/healthz", func(w http.ResponseWriter, _ *http.Request) { 201 _, err := w.Write([]byte("ok")) 202 if err != nil { 203 s.logger.WithError(err).Error("could not send ok response on healthz") 204 w.WriteHeader(http.StatusInternalServerError) 205 } 206 }) 207 mux.HandleFunc("/gshealthz", func(w http.ResponseWriter, _ *http.Request) { 208 s.ensureHealthChecksRunning() 209 if s.healthy() { 210 _, err := w.Write([]byte("ok")) 211 if err != nil { 212 s.logger.WithError(err).Error("could not send ok response on gshealthz") 213 w.WriteHeader(http.StatusInternalServerError) 214 } 215 } else { 216 w.WriteHeader(http.StatusInternalServerError) 217 } 218 }) 219 220 // we haven't synced yet 221 s.gsWaitForSync.Add(1) 222 s.workerqueue = workerqueue.NewWorkerQueueWithRateLimiter( 223 s.syncGameServer, 224 s.logger, 225 logfields.GameServerKey, 226 strings.Join([]string{agones.GroupName, s.namespace, s.gameServerName}, "."), 227 workerqueue.ConstantRateLimiter(requestsRateLimit)) 228 229 s.logger.Info("Created GameServer sidecar") 230 231 return s, nil 232 } 233 234 // Run processes the rate limited queue. 235 // Will block until stop is closed 236 func (s *SDKServer) Run(ctx context.Context) error { 237 s.informerFactory.Start(ctx.Done()) 238 if !cache.WaitForCacheSync(ctx.Done(), s.gameServerSynced) { 239 return errors.New("failed to wait for caches to sync") 240 } 241 242 // need this for streaming gRPC commands 243 s.ctx = ctx 244 // we have the gameserver details now 245 s.gsWaitForSync.Done() 246 247 gs, err := s.gameServer() 248 if err != nil { 249 return err 250 } 251 252 s.health = gs.Spec.Health 253 s.logger.WithField("health", s.health).Debug("Setting health configuration") 254 s.healthTimeout = time.Duration(gs.Spec.Health.PeriodSeconds) * time.Second 255 s.touchHealthLastUpdated() 256 257 if gs.Status.State == agonesv1.GameServerStateReserved && gs.Status.ReservedUntil != nil { 258 s.gsUpdateMutex.Lock() 259 s.resetReserveAfter(context.Background(), time.Until(gs.Status.ReservedUntil.Time)) 260 s.gsUpdateMutex.Unlock() 261 } 262 263 // populate player tracking values 264 if runtime.FeatureEnabled(runtime.FeaturePlayerTracking) { 265 s.gsUpdateMutex.Lock() 266 if gs.Status.Players != nil { 267 s.gsPlayerCapacity = gs.Status.Players.Capacity 268 s.gsConnectedPlayers = gs.Status.Players.IDs 269 } 270 s.gsUpdateMutex.Unlock() 271 } 272 273 // then start the http endpoints 274 s.logger.Debug("Starting SDKServer http health check...") 275 go func() { 276 if err := s.server.ListenAndServe(); err != nil { 277 if err == http.ErrServerClosed { 278 s.logger.WithError(err).Error("Health check: http server closed") 279 } else { 280 err = errors.Wrap(err, "Could not listen on :8080") 281 runtime.HandleError(s.logger.WithError(err), err) 282 } 283 } 284 }() 285 defer s.server.Close() // nolint: errcheck 286 287 s.workerqueue.Run(ctx, 1) 288 return nil 289 } 290 291 // WaitForConnection attempts a GameServer GET every 3s until the client responds. 292 // This is a workaround for the informer hanging indefinitely on first LIST due 293 // to a flaky network to the Kubernetes service endpoint. 294 func (s *SDKServer) WaitForConnection(ctx context.Context) error { 295 // In normal operaiton, waitForConnection is called exactly once in Run(). 296 // In unit tests, waitForConnection() can be called before Run() to ensure 297 // that connected is true when Run() is called, otherwise the List() below 298 // may race with a test that changes a mock. (Despite the fact that we drop 299 // the data on the ground, the Go race detector will pereceive a data race.) 300 if s.connected { 301 return nil 302 } 303 304 try := 0 305 return wait.PollUntilContextCancel(ctx, 4*time.Second, true, func(ctx context.Context) (bool, error) { 306 ctx, cancel := context.WithTimeout(ctx, 3*time.Second) 307 defer cancel() 308 309 // Specifically use gameServerGetter since it's the raw client (gameServerLister is the informer). 310 // We use List here to avoid needing permission to Get(). 311 _, err := s.gameServerGetter.GameServers(s.namespace).List(ctx, metav1.ListOptions{ 312 FieldSelector: fields.OneTermEqualSelector("metadata.name", s.gameServerName).String(), 313 }) 314 if err != nil { 315 s.logger.WithField("try", try).WithError(err).Info("Connection to Kubernetes service failed") 316 try++ 317 return false, nil 318 } 319 s.logger.WithField("try", try).Info("Connection to Kubernetes service established") 320 s.connected = true 321 return true, nil 322 }) 323 } 324 325 // syncGameServer synchronises the GameServer with the requested operations. 326 // The format of the key is {operation}. To prevent old operation data from 327 // overwriting the new one, the operation data is persisted in SDKServer. 328 func (s *SDKServer) syncGameServer(ctx context.Context, key string) error { 329 switch Operation(key) { 330 case updateState: 331 return s.updateState(ctx) 332 case updateLabel: 333 return s.updateLabels(ctx) 334 case updateAnnotation: 335 return s.updateAnnotations(ctx) 336 case updatePlayerCapacity: 337 return s.updatePlayerCapacity(ctx) 338 case updateConnectedPlayers: 339 return s.updateConnectedPlayers(ctx) 340 case updateCounters: 341 return s.updateCounter(ctx) 342 case updateLists: 343 return s.updateList(ctx) 344 } 345 346 return errors.Errorf("could not sync game server key: %s", key) 347 } 348 349 // updateState sets the GameServer Status's state to the one persisted in SDKServer, 350 // i.e. SDKServer.gsState. 351 func (s *SDKServer) updateState(ctx context.Context) error { 352 s.gsUpdateMutex.RLock() 353 s.logger.WithField("state", s.gsState).Debug("Updating state") 354 if len(s.gsState) == 0 { 355 s.gsUpdateMutex.RUnlock() 356 return errors.Errorf("could not update GameServer %s/%s to empty state", s.namespace, s.gameServerName) 357 } 358 s.gsUpdateMutex.RUnlock() 359 360 gs, err := s.gameServer() 361 if err != nil { 362 return err 363 } 364 365 // If we are currently in shutdown/being deleted, there is no escaping. 366 if gs.IsBeingDeleted() { 367 s.logger.Debug("GameServerState being shutdown. Skipping update.") 368 369 // Explicitly update gsStateChannel if current state is Shutdown since sendGameServerUpdate will not triggered. 370 if s.gsState == agonesv1.GameServerStateShutdown && gs.Status.State != agonesv1.GameServerStateShutdown { 371 go func() { 372 s.gsStateChannel <- agonesv1.GameServerStateShutdown 373 }() 374 } 375 376 return nil 377 } 378 379 // If the state is currently unhealthy, you can't go back to Ready. 380 if gs.Status.State == agonesv1.GameServerStateUnhealthy { 381 s.logger.Debug("GameServerState already unhealthy. Skipping update.") 382 return nil 383 } 384 385 s.gsUpdateMutex.RLock() 386 gsCopy := gs.DeepCopy() 387 gsCopy.Status.State = s.gsState 388 389 // If we are setting the Reserved status, check for the duration, and set that too. 390 if gsCopy.Status.State == agonesv1.GameServerStateReserved && s.gsReserveDuration != nil { 391 n := metav1.NewTime(time.Now().Add(*s.gsReserveDuration)) 392 gsCopy.Status.ReservedUntil = &n 393 } else { 394 gsCopy.Status.ReservedUntil = nil 395 } 396 s.gsUpdateMutex.RUnlock() 397 398 // If we are setting the Allocated status, set the last-allocated annotation as well. 399 if gsCopy.Status.State == agonesv1.GameServerStateAllocated { 400 ts, err := s.clock.Now().MarshalText() 401 if err != nil { 402 return err 403 } 404 if gsCopy.ObjectMeta.Annotations == nil { 405 gsCopy.ObjectMeta.Annotations = map[string]string{} 406 } 407 gsCopy.ObjectMeta.Annotations[gameserverallocations.LastAllocatedAnnotationKey] = string(ts) 408 } 409 410 gs, err = s.patchGameServer(ctx, gs, gsCopy) 411 if err != nil { 412 return errors.Wrapf(err, "could not update GameServer %s/%s to state %s", s.namespace, s.gameServerName, gsCopy.Status.State) 413 } 414 415 message := "SDK state change" 416 level := corev1.EventTypeNormal 417 // post state specific work here 418 switch gs.Status.State { 419 case agonesv1.GameServerStateUnhealthy: 420 level = corev1.EventTypeWarning 421 message = "Health check failure" 422 case agonesv1.GameServerStateReserved: 423 s.gsUpdateMutex.Lock() 424 if s.gsReserveDuration != nil { 425 message += fmt.Sprintf(", for %s", s.gsReserveDuration) 426 s.resetReserveAfter(context.Background(), *s.gsReserveDuration) 427 } 428 s.gsUpdateMutex.Unlock() 429 } 430 431 s.recorder.Event(gs, level, string(gs.Status.State), message) 432 433 return nil 434 } 435 436 // Gets the GameServer from the cache, or from the local SDKServer if that version is more recent. 437 func (s *SDKServer) gameServer() (*agonesv1.GameServer, error) { 438 // this ensure that if we get requests for the gameserver before the cache has been synced, 439 // they will block here until it's ready 440 s.gsWaitForSync.Wait() 441 gs, err := s.gameServerLister.GameServers(s.namespace).Get(s.gameServerName) 442 if err != nil { 443 return gs, errors.Wrapf(err, "could not retrieve GameServer %s/%s", s.namespace, s.gameServerName) 444 } 445 s.gsUpdateMutex.RLock() 446 defer s.gsUpdateMutex.RUnlock() 447 if s.gsCopy != nil && gs.ObjectMeta.Generation < s.gsCopy.Generation { 448 return s.gsCopy, nil 449 } 450 return gs, nil 451 } 452 453 // patchGameServer is a helper function to create and apply a patch update, so the changes in 454 // gsCopy are applied to the original gs. 455 func (s *SDKServer) patchGameServer(ctx context.Context, gs, gsCopy *agonesv1.GameServer) (*agonesv1.GameServer, error) { 456 patch, err := gs.Patch(gsCopy) 457 if err != nil { 458 return nil, err 459 } 460 461 gs, err = s.gameServerGetter.GameServers(s.namespace).Patch(ctx, gs.GetObjectMeta().GetName(), types.JSONPatchType, patch, metav1.PatchOptions{}) 462 // if the test operation fails, no reason to error log 463 if err != nil && k8serrors.IsInvalid(err) { 464 err = workerqueue.NewTraceError(err) 465 } 466 return gs, errors.Wrapf(err, "error attempting to patch gameserver: %s/%s", gsCopy.ObjectMeta.Namespace, gsCopy.ObjectMeta.Name) 467 } 468 469 // updateLabels updates the labels on this GameServer to the ones persisted in SDKServer, 470 // i.e. SDKServer.gsLabels, with the prefix of "agones.dev/sdk-" 471 func (s *SDKServer) updateLabels(ctx context.Context) error { 472 s.logger.WithField("labels", s.gsLabels).Debug("Updating label") 473 gs, err := s.gameServer() 474 if err != nil { 475 return err 476 } 477 478 gsCopy := gs.DeepCopy() 479 480 s.gsUpdateMutex.RLock() 481 if len(s.gsLabels) > 0 && gsCopy.ObjectMeta.Labels == nil { 482 gsCopy.ObjectMeta.Labels = map[string]string{} 483 } 484 for k, v := range s.gsLabels { 485 gsCopy.ObjectMeta.Labels[metadataPrefix+k] = v 486 } 487 s.gsUpdateMutex.RUnlock() 488 489 _, err = s.patchGameServer(ctx, gs, gsCopy) 490 return err 491 } 492 493 // updateAnnotations updates the Annotations on this GameServer to the ones persisted in SDKServer, 494 // i.e. SDKServer.gsAnnotations, with the prefix of "agones.dev/sdk-" 495 func (s *SDKServer) updateAnnotations(ctx context.Context) error { 496 s.logger.WithField("annotations", s.gsAnnotations).Debug("Updating annotation") 497 gs, err := s.gameServer() 498 if err != nil { 499 return err 500 } 501 502 gsCopy := gs.DeepCopy() 503 504 s.gsUpdateMutex.RLock() 505 if len(s.gsAnnotations) > 0 && gsCopy.ObjectMeta.Annotations == nil { 506 gsCopy.ObjectMeta.Annotations = map[string]string{} 507 } 508 for k, v := range s.gsAnnotations { 509 gsCopy.ObjectMeta.Annotations[metadataPrefix+k] = v 510 } 511 s.gsUpdateMutex.RUnlock() 512 513 _, err = s.patchGameServer(ctx, gs, gsCopy) 514 return err 515 } 516 517 // enqueueState enqueue a State change request into the 518 // workerqueue 519 func (s *SDKServer) enqueueState(state agonesv1.GameServerState) { 520 s.gsUpdateMutex.Lock() 521 // Update cached state, but prevent transitions out of `Unhealthy` by the SDK. 522 if s.gsState != agonesv1.GameServerStateUnhealthy { 523 s.gsState = state 524 } 525 s.gsUpdateMutex.Unlock() 526 s.workerqueue.Enqueue(cache.ExplicitKey(string(updateState))) 527 } 528 529 // Ready enters the RequestReady state change for this GameServer into 530 // the workqueue so it can be updated 531 func (s *SDKServer) Ready(_ context.Context, e *sdk.Empty) (*sdk.Empty, error) { 532 s.logger.Debug("Received Ready request, adding to queue") 533 s.stopReserveTimer() 534 s.enqueueState(agonesv1.GameServerStateRequestReady) 535 return e, nil 536 } 537 538 // Allocate enters an Allocate state change into the workqueue, so it can be updated 539 func (s *SDKServer) Allocate(_ context.Context, e *sdk.Empty) (*sdk.Empty, error) { 540 s.stopReserveTimer() 541 s.enqueueState(agonesv1.GameServerStateAllocated) 542 return e, nil 543 } 544 545 // Shutdown enters the Shutdown state change for this GameServer into 546 // the workqueue so it can be updated. If gracefulTermination feature is enabled, 547 // Shutdown will block on GameServer being shutdown. 548 func (s *SDKServer) Shutdown(_ context.Context, e *sdk.Empty) (*sdk.Empty, error) { 549 s.logger.Debug("Received Shutdown request, adding to queue") 550 s.stopReserveTimer() 551 s.enqueueState(agonesv1.GameServerStateShutdown) 552 553 return e, nil 554 } 555 556 // Health receives each health ping, and tracks the last time the health 557 // check was received, to track if a GameServer is healthy 558 func (s *SDKServer) Health(stream sdk.SDK_HealthServer) error { 559 for { 560 _, err := stream.Recv() 561 if err == io.EOF { 562 s.logger.Debug("Health stream closed.") 563 return stream.SendAndClose(&sdk.Empty{}) 564 } 565 if err != nil { 566 return errors.Wrap(err, "Error with Health check") 567 } 568 s.logger.Debug("Health Ping Received") 569 s.touchHealthLastUpdated() 570 } 571 } 572 573 // SetLabel adds the Key/Value to be used to set the label with the metadataPrefix to the `GameServer` 574 // metdata 575 func (s *SDKServer) SetLabel(_ context.Context, kv *sdk.KeyValue) (*sdk.Empty, error) { 576 s.logger.WithField("values", kv).Debug("Adding SetLabel to queue") 577 578 s.gsUpdateMutex.Lock() 579 s.gsLabels[kv.Key] = kv.Value 580 s.gsUpdateMutex.Unlock() 581 582 s.workerqueue.Enqueue(cache.ExplicitKey(string(updateLabel))) 583 return &sdk.Empty{}, nil 584 } 585 586 // SetAnnotation adds the Key/Value to be used to set the annotations with the metadataPrefix to the `GameServer` 587 // metdata 588 func (s *SDKServer) SetAnnotation(_ context.Context, kv *sdk.KeyValue) (*sdk.Empty, error) { 589 s.logger.WithField("values", kv).Debug("Adding SetAnnotation to queue") 590 591 s.gsUpdateMutex.Lock() 592 s.gsAnnotations[kv.Key] = kv.Value 593 s.gsUpdateMutex.Unlock() 594 595 s.workerqueue.Enqueue(cache.ExplicitKey(string(updateAnnotation))) 596 return &sdk.Empty{}, nil 597 } 598 599 // GetGameServer returns the current GameServer configuration and state from the backing GameServer CRD 600 func (s *SDKServer) GetGameServer(context.Context, *sdk.Empty) (*sdk.GameServer, error) { 601 s.logger.Debug("Received GetGameServer request") 602 gs, err := s.gameServer() 603 if err != nil { 604 return nil, err 605 } 606 return convert(gs), nil 607 } 608 609 // WatchGameServer sends events through the stream when changes occur to the 610 // backing GameServer configuration / status 611 func (s *SDKServer) WatchGameServer(_ *sdk.Empty, stream sdk.SDK_WatchGameServerServer) error { 612 s.logger.Debug("Received WatchGameServer request, adding stream to connectedStreams") 613 614 gs, err := s.GetGameServer(context.Background(), &sdk.Empty{}) 615 if err != nil { 616 return err 617 } 618 619 if err := stream.Send(gs); err != nil { 620 return err 621 } 622 623 s.streamMutex.Lock() 624 s.connectedStreams = append(s.connectedStreams, stream) 625 s.streamMutex.Unlock() 626 // don't exit until we shutdown, because that will close the stream 627 <-s.ctx.Done() 628 return nil 629 } 630 631 // Reserve moves this GameServer to the Reserved state for the Duration specified 632 func (s *SDKServer) Reserve(_ context.Context, d *sdk.Duration) (*sdk.Empty, error) { 633 s.stopReserveTimer() 634 635 e := &sdk.Empty{} 636 637 // 0 is forever. 638 if d.Seconds > 0 { 639 duration := time.Duration(d.Seconds) * time.Second 640 s.gsUpdateMutex.Lock() 641 s.gsReserveDuration = &duration 642 s.gsUpdateMutex.Unlock() 643 } 644 645 s.logger.Debug("Received Reserve request, adding to queue") 646 s.enqueueState(agonesv1.GameServerStateReserved) 647 648 return e, nil 649 } 650 651 // resetReserveAfter will move the GameServer back to being ready after the specified duration. 652 // This function should be wrapped in a s.gsUpdateMutex lock when being called. 653 func (s *SDKServer) resetReserveAfter(ctx context.Context, duration time.Duration) { 654 if s.reserveTimer != nil { 655 s.reserveTimer.Stop() 656 } 657 658 s.reserveTimer = time.AfterFunc(duration, func() { 659 if _, err := s.Ready(ctx, &sdk.Empty{}); err != nil { 660 s.logger.WithError(errors.WithStack(err)).Error("error returning to Ready after reserved") 661 } 662 }) 663 } 664 665 // stopReserveTimer stops the reserve timer. This is a no-op and safe to call if the timer is nil 666 func (s *SDKServer) stopReserveTimer() { 667 s.gsUpdateMutex.Lock() 668 defer s.gsUpdateMutex.Unlock() 669 670 if s.reserveTimer != nil { 671 s.reserveTimer.Stop() 672 } 673 s.gsReserveDuration = nil 674 } 675 676 // PlayerConnect should be called when a player connects. 677 // [Stage:Alpha] 678 // [FeatureFlag:PlayerTracking] 679 func (s *SDKServer) PlayerConnect(_ context.Context, id *alpha.PlayerID) (*alpha.Bool, error) { 680 if !runtime.FeatureEnabled(runtime.FeaturePlayerTracking) { 681 return &alpha.Bool{Bool: false}, errors.Errorf("%s not enabled", runtime.FeaturePlayerTracking) 682 } 683 s.logger.WithField("playerID", id.PlayerID).Debug("Player Connected") 684 685 s.gsUpdateMutex.Lock() 686 defer s.gsUpdateMutex.Unlock() 687 688 // the player is already connected, return false. 689 for _, playerID := range s.gsConnectedPlayers { 690 if playerID == id.PlayerID { 691 return &alpha.Bool{Bool: false}, nil 692 } 693 } 694 695 if int64(len(s.gsConnectedPlayers)) >= s.gsPlayerCapacity { 696 return &alpha.Bool{Bool: false}, errors.New("players are already at capacity") 697 } 698 699 // let's retain the original order, as it should be a smaller patch on data change 700 s.gsConnectedPlayers = append(s.gsConnectedPlayers, id.PlayerID) 701 s.workerqueue.EnqueueAfter(cache.ExplicitKey(string(updateConnectedPlayers)), updatePeriod) 702 703 return &alpha.Bool{Bool: true}, nil 704 } 705 706 // PlayerDisconnect should be called when a player disconnects. 707 // [Stage:Alpha] 708 // [FeatureFlag:PlayerTracking] 709 func (s *SDKServer) PlayerDisconnect(_ context.Context, id *alpha.PlayerID) (*alpha.Bool, error) { 710 if !runtime.FeatureEnabled(runtime.FeaturePlayerTracking) { 711 return &alpha.Bool{Bool: false}, errors.Errorf("%s not enabled", runtime.FeaturePlayerTracking) 712 } 713 s.logger.WithField("playerID", id.PlayerID).Debug("Player Disconnected") 714 715 s.gsUpdateMutex.Lock() 716 defer s.gsUpdateMutex.Unlock() 717 718 found := -1 719 for i, playerID := range s.gsConnectedPlayers { 720 if playerID == id.PlayerID { 721 found = i 722 break 723 } 724 } 725 if found == -1 { 726 return &alpha.Bool{Bool: false}, nil 727 } 728 729 // let's retain the original order, as it should be a smaller patch on data change 730 s.gsConnectedPlayers = append(s.gsConnectedPlayers[:found], s.gsConnectedPlayers[found+1:]...) 731 s.workerqueue.EnqueueAfter(cache.ExplicitKey(string(updateConnectedPlayers)), updatePeriod) 732 733 return &alpha.Bool{Bool: true}, nil 734 } 735 736 // IsPlayerConnected returns if the playerID is currently connected to the GameServer. 737 // This is always accurate, even if the value hasn’t been updated to the GameServer status yet. 738 // [Stage:Alpha] 739 // [FeatureFlag:PlayerTracking] 740 func (s *SDKServer) IsPlayerConnected(_ context.Context, id *alpha.PlayerID) (*alpha.Bool, error) { 741 if !runtime.FeatureEnabled(runtime.FeaturePlayerTracking) { 742 return &alpha.Bool{Bool: false}, errors.Errorf("%s not enabled", runtime.FeaturePlayerTracking) 743 } 744 s.gsUpdateMutex.RLock() 745 defer s.gsUpdateMutex.RUnlock() 746 747 result := &alpha.Bool{Bool: false} 748 749 for _, playerID := range s.gsConnectedPlayers { 750 if playerID == id.PlayerID { 751 result.Bool = true 752 break 753 } 754 } 755 756 return result, nil 757 } 758 759 // GetConnectedPlayers returns the list of the currently connected player ids. 760 // This is always accurate, even if the value hasn’t been updated to the GameServer status yet. 761 // [Stage:Alpha] 762 // [FeatureFlag:PlayerTracking] 763 func (s *SDKServer) GetConnectedPlayers(_ context.Context, _ *alpha.Empty) (*alpha.PlayerIDList, error) { 764 if !runtime.FeatureEnabled(runtime.FeaturePlayerTracking) { 765 return nil, errors.Errorf("%s not enabled", runtime.FeaturePlayerTracking) 766 } 767 s.gsUpdateMutex.RLock() 768 defer s.gsUpdateMutex.RUnlock() 769 770 return &alpha.PlayerIDList{List: s.gsConnectedPlayers}, nil 771 } 772 773 // GetPlayerCount returns the current player count. 774 // [Stage:Alpha] 775 // [FeatureFlag:PlayerTracking] 776 func (s *SDKServer) GetPlayerCount(_ context.Context, _ *alpha.Empty) (*alpha.Count, error) { 777 if !runtime.FeatureEnabled(runtime.FeaturePlayerTracking) { 778 return nil, errors.Errorf("%s not enabled", runtime.FeaturePlayerTracking) 779 } 780 s.gsUpdateMutex.RLock() 781 defer s.gsUpdateMutex.RUnlock() 782 return &alpha.Count{Count: int64(len(s.gsConnectedPlayers))}, nil 783 } 784 785 // SetPlayerCapacity to change the game server's player capacity. 786 // [Stage:Alpha] 787 // [FeatureFlag:PlayerTracking] 788 func (s *SDKServer) SetPlayerCapacity(_ context.Context, count *alpha.Count) (*alpha.Empty, error) { 789 if !runtime.FeatureEnabled(runtime.FeaturePlayerTracking) { 790 return nil, errors.Errorf("%s not enabled", runtime.FeaturePlayerTracking) 791 } 792 s.gsUpdateMutex.Lock() 793 s.gsPlayerCapacity = count.Count 794 s.gsUpdateMutex.Unlock() 795 s.workerqueue.Enqueue(cache.ExplicitKey(string(updatePlayerCapacity))) 796 797 return &alpha.Empty{}, nil 798 } 799 800 // GetPlayerCapacity returns the current player capacity, as set by SDK.SetPlayerCapacity() 801 // [Stage:Alpha] 802 // [FeatureFlag:PlayerTracking] 803 func (s *SDKServer) GetPlayerCapacity(_ context.Context, _ *alpha.Empty) (*alpha.Count, error) { 804 if !runtime.FeatureEnabled(runtime.FeaturePlayerTracking) { 805 return nil, errors.Errorf("%s not enabled", runtime.FeaturePlayerTracking) 806 } 807 s.gsUpdateMutex.RLock() 808 defer s.gsUpdateMutex.RUnlock() 809 return &alpha.Count{Count: s.gsPlayerCapacity}, nil 810 } 811 812 // GetCounter returns a Counter. Returns error if the counter does not exist. 813 // [Stage:Beta] 814 // [FeatureFlag:CountsAndLists] 815 func (s *SDKServer) GetCounter(_ context.Context, in *beta.GetCounterRequest) (*beta.Counter, error) { 816 if !runtime.FeatureEnabled(runtime.FeatureCountsAndLists) { 817 return nil, errors.Errorf("%s not enabled", runtime.FeatureCountsAndLists) 818 } 819 820 s.logger.WithField("name", in.Name).Debug("Getting Counter") 821 822 gs, err := s.gameServer() 823 if err != nil { 824 return nil, err 825 } 826 827 s.gsUpdateMutex.RLock() 828 defer s.gsUpdateMutex.RUnlock() 829 830 counter, ok := gs.Status.Counters[in.Name] 831 if !ok { 832 return nil, errors.Errorf("counter not found: %s", in.Name) 833 } 834 s.logger.WithField("Get Counter", counter).Debugf("Got Counter %s", in.Name) 835 protoCounter := &beta.Counter{Name: in.Name, Count: counter.Count, Capacity: counter.Capacity} 836 // If there are batched changes that have not yet been applied, apply them to the Counter. 837 // This does NOT validate batched the changes. 838 if counterUpdate, ok := s.gsCounterUpdates[in.Name]; ok { 839 if counterUpdate.capacitySet != nil { 840 protoCounter.Capacity = *counterUpdate.capacitySet 841 } 842 if counterUpdate.countSet != nil { 843 protoCounter.Count = *counterUpdate.countSet 844 } 845 protoCounter.Count += counterUpdate.diff 846 if protoCounter.Count < 0 { 847 protoCounter.Count = 0 848 s.logger.Debug("truncating Count in Get Counter request to 0") 849 } 850 if protoCounter.Count > protoCounter.Capacity { 851 protoCounter.Count = protoCounter.Capacity 852 s.logger.Debug("truncating Count in Get Counter request to Capacity") 853 } 854 s.logger.WithField("Get Counter", counter).Debugf("Applied Batched Counter Updates %v", counterUpdate) 855 } 856 857 return protoCounter, nil 858 } 859 860 // UpdateCounter collapses all UpdateCounterRequests for a given Counter into a single request. 861 // UpdateCounterRequest must be one and only one of Capacity, Count, or CountDiff. 862 // Returns error if the Counter does not exist (name cannot be updated). 863 // Returns error if the Count is out of range [0,Capacity]. 864 // [Stage:Beta] 865 // [FeatureFlag:CountsAndLists] 866 func (s *SDKServer) UpdateCounter(_ context.Context, in *beta.UpdateCounterRequest) (*beta.Counter, error) { 867 if !runtime.FeatureEnabled(runtime.FeatureCountsAndLists) { 868 return nil, errors.Errorf("%s not enabled", runtime.FeatureCountsAndLists) 869 } 870 871 if in.CounterUpdateRequest == nil { 872 return nil, errors.Errorf("invalid argument. CounterUpdateRequest: %v cannot be nil", in.CounterUpdateRequest) 873 } 874 if in.CounterUpdateRequest.CountDiff == 0 && in.CounterUpdateRequest.Count == nil && in.CounterUpdateRequest.Capacity == nil { 875 return nil, errors.Errorf("invalid argument. Malformed CounterUpdateRequest: %v", in.CounterUpdateRequest) 876 } 877 878 s.logger.WithField("name", in.CounterUpdateRequest.Name).Debug("Update Counter Request") 879 880 gs, err := s.gameServer() 881 if err != nil { 882 return nil, err 883 } 884 885 s.gsUpdateMutex.Lock() 886 defer s.gsUpdateMutex.Unlock() 887 888 // Check if we already have a batch request started for this Counter. If not, add new request to 889 // the gsCounterUpdates map. 890 name := in.CounterUpdateRequest.Name 891 batchCounter := s.gsCounterUpdates[name] 892 893 counter, ok := gs.Status.Counters[name] 894 // We didn't find the Counter named key in the gameserver. 895 if !ok { 896 return nil, errors.Errorf("counter not found: %s", name) 897 } 898 899 batchCounter.counter = *counter.DeepCopy() 900 901 // Updated based on if client call is CapacitySet 902 if in.CounterUpdateRequest.Capacity != nil { 903 if in.CounterUpdateRequest.Capacity.GetValue() < 0 { 904 return nil, errors.Errorf("out of range. Capacity must be greater than or equal to 0. Found Capacity: %d", in.CounterUpdateRequest.Capacity.GetValue()) 905 } 906 capacitySet := in.CounterUpdateRequest.Capacity.GetValue() 907 batchCounter.capacitySet = &capacitySet 908 } 909 910 // Update based on if Client call is CountSet 911 if in.CounterUpdateRequest.Count != nil { 912 // Verify that 0 <= Count >= Capacity 913 countSet := in.CounterUpdateRequest.Count.GetValue() 914 capacity := batchCounter.counter.Capacity 915 if batchCounter.capacitySet != nil { 916 capacity = *batchCounter.capacitySet 917 } 918 if countSet < 0 || countSet > capacity { 919 return nil, errors.Errorf("out of range. Count must be within range [0,Capacity]. Found Count: %d, Capacity: %d", countSet, capacity) 920 } 921 batchCounter.countSet = &countSet 922 // Clear any previous CountIncrement or CountDecrement requests, and add the CountSet as the first item. 923 batchCounter.diff = 0 924 } 925 926 // Update based on if Client call is CountIncrement or CountDecrement 927 if in.CounterUpdateRequest.CountDiff != 0 { 928 count := batchCounter.counter.Count 929 if batchCounter.countSet != nil { 930 count = *batchCounter.countSet 931 } 932 count += batchCounter.diff + in.CounterUpdateRequest.CountDiff 933 // Verify that 0 <= Count >= Capacity 934 capacity := batchCounter.counter.Capacity 935 if batchCounter.capacitySet != nil { 936 capacity = *batchCounter.capacitySet 937 } 938 if count < 0 || count > capacity { 939 return nil, errors.Errorf("out of range. Count must be within range [0,Capacity]. Found Count: %d, Capacity: %d", count, capacity) 940 } 941 batchCounter.diff += in.CounterUpdateRequest.CountDiff 942 } 943 944 s.gsCounterUpdates[name] = batchCounter 945 946 // Queue up the Update for later batch processing by updateCounters. 947 s.workerqueue.Enqueue(cache.ExplicitKey(updateCounters)) 948 return projectCounterState(name, batchCounter), nil 949 } 950 951 // projectCounterState calculates the final expected Counter state after applying batched updates. 952 func projectCounterState(name string, batchCounter counterUpdateRequest) *beta.Counter { 953 currentCapacity := batchCounter.counter.Capacity 954 if batchCounter.capacitySet != nil { 955 currentCapacity = *batchCounter.capacitySet 956 } 957 currentCount := batchCounter.counter.Count 958 if batchCounter.countSet != nil { 959 currentCount = *batchCounter.countSet 960 } 961 currentCount += batchCounter.diff 962 if currentCount < 0 { 963 currentCount = 0 964 } 965 if currentCount > currentCapacity { 966 currentCount = currentCapacity 967 } 968 return &beta.Counter{ 969 Name: name, 970 Count: currentCount, 971 Capacity: currentCapacity, 972 } 973 } 974 975 // updateCounter updates the Counters in the GameServer's Status with the batched update requests. 976 func (s *SDKServer) updateCounter(ctx context.Context) error { 977 gs, err := s.gameServer() 978 if err != nil { 979 return err 980 } 981 gsCopy := gs.DeepCopy() 982 983 s.logger.WithField("batchCounterUpdates", s.gsCounterUpdates).Debug("Batch updating Counter(s)") 984 s.gsUpdateMutex.Lock() 985 defer s.gsUpdateMutex.Unlock() 986 987 names := []string{} 988 989 for name, ctrReq := range s.gsCounterUpdates { 990 counter, ok := gsCopy.Status.Counters[name] 991 if !ok { 992 continue 993 } 994 // Changes may have been made to the Counter since we validated the incoming changes in 995 // UpdateCounter, and we need to verify if the batched changes can be fully applied, partially 996 // applied, or cannot be applied. 997 if ctrReq.capacitySet != nil { 998 counter.Capacity = *ctrReq.capacitySet 999 } 1000 if ctrReq.countSet != nil { 1001 counter.Count = *ctrReq.countSet 1002 } 1003 newCnt := counter.Count + ctrReq.diff 1004 if newCnt < 0 { 1005 newCnt = 0 1006 s.logger.Debug("truncating Count in Update Counter request to 0") 1007 } 1008 if newCnt > counter.Capacity { 1009 newCnt = counter.Capacity 1010 s.logger.Debug("truncating Count in Update Counter request to Capacity") 1011 } 1012 counter.Count = newCnt 1013 gsCopy.Status.Counters[name] = counter 1014 names = append(names, name) 1015 } 1016 1017 gs, err = s.patchGameServer(ctx, gs, gsCopy) 1018 if err != nil { 1019 return err 1020 } 1021 1022 // Record an event per update Counter 1023 for _, name := range names { 1024 s.recorder.Event(gs, corev1.EventTypeNormal, "UpdateCounter", 1025 fmt.Sprintf("Counter %s updated to Count:%d Capacity:%d", 1026 name, gs.Status.Counters[name].Count, gs.Status.Counters[name].Capacity)) 1027 } 1028 1029 // Cache a copy of the successfully updated gameserver 1030 s.gsCopy = gs 1031 // Clear the gsCounterUpdates 1032 s.gsCounterUpdates = map[string]counterUpdateRequest{} 1033 1034 return nil 1035 } 1036 1037 // GetList returns a List. Returns not found if the List does not exist. 1038 // [Stage:Beta] 1039 // [FeatureFlag:CountsAndLists] 1040 func (s *SDKServer) GetList(_ context.Context, in *beta.GetListRequest) (*beta.List, error) { 1041 if !runtime.FeatureEnabled(runtime.FeatureCountsAndLists) { 1042 return nil, errors.Errorf("%s not enabled", runtime.FeatureCountsAndLists) 1043 } 1044 if in == nil { 1045 return nil, errors.Errorf("GetListRequest cannot be nil") 1046 } 1047 s.logger.WithField("name", in.Name).Debug("Getting List") 1048 1049 gs, err := s.gameServer() 1050 if err != nil { 1051 return nil, err 1052 } 1053 1054 s.gsUpdateMutex.RLock() 1055 defer s.gsUpdateMutex.RUnlock() 1056 1057 list, ok := gs.Status.Lists[in.Name] 1058 if !ok { 1059 return nil, errors.Errorf("list not found: %s", in.Name) 1060 } 1061 1062 s.logger.WithField("Get List", list).Debugf("Got List %s", in.Name) 1063 protoList := beta.List{Name: in.Name, Values: list.Values, Capacity: list.Capacity} 1064 // If there are batched changes that have not yet been applied, apply them to the List. 1065 // This does NOT validate batched the changes, and does NOT modify the List. 1066 if listUpdate, ok := s.gsListUpdates[in.Name]; ok { 1067 if listUpdate.capacitySet != nil { 1068 protoList.Capacity = *listUpdate.capacitySet 1069 } 1070 if len(listUpdate.valuesToDelete) != 0 { 1071 protoList.Values = deleteValues(protoList.Values, listUpdate.valuesToDelete) 1072 } 1073 if len(listUpdate.valuesToAppend) != 0 { 1074 protoList.Values = agonesv1.MergeRemoveDuplicates(protoList.Values, listUpdate.valuesToAppend) 1075 } 1076 // Truncates Values to less than or equal to Capacity 1077 if len(protoList.Values) > int(protoList.Capacity) { 1078 protoList.Values = append([]string{}, protoList.Values[:protoList.Capacity]...) 1079 } 1080 s.logger.WithField("Get List", list).Debugf("Applied Batched List Updates %v", listUpdate) 1081 } 1082 1083 return &protoList, nil 1084 } 1085 1086 // UpdateList collapses all update capacity requests for a given List into a single UpdateList request. 1087 // This function currently only updates the Capacity of a List. 1088 // Returns error if the List does not exist (name cannot be updated). 1089 // Returns error if the List update capacity is out of range [0,1000]. 1090 // [Stage:Beta] 1091 // [FeatureFlag:CountsAndLists] 1092 func (s *SDKServer) UpdateList(ctx context.Context, in *beta.UpdateListRequest) (*beta.List, error) { 1093 if !runtime.FeatureEnabled(runtime.FeatureCountsAndLists) { 1094 return nil, errors.Errorf("%s not enabled", runtime.FeatureCountsAndLists) 1095 } 1096 if in == nil { 1097 return nil, errors.Errorf("UpdateListRequest cannot be nil") 1098 } 1099 if in.List == nil || in.UpdateMask == nil { 1100 return nil, errors.Errorf("invalid argument. List: %v and UpdateMask %v cannot be nil", in.List, in.UpdateMask) 1101 } 1102 if !in.UpdateMask.IsValid(in.List.ProtoReflect().Interface()) { 1103 return nil, errors.Errorf("invalid argument. Field Mask Path(s): %v are invalid for List. Use valid field name(s): %v", in.UpdateMask.GetPaths(), in.List.ProtoReflect().Descriptor().Fields()) 1104 } 1105 1106 if in.List.Capacity < 0 || in.List.Capacity > apiserver.ListMaxCapacity { 1107 return nil, errors.Errorf("out of range. Capacity must be within range [0,1000]. Found Capacity: %d", in.List.Capacity) 1108 } 1109 1110 list, err := s.GetList(ctx, &beta.GetListRequest{Name: in.List.Name}) 1111 if err != nil { 1112 1113 return nil, errors.Errorf("not found. %s List not found", list.Name) 1114 } 1115 1116 s.gsUpdateMutex.Lock() 1117 defer s.gsUpdateMutex.Unlock() 1118 1119 // Removes any fields from the request object that are not included in the FieldMask Paths. 1120 fmutils.Filter(in.List, in.UpdateMask.Paths) 1121 1122 // The list will allow the current list to be overwritten 1123 batchList := listUpdateRequest{} 1124 1125 // Only set the capacity if its included in the update mask paths 1126 if slices.Contains(in.UpdateMask.Paths, "capacity") { 1127 batchList.capacitySet = &in.List.Capacity 1128 } 1129 1130 // Only change the values if its included in the update mask paths 1131 if slices.Contains(in.UpdateMask.Paths, "values") { 1132 currList := list 1133 1134 // Find values to remove from the current list 1135 valuesToDelete := map[string]bool{} 1136 for _, value := range currList.Values { 1137 valueFound := false 1138 for _, element := range in.List.Values { 1139 if value == element { 1140 valueFound = true 1141 } 1142 } 1143 1144 if !valueFound { 1145 valuesToDelete[value] = true 1146 } 1147 } 1148 batchList.valuesToDelete = valuesToDelete 1149 1150 // Find values that need to be added to the current list from the incomming list 1151 valuesToAdd := []string{} 1152 for _, value := range in.List.Values { 1153 valueFound := false 1154 for _, element := range currList.Values { 1155 if value == element { 1156 valueFound = true 1157 } 1158 } 1159 1160 if !valueFound { 1161 valuesToAdd = append(valuesToAdd, value) 1162 } 1163 } 1164 batchList.valuesToAppend = valuesToAdd 1165 } 1166 1167 // Queue up the Update for later batch processing by updateLists. 1168 s.gsListUpdates[list.Name] = batchList 1169 s.workerqueue.Enqueue(cache.ExplicitKey(updateLists)) 1170 return &beta.List{}, nil 1171 1172 } 1173 1174 // AddListValue collapses all append a value to the end of a List requests into a single UpdateList request. 1175 // Returns not found if the List does not exist. 1176 // Returns already exists if the value is already in the List. 1177 // Returns out of range if the List is already at Capacity. 1178 // [Stage:Beta] 1179 // [FeatureFlag:CountsAndLists] 1180 func (s *SDKServer) AddListValue(ctx context.Context, in *beta.AddListValueRequest) (*beta.List, error) { 1181 if !runtime.FeatureEnabled(runtime.FeatureCountsAndLists) { 1182 return nil, errors.Errorf("%s not enabled", runtime.FeatureCountsAndLists) 1183 } 1184 if in == nil { 1185 return nil, errors.Errorf("AddListValueRequest cannot be nil") 1186 } 1187 s.logger.WithField("name", in.Name).Debug("Add List Value") 1188 1189 list, err := s.GetList(ctx, &beta.GetListRequest{Name: in.Name}) 1190 if err != nil { 1191 return nil, err 1192 } 1193 1194 s.gsUpdateMutex.Lock() 1195 defer s.gsUpdateMutex.Unlock() 1196 1197 // Verify room to add another value 1198 if int(list.Capacity) <= len(list.Values) { 1199 return nil, errors.Errorf("out of range. No available capacity. Current Capacity: %d, List Size: %d", list.Capacity, len(list.Values)) 1200 } 1201 // Verify value does not already exist in the list 1202 for _, val := range list.Values { 1203 if in.Value == val { 1204 return nil, errors.Errorf("already exists. Value: %s already in List: %s", in.Value, in.Name) 1205 } 1206 } 1207 list.Values = append(list.Values, in.Value) 1208 batchList := s.gsListUpdates[in.Name] 1209 batchList.valuesToAppend = append(batchList.valuesToAppend, in.Value) 1210 s.gsListUpdates[in.Name] = batchList 1211 // Queue up the Update for later batch processing by updateLists. 1212 s.workerqueue.Enqueue(cache.ExplicitKey(updateLists)) 1213 return list, nil 1214 } 1215 1216 // RemoveListValue collapses all remove a value from a List requests into a single UpdateList request. 1217 // Returns not found if the List does not exist. 1218 // Returns not found if the value is not in the List. 1219 // [Stage:Beta] 1220 // [FeatureFlag:CountsAndLists] 1221 func (s *SDKServer) RemoveListValue(ctx context.Context, in *beta.RemoveListValueRequest) (*beta.List, error) { 1222 if !runtime.FeatureEnabled(runtime.FeatureCountsAndLists) { 1223 return nil, errors.Errorf("%s not enabled", runtime.FeatureCountsAndLists) 1224 } 1225 if in == nil { 1226 return nil, errors.Errorf("RemoveListValueRequest cannot be nil") 1227 } 1228 s.logger.WithField("name", in.Name).WithField("value", in.Value).Debug("Remove List Value") 1229 1230 list, err := s.GetList(ctx, &beta.GetListRequest{Name: in.Name}) 1231 if err != nil { 1232 return nil, err 1233 } 1234 1235 s.gsUpdateMutex.Lock() 1236 defer s.gsUpdateMutex.Unlock() 1237 1238 // Track this removal for batch persistence to K8s 1239 batchList := s.gsListUpdates[in.Name] 1240 1241 removedFromBatch := false 1242 if len(batchList.valuesToAppend) > 0 { 1243 newAppend := make([]string, 0, len(batchList.valuesToAppend)) 1244 for _, v := range batchList.valuesToAppend { 1245 if v == in.Value { 1246 removedFromBatch = true 1247 continue // skip value 1248 } 1249 newAppend = append(newAppend, v) 1250 } 1251 batchList.valuesToAppend = newAppend 1252 } 1253 if !removedFromBatch { 1254 found := false 1255 newValues := make([]string, 0, len(list.Values)) 1256 for _, val := range list.Values { 1257 if val == in.Value { 1258 found = true 1259 continue 1260 } 1261 newValues = append(newValues, val) 1262 } 1263 if !found { 1264 return nil, fmt.Errorf("not found: value %s not in list %s", in.Value, in.Name) 1265 } 1266 list.Values = newValues 1267 // Track deletions 1268 if batchList.valuesToDelete == nil { 1269 batchList.valuesToDelete = make(map[string]bool) 1270 } 1271 batchList.valuesToDelete[in.Value] = true 1272 } 1273 s.gsListUpdates[in.Name] = batchList 1274 // Queue up the Update for later batch processing by updateLists. 1275 s.workerqueue.Enqueue(cache.ExplicitKey(updateLists)) 1276 return list, nil 1277 } 1278 1279 // updateList updates the Lists in the GameServer's Status with the batched update list requests. 1280 // Includes all SetCapacity, AddValue, and RemoveValue requests in the batched request. 1281 func (s *SDKServer) updateList(ctx context.Context) error { 1282 gs, err := s.gameServer() 1283 if err != nil { 1284 return err 1285 } 1286 gsCopy := gs.DeepCopy() 1287 1288 s.gsUpdateMutex.Lock() 1289 defer s.gsUpdateMutex.Unlock() 1290 1291 s.logger.WithField("batchListUpdates", s.gsListUpdates).Debug("Batch updating List(s)") 1292 1293 names := []string{} 1294 1295 for name, listReq := range s.gsListUpdates { 1296 list, ok := gsCopy.Status.Lists[name] 1297 if !ok { 1298 continue 1299 } 1300 if listReq.capacitySet != nil { 1301 list.Capacity = *listReq.capacitySet 1302 } 1303 if len(listReq.valuesToDelete) != 0 { 1304 list.Values = deleteValues(list.Values, listReq.valuesToDelete) 1305 } 1306 if len(listReq.valuesToAppend) != 0 { 1307 list.Values = agonesv1.MergeRemoveDuplicates(list.Values, listReq.valuesToAppend) 1308 } 1309 1310 if int64(len(list.Values)) > list.Capacity { 1311 s.logger.Debugf("truncating Values in Update List request to List Capacity %d", list.Capacity) 1312 list.Values = append([]string{}, list.Values[:list.Capacity]...) 1313 } 1314 gsCopy.Status.Lists[name] = list 1315 names = append(names, name) 1316 } 1317 1318 gs, err = s.patchGameServer(ctx, gs, gsCopy) 1319 if err != nil { 1320 return err 1321 } 1322 1323 // Record an event per List update 1324 for _, name := range names { 1325 s.recorder.Event(gs, corev1.EventTypeNormal, "UpdateList", fmt.Sprintf("List %s updated", name)) 1326 s.logger.Debugf("List %s updated to List Capacity: %d, Values: %v", 1327 name, gs.Status.Lists[name].Capacity, gs.Status.Lists[name].Values) 1328 } 1329 1330 // Cache a copy of the successfully updated gameserver 1331 s.gsCopy = gs 1332 // Clear the gsListUpdates 1333 s.gsListUpdates = map[string]listUpdateRequest{} 1334 1335 return nil 1336 } 1337 1338 // Returns a new string list with the string keys in toDeleteValues removed from valuesList. 1339 func deleteValues(valuesList []string, toDeleteValues map[string]bool) []string { 1340 newValuesList := []string{} 1341 for _, value := range valuesList { 1342 if _, ok := toDeleteValues[value]; ok { 1343 continue 1344 } 1345 newValuesList = append(newValuesList, value) 1346 } 1347 return newValuesList 1348 } 1349 1350 // sendGameServerUpdate sends a watch game server event 1351 func (s *SDKServer) sendGameServerUpdate(gs *agonesv1.GameServer) { 1352 s.logger.Debug("Sending GameServer Event to connectedStreams") 1353 1354 s.streamMutex.Lock() 1355 defer s.streamMutex.Unlock() 1356 1357 // Filter the slice of streams sharing the same backing array and capacity as the original 1358 // so that storage is reused and no memory allocations are made. This modifies the original 1359 // slice. 1360 // 1361 // See https://go.dev/wiki/SliceTricks#filtering-without-allocating 1362 remainingStreams := s.connectedStreams[:0] 1363 for _, stream := range s.connectedStreams { 1364 select { 1365 case <-stream.Context().Done(): 1366 s.logger.Debug("Dropping stream") 1367 1368 err := stream.Context().Err() 1369 switch { 1370 case err != nil: 1371 s.logger.WithError(errors.WithStack(err)).Error("stream closed with error") 1372 default: 1373 s.logger.Debug("Stream closed") 1374 } 1375 default: 1376 s.logger.Debug("Keeping stream") 1377 remainingStreams = append(remainingStreams, stream) 1378 1379 if err := stream.Send(convert(gs)); err != nil { 1380 s.logger.WithError(errors.WithStack(err)). 1381 Error("error sending game server update event") 1382 } 1383 } 1384 } 1385 s.connectedStreams = remainingStreams 1386 1387 if gs.Status.State == agonesv1.GameServerStateShutdown { 1388 // Wrap this in a go func(), just in case pushing to this channel deadlocks since there is only one instance of 1389 // a receiver. In theory, This could leak goroutines a bit, but since we're shuttling down everything anyway, 1390 // it shouldn't matter. 1391 go func() { 1392 s.gsStateChannel <- agonesv1.GameServerStateShutdown 1393 }() 1394 } 1395 } 1396 1397 // checkHealthUpdateState checks the health as part of the /gshealthz hook, and if not 1398 // healthy will push the Unhealthy state into the queue so it can be updated. 1399 func (s *SDKServer) checkHealthUpdateState() { 1400 s.checkHealth() 1401 if !s.healthy() { 1402 s.logger.WithField("gameServerName", s.gameServerName).Warn("GameServer has failed health check") 1403 s.enqueueState(agonesv1.GameServerStateUnhealthy) 1404 } 1405 } 1406 1407 // touchHealthLastUpdated sets the healthLastUpdated 1408 // value to now in UTC 1409 func (s *SDKServer) touchHealthLastUpdated() { 1410 s.healthMutex.Lock() 1411 defer s.healthMutex.Unlock() 1412 s.healthLastUpdated = s.clock.Now().UTC() 1413 s.healthFailureCount = 0 1414 } 1415 1416 func (s *SDKServer) ensureHealthChecksRunning() { 1417 if s.health.Disabled { 1418 return 1419 } 1420 s.healthChecksRunning.Do(func() { 1421 // start health checking running 1422 s.logger.Debug("Starting GameServer health checking") 1423 go wait.Until(s.checkHealthUpdateState, s.healthTimeout, s.ctx.Done()) 1424 }) 1425 } 1426 1427 // checkHealth checks the healthLastUpdated value 1428 // and if it is outside the timeout value, logger and 1429 // count a failure 1430 func (s *SDKServer) checkHealth() { 1431 s.healthMutex.Lock() 1432 defer s.healthMutex.Unlock() 1433 1434 timeout := s.healthLastUpdated.Add(s.healthTimeout) 1435 if timeout.Before(s.clock.Now().UTC()) { 1436 s.healthFailureCount++ 1437 s.logger.WithField("failureCount", s.healthFailureCount).Warn("GameServer Health Check failed") 1438 } 1439 } 1440 1441 // healthy returns if the GameServer is 1442 // currently healthy or not based on the configured 1443 // failure count vs failure threshold 1444 func (s *SDKServer) healthy() bool { 1445 if s.health.Disabled { 1446 return true 1447 } 1448 1449 s.healthMutex.RLock() 1450 defer s.healthMutex.RUnlock() 1451 return s.healthFailureCount < s.health.FailureThreshold 1452 } 1453 1454 // updatePlayerCapacity updates the Player Capacity field in the GameServer's Status. 1455 func (s *SDKServer) updatePlayerCapacity(ctx context.Context) error { 1456 if !runtime.FeatureEnabled(runtime.FeaturePlayerTracking) { 1457 return errors.Errorf("%s not enabled", runtime.FeaturePlayerTracking) 1458 } 1459 s.logger.WithField("capacity", s.gsPlayerCapacity).Debug("updating player capacity") 1460 gs, err := s.gameServer() 1461 if err != nil { 1462 return err 1463 } 1464 1465 gsCopy := gs.DeepCopy() 1466 1467 s.gsUpdateMutex.RLock() 1468 gsCopy.Status.Players.Capacity = s.gsPlayerCapacity 1469 s.gsUpdateMutex.RUnlock() 1470 1471 gs, err = s.patchGameServer(ctx, gs, gsCopy) 1472 if err == nil { 1473 s.recorder.Event(gs, corev1.EventTypeNormal, "PlayerCapacity", fmt.Sprintf("Set to %d", gs.Status.Players.Capacity)) 1474 } 1475 1476 return err 1477 } 1478 1479 // updateConnectedPlayers updates the Player IDs and Count fields in the GameServer's Status. 1480 func (s *SDKServer) updateConnectedPlayers(ctx context.Context) error { 1481 if !runtime.FeatureEnabled(runtime.FeaturePlayerTracking) { 1482 return errors.Errorf("%s not enabled", runtime.FeaturePlayerTracking) 1483 } 1484 gs, err := s.gameServer() 1485 if err != nil { 1486 return err 1487 } 1488 1489 gsCopy := gs.DeepCopy() 1490 same := false 1491 s.gsUpdateMutex.RLock() 1492 s.logger.WithField("playerIDs", s.gsConnectedPlayers).Debug("updating connected players") 1493 same = apiequality.Semantic.DeepEqual(gsCopy.Status.Players.IDs, s.gsConnectedPlayers) 1494 gsCopy.Status.Players.IDs = s.gsConnectedPlayers 1495 gsCopy.Status.Players.Count = int64(len(s.gsConnectedPlayers)) 1496 s.gsUpdateMutex.RUnlock() 1497 // if there is no change, then don't update 1498 // since it's possible this could fire quite a lot, let's reduce the 1499 // amount of requests as much as possible. 1500 if same { 1501 return nil 1502 } 1503 1504 gs, err = s.patchGameServer(ctx, gs, gsCopy) 1505 if err == nil { 1506 s.recorder.Event(gs, corev1.EventTypeNormal, "PlayerCount", fmt.Sprintf("Set to %d", gs.Status.Players.Count)) 1507 } 1508 1509 return err 1510 } 1511 1512 // NewSDKServerContext returns a Context that cancels when SIGTERM or os.Interrupt 1513 // is received and the GameServer's Status is shutdown 1514 func (s *SDKServer) NewSDKServerContext(ctx context.Context) context.Context { 1515 sdkCtx, cancel := context.WithCancel(context.Background()) 1516 go func() { 1517 <-ctx.Done() 1518 1519 keepWaiting := true 1520 s.gsUpdateMutex.RLock() 1521 if len(s.gsState) > 0 { 1522 s.logger.WithField("state", s.gsState).Info("SDK server shutdown requested, waiting for game server shutdown") 1523 } else { 1524 s.logger.Info("SDK server state never updated by game server, shutting down sdk server without waiting") 1525 keepWaiting = false 1526 } 1527 s.gsUpdateMutex.RUnlock() 1528 1529 for keepWaiting { 1530 gsState := <-s.gsStateChannel 1531 if gsState == agonesv1.GameServerStateShutdown { 1532 keepWaiting = false 1533 } 1534 } 1535 1536 cancel() 1537 }() 1538 return sdkCtx 1539 } 1540 1541 func (s *SDKServer) gsListUpdatesLen() int { 1542 s.gsUpdateMutex.RLock() 1543 defer s.gsUpdateMutex.RUnlock() 1544 return len(s.gsListUpdates) 1545 }