agones.dev/agones@v1.54.0/pkg/sdkserver/sdkserver.go (about)

     1  // Copyright 2018 Google LLC All Rights Reserved.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package sdkserver
    16  
    17  import (
    18  	"context"
    19  	"fmt"
    20  	"io"
    21  	"net/http"
    22  	"slices"
    23  	"strings"
    24  	"sync"
    25  	"time"
    26  
    27  	"github.com/mennanov/fmutils"
    28  	"github.com/pkg/errors"
    29  	"github.com/sirupsen/logrus"
    30  	corev1 "k8s.io/api/core/v1"
    31  	apiequality "k8s.io/apimachinery/pkg/api/equality"
    32  	k8serrors "k8s.io/apimachinery/pkg/api/errors"
    33  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    34  	"k8s.io/apimachinery/pkg/fields"
    35  	"k8s.io/apimachinery/pkg/types"
    36  	"k8s.io/apimachinery/pkg/util/wait"
    37  	"k8s.io/client-go/kubernetes"
    38  	"k8s.io/client-go/kubernetes/scheme"
    39  	k8sv1 "k8s.io/client-go/kubernetes/typed/core/v1"
    40  	"k8s.io/client-go/tools/cache"
    41  	"k8s.io/client-go/tools/record"
    42  	"k8s.io/utils/clock"
    43  
    44  	"agones.dev/agones/pkg/apis/agones"
    45  	agonesv1 "agones.dev/agones/pkg/apis/agones/v1"
    46  	"agones.dev/agones/pkg/client/clientset/versioned"
    47  	typedv1 "agones.dev/agones/pkg/client/clientset/versioned/typed/agones/v1"
    48  	"agones.dev/agones/pkg/client/informers/externalversions"
    49  	listersv1 "agones.dev/agones/pkg/client/listers/agones/v1"
    50  	"agones.dev/agones/pkg/gameserverallocations"
    51  	"agones.dev/agones/pkg/sdk"
    52  	"agones.dev/agones/pkg/sdk/alpha"
    53  	"agones.dev/agones/pkg/sdk/beta"
    54  	"agones.dev/agones/pkg/util/apiserver"
    55  	"agones.dev/agones/pkg/util/logfields"
    56  	"agones.dev/agones/pkg/util/runtime"
    57  	"agones.dev/agones/pkg/util/workerqueue"
    58  )
    59  
    60  // Operation is a synchronisation action
    61  type Operation string
    62  
    63  const (
    64  	updateState            Operation     = "updateState"
    65  	updateLabel            Operation     = "updateLabel"
    66  	updateAnnotation       Operation     = "updateAnnotation"
    67  	updatePlayerCapacity   Operation     = "updatePlayerCapacity"
    68  	updateConnectedPlayers Operation     = "updateConnectedPlayers"
    69  	updateCounters         Operation     = "updateCounters"
    70  	updateLists            Operation     = "updateLists"
    71  	updatePeriod           time.Duration = time.Second
    72  )
    73  
    74  var (
    75  	_ sdk.SDKServer   = &SDKServer{}
    76  	_ alpha.SDKServer = &SDKServer{}
    77  	_ beta.SDKServer  = &SDKServer{}
    78  )
    79  
    80  type counterUpdateRequest struct {
    81  	// Capacity of the Counter as set by capacitySet.
    82  	capacitySet *int64
    83  	// Count of the Counter as set by countSet.
    84  	countSet *int64
    85  	// Tracks the sum of CountIncrement, CountDecrement, and/or CountSet requests from the client SDK.
    86  	diff int64
    87  	// Counter as retreived from the GameServer
    88  	counter agonesv1.CounterStatus
    89  }
    90  
    91  type listUpdateRequest struct {
    92  	// Capacity of the List as set by capacitySet.
    93  	capacitySet *int64
    94  	// String keys are the Values to remove from the List
    95  	valuesToDelete map[string]bool
    96  	// Values to add to the List
    97  	valuesToAppend []string
    98  }
    99  
   100  // SDKServer is a gRPC server, that is meant to be a sidecar
   101  // for a GameServer that will update the game server status on SDK requests
   102  //
   103  //nolint:govet // ignore fieldalignment, singleton
   104  type SDKServer struct {
   105  	logger              *logrus.Entry
   106  	gameServerName      string
   107  	namespace           string
   108  	informerFactory     externalversions.SharedInformerFactory
   109  	gameServerGetter    typedv1.GameServersGetter
   110  	gameServerLister    listersv1.GameServerLister
   111  	gameServerSynced    cache.InformerSynced
   112  	connected           bool
   113  	server              *http.Server
   114  	clock               clock.Clock
   115  	health              agonesv1.Health
   116  	healthTimeout       time.Duration
   117  	healthMutex         sync.RWMutex
   118  	healthLastUpdated   time.Time
   119  	healthFailureCount  int32
   120  	healthChecksRunning sync.Once
   121  	workerqueue         *workerqueue.WorkerQueue
   122  	streamMutex         sync.RWMutex
   123  	connectedStreams    []sdk.SDK_WatchGameServerServer
   124  	ctx                 context.Context
   125  	recorder            record.EventRecorder
   126  	gsLabels            map[string]string
   127  	gsAnnotations       map[string]string
   128  	gsState             agonesv1.GameServerState
   129  	gsStateChannel      chan agonesv1.GameServerState
   130  	gsUpdateMutex       sync.RWMutex
   131  	gsWaitForSync       sync.WaitGroup
   132  	reserveTimer        *time.Timer
   133  	gsReserveDuration   *time.Duration
   134  	gsPlayerCapacity    int64
   135  	gsConnectedPlayers  []string
   136  	gsCounterUpdates    map[string]counterUpdateRequest
   137  	gsListUpdates       map[string]listUpdateRequest
   138  	gsCopy              *agonesv1.GameServer
   139  }
   140  
   141  // NewSDKServer creates a SDKServer that sets up an
   142  // InClusterConfig for Kubernetes
   143  func NewSDKServer(gameServerName, namespace string, kubeClient kubernetes.Interface,
   144  	agonesClient versioned.Interface, logLevel logrus.Level, healthPort int, requestsRateLimit time.Duration) (*SDKServer, error) {
   145  	mux := http.NewServeMux()
   146  	resync := 0 * time.Second
   147  
   148  	// limit the informer to only working with the gameserver that the sdk is attached to
   149  	tweakListOptions := func(opts *metav1.ListOptions) {
   150  		s1 := fields.OneTermEqualSelector("metadata.name", gameServerName)
   151  		opts.FieldSelector = s1.String()
   152  	}
   153  	factory := externalversions.NewSharedInformerFactoryWithOptions(agonesClient, resync, externalversions.WithNamespace(namespace), externalversions.WithTweakListOptions(tweakListOptions))
   154  	gameServers := factory.Agones().V1().GameServers()
   155  
   156  	s := &SDKServer{
   157  		gameServerName:   gameServerName,
   158  		namespace:        namespace,
   159  		gameServerGetter: agonesClient.AgonesV1(),
   160  		gameServerLister: gameServers.Lister(),
   161  		gameServerSynced: gameServers.Informer().HasSynced,
   162  		server: &http.Server{
   163  			Addr:    fmt.Sprintf(":%d", healthPort),
   164  			Handler: mux,
   165  		},
   166  		clock:              clock.RealClock{},
   167  		healthMutex:        sync.RWMutex{},
   168  		healthFailureCount: 0,
   169  		streamMutex:        sync.RWMutex{},
   170  		gsLabels:           map[string]string{},
   171  		gsAnnotations:      map[string]string{},
   172  		gsUpdateMutex:      sync.RWMutex{},
   173  		gsWaitForSync:      sync.WaitGroup{},
   174  		gsConnectedPlayers: []string{},
   175  		gsStateChannel:     make(chan agonesv1.GameServerState, 2),
   176  	}
   177  
   178  	if runtime.FeatureEnabled(runtime.FeatureCountsAndLists) {
   179  		// Once FeatureCountsAndLists is in GA, move this into SDKServer creation above.
   180  		s.gsCounterUpdates = map[string]counterUpdateRequest{}
   181  		s.gsListUpdates = map[string]listUpdateRequest{}
   182  	}
   183  
   184  	s.informerFactory = factory
   185  	s.logger = runtime.NewLoggerWithType(s).WithField("gsKey", namespace+"/"+gameServerName)
   186  	s.logger.Logger.SetLevel(logLevel)
   187  
   188  	_, _ = gameServers.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
   189  		UpdateFunc: func(_, newObj interface{}) {
   190  			gs := newObj.(*agonesv1.GameServer)
   191  			s.sendGameServerUpdate(gs)
   192  		},
   193  	})
   194  
   195  	eventBroadcaster := record.NewBroadcaster()
   196  	eventBroadcaster.StartLogging(s.logger.Debugf)
   197  	eventBroadcaster.StartRecordingToSink(&k8sv1.EventSinkImpl{Interface: kubeClient.CoreV1().Events("")})
   198  	s.recorder = eventBroadcaster.NewRecorder(scheme.Scheme, corev1.EventSource{Component: "gameserver-sidecar"})
   199  
   200  	mux.HandleFunc("/healthz", func(w http.ResponseWriter, _ *http.Request) {
   201  		_, err := w.Write([]byte("ok"))
   202  		if err != nil {
   203  			s.logger.WithError(err).Error("could not send ok response on healthz")
   204  			w.WriteHeader(http.StatusInternalServerError)
   205  		}
   206  	})
   207  	mux.HandleFunc("/gshealthz", func(w http.ResponseWriter, _ *http.Request) {
   208  		s.ensureHealthChecksRunning()
   209  		if s.healthy() {
   210  			_, err := w.Write([]byte("ok"))
   211  			if err != nil {
   212  				s.logger.WithError(err).Error("could not send ok response on gshealthz")
   213  				w.WriteHeader(http.StatusInternalServerError)
   214  			}
   215  		} else {
   216  			w.WriteHeader(http.StatusInternalServerError)
   217  		}
   218  	})
   219  
   220  	// we haven't synced yet
   221  	s.gsWaitForSync.Add(1)
   222  	s.workerqueue = workerqueue.NewWorkerQueueWithRateLimiter(
   223  		s.syncGameServer,
   224  		s.logger,
   225  		logfields.GameServerKey,
   226  		strings.Join([]string{agones.GroupName, s.namespace, s.gameServerName}, "."),
   227  		workerqueue.ConstantRateLimiter(requestsRateLimit))
   228  
   229  	s.logger.Info("Created GameServer sidecar")
   230  
   231  	return s, nil
   232  }
   233  
   234  // Run processes the rate limited queue.
   235  // Will block until stop is closed
   236  func (s *SDKServer) Run(ctx context.Context) error {
   237  	s.informerFactory.Start(ctx.Done())
   238  	if !cache.WaitForCacheSync(ctx.Done(), s.gameServerSynced) {
   239  		return errors.New("failed to wait for caches to sync")
   240  	}
   241  
   242  	// need this for streaming gRPC commands
   243  	s.ctx = ctx
   244  	// we have the gameserver details now
   245  	s.gsWaitForSync.Done()
   246  
   247  	gs, err := s.gameServer()
   248  	if err != nil {
   249  		return err
   250  	}
   251  
   252  	s.health = gs.Spec.Health
   253  	s.logger.WithField("health", s.health).Debug("Setting health configuration")
   254  	s.healthTimeout = time.Duration(gs.Spec.Health.PeriodSeconds) * time.Second
   255  	s.touchHealthLastUpdated()
   256  
   257  	if gs.Status.State == agonesv1.GameServerStateReserved && gs.Status.ReservedUntil != nil {
   258  		s.gsUpdateMutex.Lock()
   259  		s.resetReserveAfter(context.Background(), time.Until(gs.Status.ReservedUntil.Time))
   260  		s.gsUpdateMutex.Unlock()
   261  	}
   262  
   263  	// populate player tracking values
   264  	if runtime.FeatureEnabled(runtime.FeaturePlayerTracking) {
   265  		s.gsUpdateMutex.Lock()
   266  		if gs.Status.Players != nil {
   267  			s.gsPlayerCapacity = gs.Status.Players.Capacity
   268  			s.gsConnectedPlayers = gs.Status.Players.IDs
   269  		}
   270  		s.gsUpdateMutex.Unlock()
   271  	}
   272  
   273  	// then start the http endpoints
   274  	s.logger.Debug("Starting SDKServer http health check...")
   275  	go func() {
   276  		if err := s.server.ListenAndServe(); err != nil {
   277  			if err == http.ErrServerClosed {
   278  				s.logger.WithError(err).Error("Health check: http server closed")
   279  			} else {
   280  				err = errors.Wrap(err, "Could not listen on :8080")
   281  				runtime.HandleError(s.logger.WithError(err), err)
   282  			}
   283  		}
   284  	}()
   285  	defer s.server.Close() // nolint: errcheck
   286  
   287  	s.workerqueue.Run(ctx, 1)
   288  	return nil
   289  }
   290  
   291  // WaitForConnection attempts a GameServer GET every 3s until the client responds.
   292  // This is a workaround for the informer hanging indefinitely on first LIST due
   293  // to a flaky network to the Kubernetes service endpoint.
   294  func (s *SDKServer) WaitForConnection(ctx context.Context) error {
   295  	// In normal operaiton, waitForConnection is called exactly once in Run().
   296  	// In unit tests, waitForConnection() can be called before Run() to ensure
   297  	// that connected is true when Run() is called, otherwise the List() below
   298  	// may race with a test that changes a mock. (Despite the fact that we drop
   299  	// the data on the ground, the Go race detector will pereceive a data race.)
   300  	if s.connected {
   301  		return nil
   302  	}
   303  
   304  	try := 0
   305  	return wait.PollUntilContextCancel(ctx, 4*time.Second, true, func(ctx context.Context) (bool, error) {
   306  		ctx, cancel := context.WithTimeout(ctx, 3*time.Second)
   307  		defer cancel()
   308  
   309  		// Specifically use gameServerGetter since it's the raw client (gameServerLister is the informer).
   310  		// We use List here to avoid needing permission to Get().
   311  		_, err := s.gameServerGetter.GameServers(s.namespace).List(ctx, metav1.ListOptions{
   312  			FieldSelector: fields.OneTermEqualSelector("metadata.name", s.gameServerName).String(),
   313  		})
   314  		if err != nil {
   315  			s.logger.WithField("try", try).WithError(err).Info("Connection to Kubernetes service failed")
   316  			try++
   317  			return false, nil
   318  		}
   319  		s.logger.WithField("try", try).Info("Connection to Kubernetes service established")
   320  		s.connected = true
   321  		return true, nil
   322  	})
   323  }
   324  
   325  // syncGameServer synchronises the GameServer with the requested operations.
   326  // The format of the key is {operation}. To prevent old operation data from
   327  // overwriting the new one, the operation data is persisted in SDKServer.
   328  func (s *SDKServer) syncGameServer(ctx context.Context, key string) error {
   329  	switch Operation(key) {
   330  	case updateState:
   331  		return s.updateState(ctx)
   332  	case updateLabel:
   333  		return s.updateLabels(ctx)
   334  	case updateAnnotation:
   335  		return s.updateAnnotations(ctx)
   336  	case updatePlayerCapacity:
   337  		return s.updatePlayerCapacity(ctx)
   338  	case updateConnectedPlayers:
   339  		return s.updateConnectedPlayers(ctx)
   340  	case updateCounters:
   341  		return s.updateCounter(ctx)
   342  	case updateLists:
   343  		return s.updateList(ctx)
   344  	}
   345  
   346  	return errors.Errorf("could not sync game server key: %s", key)
   347  }
   348  
   349  // updateState sets the GameServer Status's state to the one persisted in SDKServer,
   350  // i.e. SDKServer.gsState.
   351  func (s *SDKServer) updateState(ctx context.Context) error {
   352  	s.gsUpdateMutex.RLock()
   353  	s.logger.WithField("state", s.gsState).Debug("Updating state")
   354  	if len(s.gsState) == 0 {
   355  		s.gsUpdateMutex.RUnlock()
   356  		return errors.Errorf("could not update GameServer %s/%s to empty state", s.namespace, s.gameServerName)
   357  	}
   358  	s.gsUpdateMutex.RUnlock()
   359  
   360  	gs, err := s.gameServer()
   361  	if err != nil {
   362  		return err
   363  	}
   364  
   365  	// If we are currently in shutdown/being deleted, there is no escaping.
   366  	if gs.IsBeingDeleted() {
   367  		s.logger.Debug("GameServerState being shutdown. Skipping update.")
   368  
   369  		// Explicitly update gsStateChannel if current state is Shutdown since sendGameServerUpdate will not triggered.
   370  		if s.gsState == agonesv1.GameServerStateShutdown && gs.Status.State != agonesv1.GameServerStateShutdown {
   371  			go func() {
   372  				s.gsStateChannel <- agonesv1.GameServerStateShutdown
   373  			}()
   374  		}
   375  
   376  		return nil
   377  	}
   378  
   379  	// If the state is currently unhealthy, you can't go back to Ready.
   380  	if gs.Status.State == agonesv1.GameServerStateUnhealthy {
   381  		s.logger.Debug("GameServerState already unhealthy. Skipping update.")
   382  		return nil
   383  	}
   384  
   385  	s.gsUpdateMutex.RLock()
   386  	gsCopy := gs.DeepCopy()
   387  	gsCopy.Status.State = s.gsState
   388  
   389  	// If we are setting the Reserved status, check for the duration, and set that too.
   390  	if gsCopy.Status.State == agonesv1.GameServerStateReserved && s.gsReserveDuration != nil {
   391  		n := metav1.NewTime(time.Now().Add(*s.gsReserveDuration))
   392  		gsCopy.Status.ReservedUntil = &n
   393  	} else {
   394  		gsCopy.Status.ReservedUntil = nil
   395  	}
   396  	s.gsUpdateMutex.RUnlock()
   397  
   398  	// If we are setting the Allocated status, set the last-allocated annotation as well.
   399  	if gsCopy.Status.State == agonesv1.GameServerStateAllocated {
   400  		ts, err := s.clock.Now().MarshalText()
   401  		if err != nil {
   402  			return err
   403  		}
   404  		if gsCopy.ObjectMeta.Annotations == nil {
   405  			gsCopy.ObjectMeta.Annotations = map[string]string{}
   406  		}
   407  		gsCopy.ObjectMeta.Annotations[gameserverallocations.LastAllocatedAnnotationKey] = string(ts)
   408  	}
   409  
   410  	gs, err = s.patchGameServer(ctx, gs, gsCopy)
   411  	if err != nil {
   412  		return errors.Wrapf(err, "could not update GameServer %s/%s to state %s", s.namespace, s.gameServerName, gsCopy.Status.State)
   413  	}
   414  
   415  	message := "SDK state change"
   416  	level := corev1.EventTypeNormal
   417  	// post state specific work here
   418  	switch gs.Status.State {
   419  	case agonesv1.GameServerStateUnhealthy:
   420  		level = corev1.EventTypeWarning
   421  		message = "Health check failure"
   422  	case agonesv1.GameServerStateReserved:
   423  		s.gsUpdateMutex.Lock()
   424  		if s.gsReserveDuration != nil {
   425  			message += fmt.Sprintf(", for %s", s.gsReserveDuration)
   426  			s.resetReserveAfter(context.Background(), *s.gsReserveDuration)
   427  		}
   428  		s.gsUpdateMutex.Unlock()
   429  	}
   430  
   431  	s.recorder.Event(gs, level, string(gs.Status.State), message)
   432  
   433  	return nil
   434  }
   435  
   436  // Gets the GameServer from the cache, or from the local SDKServer if that version is more recent.
   437  func (s *SDKServer) gameServer() (*agonesv1.GameServer, error) {
   438  	// this ensure that if we get requests for the gameserver before the cache has been synced,
   439  	// they will block here until it's ready
   440  	s.gsWaitForSync.Wait()
   441  	gs, err := s.gameServerLister.GameServers(s.namespace).Get(s.gameServerName)
   442  	if err != nil {
   443  		return gs, errors.Wrapf(err, "could not retrieve GameServer %s/%s", s.namespace, s.gameServerName)
   444  	}
   445  	s.gsUpdateMutex.RLock()
   446  	defer s.gsUpdateMutex.RUnlock()
   447  	if s.gsCopy != nil && gs.ObjectMeta.Generation < s.gsCopy.Generation {
   448  		return s.gsCopy, nil
   449  	}
   450  	return gs, nil
   451  }
   452  
   453  // patchGameServer is a helper function to create and apply a patch update, so the changes in
   454  // gsCopy are applied to the original gs.
   455  func (s *SDKServer) patchGameServer(ctx context.Context, gs, gsCopy *agonesv1.GameServer) (*agonesv1.GameServer, error) {
   456  	patch, err := gs.Patch(gsCopy)
   457  	if err != nil {
   458  		return nil, err
   459  	}
   460  
   461  	gs, err = s.gameServerGetter.GameServers(s.namespace).Patch(ctx, gs.GetObjectMeta().GetName(), types.JSONPatchType, patch, metav1.PatchOptions{})
   462  	// if the test operation fails, no reason to error log
   463  	if err != nil && k8serrors.IsInvalid(err) {
   464  		err = workerqueue.NewTraceError(err)
   465  	}
   466  	return gs, errors.Wrapf(err, "error attempting to patch gameserver: %s/%s", gsCopy.ObjectMeta.Namespace, gsCopy.ObjectMeta.Name)
   467  }
   468  
   469  // updateLabels updates the labels on this GameServer to the ones persisted in SDKServer,
   470  // i.e. SDKServer.gsLabels, with the prefix of "agones.dev/sdk-"
   471  func (s *SDKServer) updateLabels(ctx context.Context) error {
   472  	s.logger.WithField("labels", s.gsLabels).Debug("Updating label")
   473  	gs, err := s.gameServer()
   474  	if err != nil {
   475  		return err
   476  	}
   477  
   478  	gsCopy := gs.DeepCopy()
   479  
   480  	s.gsUpdateMutex.RLock()
   481  	if len(s.gsLabels) > 0 && gsCopy.ObjectMeta.Labels == nil {
   482  		gsCopy.ObjectMeta.Labels = map[string]string{}
   483  	}
   484  	for k, v := range s.gsLabels {
   485  		gsCopy.ObjectMeta.Labels[metadataPrefix+k] = v
   486  	}
   487  	s.gsUpdateMutex.RUnlock()
   488  
   489  	_, err = s.patchGameServer(ctx, gs, gsCopy)
   490  	return err
   491  }
   492  
   493  // updateAnnotations updates the Annotations on this GameServer to the ones persisted in SDKServer,
   494  // i.e. SDKServer.gsAnnotations, with the prefix of "agones.dev/sdk-"
   495  func (s *SDKServer) updateAnnotations(ctx context.Context) error {
   496  	s.logger.WithField("annotations", s.gsAnnotations).Debug("Updating annotation")
   497  	gs, err := s.gameServer()
   498  	if err != nil {
   499  		return err
   500  	}
   501  
   502  	gsCopy := gs.DeepCopy()
   503  
   504  	s.gsUpdateMutex.RLock()
   505  	if len(s.gsAnnotations) > 0 && gsCopy.ObjectMeta.Annotations == nil {
   506  		gsCopy.ObjectMeta.Annotations = map[string]string{}
   507  	}
   508  	for k, v := range s.gsAnnotations {
   509  		gsCopy.ObjectMeta.Annotations[metadataPrefix+k] = v
   510  	}
   511  	s.gsUpdateMutex.RUnlock()
   512  
   513  	_, err = s.patchGameServer(ctx, gs, gsCopy)
   514  	return err
   515  }
   516  
   517  // enqueueState enqueue a State change request into the
   518  // workerqueue
   519  func (s *SDKServer) enqueueState(state agonesv1.GameServerState) {
   520  	s.gsUpdateMutex.Lock()
   521  	// Update cached state, but prevent transitions out of `Unhealthy` by the SDK.
   522  	if s.gsState != agonesv1.GameServerStateUnhealthy {
   523  		s.gsState = state
   524  	}
   525  	s.gsUpdateMutex.Unlock()
   526  	s.workerqueue.Enqueue(cache.ExplicitKey(string(updateState)))
   527  }
   528  
   529  // Ready enters the RequestReady state change for this GameServer into
   530  // the workqueue so it can be updated
   531  func (s *SDKServer) Ready(_ context.Context, e *sdk.Empty) (*sdk.Empty, error) {
   532  	s.logger.Debug("Received Ready request, adding to queue")
   533  	s.stopReserveTimer()
   534  	s.enqueueState(agonesv1.GameServerStateRequestReady)
   535  	return e, nil
   536  }
   537  
   538  // Allocate enters an Allocate state change into the workqueue, so it can be updated
   539  func (s *SDKServer) Allocate(_ context.Context, e *sdk.Empty) (*sdk.Empty, error) {
   540  	s.stopReserveTimer()
   541  	s.enqueueState(agonesv1.GameServerStateAllocated)
   542  	return e, nil
   543  }
   544  
   545  // Shutdown enters the Shutdown state change for this GameServer into
   546  // the workqueue so it can be updated. If gracefulTermination feature is enabled,
   547  // Shutdown will block on GameServer being shutdown.
   548  func (s *SDKServer) Shutdown(_ context.Context, e *sdk.Empty) (*sdk.Empty, error) {
   549  	s.logger.Debug("Received Shutdown request, adding to queue")
   550  	s.stopReserveTimer()
   551  	s.enqueueState(agonesv1.GameServerStateShutdown)
   552  
   553  	return e, nil
   554  }
   555  
   556  // Health receives each health ping, and tracks the last time the health
   557  // check was received, to track if a GameServer is healthy
   558  func (s *SDKServer) Health(stream sdk.SDK_HealthServer) error {
   559  	for {
   560  		_, err := stream.Recv()
   561  		if err == io.EOF {
   562  			s.logger.Debug("Health stream closed.")
   563  			return stream.SendAndClose(&sdk.Empty{})
   564  		}
   565  		if err != nil {
   566  			return errors.Wrap(err, "Error with Health check")
   567  		}
   568  		s.logger.Debug("Health Ping Received")
   569  		s.touchHealthLastUpdated()
   570  	}
   571  }
   572  
   573  // SetLabel adds the Key/Value to be used to set the label with the metadataPrefix to the `GameServer`
   574  // metdata
   575  func (s *SDKServer) SetLabel(_ context.Context, kv *sdk.KeyValue) (*sdk.Empty, error) {
   576  	s.logger.WithField("values", kv).Debug("Adding SetLabel to queue")
   577  
   578  	s.gsUpdateMutex.Lock()
   579  	s.gsLabels[kv.Key] = kv.Value
   580  	s.gsUpdateMutex.Unlock()
   581  
   582  	s.workerqueue.Enqueue(cache.ExplicitKey(string(updateLabel)))
   583  	return &sdk.Empty{}, nil
   584  }
   585  
   586  // SetAnnotation adds the Key/Value to be used to set the annotations with the metadataPrefix to the `GameServer`
   587  // metdata
   588  func (s *SDKServer) SetAnnotation(_ context.Context, kv *sdk.KeyValue) (*sdk.Empty, error) {
   589  	s.logger.WithField("values", kv).Debug("Adding SetAnnotation to queue")
   590  
   591  	s.gsUpdateMutex.Lock()
   592  	s.gsAnnotations[kv.Key] = kv.Value
   593  	s.gsUpdateMutex.Unlock()
   594  
   595  	s.workerqueue.Enqueue(cache.ExplicitKey(string(updateAnnotation)))
   596  	return &sdk.Empty{}, nil
   597  }
   598  
   599  // GetGameServer returns the current GameServer configuration and state from the backing GameServer CRD
   600  func (s *SDKServer) GetGameServer(context.Context, *sdk.Empty) (*sdk.GameServer, error) {
   601  	s.logger.Debug("Received GetGameServer request")
   602  	gs, err := s.gameServer()
   603  	if err != nil {
   604  		return nil, err
   605  	}
   606  	return convert(gs), nil
   607  }
   608  
   609  // WatchGameServer sends events through the stream when changes occur to the
   610  // backing GameServer configuration / status
   611  func (s *SDKServer) WatchGameServer(_ *sdk.Empty, stream sdk.SDK_WatchGameServerServer) error {
   612  	s.logger.Debug("Received WatchGameServer request, adding stream to connectedStreams")
   613  
   614  	gs, err := s.GetGameServer(context.Background(), &sdk.Empty{})
   615  	if err != nil {
   616  		return err
   617  	}
   618  
   619  	if err := stream.Send(gs); err != nil {
   620  		return err
   621  	}
   622  
   623  	s.streamMutex.Lock()
   624  	s.connectedStreams = append(s.connectedStreams, stream)
   625  	s.streamMutex.Unlock()
   626  	// don't exit until we shutdown, because that will close the stream
   627  	<-s.ctx.Done()
   628  	return nil
   629  }
   630  
   631  // Reserve moves this GameServer to the Reserved state for the Duration specified
   632  func (s *SDKServer) Reserve(_ context.Context, d *sdk.Duration) (*sdk.Empty, error) {
   633  	s.stopReserveTimer()
   634  
   635  	e := &sdk.Empty{}
   636  
   637  	// 0 is forever.
   638  	if d.Seconds > 0 {
   639  		duration := time.Duration(d.Seconds) * time.Second
   640  		s.gsUpdateMutex.Lock()
   641  		s.gsReserveDuration = &duration
   642  		s.gsUpdateMutex.Unlock()
   643  	}
   644  
   645  	s.logger.Debug("Received Reserve request, adding to queue")
   646  	s.enqueueState(agonesv1.GameServerStateReserved)
   647  
   648  	return e, nil
   649  }
   650  
   651  // resetReserveAfter will move the GameServer back to being ready after the specified duration.
   652  // This function should be wrapped in a s.gsUpdateMutex lock when being called.
   653  func (s *SDKServer) resetReserveAfter(ctx context.Context, duration time.Duration) {
   654  	if s.reserveTimer != nil {
   655  		s.reserveTimer.Stop()
   656  	}
   657  
   658  	s.reserveTimer = time.AfterFunc(duration, func() {
   659  		if _, err := s.Ready(ctx, &sdk.Empty{}); err != nil {
   660  			s.logger.WithError(errors.WithStack(err)).Error("error returning to Ready after reserved")
   661  		}
   662  	})
   663  }
   664  
   665  // stopReserveTimer stops the reserve timer. This is a no-op and safe to call if the timer is nil
   666  func (s *SDKServer) stopReserveTimer() {
   667  	s.gsUpdateMutex.Lock()
   668  	defer s.gsUpdateMutex.Unlock()
   669  
   670  	if s.reserveTimer != nil {
   671  		s.reserveTimer.Stop()
   672  	}
   673  	s.gsReserveDuration = nil
   674  }
   675  
   676  // PlayerConnect should be called when a player connects.
   677  // [Stage:Alpha]
   678  // [FeatureFlag:PlayerTracking]
   679  func (s *SDKServer) PlayerConnect(_ context.Context, id *alpha.PlayerID) (*alpha.Bool, error) {
   680  	if !runtime.FeatureEnabled(runtime.FeaturePlayerTracking) {
   681  		return &alpha.Bool{Bool: false}, errors.Errorf("%s not enabled", runtime.FeaturePlayerTracking)
   682  	}
   683  	s.logger.WithField("playerID", id.PlayerID).Debug("Player Connected")
   684  
   685  	s.gsUpdateMutex.Lock()
   686  	defer s.gsUpdateMutex.Unlock()
   687  
   688  	// the player is already connected, return false.
   689  	for _, playerID := range s.gsConnectedPlayers {
   690  		if playerID == id.PlayerID {
   691  			return &alpha.Bool{Bool: false}, nil
   692  		}
   693  	}
   694  
   695  	if int64(len(s.gsConnectedPlayers)) >= s.gsPlayerCapacity {
   696  		return &alpha.Bool{Bool: false}, errors.New("players are already at capacity")
   697  	}
   698  
   699  	// let's retain the original order, as it should be a smaller patch on data change
   700  	s.gsConnectedPlayers = append(s.gsConnectedPlayers, id.PlayerID)
   701  	s.workerqueue.EnqueueAfter(cache.ExplicitKey(string(updateConnectedPlayers)), updatePeriod)
   702  
   703  	return &alpha.Bool{Bool: true}, nil
   704  }
   705  
   706  // PlayerDisconnect should be called when a player disconnects.
   707  // [Stage:Alpha]
   708  // [FeatureFlag:PlayerTracking]
   709  func (s *SDKServer) PlayerDisconnect(_ context.Context, id *alpha.PlayerID) (*alpha.Bool, error) {
   710  	if !runtime.FeatureEnabled(runtime.FeaturePlayerTracking) {
   711  		return &alpha.Bool{Bool: false}, errors.Errorf("%s not enabled", runtime.FeaturePlayerTracking)
   712  	}
   713  	s.logger.WithField("playerID", id.PlayerID).Debug("Player Disconnected")
   714  
   715  	s.gsUpdateMutex.Lock()
   716  	defer s.gsUpdateMutex.Unlock()
   717  
   718  	found := -1
   719  	for i, playerID := range s.gsConnectedPlayers {
   720  		if playerID == id.PlayerID {
   721  			found = i
   722  			break
   723  		}
   724  	}
   725  	if found == -1 {
   726  		return &alpha.Bool{Bool: false}, nil
   727  	}
   728  
   729  	// let's retain the original order, as it should be a smaller patch on data change
   730  	s.gsConnectedPlayers = append(s.gsConnectedPlayers[:found], s.gsConnectedPlayers[found+1:]...)
   731  	s.workerqueue.EnqueueAfter(cache.ExplicitKey(string(updateConnectedPlayers)), updatePeriod)
   732  
   733  	return &alpha.Bool{Bool: true}, nil
   734  }
   735  
   736  // IsPlayerConnected returns if the playerID is currently connected to the GameServer.
   737  // This is always accurate, even if the value hasn’t been updated to the GameServer status yet.
   738  // [Stage:Alpha]
   739  // [FeatureFlag:PlayerTracking]
   740  func (s *SDKServer) IsPlayerConnected(_ context.Context, id *alpha.PlayerID) (*alpha.Bool, error) {
   741  	if !runtime.FeatureEnabled(runtime.FeaturePlayerTracking) {
   742  		return &alpha.Bool{Bool: false}, errors.Errorf("%s not enabled", runtime.FeaturePlayerTracking)
   743  	}
   744  	s.gsUpdateMutex.RLock()
   745  	defer s.gsUpdateMutex.RUnlock()
   746  
   747  	result := &alpha.Bool{Bool: false}
   748  
   749  	for _, playerID := range s.gsConnectedPlayers {
   750  		if playerID == id.PlayerID {
   751  			result.Bool = true
   752  			break
   753  		}
   754  	}
   755  
   756  	return result, nil
   757  }
   758  
   759  // GetConnectedPlayers returns the list of the currently connected player ids.
   760  // This is always accurate, even if the value hasn’t been updated to the GameServer status yet.
   761  // [Stage:Alpha]
   762  // [FeatureFlag:PlayerTracking]
   763  func (s *SDKServer) GetConnectedPlayers(_ context.Context, _ *alpha.Empty) (*alpha.PlayerIDList, error) {
   764  	if !runtime.FeatureEnabled(runtime.FeaturePlayerTracking) {
   765  		return nil, errors.Errorf("%s not enabled", runtime.FeaturePlayerTracking)
   766  	}
   767  	s.gsUpdateMutex.RLock()
   768  	defer s.gsUpdateMutex.RUnlock()
   769  
   770  	return &alpha.PlayerIDList{List: s.gsConnectedPlayers}, nil
   771  }
   772  
   773  // GetPlayerCount returns the current player count.
   774  // [Stage:Alpha]
   775  // [FeatureFlag:PlayerTracking]
   776  func (s *SDKServer) GetPlayerCount(_ context.Context, _ *alpha.Empty) (*alpha.Count, error) {
   777  	if !runtime.FeatureEnabled(runtime.FeaturePlayerTracking) {
   778  		return nil, errors.Errorf("%s not enabled", runtime.FeaturePlayerTracking)
   779  	}
   780  	s.gsUpdateMutex.RLock()
   781  	defer s.gsUpdateMutex.RUnlock()
   782  	return &alpha.Count{Count: int64(len(s.gsConnectedPlayers))}, nil
   783  }
   784  
   785  // SetPlayerCapacity to change the game server's player capacity.
   786  // [Stage:Alpha]
   787  // [FeatureFlag:PlayerTracking]
   788  func (s *SDKServer) SetPlayerCapacity(_ context.Context, count *alpha.Count) (*alpha.Empty, error) {
   789  	if !runtime.FeatureEnabled(runtime.FeaturePlayerTracking) {
   790  		return nil, errors.Errorf("%s not enabled", runtime.FeaturePlayerTracking)
   791  	}
   792  	s.gsUpdateMutex.Lock()
   793  	s.gsPlayerCapacity = count.Count
   794  	s.gsUpdateMutex.Unlock()
   795  	s.workerqueue.Enqueue(cache.ExplicitKey(string(updatePlayerCapacity)))
   796  
   797  	return &alpha.Empty{}, nil
   798  }
   799  
   800  // GetPlayerCapacity returns the current player capacity, as set by SDK.SetPlayerCapacity()
   801  // [Stage:Alpha]
   802  // [FeatureFlag:PlayerTracking]
   803  func (s *SDKServer) GetPlayerCapacity(_ context.Context, _ *alpha.Empty) (*alpha.Count, error) {
   804  	if !runtime.FeatureEnabled(runtime.FeaturePlayerTracking) {
   805  		return nil, errors.Errorf("%s not enabled", runtime.FeaturePlayerTracking)
   806  	}
   807  	s.gsUpdateMutex.RLock()
   808  	defer s.gsUpdateMutex.RUnlock()
   809  	return &alpha.Count{Count: s.gsPlayerCapacity}, nil
   810  }
   811  
   812  // GetCounter returns a Counter. Returns error if the counter does not exist.
   813  // [Stage:Beta]
   814  // [FeatureFlag:CountsAndLists]
   815  func (s *SDKServer) GetCounter(_ context.Context, in *beta.GetCounterRequest) (*beta.Counter, error) {
   816  	if !runtime.FeatureEnabled(runtime.FeatureCountsAndLists) {
   817  		return nil, errors.Errorf("%s not enabled", runtime.FeatureCountsAndLists)
   818  	}
   819  
   820  	s.logger.WithField("name", in.Name).Debug("Getting Counter")
   821  
   822  	gs, err := s.gameServer()
   823  	if err != nil {
   824  		return nil, err
   825  	}
   826  
   827  	s.gsUpdateMutex.RLock()
   828  	defer s.gsUpdateMutex.RUnlock()
   829  
   830  	counter, ok := gs.Status.Counters[in.Name]
   831  	if !ok {
   832  		return nil, errors.Errorf("counter not found: %s", in.Name)
   833  	}
   834  	s.logger.WithField("Get Counter", counter).Debugf("Got Counter %s", in.Name)
   835  	protoCounter := &beta.Counter{Name: in.Name, Count: counter.Count, Capacity: counter.Capacity}
   836  	// If there are batched changes that have not yet been applied, apply them to the Counter.
   837  	// This does NOT validate batched the changes.
   838  	if counterUpdate, ok := s.gsCounterUpdates[in.Name]; ok {
   839  		if counterUpdate.capacitySet != nil {
   840  			protoCounter.Capacity = *counterUpdate.capacitySet
   841  		}
   842  		if counterUpdate.countSet != nil {
   843  			protoCounter.Count = *counterUpdate.countSet
   844  		}
   845  		protoCounter.Count += counterUpdate.diff
   846  		if protoCounter.Count < 0 {
   847  			protoCounter.Count = 0
   848  			s.logger.Debug("truncating Count in Get Counter request to 0")
   849  		}
   850  		if protoCounter.Count > protoCounter.Capacity {
   851  			protoCounter.Count = protoCounter.Capacity
   852  			s.logger.Debug("truncating Count in Get Counter request to Capacity")
   853  		}
   854  		s.logger.WithField("Get Counter", counter).Debugf("Applied Batched Counter Updates %v", counterUpdate)
   855  	}
   856  
   857  	return protoCounter, nil
   858  }
   859  
   860  // UpdateCounter collapses all UpdateCounterRequests for a given Counter into a single request.
   861  // UpdateCounterRequest must be one and only one of Capacity, Count, or CountDiff.
   862  // Returns error if the Counter does not exist (name cannot be updated).
   863  // Returns error if the Count is out of range [0,Capacity].
   864  // [Stage:Beta]
   865  // [FeatureFlag:CountsAndLists]
   866  func (s *SDKServer) UpdateCounter(_ context.Context, in *beta.UpdateCounterRequest) (*beta.Counter, error) {
   867  	if !runtime.FeatureEnabled(runtime.FeatureCountsAndLists) {
   868  		return nil, errors.Errorf("%s not enabled", runtime.FeatureCountsAndLists)
   869  	}
   870  
   871  	if in.CounterUpdateRequest == nil {
   872  		return nil, errors.Errorf("invalid argument. CounterUpdateRequest: %v cannot be nil", in.CounterUpdateRequest)
   873  	}
   874  	if in.CounterUpdateRequest.CountDiff == 0 && in.CounterUpdateRequest.Count == nil && in.CounterUpdateRequest.Capacity == nil {
   875  		return nil, errors.Errorf("invalid argument. Malformed CounterUpdateRequest: %v", in.CounterUpdateRequest)
   876  	}
   877  
   878  	s.logger.WithField("name", in.CounterUpdateRequest.Name).Debug("Update Counter Request")
   879  
   880  	gs, err := s.gameServer()
   881  	if err != nil {
   882  		return nil, err
   883  	}
   884  
   885  	s.gsUpdateMutex.Lock()
   886  	defer s.gsUpdateMutex.Unlock()
   887  
   888  	// Check if we already have a batch request started for this Counter. If not, add new request to
   889  	// the gsCounterUpdates map.
   890  	name := in.CounterUpdateRequest.Name
   891  	batchCounter := s.gsCounterUpdates[name]
   892  
   893  	counter, ok := gs.Status.Counters[name]
   894  	// We didn't find the Counter named key in the gameserver.
   895  	if !ok {
   896  		return nil, errors.Errorf("counter not found: %s", name)
   897  	}
   898  
   899  	batchCounter.counter = *counter.DeepCopy()
   900  
   901  	// Updated based on if client call is CapacitySet
   902  	if in.CounterUpdateRequest.Capacity != nil {
   903  		if in.CounterUpdateRequest.Capacity.GetValue() < 0 {
   904  			return nil, errors.Errorf("out of range. Capacity must be greater than or equal to 0. Found Capacity: %d", in.CounterUpdateRequest.Capacity.GetValue())
   905  		}
   906  		capacitySet := in.CounterUpdateRequest.Capacity.GetValue()
   907  		batchCounter.capacitySet = &capacitySet
   908  	}
   909  
   910  	// Update based on if Client call is CountSet
   911  	if in.CounterUpdateRequest.Count != nil {
   912  		// Verify that 0 <= Count >= Capacity
   913  		countSet := in.CounterUpdateRequest.Count.GetValue()
   914  		capacity := batchCounter.counter.Capacity
   915  		if batchCounter.capacitySet != nil {
   916  			capacity = *batchCounter.capacitySet
   917  		}
   918  		if countSet < 0 || countSet > capacity {
   919  			return nil, errors.Errorf("out of range. Count must be within range [0,Capacity]. Found Count: %d, Capacity: %d", countSet, capacity)
   920  		}
   921  		batchCounter.countSet = &countSet
   922  		// Clear any previous CountIncrement or CountDecrement requests, and add the CountSet as the first item.
   923  		batchCounter.diff = 0
   924  	}
   925  
   926  	// Update based on if Client call is CountIncrement or CountDecrement
   927  	if in.CounterUpdateRequest.CountDiff != 0 {
   928  		count := batchCounter.counter.Count
   929  		if batchCounter.countSet != nil {
   930  			count = *batchCounter.countSet
   931  		}
   932  		count += batchCounter.diff + in.CounterUpdateRequest.CountDiff
   933  		// Verify that 0 <= Count >= Capacity
   934  		capacity := batchCounter.counter.Capacity
   935  		if batchCounter.capacitySet != nil {
   936  			capacity = *batchCounter.capacitySet
   937  		}
   938  		if count < 0 || count > capacity {
   939  			return nil, errors.Errorf("out of range. Count must be within range [0,Capacity]. Found Count: %d, Capacity: %d", count, capacity)
   940  		}
   941  		batchCounter.diff += in.CounterUpdateRequest.CountDiff
   942  	}
   943  
   944  	s.gsCounterUpdates[name] = batchCounter
   945  
   946  	// Queue up the Update for later batch processing by updateCounters.
   947  	s.workerqueue.Enqueue(cache.ExplicitKey(updateCounters))
   948  	return projectCounterState(name, batchCounter), nil
   949  }
   950  
   951  // projectCounterState calculates the final expected Counter state after applying batched updates.
   952  func projectCounterState(name string, batchCounter counterUpdateRequest) *beta.Counter {
   953  	currentCapacity := batchCounter.counter.Capacity
   954  	if batchCounter.capacitySet != nil {
   955  		currentCapacity = *batchCounter.capacitySet
   956  	}
   957  	currentCount := batchCounter.counter.Count
   958  	if batchCounter.countSet != nil {
   959  		currentCount = *batchCounter.countSet
   960  	}
   961  	currentCount += batchCounter.diff
   962  	if currentCount < 0 {
   963  		currentCount = 0
   964  	}
   965  	if currentCount > currentCapacity {
   966  		currentCount = currentCapacity
   967  	}
   968  	return &beta.Counter{
   969  		Name:     name,
   970  		Count:    currentCount,
   971  		Capacity: currentCapacity,
   972  	}
   973  }
   974  
   975  // updateCounter updates the Counters in the GameServer's Status with the batched update requests.
   976  func (s *SDKServer) updateCounter(ctx context.Context) error {
   977  	gs, err := s.gameServer()
   978  	if err != nil {
   979  		return err
   980  	}
   981  	gsCopy := gs.DeepCopy()
   982  
   983  	s.logger.WithField("batchCounterUpdates", s.gsCounterUpdates).Debug("Batch updating Counter(s)")
   984  	s.gsUpdateMutex.Lock()
   985  	defer s.gsUpdateMutex.Unlock()
   986  
   987  	names := []string{}
   988  
   989  	for name, ctrReq := range s.gsCounterUpdates {
   990  		counter, ok := gsCopy.Status.Counters[name]
   991  		if !ok {
   992  			continue
   993  		}
   994  		// Changes may have been made to the Counter since we validated the incoming changes in
   995  		// UpdateCounter, and we need to verify if the batched changes can be fully applied, partially
   996  		// applied, or cannot be applied.
   997  		if ctrReq.capacitySet != nil {
   998  			counter.Capacity = *ctrReq.capacitySet
   999  		}
  1000  		if ctrReq.countSet != nil {
  1001  			counter.Count = *ctrReq.countSet
  1002  		}
  1003  		newCnt := counter.Count + ctrReq.diff
  1004  		if newCnt < 0 {
  1005  			newCnt = 0
  1006  			s.logger.Debug("truncating Count in Update Counter request to 0")
  1007  		}
  1008  		if newCnt > counter.Capacity {
  1009  			newCnt = counter.Capacity
  1010  			s.logger.Debug("truncating Count in Update Counter request to Capacity")
  1011  		}
  1012  		counter.Count = newCnt
  1013  		gsCopy.Status.Counters[name] = counter
  1014  		names = append(names, name)
  1015  	}
  1016  
  1017  	gs, err = s.patchGameServer(ctx, gs, gsCopy)
  1018  	if err != nil {
  1019  		return err
  1020  	}
  1021  
  1022  	// Record an event per update Counter
  1023  	for _, name := range names {
  1024  		s.recorder.Event(gs, corev1.EventTypeNormal, "UpdateCounter",
  1025  			fmt.Sprintf("Counter %s updated to Count:%d Capacity:%d",
  1026  				name, gs.Status.Counters[name].Count, gs.Status.Counters[name].Capacity))
  1027  	}
  1028  
  1029  	// Cache a copy of the successfully updated gameserver
  1030  	s.gsCopy = gs
  1031  	// Clear the gsCounterUpdates
  1032  	s.gsCounterUpdates = map[string]counterUpdateRequest{}
  1033  
  1034  	return nil
  1035  }
  1036  
  1037  // GetList returns a List. Returns not found if the List does not exist.
  1038  // [Stage:Beta]
  1039  // [FeatureFlag:CountsAndLists]
  1040  func (s *SDKServer) GetList(_ context.Context, in *beta.GetListRequest) (*beta.List, error) {
  1041  	if !runtime.FeatureEnabled(runtime.FeatureCountsAndLists) {
  1042  		return nil, errors.Errorf("%s not enabled", runtime.FeatureCountsAndLists)
  1043  	}
  1044  	if in == nil {
  1045  		return nil, errors.Errorf("GetListRequest cannot be nil")
  1046  	}
  1047  	s.logger.WithField("name", in.Name).Debug("Getting List")
  1048  
  1049  	gs, err := s.gameServer()
  1050  	if err != nil {
  1051  		return nil, err
  1052  	}
  1053  
  1054  	s.gsUpdateMutex.RLock()
  1055  	defer s.gsUpdateMutex.RUnlock()
  1056  
  1057  	list, ok := gs.Status.Lists[in.Name]
  1058  	if !ok {
  1059  		return nil, errors.Errorf("list not found: %s", in.Name)
  1060  	}
  1061  
  1062  	s.logger.WithField("Get List", list).Debugf("Got List %s", in.Name)
  1063  	protoList := beta.List{Name: in.Name, Values: list.Values, Capacity: list.Capacity}
  1064  	// If there are batched changes that have not yet been applied, apply them to the List.
  1065  	// This does NOT validate batched the changes, and does NOT modify the List.
  1066  	if listUpdate, ok := s.gsListUpdates[in.Name]; ok {
  1067  		if listUpdate.capacitySet != nil {
  1068  			protoList.Capacity = *listUpdate.capacitySet
  1069  		}
  1070  		if len(listUpdate.valuesToDelete) != 0 {
  1071  			protoList.Values = deleteValues(protoList.Values, listUpdate.valuesToDelete)
  1072  		}
  1073  		if len(listUpdate.valuesToAppend) != 0 {
  1074  			protoList.Values = agonesv1.MergeRemoveDuplicates(protoList.Values, listUpdate.valuesToAppend)
  1075  		}
  1076  		// Truncates Values to less than or equal to Capacity
  1077  		if len(protoList.Values) > int(protoList.Capacity) {
  1078  			protoList.Values = append([]string{}, protoList.Values[:protoList.Capacity]...)
  1079  		}
  1080  		s.logger.WithField("Get List", list).Debugf("Applied Batched List Updates %v", listUpdate)
  1081  	}
  1082  
  1083  	return &protoList, nil
  1084  }
  1085  
  1086  // UpdateList collapses all update capacity requests for a given List into a single UpdateList request.
  1087  // This function currently only updates the Capacity of a List.
  1088  // Returns error if the List does not exist (name cannot be updated).
  1089  // Returns error if the List update capacity is out of range [0,1000].
  1090  // [Stage:Beta]
  1091  // [FeatureFlag:CountsAndLists]
  1092  func (s *SDKServer) UpdateList(ctx context.Context, in *beta.UpdateListRequest) (*beta.List, error) {
  1093  	if !runtime.FeatureEnabled(runtime.FeatureCountsAndLists) {
  1094  		return nil, errors.Errorf("%s not enabled", runtime.FeatureCountsAndLists)
  1095  	}
  1096  	if in == nil {
  1097  		return nil, errors.Errorf("UpdateListRequest cannot be nil")
  1098  	}
  1099  	if in.List == nil || in.UpdateMask == nil {
  1100  		return nil, errors.Errorf("invalid argument. List: %v and UpdateMask %v cannot be nil", in.List, in.UpdateMask)
  1101  	}
  1102  	if !in.UpdateMask.IsValid(in.List.ProtoReflect().Interface()) {
  1103  		return nil, errors.Errorf("invalid argument. Field Mask Path(s): %v are invalid for List. Use valid field name(s): %v", in.UpdateMask.GetPaths(), in.List.ProtoReflect().Descriptor().Fields())
  1104  	}
  1105  
  1106  	if in.List.Capacity < 0 || in.List.Capacity > apiserver.ListMaxCapacity {
  1107  		return nil, errors.Errorf("out of range. Capacity must be within range [0,1000]. Found Capacity: %d", in.List.Capacity)
  1108  	}
  1109  
  1110  	list, err := s.GetList(ctx, &beta.GetListRequest{Name: in.List.Name})
  1111  	if err != nil {
  1112  
  1113  		return nil, errors.Errorf("not found. %s List not found", list.Name)
  1114  	}
  1115  
  1116  	s.gsUpdateMutex.Lock()
  1117  	defer s.gsUpdateMutex.Unlock()
  1118  
  1119  	// Removes any fields from the request object that are not included in the FieldMask Paths.
  1120  	fmutils.Filter(in.List, in.UpdateMask.Paths)
  1121  
  1122  	// The list will allow the current list to be overwritten
  1123  	batchList := listUpdateRequest{}
  1124  
  1125  	// Only set the capacity if its included in the update mask paths
  1126  	if slices.Contains(in.UpdateMask.Paths, "capacity") {
  1127  		batchList.capacitySet = &in.List.Capacity
  1128  	}
  1129  
  1130  	// Only change the values if its included in the update mask paths
  1131  	if slices.Contains(in.UpdateMask.Paths, "values") {
  1132  		currList := list
  1133  
  1134  		// Find values to remove from the current list
  1135  		valuesToDelete := map[string]bool{}
  1136  		for _, value := range currList.Values {
  1137  			valueFound := false
  1138  			for _, element := range in.List.Values {
  1139  				if value == element {
  1140  					valueFound = true
  1141  				}
  1142  			}
  1143  
  1144  			if !valueFound {
  1145  				valuesToDelete[value] = true
  1146  			}
  1147  		}
  1148  		batchList.valuesToDelete = valuesToDelete
  1149  
  1150  		// Find values that need to be added to the current list from the incomming list
  1151  		valuesToAdd := []string{}
  1152  		for _, value := range in.List.Values {
  1153  			valueFound := false
  1154  			for _, element := range currList.Values {
  1155  				if value == element {
  1156  					valueFound = true
  1157  				}
  1158  			}
  1159  
  1160  			if !valueFound {
  1161  				valuesToAdd = append(valuesToAdd, value)
  1162  			}
  1163  		}
  1164  		batchList.valuesToAppend = valuesToAdd
  1165  	}
  1166  
  1167  	// Queue up the Update for later batch processing by updateLists.
  1168  	s.gsListUpdates[list.Name] = batchList
  1169  	s.workerqueue.Enqueue(cache.ExplicitKey(updateLists))
  1170  	return &beta.List{}, nil
  1171  
  1172  }
  1173  
  1174  // AddListValue collapses all append a value to the end of a List requests into a single UpdateList request.
  1175  // Returns not found if the List does not exist.
  1176  // Returns already exists if the value is already in the List.
  1177  // Returns out of range if the List is already at Capacity.
  1178  // [Stage:Beta]
  1179  // [FeatureFlag:CountsAndLists]
  1180  func (s *SDKServer) AddListValue(ctx context.Context, in *beta.AddListValueRequest) (*beta.List, error) {
  1181  	if !runtime.FeatureEnabled(runtime.FeatureCountsAndLists) {
  1182  		return nil, errors.Errorf("%s not enabled", runtime.FeatureCountsAndLists)
  1183  	}
  1184  	if in == nil {
  1185  		return nil, errors.Errorf("AddListValueRequest cannot be nil")
  1186  	}
  1187  	s.logger.WithField("name", in.Name).Debug("Add List Value")
  1188  
  1189  	list, err := s.GetList(ctx, &beta.GetListRequest{Name: in.Name})
  1190  	if err != nil {
  1191  		return nil, err
  1192  	}
  1193  
  1194  	s.gsUpdateMutex.Lock()
  1195  	defer s.gsUpdateMutex.Unlock()
  1196  
  1197  	// Verify room to add another value
  1198  	if int(list.Capacity) <= len(list.Values) {
  1199  		return nil, errors.Errorf("out of range. No available capacity. Current Capacity: %d, List Size: %d", list.Capacity, len(list.Values))
  1200  	}
  1201  	// Verify value does not already exist in the list
  1202  	for _, val := range list.Values {
  1203  		if in.Value == val {
  1204  			return nil, errors.Errorf("already exists. Value: %s already in List: %s", in.Value, in.Name)
  1205  		}
  1206  	}
  1207  	list.Values = append(list.Values, in.Value)
  1208  	batchList := s.gsListUpdates[in.Name]
  1209  	batchList.valuesToAppend = append(batchList.valuesToAppend, in.Value)
  1210  	s.gsListUpdates[in.Name] = batchList
  1211  	// Queue up the Update for later batch processing by updateLists.
  1212  	s.workerqueue.Enqueue(cache.ExplicitKey(updateLists))
  1213  	return list, nil
  1214  }
  1215  
  1216  // RemoveListValue collapses all remove a value from a List requests into a single UpdateList request.
  1217  // Returns not found if the List does not exist.
  1218  // Returns not found if the value is not in the List.
  1219  // [Stage:Beta]
  1220  // [FeatureFlag:CountsAndLists]
  1221  func (s *SDKServer) RemoveListValue(ctx context.Context, in *beta.RemoveListValueRequest) (*beta.List, error) {
  1222  	if !runtime.FeatureEnabled(runtime.FeatureCountsAndLists) {
  1223  		return nil, errors.Errorf("%s not enabled", runtime.FeatureCountsAndLists)
  1224  	}
  1225  	if in == nil {
  1226  		return nil, errors.Errorf("RemoveListValueRequest cannot be nil")
  1227  	}
  1228  	s.logger.WithField("name", in.Name).WithField("value", in.Value).Debug("Remove List Value")
  1229  
  1230  	list, err := s.GetList(ctx, &beta.GetListRequest{Name: in.Name})
  1231  	if err != nil {
  1232  		return nil, err
  1233  	}
  1234  
  1235  	s.gsUpdateMutex.Lock()
  1236  	defer s.gsUpdateMutex.Unlock()
  1237  
  1238  	// Track this removal for batch persistence to K8s
  1239  	batchList := s.gsListUpdates[in.Name]
  1240  
  1241  	removedFromBatch := false
  1242  	if len(batchList.valuesToAppend) > 0 {
  1243  		newAppend := make([]string, 0, len(batchList.valuesToAppend))
  1244  		for _, v := range batchList.valuesToAppend {
  1245  			if v == in.Value {
  1246  				removedFromBatch = true
  1247  				continue // skip value
  1248  			}
  1249  			newAppend = append(newAppend, v)
  1250  		}
  1251  		batchList.valuesToAppend = newAppend
  1252  	}
  1253  	if !removedFromBatch {
  1254  		found := false
  1255  		newValues := make([]string, 0, len(list.Values))
  1256  		for _, val := range list.Values {
  1257  			if val == in.Value {
  1258  				found = true
  1259  				continue
  1260  			}
  1261  			newValues = append(newValues, val)
  1262  		}
  1263  		if !found {
  1264  			return nil, fmt.Errorf("not found: value %s not in list %s", in.Value, in.Name)
  1265  		}
  1266  		list.Values = newValues
  1267  		// Track deletions
  1268  		if batchList.valuesToDelete == nil {
  1269  			batchList.valuesToDelete = make(map[string]bool)
  1270  		}
  1271  		batchList.valuesToDelete[in.Value] = true
  1272  	}
  1273  	s.gsListUpdates[in.Name] = batchList
  1274  	// Queue up the Update for later batch processing by updateLists.
  1275  	s.workerqueue.Enqueue(cache.ExplicitKey(updateLists))
  1276  	return list, nil
  1277  }
  1278  
  1279  // updateList updates the Lists in the GameServer's Status with the batched update list requests.
  1280  // Includes all SetCapacity, AddValue, and RemoveValue requests in the batched request.
  1281  func (s *SDKServer) updateList(ctx context.Context) error {
  1282  	gs, err := s.gameServer()
  1283  	if err != nil {
  1284  		return err
  1285  	}
  1286  	gsCopy := gs.DeepCopy()
  1287  
  1288  	s.gsUpdateMutex.Lock()
  1289  	defer s.gsUpdateMutex.Unlock()
  1290  
  1291  	s.logger.WithField("batchListUpdates", s.gsListUpdates).Debug("Batch updating List(s)")
  1292  
  1293  	names := []string{}
  1294  
  1295  	for name, listReq := range s.gsListUpdates {
  1296  		list, ok := gsCopy.Status.Lists[name]
  1297  		if !ok {
  1298  			continue
  1299  		}
  1300  		if listReq.capacitySet != nil {
  1301  			list.Capacity = *listReq.capacitySet
  1302  		}
  1303  		if len(listReq.valuesToDelete) != 0 {
  1304  			list.Values = deleteValues(list.Values, listReq.valuesToDelete)
  1305  		}
  1306  		if len(listReq.valuesToAppend) != 0 {
  1307  			list.Values = agonesv1.MergeRemoveDuplicates(list.Values, listReq.valuesToAppend)
  1308  		}
  1309  
  1310  		if int64(len(list.Values)) > list.Capacity {
  1311  			s.logger.Debugf("truncating Values in Update List request to List Capacity %d", list.Capacity)
  1312  			list.Values = append([]string{}, list.Values[:list.Capacity]...)
  1313  		}
  1314  		gsCopy.Status.Lists[name] = list
  1315  		names = append(names, name)
  1316  	}
  1317  
  1318  	gs, err = s.patchGameServer(ctx, gs, gsCopy)
  1319  	if err != nil {
  1320  		return err
  1321  	}
  1322  
  1323  	// Record an event per List update
  1324  	for _, name := range names {
  1325  		s.recorder.Event(gs, corev1.EventTypeNormal, "UpdateList", fmt.Sprintf("List %s updated", name))
  1326  		s.logger.Debugf("List %s updated to List Capacity: %d, Values: %v",
  1327  			name, gs.Status.Lists[name].Capacity, gs.Status.Lists[name].Values)
  1328  	}
  1329  
  1330  	// Cache a copy of the successfully updated gameserver
  1331  	s.gsCopy = gs
  1332  	// Clear the gsListUpdates
  1333  	s.gsListUpdates = map[string]listUpdateRequest{}
  1334  
  1335  	return nil
  1336  }
  1337  
  1338  // Returns a new string list with the string keys in toDeleteValues removed from valuesList.
  1339  func deleteValues(valuesList []string, toDeleteValues map[string]bool) []string {
  1340  	newValuesList := []string{}
  1341  	for _, value := range valuesList {
  1342  		if _, ok := toDeleteValues[value]; ok {
  1343  			continue
  1344  		}
  1345  		newValuesList = append(newValuesList, value)
  1346  	}
  1347  	return newValuesList
  1348  }
  1349  
  1350  // sendGameServerUpdate sends a watch game server event
  1351  func (s *SDKServer) sendGameServerUpdate(gs *agonesv1.GameServer) {
  1352  	s.logger.Debug("Sending GameServer Event to connectedStreams")
  1353  
  1354  	s.streamMutex.Lock()
  1355  	defer s.streamMutex.Unlock()
  1356  
  1357  	// Filter the slice of streams sharing the same backing array and capacity as the original
  1358  	// so that storage is reused and no memory allocations are made. This modifies the original
  1359  	// slice.
  1360  	//
  1361  	// See https://go.dev/wiki/SliceTricks#filtering-without-allocating
  1362  	remainingStreams := s.connectedStreams[:0]
  1363  	for _, stream := range s.connectedStreams {
  1364  		select {
  1365  		case <-stream.Context().Done():
  1366  			s.logger.Debug("Dropping stream")
  1367  
  1368  			err := stream.Context().Err()
  1369  			switch {
  1370  			case err != nil:
  1371  				s.logger.WithError(errors.WithStack(err)).Error("stream closed with error")
  1372  			default:
  1373  				s.logger.Debug("Stream closed")
  1374  			}
  1375  		default:
  1376  			s.logger.Debug("Keeping stream")
  1377  			remainingStreams = append(remainingStreams, stream)
  1378  
  1379  			if err := stream.Send(convert(gs)); err != nil {
  1380  				s.logger.WithError(errors.WithStack(err)).
  1381  					Error("error sending game server update event")
  1382  			}
  1383  		}
  1384  	}
  1385  	s.connectedStreams = remainingStreams
  1386  
  1387  	if gs.Status.State == agonesv1.GameServerStateShutdown {
  1388  		// Wrap this in a go func(), just in case pushing to this channel deadlocks since there is only one instance of
  1389  		// a receiver. In theory, This could leak goroutines a bit, but since we're shuttling down everything anyway,
  1390  		// it shouldn't matter.
  1391  		go func() {
  1392  			s.gsStateChannel <- agonesv1.GameServerStateShutdown
  1393  		}()
  1394  	}
  1395  }
  1396  
  1397  // checkHealthUpdateState checks the health as part of the /gshealthz hook, and if not
  1398  // healthy will push the Unhealthy state into the queue so it can be updated.
  1399  func (s *SDKServer) checkHealthUpdateState() {
  1400  	s.checkHealth()
  1401  	if !s.healthy() {
  1402  		s.logger.WithField("gameServerName", s.gameServerName).Warn("GameServer has failed health check")
  1403  		s.enqueueState(agonesv1.GameServerStateUnhealthy)
  1404  	}
  1405  }
  1406  
  1407  // touchHealthLastUpdated sets the healthLastUpdated
  1408  // value to now in UTC
  1409  func (s *SDKServer) touchHealthLastUpdated() {
  1410  	s.healthMutex.Lock()
  1411  	defer s.healthMutex.Unlock()
  1412  	s.healthLastUpdated = s.clock.Now().UTC()
  1413  	s.healthFailureCount = 0
  1414  }
  1415  
  1416  func (s *SDKServer) ensureHealthChecksRunning() {
  1417  	if s.health.Disabled {
  1418  		return
  1419  	}
  1420  	s.healthChecksRunning.Do(func() {
  1421  		// start health checking running
  1422  		s.logger.Debug("Starting GameServer health checking")
  1423  		go wait.Until(s.checkHealthUpdateState, s.healthTimeout, s.ctx.Done())
  1424  	})
  1425  }
  1426  
  1427  // checkHealth checks the healthLastUpdated value
  1428  // and if it is outside the timeout value, logger and
  1429  // count a failure
  1430  func (s *SDKServer) checkHealth() {
  1431  	s.healthMutex.Lock()
  1432  	defer s.healthMutex.Unlock()
  1433  
  1434  	timeout := s.healthLastUpdated.Add(s.healthTimeout)
  1435  	if timeout.Before(s.clock.Now().UTC()) {
  1436  		s.healthFailureCount++
  1437  		s.logger.WithField("failureCount", s.healthFailureCount).Warn("GameServer Health Check failed")
  1438  	}
  1439  }
  1440  
  1441  // healthy returns if the GameServer is
  1442  // currently healthy or not based on the configured
  1443  // failure count vs failure threshold
  1444  func (s *SDKServer) healthy() bool {
  1445  	if s.health.Disabled {
  1446  		return true
  1447  	}
  1448  
  1449  	s.healthMutex.RLock()
  1450  	defer s.healthMutex.RUnlock()
  1451  	return s.healthFailureCount < s.health.FailureThreshold
  1452  }
  1453  
  1454  // updatePlayerCapacity updates the Player Capacity field in the GameServer's Status.
  1455  func (s *SDKServer) updatePlayerCapacity(ctx context.Context) error {
  1456  	if !runtime.FeatureEnabled(runtime.FeaturePlayerTracking) {
  1457  		return errors.Errorf("%s not enabled", runtime.FeaturePlayerTracking)
  1458  	}
  1459  	s.logger.WithField("capacity", s.gsPlayerCapacity).Debug("updating player capacity")
  1460  	gs, err := s.gameServer()
  1461  	if err != nil {
  1462  		return err
  1463  	}
  1464  
  1465  	gsCopy := gs.DeepCopy()
  1466  
  1467  	s.gsUpdateMutex.RLock()
  1468  	gsCopy.Status.Players.Capacity = s.gsPlayerCapacity
  1469  	s.gsUpdateMutex.RUnlock()
  1470  
  1471  	gs, err = s.patchGameServer(ctx, gs, gsCopy)
  1472  	if err == nil {
  1473  		s.recorder.Event(gs, corev1.EventTypeNormal, "PlayerCapacity", fmt.Sprintf("Set to %d", gs.Status.Players.Capacity))
  1474  	}
  1475  
  1476  	return err
  1477  }
  1478  
  1479  // updateConnectedPlayers updates the Player IDs and Count fields in the GameServer's Status.
  1480  func (s *SDKServer) updateConnectedPlayers(ctx context.Context) error {
  1481  	if !runtime.FeatureEnabled(runtime.FeaturePlayerTracking) {
  1482  		return errors.Errorf("%s not enabled", runtime.FeaturePlayerTracking)
  1483  	}
  1484  	gs, err := s.gameServer()
  1485  	if err != nil {
  1486  		return err
  1487  	}
  1488  
  1489  	gsCopy := gs.DeepCopy()
  1490  	same := false
  1491  	s.gsUpdateMutex.RLock()
  1492  	s.logger.WithField("playerIDs", s.gsConnectedPlayers).Debug("updating connected players")
  1493  	same = apiequality.Semantic.DeepEqual(gsCopy.Status.Players.IDs, s.gsConnectedPlayers)
  1494  	gsCopy.Status.Players.IDs = s.gsConnectedPlayers
  1495  	gsCopy.Status.Players.Count = int64(len(s.gsConnectedPlayers))
  1496  	s.gsUpdateMutex.RUnlock()
  1497  	// if there is no change, then don't update
  1498  	// since it's possible this could fire quite a lot, let's reduce the
  1499  	// amount of requests as much as possible.
  1500  	if same {
  1501  		return nil
  1502  	}
  1503  
  1504  	gs, err = s.patchGameServer(ctx, gs, gsCopy)
  1505  	if err == nil {
  1506  		s.recorder.Event(gs, corev1.EventTypeNormal, "PlayerCount", fmt.Sprintf("Set to %d", gs.Status.Players.Count))
  1507  	}
  1508  
  1509  	return err
  1510  }
  1511  
  1512  // NewSDKServerContext returns a Context that cancels when SIGTERM or os.Interrupt
  1513  // is received and the GameServer's Status is shutdown
  1514  func (s *SDKServer) NewSDKServerContext(ctx context.Context) context.Context {
  1515  	sdkCtx, cancel := context.WithCancel(context.Background())
  1516  	go func() {
  1517  		<-ctx.Done()
  1518  
  1519  		keepWaiting := true
  1520  		s.gsUpdateMutex.RLock()
  1521  		if len(s.gsState) > 0 {
  1522  			s.logger.WithField("state", s.gsState).Info("SDK server shutdown requested, waiting for game server shutdown")
  1523  		} else {
  1524  			s.logger.Info("SDK server state never updated by game server, shutting down sdk server without waiting")
  1525  			keepWaiting = false
  1526  		}
  1527  		s.gsUpdateMutex.RUnlock()
  1528  
  1529  		for keepWaiting {
  1530  			gsState := <-s.gsStateChannel
  1531  			if gsState == agonesv1.GameServerStateShutdown {
  1532  				keepWaiting = false
  1533  			}
  1534  		}
  1535  
  1536  		cancel()
  1537  	}()
  1538  	return sdkCtx
  1539  }
  1540  
  1541  func (s *SDKServer) gsListUpdatesLen() int {
  1542  	s.gsUpdateMutex.RLock()
  1543  	defer s.gsUpdateMutex.RUnlock()
  1544  	return len(s.gsListUpdates)
  1545  }