agones.dev/agones@v1.54.0/pkg/gameservers/pernodecounter.go (about)

     1  // Copyright 2018 Google LLC All Rights Reserved.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package gameservers
    16  
    17  import (
    18  	"context"
    19  	"sync"
    20  
    21  	agonesv1 "agones.dev/agones/pkg/apis/agones/v1"
    22  	"agones.dev/agones/pkg/client/informers/externalversions"
    23  	listerv1 "agones.dev/agones/pkg/client/listers/agones/v1"
    24  	"agones.dev/agones/pkg/util/runtime"
    25  	"github.com/pkg/errors"
    26  	"github.com/sirupsen/logrus"
    27  	corev1 "k8s.io/api/core/v1"
    28  	"k8s.io/apimachinery/pkg/labels"
    29  	"k8s.io/apimachinery/pkg/types"
    30  	"k8s.io/client-go/informers"
    31  	"k8s.io/client-go/tools/cache"
    32  )
    33  
    34  // PerNodeCounter counts how many Allocated and
    35  // Ready GameServers currently exist on each node.
    36  // This is useful for scheduling allocations, fleet management
    37  // mostly under a Packed strategy
    38  //
    39  //nolint:govet // ignore fieldalignment, singleton
    40  type PerNodeCounter struct {
    41  	logger           *logrus.Entry
    42  	gameServerSynced cache.InformerSynced
    43  	gameServerLister listerv1.GameServerLister
    44  	countMutex       sync.RWMutex
    45  	counts           map[string]*NodeCount
    46  	processed        map[types.UID]processed
    47  }
    48  
    49  // processed tracks the last processed state of a GameServer to prevent duplicate event processing
    50  type processed struct {
    51  	resourceVersion string
    52  	state           agonesv1.GameServerState
    53  	nodeName        string
    54  }
    55  
    56  // NodeCount is just a convenience data structure for
    57  // keeping relevant GameServer counts about Nodes
    58  type NodeCount struct {
    59  	// Ready is ready count
    60  	Ready int64
    61  	// Allocated is allocated out
    62  	Allocated int64
    63  }
    64  
    65  // NewPerNodeCounter returns a new PerNodeCounter
    66  func NewPerNodeCounter(
    67  	kubeInformerFactory informers.SharedInformerFactory,
    68  	agonesInformerFactory externalversions.SharedInformerFactory) *PerNodeCounter {
    69  
    70  	gameServers := agonesInformerFactory.Agones().V1().GameServers()
    71  	gsInformer := gameServers.Informer()
    72  
    73  	pnc := &PerNodeCounter{
    74  		gameServerSynced: gsInformer.HasSynced,
    75  		gameServerLister: gameServers.Lister(),
    76  		countMutex:       sync.RWMutex{},
    77  		counts:           map[string]*NodeCount{},
    78  		processed:        map[types.UID]processed{},
    79  	}
    80  
    81  	pnc.logger = runtime.NewLoggerWithType(pnc)
    82  
    83  	_, _ = gsInformer.AddEventHandler(cache.ResourceEventHandlerFuncs{
    84  		AddFunc: func(obj interface{}) {
    85  			gs := obj.(*agonesv1.GameServer)
    86  
    87  			pnc.countMutex.Lock()
    88  			defer pnc.countMutex.Unlock()
    89  
    90  			// Check if we've already processed this GameServer
    91  			if processed, exists := pnc.processed[gs.ObjectMeta.UID]; exists {
    92  				// Skip if same ResourceVersion (when set) and same state
    93  				if processed.resourceVersion == gs.ObjectMeta.ResourceVersion &&
    94  					processed.state == gs.Status.State {
    95  					// Already processed this exact version, skip
    96  					return
    97  				}
    98  
    99  				// If state changed, handle it as an update
   100  				if processed.state != gs.Status.State {
   101  					ready, allocated := pnc.calculateStateTransition(processed.state, gs.Status.State)
   102  					updateProcessed(pnc.processed, gs)
   103  					pnc.inc(gs, ready, allocated)
   104  				}
   105  				return
   106  			}
   107  
   108  			// Track this state
   109  			updateProcessed(pnc.processed, gs)
   110  
   111  			switch gs.Status.State {
   112  			case agonesv1.GameServerStateReady:
   113  				pnc.inc(gs, 1, 0)
   114  			case agonesv1.GameServerStateAllocated:
   115  				pnc.inc(gs, 0, 1)
   116  			}
   117  		},
   118  		UpdateFunc: func(oldObj, newObj interface{}) {
   119  			oldGS := oldObj.(*agonesv1.GameServer)
   120  			newGS := newObj.(*agonesv1.GameServer)
   121  
   122  			pnc.countMutex.Lock()
   123  			defer pnc.countMutex.Unlock()
   124  
   125  			// Check if we've already processed this exact state
   126  			if pnc.isAlreadyProcessed(newGS.ObjectMeta.UID, newGS.ObjectMeta.ResourceVersion) {
   127  				return
   128  			}
   129  
   130  			// Use the tracked previous state instead of oldGS to handle duplicates
   131  			if processed, exists := pnc.processed[newGS.ObjectMeta.UID]; exists {
   132  				oldGS = &agonesv1.GameServer{
   133  					Status: agonesv1.GameServerStatus{
   134  						State:    processed.state,
   135  						NodeName: processed.nodeName,
   136  					},
   137  				}
   138  			}
   139  
   140  			ready, allocated := pnc.calculateStateTransition(oldGS.Status.State, newGS.Status.State)
   141  			updateProcessed(pnc.processed, newGS)
   142  			pnc.inc(newGS, ready, allocated)
   143  		},
   144  		DeleteFunc: func(obj interface{}) {
   145  			gs, ok := obj.(*agonesv1.GameServer)
   146  			if !ok {
   147  				return
   148  			}
   149  
   150  			pnc.countMutex.Lock()
   151  			defer pnc.countMutex.Unlock()
   152  
   153  			// Check if we've tracked this GameServer
   154  			processed, exists := pnc.processed[gs.ObjectMeta.UID]
   155  			if exists {
   156  				// Use the tracked state for accurate counting, as the current state may not be
   157  				// allocated or ready at this point (could very well be Shutdown).
   158  				gs = &agonesv1.GameServer{
   159  					Status: agonesv1.GameServerStatus{
   160  						State:    processed.state,
   161  						NodeName: processed.nodeName,
   162  					},
   163  				}
   164  			}
   165  
   166  			switch gs.Status.State {
   167  			case agonesv1.GameServerStateReady:
   168  				pnc.inc(gs, -1, 0)
   169  			case agonesv1.GameServerStateAllocated:
   170  				pnc.inc(gs, 0, -1)
   171  			}
   172  
   173  			// Remove from tracking since the object is deleted
   174  			delete(pnc.processed, gs.ObjectMeta.UID)
   175  		},
   176  	})
   177  
   178  	// remove the record when the node is deleted
   179  	_, _ = kubeInformerFactory.Core().V1().Nodes().Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
   180  		DeleteFunc: func(obj interface{}) {
   181  			node, ok := obj.(*corev1.Node)
   182  			if !ok {
   183  				return
   184  			}
   185  
   186  			pnc.countMutex.Lock()
   187  			defer pnc.countMutex.Unlock()
   188  
   189  			delete(pnc.counts, node.ObjectMeta.Name)
   190  		},
   191  	})
   192  
   193  	return pnc
   194  }
   195  
   196  // Run sets up the current state GameServer counts across nodes
   197  // non blocking Run function.
   198  func (pnc *PerNodeCounter) Run(ctx context.Context, _ int) error {
   199  	pnc.countMutex.Lock()
   200  	defer pnc.countMutex.Unlock()
   201  
   202  	pnc.logger.Debug("Running")
   203  
   204  	if !cache.WaitForCacheSync(ctx.Done(), pnc.gameServerSynced) {
   205  		return errors.New("failed to wait for caches to sync")
   206  	}
   207  
   208  	gsList, err := pnc.gameServerLister.List(labels.Everything())
   209  	if err != nil {
   210  		return errors.Wrap(err, "error attempting to list all GameServers")
   211  	}
   212  
   213  	counts := map[string]*NodeCount{}
   214  	processedGS := map[types.UID]processed{}
   215  
   216  	for _, gs := range gsList {
   217  		_, ok := counts[gs.Status.NodeName]
   218  		if !ok {
   219  			counts[gs.Status.NodeName] = &NodeCount{}
   220  		}
   221  
   222  		switch gs.Status.State {
   223  		case agonesv1.GameServerStateReady:
   224  			counts[gs.Status.NodeName].Ready++
   225  		case agonesv1.GameServerStateAllocated:
   226  			counts[gs.Status.NodeName].Allocated++
   227  		}
   228  
   229  		// Track this GameServer to prevent duplicate processing
   230  		updateProcessed(processedGS, gs)
   231  	}
   232  
   233  	pnc.counts = counts
   234  	pnc.processed = processedGS
   235  	return nil
   236  }
   237  
   238  // Counts returns the NodeCount map in a thread safe way
   239  func (pnc *PerNodeCounter) Counts() map[string]NodeCount {
   240  	pnc.countMutex.RLock()
   241  	defer pnc.countMutex.RUnlock()
   242  
   243  	result := make(map[string]NodeCount, len(pnc.counts))
   244  
   245  	// return a copy, so it's thread safe
   246  	for k, v := range pnc.counts {
   247  		result[k] = *v
   248  	}
   249  
   250  	return result
   251  }
   252  
   253  // incLocked increments the counts for a GameServer without acquiring the lock.
   254  // The caller must hold the countMutex lock.
   255  func (pnc *PerNodeCounter) inc(gs *agonesv1.GameServer, ready, allocated int64) {
   256  	_, ok := pnc.counts[gs.Status.NodeName]
   257  	if !ok {
   258  		pnc.counts[gs.Status.NodeName] = &NodeCount{}
   259  	}
   260  
   261  	pnc.counts[gs.Status.NodeName].Allocated += allocated
   262  	pnc.counts[gs.Status.NodeName].Ready += ready
   263  
   264  	// just in case
   265  	if pnc.counts[gs.Status.NodeName].Allocated < 0 {
   266  		pnc.logger.WithField("node", gs.Status.NodeName).Warn("Allocated count went negative, resetting to 0")
   267  		pnc.counts[gs.Status.NodeName].Allocated = 0
   268  	}
   269  
   270  	if pnc.counts[gs.Status.NodeName].Ready < 0 {
   271  		pnc.counts[gs.Status.NodeName].Ready = 0
   272  	}
   273  }
   274  
   275  // calculateStateTransition calculates the ready and allocated deltas when transitioning
   276  // from oldState to newState.
   277  func (pnc *PerNodeCounter) calculateStateTransition(oldState, newState agonesv1.GameServerState) (ready, allocated int64) {
   278  	if oldState == agonesv1.GameServerStateReady && newState != agonesv1.GameServerStateReady {
   279  		ready = -1
   280  	} else if newState == agonesv1.GameServerStateReady && oldState != agonesv1.GameServerStateReady {
   281  		ready = 1
   282  	}
   283  
   284  	if oldState == agonesv1.GameServerStateAllocated && newState != agonesv1.GameServerStateAllocated {
   285  		allocated = -1
   286  	} else if newState == agonesv1.GameServerStateAllocated && oldState != agonesv1.GameServerStateAllocated {
   287  		allocated = 1
   288  	}
   289  
   290  	return ready, allocated
   291  }
   292  
   293  // isAlreadyProcessed checks if a GameServer with the given UID and ResourceVersion
   294  // has already been processed. The caller must hold the countMutex lock.
   295  func (pnc *PerNodeCounter) isAlreadyProcessed(uid types.UID, resourceVersion string) bool {
   296  	if processed, exists := pnc.processed[uid]; exists {
   297  		if processed.resourceVersion == resourceVersion {
   298  			return true
   299  		}
   300  	}
   301  	return false
   302  }
   303  
   304  // updateProcessed updates the tracking state for a GameServer in the specified map.
   305  // The caller must hold the countMutex lock when updating pnc.processed.
   306  func updateProcessed(processedMap map[types.UID]processed, gs *agonesv1.GameServer) {
   307  	processedMap[gs.ObjectMeta.UID] = processed{
   308  		resourceVersion: gs.ObjectMeta.ResourceVersion,
   309  		state:           gs.Status.State,
   310  		nodeName:        gs.Status.NodeName,
   311  	}
   312  }