agones.dev/agones@v1.54.0/pkg/gameservers/pernodecounter.go (about) 1 // Copyright 2018 Google LLC All Rights Reserved. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package gameservers 16 17 import ( 18 "context" 19 "sync" 20 21 agonesv1 "agones.dev/agones/pkg/apis/agones/v1" 22 "agones.dev/agones/pkg/client/informers/externalversions" 23 listerv1 "agones.dev/agones/pkg/client/listers/agones/v1" 24 "agones.dev/agones/pkg/util/runtime" 25 "github.com/pkg/errors" 26 "github.com/sirupsen/logrus" 27 corev1 "k8s.io/api/core/v1" 28 "k8s.io/apimachinery/pkg/labels" 29 "k8s.io/apimachinery/pkg/types" 30 "k8s.io/client-go/informers" 31 "k8s.io/client-go/tools/cache" 32 ) 33 34 // PerNodeCounter counts how many Allocated and 35 // Ready GameServers currently exist on each node. 36 // This is useful for scheduling allocations, fleet management 37 // mostly under a Packed strategy 38 // 39 //nolint:govet // ignore fieldalignment, singleton 40 type PerNodeCounter struct { 41 logger *logrus.Entry 42 gameServerSynced cache.InformerSynced 43 gameServerLister listerv1.GameServerLister 44 countMutex sync.RWMutex 45 counts map[string]*NodeCount 46 processed map[types.UID]processed 47 } 48 49 // processed tracks the last processed state of a GameServer to prevent duplicate event processing 50 type processed struct { 51 resourceVersion string 52 state agonesv1.GameServerState 53 nodeName string 54 } 55 56 // NodeCount is just a convenience data structure for 57 // keeping relevant GameServer counts about Nodes 58 type NodeCount struct { 59 // Ready is ready count 60 Ready int64 61 // Allocated is allocated out 62 Allocated int64 63 } 64 65 // NewPerNodeCounter returns a new PerNodeCounter 66 func NewPerNodeCounter( 67 kubeInformerFactory informers.SharedInformerFactory, 68 agonesInformerFactory externalversions.SharedInformerFactory) *PerNodeCounter { 69 70 gameServers := agonesInformerFactory.Agones().V1().GameServers() 71 gsInformer := gameServers.Informer() 72 73 pnc := &PerNodeCounter{ 74 gameServerSynced: gsInformer.HasSynced, 75 gameServerLister: gameServers.Lister(), 76 countMutex: sync.RWMutex{}, 77 counts: map[string]*NodeCount{}, 78 processed: map[types.UID]processed{}, 79 } 80 81 pnc.logger = runtime.NewLoggerWithType(pnc) 82 83 _, _ = gsInformer.AddEventHandler(cache.ResourceEventHandlerFuncs{ 84 AddFunc: func(obj interface{}) { 85 gs := obj.(*agonesv1.GameServer) 86 87 pnc.countMutex.Lock() 88 defer pnc.countMutex.Unlock() 89 90 // Check if we've already processed this GameServer 91 if processed, exists := pnc.processed[gs.ObjectMeta.UID]; exists { 92 // Skip if same ResourceVersion (when set) and same state 93 if processed.resourceVersion == gs.ObjectMeta.ResourceVersion && 94 processed.state == gs.Status.State { 95 // Already processed this exact version, skip 96 return 97 } 98 99 // If state changed, handle it as an update 100 if processed.state != gs.Status.State { 101 ready, allocated := pnc.calculateStateTransition(processed.state, gs.Status.State) 102 updateProcessed(pnc.processed, gs) 103 pnc.inc(gs, ready, allocated) 104 } 105 return 106 } 107 108 // Track this state 109 updateProcessed(pnc.processed, gs) 110 111 switch gs.Status.State { 112 case agonesv1.GameServerStateReady: 113 pnc.inc(gs, 1, 0) 114 case agonesv1.GameServerStateAllocated: 115 pnc.inc(gs, 0, 1) 116 } 117 }, 118 UpdateFunc: func(oldObj, newObj interface{}) { 119 oldGS := oldObj.(*agonesv1.GameServer) 120 newGS := newObj.(*agonesv1.GameServer) 121 122 pnc.countMutex.Lock() 123 defer pnc.countMutex.Unlock() 124 125 // Check if we've already processed this exact state 126 if pnc.isAlreadyProcessed(newGS.ObjectMeta.UID, newGS.ObjectMeta.ResourceVersion) { 127 return 128 } 129 130 // Use the tracked previous state instead of oldGS to handle duplicates 131 if processed, exists := pnc.processed[newGS.ObjectMeta.UID]; exists { 132 oldGS = &agonesv1.GameServer{ 133 Status: agonesv1.GameServerStatus{ 134 State: processed.state, 135 NodeName: processed.nodeName, 136 }, 137 } 138 } 139 140 ready, allocated := pnc.calculateStateTransition(oldGS.Status.State, newGS.Status.State) 141 updateProcessed(pnc.processed, newGS) 142 pnc.inc(newGS, ready, allocated) 143 }, 144 DeleteFunc: func(obj interface{}) { 145 gs, ok := obj.(*agonesv1.GameServer) 146 if !ok { 147 return 148 } 149 150 pnc.countMutex.Lock() 151 defer pnc.countMutex.Unlock() 152 153 // Check if we've tracked this GameServer 154 processed, exists := pnc.processed[gs.ObjectMeta.UID] 155 if exists { 156 // Use the tracked state for accurate counting, as the current state may not be 157 // allocated or ready at this point (could very well be Shutdown). 158 gs = &agonesv1.GameServer{ 159 Status: agonesv1.GameServerStatus{ 160 State: processed.state, 161 NodeName: processed.nodeName, 162 }, 163 } 164 } 165 166 switch gs.Status.State { 167 case agonesv1.GameServerStateReady: 168 pnc.inc(gs, -1, 0) 169 case agonesv1.GameServerStateAllocated: 170 pnc.inc(gs, 0, -1) 171 } 172 173 // Remove from tracking since the object is deleted 174 delete(pnc.processed, gs.ObjectMeta.UID) 175 }, 176 }) 177 178 // remove the record when the node is deleted 179 _, _ = kubeInformerFactory.Core().V1().Nodes().Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{ 180 DeleteFunc: func(obj interface{}) { 181 node, ok := obj.(*corev1.Node) 182 if !ok { 183 return 184 } 185 186 pnc.countMutex.Lock() 187 defer pnc.countMutex.Unlock() 188 189 delete(pnc.counts, node.ObjectMeta.Name) 190 }, 191 }) 192 193 return pnc 194 } 195 196 // Run sets up the current state GameServer counts across nodes 197 // non blocking Run function. 198 func (pnc *PerNodeCounter) Run(ctx context.Context, _ int) error { 199 pnc.countMutex.Lock() 200 defer pnc.countMutex.Unlock() 201 202 pnc.logger.Debug("Running") 203 204 if !cache.WaitForCacheSync(ctx.Done(), pnc.gameServerSynced) { 205 return errors.New("failed to wait for caches to sync") 206 } 207 208 gsList, err := pnc.gameServerLister.List(labels.Everything()) 209 if err != nil { 210 return errors.Wrap(err, "error attempting to list all GameServers") 211 } 212 213 counts := map[string]*NodeCount{} 214 processedGS := map[types.UID]processed{} 215 216 for _, gs := range gsList { 217 _, ok := counts[gs.Status.NodeName] 218 if !ok { 219 counts[gs.Status.NodeName] = &NodeCount{} 220 } 221 222 switch gs.Status.State { 223 case agonesv1.GameServerStateReady: 224 counts[gs.Status.NodeName].Ready++ 225 case agonesv1.GameServerStateAllocated: 226 counts[gs.Status.NodeName].Allocated++ 227 } 228 229 // Track this GameServer to prevent duplicate processing 230 updateProcessed(processedGS, gs) 231 } 232 233 pnc.counts = counts 234 pnc.processed = processedGS 235 return nil 236 } 237 238 // Counts returns the NodeCount map in a thread safe way 239 func (pnc *PerNodeCounter) Counts() map[string]NodeCount { 240 pnc.countMutex.RLock() 241 defer pnc.countMutex.RUnlock() 242 243 result := make(map[string]NodeCount, len(pnc.counts)) 244 245 // return a copy, so it's thread safe 246 for k, v := range pnc.counts { 247 result[k] = *v 248 } 249 250 return result 251 } 252 253 // incLocked increments the counts for a GameServer without acquiring the lock. 254 // The caller must hold the countMutex lock. 255 func (pnc *PerNodeCounter) inc(gs *agonesv1.GameServer, ready, allocated int64) { 256 _, ok := pnc.counts[gs.Status.NodeName] 257 if !ok { 258 pnc.counts[gs.Status.NodeName] = &NodeCount{} 259 } 260 261 pnc.counts[gs.Status.NodeName].Allocated += allocated 262 pnc.counts[gs.Status.NodeName].Ready += ready 263 264 // just in case 265 if pnc.counts[gs.Status.NodeName].Allocated < 0 { 266 pnc.logger.WithField("node", gs.Status.NodeName).Warn("Allocated count went negative, resetting to 0") 267 pnc.counts[gs.Status.NodeName].Allocated = 0 268 } 269 270 if pnc.counts[gs.Status.NodeName].Ready < 0 { 271 pnc.counts[gs.Status.NodeName].Ready = 0 272 } 273 } 274 275 // calculateStateTransition calculates the ready and allocated deltas when transitioning 276 // from oldState to newState. 277 func (pnc *PerNodeCounter) calculateStateTransition(oldState, newState agonesv1.GameServerState) (ready, allocated int64) { 278 if oldState == agonesv1.GameServerStateReady && newState != agonesv1.GameServerStateReady { 279 ready = -1 280 } else if newState == agonesv1.GameServerStateReady && oldState != agonesv1.GameServerStateReady { 281 ready = 1 282 } 283 284 if oldState == agonesv1.GameServerStateAllocated && newState != agonesv1.GameServerStateAllocated { 285 allocated = -1 286 } else if newState == agonesv1.GameServerStateAllocated && oldState != agonesv1.GameServerStateAllocated { 287 allocated = 1 288 } 289 290 return ready, allocated 291 } 292 293 // isAlreadyProcessed checks if a GameServer with the given UID and ResourceVersion 294 // has already been processed. The caller must hold the countMutex lock. 295 func (pnc *PerNodeCounter) isAlreadyProcessed(uid types.UID, resourceVersion string) bool { 296 if processed, exists := pnc.processed[uid]; exists { 297 if processed.resourceVersion == resourceVersion { 298 return true 299 } 300 } 301 return false 302 } 303 304 // updateProcessed updates the tracking state for a GameServer in the specified map. 305 // The caller must hold the countMutex lock when updating pnc.processed. 306 func updateProcessed(processedMap map[types.UID]processed, gs *agonesv1.GameServer) { 307 processedMap[gs.ObjectMeta.UID] = processed{ 308 resourceVersion: gs.ObjectMeta.ResourceVersion, 309 state: gs.Status.State, 310 nodeName: gs.Status.NodeName, 311 } 312 }