agones.dev/agones@v1.54.0/pkg/gameserverallocations/allocator.go (about) 1 // Copyright 2019 Google LLC All Rights Reserved. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package gameserverallocations 16 17 import ( 18 "context" 19 "crypto/tls" 20 "crypto/x509" 21 goErrors "errors" 22 "fmt" 23 "strings" 24 "time" 25 26 "agones.dev/agones/pkg/allocation/converters" 27 pb "agones.dev/agones/pkg/allocation/go" 28 "agones.dev/agones/pkg/apis" 29 agonesv1 "agones.dev/agones/pkg/apis/agones/v1" 30 allocationv1 "agones.dev/agones/pkg/apis/allocation/v1" 31 multiclusterv1 "agones.dev/agones/pkg/apis/multicluster/v1" 32 getterv1 "agones.dev/agones/pkg/client/clientset/versioned/typed/agones/v1" 33 multiclusterinformerv1 "agones.dev/agones/pkg/client/informers/externalversions/multicluster/v1" 34 multiclusterlisterv1 "agones.dev/agones/pkg/client/listers/multicluster/v1" 35 "agones.dev/agones/pkg/util/apiserver" 36 "agones.dev/agones/pkg/util/logfields" 37 "agones.dev/agones/pkg/util/runtime" 38 "github.com/pkg/errors" 39 "github.com/sirupsen/logrus" 40 "go.opencensus.io/tag" 41 "google.golang.org/grpc" 42 "google.golang.org/grpc/codes" 43 "google.golang.org/grpc/credentials" 44 "google.golang.org/grpc/status" 45 corev1 "k8s.io/api/core/v1" 46 k8serrors "k8s.io/apimachinery/pkg/api/errors" 47 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 48 "k8s.io/apimachinery/pkg/labels" 49 k8sruntime "k8s.io/apimachinery/pkg/runtime" 50 "k8s.io/apimachinery/pkg/runtime/schema" 51 runtimeschema "k8s.io/apimachinery/pkg/runtime/schema" 52 "k8s.io/apimachinery/pkg/util/wait" 53 informercorev1 "k8s.io/client-go/informers/core/v1" 54 "k8s.io/client-go/kubernetes" 55 "k8s.io/client-go/kubernetes/scheme" 56 typedcorev1 "k8s.io/client-go/kubernetes/typed/core/v1" 57 corev1lister "k8s.io/client-go/listers/core/v1" 58 "k8s.io/client-go/tools/cache" 59 "k8s.io/client-go/tools/record" 60 ) 61 62 var ( 63 // ErrNoGameServer is returned when there are no Allocatable GameServers 64 // available 65 ErrNoGameServer = errors.New("Could not find an Allocatable GameServer") 66 // ErrConflictInGameServerSelection is returned when the candidate gameserver already allocated 67 ErrConflictInGameServerSelection = errors.New("The Gameserver was already allocated") 68 // ErrTotalTimeoutExceeded is used to signal that total retry timeout has been exceeded and no additional retries should be made 69 ErrTotalTimeoutExceeded = status.Errorf(codes.DeadlineExceeded, "remote allocation total timeout exceeded") 70 // ErrGameServerUpdateConflict is returned when the game server selected for applying the allocation cannot be updated 71 ErrGameServerUpdateConflict = errors.New("could not update the selected GameServer") 72 ) 73 74 const ( 75 // LastAllocatedAnnotationKey is a GameServer annotation containing an RFC 3339 formatted 76 // timestamp of the most recent allocation. 77 LastAllocatedAnnotationKey = "agones.dev/last-allocated" 78 79 secretClientCertName = "tls.crt" 80 secretClientKeyName = "tls.key" 81 secretCACertName = "ca.crt" 82 allocatorPort = "443" 83 maxBatchQueue = 100 84 maxBatchBeforeRefresh = 100 85 localAllocationSource = "local" 86 ) 87 88 var allocationRetry = wait.Backoff{ 89 Steps: 5, 90 Duration: 10 * time.Millisecond, 91 Factor: 1.0, 92 Jitter: 0.1, 93 } 94 95 var remoteAllocationRetry = wait.Backoff{ 96 Steps: 7, 97 Duration: 100 * time.Millisecond, 98 Factor: 2.0, 99 } 100 101 // Allocator handles game server allocation 102 type Allocator struct { 103 baseLogger *logrus.Entry 104 allocationPolicyLister multiclusterlisterv1.GameServerAllocationPolicyLister 105 allocationPolicySynced cache.InformerSynced 106 secretLister corev1lister.SecretLister 107 secretSynced cache.InformerSynced 108 gameServerGetter getterv1.GameServersGetter 109 recorder record.EventRecorder 110 pendingRequests chan request 111 allocationCache *AllocationCache 112 remoteAllocationCallback func(context.Context, string, grpc.DialOption, *pb.AllocationRequest) (*pb.AllocationResponse, error) 113 remoteAllocationTimeout time.Duration 114 totalRemoteAllocationTimeout time.Duration 115 batchWaitTime time.Duration 116 } 117 118 // request is an async request for allocation 119 type request struct { 120 gsa *allocationv1.GameServerAllocation 121 response chan response 122 } 123 124 // response is an async response for a matching request 125 type response struct { 126 request request 127 gs *agonesv1.GameServer 128 err error 129 } 130 131 // NewAllocator creates an instance of Allocator 132 func NewAllocator(policyInformer multiclusterinformerv1.GameServerAllocationPolicyInformer, secretInformer informercorev1.SecretInformer, gameServerGetter getterv1.GameServersGetter, 133 kubeClient kubernetes.Interface, allocationCache *AllocationCache, remoteAllocationTimeout time.Duration, totalRemoteAllocationTimeout time.Duration, batchWaitTime time.Duration) *Allocator { 134 ah := &Allocator{ 135 pendingRequests: make(chan request, maxBatchQueue), 136 allocationPolicyLister: policyInformer.Lister(), 137 allocationPolicySynced: policyInformer.Informer().HasSynced, 138 secretLister: secretInformer.Lister(), 139 secretSynced: secretInformer.Informer().HasSynced, 140 gameServerGetter: gameServerGetter, 141 allocationCache: allocationCache, 142 batchWaitTime: batchWaitTime, 143 remoteAllocationTimeout: remoteAllocationTimeout, 144 totalRemoteAllocationTimeout: totalRemoteAllocationTimeout, 145 remoteAllocationCallback: func(ctx context.Context, endpoint string, dialOpts grpc.DialOption, request *pb.AllocationRequest) (*pb.AllocationResponse, error) { 146 conn, err := grpc.NewClient(endpoint, dialOpts) 147 if err != nil { 148 return nil, err 149 } 150 defer conn.Close() // nolint: errcheck 151 152 allocationCtx, cancel := context.WithTimeout(ctx, remoteAllocationTimeout) 153 defer cancel() // nolint: errcheck 154 grpcClient := pb.NewAllocationServiceClient(conn) 155 return grpcClient.Allocate(allocationCtx, request) 156 }, 157 } 158 159 ah.baseLogger = runtime.NewLoggerWithType(ah) 160 eventBroadcaster := record.NewBroadcaster() 161 eventBroadcaster.StartLogging(ah.baseLogger.Debugf) 162 eventBroadcaster.StartRecordingToSink(&typedcorev1.EventSinkImpl{Interface: kubeClient.CoreV1().Events("")}) 163 ah.recorder = eventBroadcaster.NewRecorder(scheme.Scheme, corev1.EventSource{Component: "GameServerAllocation-Allocator"}) 164 165 return ah 166 } 167 168 // Run initiates the listeners. 169 func (c *Allocator) Run(ctx context.Context) error { 170 if err := c.Sync(ctx); err != nil { 171 return err 172 } 173 174 if err := c.allocationCache.Run(ctx); err != nil { 175 return err 176 } 177 178 // workers and logic for batching allocations 179 go c.ListenAndAllocate(ctx, maxBatchQueue) 180 181 return nil 182 } 183 184 // Sync waits for cache to sync 185 func (c *Allocator) Sync(ctx context.Context) error { 186 c.baseLogger.Debug("Wait for Allocator cache sync") 187 if !cache.WaitForCacheSync(ctx.Done(), c.secretSynced, c.allocationPolicySynced) { 188 return errors.New("failed to wait for caches to sync") 189 } 190 return nil 191 } 192 193 // Allocate CRDHandler for allocating a gameserver. 194 func (c *Allocator) Allocate(ctx context.Context, gsa *allocationv1.GameServerAllocation) (out k8sruntime.Object, err error) { 195 latency := c.newMetrics(ctx) 196 defer func() { 197 if err != nil { 198 latency.setError() 199 } 200 latency.record() 201 }() 202 latency.setRequest(gsa) 203 204 // server side validation 205 if errs := gsa.Validate(); len(errs) > 0 { 206 kind := runtimeschema.GroupKind{ 207 Group: allocationv1.SchemeGroupVersion.Group, 208 Kind: "GameServerAllocation", 209 } 210 statusErr := k8serrors.NewInvalid(kind, gsa.Name, errs) 211 s := &statusErr.ErrStatus 212 var gvks []schema.GroupVersionKind 213 gvks, _, err := apiserver.Scheme.ObjectKinds(s) 214 if err != nil { 215 return nil, errors.Wrap(err, "could not find objectkinds for status") 216 } 217 218 c.loggerForGameServerAllocation(gsa).Debug("GameServerAllocation is invalid") 219 s.TypeMeta = metav1.TypeMeta{Kind: gvks[0].Kind, APIVersion: gvks[0].Version} 220 return s, nil 221 } 222 223 // Convert gsa required and preferred fields to selectors field 224 gsa.Converter() 225 226 // If multi-cluster setting is enabled, allocate base on the multicluster allocation policy. 227 if gsa.Spec.MultiClusterSetting.Enabled { 228 out, err = c.applyMultiClusterAllocation(ctx, gsa) 229 } else { 230 out, err = c.allocateFromLocalCluster(ctx, gsa) 231 } 232 233 if err != nil { 234 c.loggerForGameServerAllocation(gsa).WithError(err).Error("allocation failed") 235 return nil, err 236 } 237 latency.setResponse(out) 238 239 return out, nil 240 } 241 242 func (c *Allocator) loggerForGameServerAllocationKey(key string) *logrus.Entry { 243 return logfields.AugmentLogEntry(c.baseLogger, logfields.GameServerAllocationKey, key) 244 } 245 246 func (c *Allocator) loggerForGameServerAllocation(gsa *allocationv1.GameServerAllocation) *logrus.Entry { 247 gsaName := "NilGameServerAllocation" 248 if gsa != nil { 249 gsaName = gsa.Namespace + "/" + gsa.Name 250 } 251 return c.loggerForGameServerAllocationKey(gsaName).WithField("gsa", gsa) 252 } 253 254 // allocateFromLocalCluster allocates gameservers from the local cluster. 255 // Registers number of times we retried before getting a success allocation 256 func (c *Allocator) allocateFromLocalCluster(ctx context.Context, gsa *allocationv1.GameServerAllocation) (*allocationv1.GameServerAllocation, error) { 257 var gs *agonesv1.GameServer 258 retry := c.newMetrics(ctx) 259 retryCount := 0 260 err := Retry(allocationRetry, func() error { 261 var err error 262 gs, err = c.allocate(ctx, gsa) 263 retryCount++ 264 265 if err != nil { 266 c.loggerForGameServerAllocation(gsa).WithError(err).Warn("Failed to Allocated. Retrying...") 267 } else { 268 retry.recordAllocationRetrySuccess(ctx, retryCount) 269 } 270 return err 271 }) 272 273 if err != nil && err != ErrNoGameServer && err != ErrConflictInGameServerSelection { 274 c.allocationCache.Resync() 275 return nil, err 276 } 277 278 switch err { 279 case ErrNoGameServer, ErrGameServerUpdateConflict: 280 gsa.Status.State = allocationv1.GameServerAllocationUnAllocated 281 case ErrConflictInGameServerSelection: 282 gsa.Status.State = allocationv1.GameServerAllocationContention 283 default: 284 gsa.ObjectMeta.Name = gs.ObjectMeta.Name 285 gsa.Status.State = allocationv1.GameServerAllocationAllocated 286 gsa.Status.GameServerName = gs.ObjectMeta.Name 287 gsa.Status.Ports = gs.Status.Ports 288 gsa.Status.Address = gs.Status.Address 289 gsa.Status.Addresses = append(gsa.Status.Addresses, gs.Status.Addresses...) 290 gsa.Status.NodeName = gs.Status.NodeName 291 gsa.Status.Source = localAllocationSource 292 gsa.Status.Metadata = &allocationv1.GameServerMetadata{ 293 Labels: gs.ObjectMeta.Labels, 294 Annotations: gs.ObjectMeta.Annotations, 295 } 296 if runtime.FeatureEnabled(runtime.FeatureCountsAndLists) { 297 gsa.Status.Counters = gs.Status.Counters 298 gsa.Status.Lists = gs.Status.Lists 299 } 300 } 301 302 c.loggerForGameServerAllocation(gsa).Debug("Game server allocation") 303 return gsa, nil 304 } 305 306 // applyMultiClusterAllocation retrieves allocation policies and iterate on policies. 307 // Then allocate gameservers from local or remote cluster accordingly. 308 func (c *Allocator) applyMultiClusterAllocation(ctx context.Context, gsa *allocationv1.GameServerAllocation) (result *allocationv1.GameServerAllocation, err error) { 309 selector := labels.Everything() 310 if len(gsa.Spec.MultiClusterSetting.PolicySelector.MatchLabels)+len(gsa.Spec.MultiClusterSetting.PolicySelector.MatchExpressions) != 0 { 311 selector, err = metav1.LabelSelectorAsSelector(&gsa.Spec.MultiClusterSetting.PolicySelector) 312 if err != nil { 313 return nil, err 314 } 315 } 316 317 policies, err := c.allocationPolicyLister.GameServerAllocationPolicies(gsa.ObjectMeta.Namespace).List(selector) 318 if err != nil { 319 return nil, err 320 } else if len(policies) == 0 { 321 return nil, errors.New("no multi-cluster allocation policy is specified") 322 } 323 324 it := multiclusterv1.NewConnectionInfoIterator(policies) 325 for { 326 connectionInfo := it.Next() 327 if connectionInfo == nil { 328 break 329 } 330 if len(connectionInfo.AllocationEndpoints) == 0 { 331 // Change the namespace to the policy namespace and allocate locally 332 gsaCopy := gsa 333 if gsa.Namespace != connectionInfo.Namespace { 334 gsaCopy = gsa.DeepCopy() 335 gsaCopy.Namespace = connectionInfo.Namespace 336 } 337 result, err = c.allocateFromLocalCluster(ctx, gsaCopy) 338 if err != nil { 339 c.loggerForGameServerAllocation(gsaCopy).WithError(err).Error("self-allocation failed") 340 } 341 } else { 342 result, err = c.allocateFromRemoteCluster(gsa, connectionInfo, gsa.ObjectMeta.Namespace) 343 if err != nil { 344 c.loggerForGameServerAllocation(gsa).WithField("allocConnInfo", connectionInfo).WithError(err).Error("remote-allocation failed") 345 } 346 } 347 if result != nil && result.Status.State == allocationv1.GameServerAllocationAllocated { 348 return result, nil 349 } 350 } 351 return result, err 352 } 353 354 // allocateFromRemoteCluster allocates gameservers from a remote cluster by making 355 // an http call to allocation service in that cluster. 356 func (c *Allocator) allocateFromRemoteCluster(gsa *allocationv1.GameServerAllocation, connectionInfo *multiclusterv1.ClusterConnectionInfo, namespace string) (*allocationv1.GameServerAllocation, error) { 357 var allocationResponse *pb.AllocationResponse 358 359 // TODO: cache the client 360 dialOpts, err := c.createRemoteClusterDialOption(namespace, connectionInfo) 361 if err != nil { 362 return nil, err 363 } 364 365 // Forward the game server allocation request to another cluster, 366 // and disable multicluster settings to avoid the target cluster 367 // forward the allocation request again. 368 request := converters.ConvertGSAToAllocationRequest(gsa) 369 request.MultiClusterSetting.Enabled = false 370 request.Namespace = connectionInfo.Namespace 371 372 ctx, cancel := context.WithTimeout(context.Background(), c.totalRemoteAllocationTimeout) 373 defer cancel() // nolint: errcheck 374 // Retry on remote call failures. 375 var endpoint string 376 err = Retry(remoteAllocationRetry, func() error { 377 for i, ip := range connectionInfo.AllocationEndpoints { 378 select { 379 case <-ctx.Done(): 380 return ErrTotalTimeoutExceeded 381 default: 382 } 383 endpoint = addPort(ip) 384 c.loggerForGameServerAllocationKey("remote-allocation").WithField("request", request).WithField("endpoint", endpoint).Debug("forwarding allocation request") 385 allocationResponse, err = c.remoteAllocationCallback(ctx, endpoint, dialOpts, request) 386 if err != nil { 387 c.baseLogger.WithError(err).Error("remote allocation failed") 388 // If there are multiple endpoints for the allocator connection and the current one is 389 // failing, try the next endpoint. Otherwise, return the error response. 390 if (i + 1) < len(connectionInfo.AllocationEndpoints) { 391 // If there is a server error try a different endpoint 392 c.loggerForGameServerAllocationKey("remote-allocation").WithField("request", request).WithError(err).WithField("endpoint", endpoint).Warn("The request failed. Trying next endpoint") 393 continue 394 } 395 return err 396 } 397 break 398 } 399 400 return nil 401 }) 402 403 return converters.ConvertAllocationResponseToGSA(allocationResponse, endpoint), err 404 } 405 406 // createRemoteClusterDialOption creates a grpc client dial option with proper certs to make a remote call. 407 func (c *Allocator) createRemoteClusterDialOption(namespace string, connectionInfo *multiclusterv1.ClusterConnectionInfo) (grpc.DialOption, error) { 408 // TODO: disableMTLS works for a single cluster; still need to address how the flag interacts with multi-cluster authentication. 409 clientCert, clientKey, caCert, err := c.getClientCertificates(namespace, connectionInfo.SecretName) 410 if err != nil { 411 return nil, err 412 } 413 if clientCert == nil || clientKey == nil { 414 return nil, fmt.Errorf("missing client certificate key pair in secret %s", connectionInfo.SecretName) 415 } 416 417 // Load client cert 418 cert, err := tls.X509KeyPair(clientCert, clientKey) 419 if err != nil { 420 return nil, err 421 } 422 423 tlsConfig := &tls.Config{Certificates: []tls.Certificate{cert}} 424 if len(connectionInfo.ServerCA) != 0 || len(caCert) != 0 { 425 // Load CA cert, if provided and trust the server certificate. 426 // This is required for self-signed certs. 427 tlsConfig.RootCAs = x509.NewCertPool() 428 if len(connectionInfo.ServerCA) != 0 && !tlsConfig.RootCAs.AppendCertsFromPEM(connectionInfo.ServerCA) { 429 return nil, errors.New("only PEM format is accepted for server CA") 430 } 431 // Add client CA cert, which can be used instead of / as well as the specified ServerCA cert 432 if len(caCert) != 0 { 433 _ = tlsConfig.RootCAs.AppendCertsFromPEM(caCert) 434 } 435 } 436 437 return grpc.WithTransportCredentials(credentials.NewTLS(tlsConfig)), nil 438 } 439 440 // getClientCertificates returns the client certificates and CA cert for remote allocation cluster call 441 func (c *Allocator) getClientCertificates(namespace, secretName string) (clientCert, clientKey, caCert []byte, err error) { 442 secret, err := c.secretLister.Secrets(namespace).Get(secretName) 443 if err != nil { 444 return nil, nil, nil, err 445 } 446 if secret == nil || len(secret.Data) == 0 { 447 return nil, nil, nil, fmt.Errorf("secret %s does not have data", secretName) 448 } 449 450 // Create http client using cert 451 clientCert = secret.Data[secretClientCertName] 452 clientKey = secret.Data[secretClientKeyName] 453 caCert = secret.Data[secretCACertName] 454 return clientCert, clientKey, caCert, nil 455 } 456 457 // allocate allocated a GameServer from a given GameServerAllocation 458 // this sets up allocation through a batch process. 459 func (c *Allocator) allocate(ctx context.Context, gsa *allocationv1.GameServerAllocation) (*agonesv1.GameServer, error) { 460 // creates an allocation request. This contains the requested GameServerAllocation, as well as the 461 // channel we expect the return values to come back for this GameServerAllocation 462 req := request{gsa: gsa, response: make(chan response)} 463 464 // this pushes the request into the batching process 465 c.pendingRequests <- req 466 467 select { 468 case res := <-req.response: // wait for the batch to be completed 469 return res.gs, res.err 470 case <-ctx.Done(): 471 return nil, ErrTotalTimeoutExceeded 472 } 473 } 474 475 // ListenAndAllocate is a blocking function that runs in a loop 476 // looking at c.requestBatches for batches of requests that are coming through. 477 func (c *Allocator) ListenAndAllocate(ctx context.Context, updateWorkerCount int) { 478 // setup workers for allocation updates. Push response values into 479 // this queue for concurrent updating of GameServers to Allocated 480 updateQueue := c.allocationUpdateWorkers(ctx, updateWorkerCount) 481 482 // Batch processing strategy: 483 // We constantly loop around the below for loop. If nothing is found in c.pendingRequests, we move to 484 // default: which will wait for half a second, to allow for some requests to backup in c.pendingRequests, 485 // providing us with a batch of Allocation requests in that channel 486 487 // Once we have 1 or more requests in c.pendingRequests (which is buffered to 100), we can start the batch process. 488 489 // Assuming this is the first run (either entirely, or for a while), list will be nil, and therefore the first 490 // thing that will be done is retrieving the Ready GameServers and sorting them for this batch via 491 // c.listSortedReadyGameServers(). This list is maintained as we flow through the batch. 492 493 // We then use findGameServerForAllocation to loop around the sorted list of Ready GameServers to look for matches 494 // against the preferred and required selectors of the GameServerAllocation. If there is an error, we immediately 495 // pass that straight back to the response channel for this GameServerAllocation. 496 497 // Assuming we find a matching GameServer to our GameServerAllocation, we remove it from the list and the backing 498 // Ready GameServer cache. 499 500 // We then pass the found GameServers into the updateQueue, where there are updateWorkerCount number of goroutines 501 // waiting to concurrently attempt to move the GameServer into an Allocated state, and return the result to 502 // GameServerAllocation request's response channel 503 504 // Then we get the next item off the batch (c.pendingRequests), and do this all over again, but this time, we have 505 // an already sorted list of GameServers, so we only need to find one that matches our GameServerAllocation 506 // selectors, and put it into updateQueue 507 508 // The tracking of requestCount >= maxBatchBeforeRefresh is necessary, because without it, at high enough load 509 // the list of GameServers that we are using to allocate would never get refreshed (list = nil) with an updated 510 // list of Ready GameServers, and you would eventually never be able to Allocate anything as long as the load 511 // continued. 512 513 var list []*agonesv1.GameServer 514 var sortKey uint64 515 requestCount := 0 516 517 for { 518 select { 519 case req := <-c.pendingRequests: 520 // refresh the list after every 100 allocations made in a single batch 521 if requestCount >= maxBatchBeforeRefresh { 522 list = nil 523 requestCount = 0 524 } 525 526 if runtime.FeatureEnabled(runtime.FeatureCountsAndLists) { 527 // SortKey returns the sorting values (list of Priorities) as a determinstic key. 528 // In case gsa.Spec.Priorities is nil this will still return a sortKey. 529 // In case of error this will return 0 for the sortKey. 530 newSortKey, err := req.gsa.SortKey() 531 if err != nil { 532 c.baseLogger.WithError(err).Warn("error getting sortKey for GameServerAllocationSpec", err) 533 } 534 // Set sortKey if this is the first request, or the previous request errored on creating a sortKey. 535 if sortKey == uint64(0) { 536 sortKey = newSortKey 537 } 538 539 if newSortKey != sortKey { 540 sortKey = newSortKey 541 list = nil 542 requestCount = 0 543 } 544 } 545 546 requestCount++ 547 548 if list == nil { 549 if !runtime.FeatureEnabled(runtime.FeatureCountsAndLists) || req.gsa.Spec.Scheduling == apis.Packed { 550 list = c.allocationCache.ListSortedGameServers(req.gsa) 551 } else { 552 // If FeatureCountsAndLists and Scheduling == Distributed, sort game servers by Priorities 553 list = c.allocationCache.ListSortedGameServersPriorities(req.gsa) 554 } 555 } 556 557 gs, index, err := findGameServerForAllocation(req.gsa, list) 558 if err != nil { 559 req.response <- response{request: req, gs: nil, err: err} 560 continue 561 } 562 // remove the game server that has been allocated 563 list = append(list[:index], list[index+1:]...) 564 565 if err := c.allocationCache.RemoveGameServer(gs); err != nil { 566 // this seems unlikely, but lets handle it just in case 567 req.response <- response{request: req, gs: nil, err: err} 568 continue 569 } 570 571 updateQueue <- response{request: req, gs: gs.DeepCopy(), err: nil} 572 573 case <-ctx.Done(): 574 return 575 default: 576 list = nil 577 requestCount = 0 578 // slow down cpu churn, and allow items to batch 579 time.Sleep(c.batchWaitTime) 580 } 581 } 582 } 583 584 // allocationUpdateWorkers runs workerCount number of goroutines as workers to 585 // process each GameServer passed into the returned updateQueue 586 // Each worker will concurrently attempt to move the GameServer to an Allocated 587 // state and then respond to the initial request's response channel with the 588 // details of that update 589 func (c *Allocator) allocationUpdateWorkers(ctx context.Context, workerCount int) chan<- response { 590 updateQueue := make(chan response) 591 592 for i := 0; i < workerCount; i++ { 593 go func() { 594 for { 595 select { 596 case res := <-updateQueue: 597 gs, err := c.applyAllocationToGameServer(ctx, res.request.gsa.Spec.MetaPatch, res.gs, res.request.gsa) 598 if err != nil { 599 if !k8serrors.IsConflict(errors.Cause(err)) { 600 // since we could not allocate, we should put it back 601 // but not if it's a conflict, as the cache is no longer up to date, and 602 // we should wait for it to get updated with fresh info. 603 c.allocationCache.AddGameServer(gs) 604 } 605 res.err = ErrGameServerUpdateConflict 606 } else { 607 // put the GameServer back into the cache, so it's immediately around for re-allocation 608 c.allocationCache.AddGameServer(gs) 609 res.gs = gs 610 } 611 612 res.request.response <- res 613 case <-ctx.Done(): 614 return 615 } 616 } 617 }() 618 } 619 620 return updateQueue 621 } 622 623 // applyAllocationToGameServer patches the inputted GameServer with the allocation metadata changes, and updates it to the Allocated State. 624 // Returns the updated GameServer. 625 func (c *Allocator) applyAllocationToGameServer(ctx context.Context, mp allocationv1.MetaPatch, gs *agonesv1.GameServer, gsa *allocationv1.GameServerAllocation) (*agonesv1.GameServer, error) { 626 // patch ObjectMeta labels 627 if mp.Labels != nil { 628 if gs.ObjectMeta.Labels == nil { 629 gs.ObjectMeta.Labels = make(map[string]string, len(mp.Labels)) 630 } 631 for key, value := range mp.Labels { 632 gs.ObjectMeta.Labels[key] = value 633 } 634 } 635 636 if gs.ObjectMeta.Annotations == nil { 637 gs.ObjectMeta.Annotations = make(map[string]string, len(mp.Annotations)) 638 } 639 // apply annotations patch 640 for key, value := range mp.Annotations { 641 gs.ObjectMeta.Annotations[key] = value 642 } 643 644 // add last allocated, so it always gets updated, even if it is already Allocated 645 ts, err := time.Now().MarshalText() 646 if err != nil { 647 return nil, err 648 } 649 gs.ObjectMeta.Annotations[LastAllocatedAnnotationKey] = string(ts) 650 gs.Status.State = agonesv1.GameServerStateAllocated 651 652 // perfom any Counter or List actions 653 var counterErrors error 654 var listErrors error 655 if runtime.FeatureEnabled(runtime.FeatureCountsAndLists) { 656 if gsa.Spec.Counters != nil { 657 for counter, ca := range gsa.Spec.Counters { 658 counterErrors = goErrors.Join(counterErrors, ca.CounterActions(counter, gs)) 659 } 660 } 661 if gsa.Spec.Lists != nil { 662 for list, la := range gsa.Spec.Lists { 663 listErrors = goErrors.Join(listErrors, la.ListActions(list, gs)) 664 } 665 } 666 } 667 668 gsUpdate, updateErr := c.gameServerGetter.GameServers(gs.ObjectMeta.Namespace).Update(ctx, gs, metav1.UpdateOptions{}) 669 if updateErr != nil { 670 return gsUpdate, updateErr 671 } 672 673 // If successful Update record any Counter or List action errors as a warning 674 if counterErrors != nil { 675 c.recorder.Event(gsUpdate, corev1.EventTypeWarning, "CounterActionError", counterErrors.Error()) 676 } 677 if listErrors != nil { 678 c.recorder.Event(gsUpdate, corev1.EventTypeWarning, "ListActionError", listErrors.Error()) 679 } 680 c.recorder.Event(gsUpdate, corev1.EventTypeNormal, string(gsUpdate.Status.State), "Allocated") 681 682 return gsUpdate, updateErr 683 } 684 685 // Retry retries fn based on backoff provided. 686 func Retry(backoff wait.Backoff, fn func() error) error { 687 var lastConflictErr error 688 err := wait.ExponentialBackoff(backoff, func() (bool, error) { 689 err := fn() 690 691 st, ok := status.FromError(err) 692 if ok { 693 if st.Code() == codes.ResourceExhausted { 694 return true, err 695 } 696 } 697 698 switch { 699 case err == nil: 700 return true, nil 701 case err == ErrNoGameServer: 702 return true, err 703 case err == ErrTotalTimeoutExceeded: 704 return true, err 705 default: 706 lastConflictErr = err 707 return false, nil 708 } 709 }) 710 if wait.Interrupted(err) { 711 err = lastConflictErr 712 } 713 return err 714 } 715 716 // newMetrics creates a new gsa latency recorder. 717 func (c *Allocator) newMetrics(ctx context.Context) *metrics { 718 ctx, err := tag.New(ctx, latencyTags...) 719 if err != nil { 720 c.baseLogger.WithError(err).Warn("failed to tag latency recorder.") 721 } 722 return &metrics{ 723 ctx: ctx, 724 gameServerLister: c.allocationCache.gameServerLister, 725 logger: c.baseLogger, 726 start: time.Now(), 727 } 728 } 729 730 func addPort(ip string) string { 731 if strings.Contains(ip, ":") { 732 return ip 733 } 734 return fmt.Sprintf("%s:%s", ip, allocatorPort) 735 }