agones.dev/agones@v1.53.0/pkg/gameserverallocations/allocator.go

agones.dev/agones@v1.53.0/pkg/gameserverallocations/allocator.go (about)

     1  // Copyright 2019 Google LLC All Rights Reserved.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package gameserverallocations
    16  
    17  import (
    18  	"context"
    19  	"crypto/tls"
    20  	"crypto/x509"
    21  	goErrors "errors"
    22  	"fmt"
    23  	"strings"
    24  	"time"
    25  
    26  	"agones.dev/agones/pkg/allocation/converters"
    27  	pb "agones.dev/agones/pkg/allocation/go"
    28  	"agones.dev/agones/pkg/apis"
    29  	agonesv1 "agones.dev/agones/pkg/apis/agones/v1"
    30  	allocationv1 "agones.dev/agones/pkg/apis/allocation/v1"
    31  	multiclusterv1 "agones.dev/agones/pkg/apis/multicluster/v1"
    32  	getterv1 "agones.dev/agones/pkg/client/clientset/versioned/typed/agones/v1"
    33  	multiclusterinformerv1 "agones.dev/agones/pkg/client/informers/externalversions/multicluster/v1"
    34  	multiclusterlisterv1 "agones.dev/agones/pkg/client/listers/multicluster/v1"
    35  	"agones.dev/agones/pkg/util/apiserver"
    36  	"agones.dev/agones/pkg/util/logfields"
    37  	"agones.dev/agones/pkg/util/runtime"
    38  	"github.com/pkg/errors"
    39  	"github.com/sirupsen/logrus"
    40  	"go.opencensus.io/tag"
    41  	"google.golang.org/grpc"
    42  	"google.golang.org/grpc/codes"
    43  	"google.golang.org/grpc/credentials"
    44  	"google.golang.org/grpc/status"
    45  	corev1 "k8s.io/api/core/v1"
    46  	k8serrors "k8s.io/apimachinery/pkg/api/errors"
    47  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    48  	"k8s.io/apimachinery/pkg/labels"
    49  	k8sruntime "k8s.io/apimachinery/pkg/runtime"
    50  	"k8s.io/apimachinery/pkg/runtime/schema"
    51  	runtimeschema "k8s.io/apimachinery/pkg/runtime/schema"
    52  	"k8s.io/apimachinery/pkg/util/wait"
    53  	informercorev1 "k8s.io/client-go/informers/core/v1"
    54  	"k8s.io/client-go/kubernetes"
    55  	"k8s.io/client-go/kubernetes/scheme"
    56  	typedcorev1 "k8s.io/client-go/kubernetes/typed/core/v1"
    57  	corev1lister "k8s.io/client-go/listers/core/v1"
    58  	"k8s.io/client-go/tools/cache"
    59  	"k8s.io/client-go/tools/record"
    60  )
    61  
    62  var (
    63  	// ErrNoGameServer is returned when there are no Allocatable GameServers
    64  	// available
    65  	ErrNoGameServer = errors.New("Could not find an Allocatable GameServer")
    66  	// ErrConflictInGameServerSelection is returned when the candidate gameserver already allocated
    67  	ErrConflictInGameServerSelection = errors.New("The Gameserver was already allocated")
    68  	// ErrTotalTimeoutExceeded is used to signal that total retry timeout has been exceeded and no additional retries should be made
    69  	ErrTotalTimeoutExceeded = status.Errorf(codes.DeadlineExceeded, "remote allocation total timeout exceeded")
    70  )
    71  
    72  const (
    73  	// LastAllocatedAnnotationKey is a GameServer annotation containing an RFC 3339 formatted
    74  	// timestamp of the most recent allocation.
    75  	LastAllocatedAnnotationKey = "agones.dev/last-allocated"
    76  
    77  	secretClientCertName  = "tls.crt"
    78  	secretClientKeyName   = "tls.key"
    79  	secretCACertName      = "ca.crt"
    80  	allocatorPort         = "443"
    81  	maxBatchQueue         = 100
    82  	maxBatchBeforeRefresh = 100
    83  	localAllocationSource = "local"
    84  )
    85  
    86  var allocationRetry = wait.Backoff{
    87  	Steps:    5,
    88  	Duration: 10 * time.Millisecond,
    89  	Factor:   1.0,
    90  	Jitter:   0.1,
    91  }
    92  
    93  var remoteAllocationRetry = wait.Backoff{
    94  	Steps:    7,
    95  	Duration: 100 * time.Millisecond,
    96  	Factor:   2.0,
    97  }
    98  
    99  // Allocator handles game server allocation
   100  type Allocator struct {
   101  	baseLogger                   *logrus.Entry
   102  	allocationPolicyLister       multiclusterlisterv1.GameServerAllocationPolicyLister
   103  	allocationPolicySynced       cache.InformerSynced
   104  	secretLister                 corev1lister.SecretLister
   105  	secretSynced                 cache.InformerSynced
   106  	gameServerGetter             getterv1.GameServersGetter
   107  	recorder                     record.EventRecorder
   108  	pendingRequests              chan request
   109  	allocationCache              *AllocationCache
   110  	remoteAllocationCallback     func(context.Context, string, grpc.DialOption, *pb.AllocationRequest) (*pb.AllocationResponse, error)
   111  	remoteAllocationTimeout      time.Duration
   112  	totalRemoteAllocationTimeout time.Duration
   113  	batchWaitTime                time.Duration
   114  }
   115  
   116  // request is an async request for allocation
   117  type request struct {
   118  	gsa      *allocationv1.GameServerAllocation
   119  	response chan response
   120  }
   121  
   122  // response is an async response for a matching request
   123  type response struct {
   124  	request request
   125  	gs      *agonesv1.GameServer
   126  	err     error
   127  }
   128  
   129  // NewAllocator creates an instance of Allocator
   130  func NewAllocator(policyInformer multiclusterinformerv1.GameServerAllocationPolicyInformer, secretInformer informercorev1.SecretInformer, gameServerGetter getterv1.GameServersGetter,
   131  	kubeClient kubernetes.Interface, allocationCache *AllocationCache, remoteAllocationTimeout time.Duration, totalRemoteAllocationTimeout time.Duration, batchWaitTime time.Duration) *Allocator {
   132  	ah := &Allocator{
   133  		pendingRequests:              make(chan request, maxBatchQueue),
   134  		allocationPolicyLister:       policyInformer.Lister(),
   135  		allocationPolicySynced:       policyInformer.Informer().HasSynced,
   136  		secretLister:                 secretInformer.Lister(),
   137  		secretSynced:                 secretInformer.Informer().HasSynced,
   138  		gameServerGetter:             gameServerGetter,
   139  		allocationCache:              allocationCache,
   140  		batchWaitTime:                batchWaitTime,
   141  		remoteAllocationTimeout:      remoteAllocationTimeout,
   142  		totalRemoteAllocationTimeout: totalRemoteAllocationTimeout,
   143  		remoteAllocationCallback: func(ctx context.Context, endpoint string, dialOpts grpc.DialOption, request *pb.AllocationRequest) (*pb.AllocationResponse, error) {
   144  			conn, err := grpc.NewClient(endpoint, dialOpts)
   145  			if err != nil {
   146  				return nil, err
   147  			}
   148  			defer conn.Close() // nolint: errcheck
   149  
   150  			allocationCtx, cancel := context.WithTimeout(ctx, remoteAllocationTimeout)
   151  			defer cancel() // nolint: errcheck
   152  			grpcClient := pb.NewAllocationServiceClient(conn)
   153  			return grpcClient.Allocate(allocationCtx, request)
   154  		},
   155  	}
   156  
   157  	ah.baseLogger = runtime.NewLoggerWithType(ah)
   158  	eventBroadcaster := record.NewBroadcaster()
   159  	eventBroadcaster.StartLogging(ah.baseLogger.Debugf)
   160  	eventBroadcaster.StartRecordingToSink(&typedcorev1.EventSinkImpl{Interface: kubeClient.CoreV1().Events("")})
   161  	ah.recorder = eventBroadcaster.NewRecorder(scheme.Scheme, corev1.EventSource{Component: "GameServerAllocation-Allocator"})
   162  
   163  	return ah
   164  }
   165  
   166  // Run initiates the listeners.
   167  func (c *Allocator) Run(ctx context.Context) error {
   168  	if err := c.Sync(ctx); err != nil {
   169  		return err
   170  	}
   171  
   172  	if err := c.allocationCache.Run(ctx); err != nil {
   173  		return err
   174  	}
   175  
   176  	// workers and logic for batching allocations
   177  	go c.ListenAndAllocate(ctx, maxBatchQueue)
   178  
   179  	return nil
   180  }
   181  
   182  // Sync waits for cache to sync
   183  func (c *Allocator) Sync(ctx context.Context) error {
   184  	c.baseLogger.Debug("Wait for Allocator cache sync")
   185  	if !cache.WaitForCacheSync(ctx.Done(), c.secretSynced, c.allocationPolicySynced) {
   186  		return errors.New("failed to wait for caches to sync")
   187  	}
   188  	return nil
   189  }
   190  
   191  // Allocate CRDHandler for allocating a gameserver.
   192  func (c *Allocator) Allocate(ctx context.Context, gsa *allocationv1.GameServerAllocation) (out k8sruntime.Object, err error) {
   193  	latency := c.newMetrics(ctx)
   194  	defer func() {
   195  		if err != nil {
   196  			latency.setError()
   197  		}
   198  		latency.record()
   199  	}()
   200  	latency.setRequest(gsa)
   201  
   202  	// server side validation
   203  	if errs := gsa.Validate(); len(errs) > 0 {
   204  		kind := runtimeschema.GroupKind{
   205  			Group: allocationv1.SchemeGroupVersion.Group,
   206  			Kind:  "GameServerAllocation",
   207  		}
   208  		statusErr := k8serrors.NewInvalid(kind, gsa.Name, errs)
   209  		s := &statusErr.ErrStatus
   210  		var gvks []schema.GroupVersionKind
   211  		gvks, _, err := apiserver.Scheme.ObjectKinds(s)
   212  		if err != nil {
   213  			return nil, errors.Wrap(err, "could not find objectkinds for status")
   214  		}
   215  
   216  		c.loggerForGameServerAllocation(gsa).Debug("GameServerAllocation is invalid")
   217  		s.TypeMeta = metav1.TypeMeta{Kind: gvks[0].Kind, APIVersion: gvks[0].Version}
   218  		return s, nil
   219  	}
   220  
   221  	// Convert gsa required and preferred fields to selectors field
   222  	gsa.Converter()
   223  
   224  	// If multi-cluster setting is enabled, allocate base on the multicluster allocation policy.
   225  	if gsa.Spec.MultiClusterSetting.Enabled {
   226  		out, err = c.applyMultiClusterAllocation(ctx, gsa)
   227  	} else {
   228  		out, err = c.allocateFromLocalCluster(ctx, gsa)
   229  	}
   230  
   231  	if err != nil {
   232  		c.loggerForGameServerAllocation(gsa).WithError(err).Error("allocation failed")
   233  		return nil, err
   234  	}
   235  	latency.setResponse(out)
   236  
   237  	return out, nil
   238  }
   239  
   240  func (c *Allocator) loggerForGameServerAllocationKey(key string) *logrus.Entry {
   241  	return logfields.AugmentLogEntry(c.baseLogger, logfields.GameServerAllocationKey, key)
   242  }
   243  
   244  func (c *Allocator) loggerForGameServerAllocation(gsa *allocationv1.GameServerAllocation) *logrus.Entry {
   245  	gsaName := "NilGameServerAllocation"
   246  	if gsa != nil {
   247  		gsaName = gsa.Namespace + "/" + gsa.Name
   248  	}
   249  	return c.loggerForGameServerAllocationKey(gsaName).WithField("gsa", gsa)
   250  }
   251  
   252  // allocateFromLocalCluster allocates gameservers from the local cluster.
   253  // Registers number of times we retried before getting a success allocation
   254  func (c *Allocator) allocateFromLocalCluster(ctx context.Context, gsa *allocationv1.GameServerAllocation) (*allocationv1.GameServerAllocation, error) {
   255  	var gs *agonesv1.GameServer
   256  	retry := c.newMetrics(ctx)
   257  	retryCount := 0
   258  	err := Retry(allocationRetry, func() error {
   259  		var err error
   260  		gs, err = c.allocate(ctx, gsa)
   261  		retryCount++
   262  
   263  		if err != nil {
   264  			c.loggerForGameServerAllocation(gsa).WithError(err).Warn("Failed to Allocated. Retrying...")
   265  		} else {
   266  			retry.recordAllocationRetrySuccess(ctx, retryCount)
   267  		}
   268  		return err
   269  	})
   270  
   271  	if err != nil && err != ErrNoGameServer && err != ErrConflictInGameServerSelection {
   272  		c.allocationCache.Resync()
   273  		return nil, err
   274  	}
   275  
   276  	switch err {
   277  	case ErrNoGameServer:
   278  		gsa.Status.State = allocationv1.GameServerAllocationUnAllocated
   279  	case ErrConflictInGameServerSelection:
   280  		gsa.Status.State = allocationv1.GameServerAllocationContention
   281  	default:
   282  		gsa.ObjectMeta.Name = gs.ObjectMeta.Name
   283  		gsa.Status.State = allocationv1.GameServerAllocationAllocated
   284  		gsa.Status.GameServerName = gs.ObjectMeta.Name
   285  		gsa.Status.Ports = gs.Status.Ports
   286  		gsa.Status.Address = gs.Status.Address
   287  		gsa.Status.Addresses = append(gsa.Status.Addresses, gs.Status.Addresses...)
   288  		gsa.Status.NodeName = gs.Status.NodeName
   289  		gsa.Status.Source = localAllocationSource
   290  		gsa.Status.Metadata = &allocationv1.GameServerMetadata{
   291  			Labels:      gs.ObjectMeta.Labels,
   292  			Annotations: gs.ObjectMeta.Annotations,
   293  		}
   294  		if runtime.FeatureEnabled(runtime.FeatureCountsAndLists) {
   295  			gsa.Status.Counters = gs.Status.Counters
   296  			gsa.Status.Lists = gs.Status.Lists
   297  		}
   298  	}
   299  
   300  	c.loggerForGameServerAllocation(gsa).Debug("Game server allocation")
   301  	return gsa, nil
   302  }
   303  
   304  // applyMultiClusterAllocation retrieves allocation policies and iterate on policies.
   305  // Then allocate gameservers from local or remote cluster accordingly.
   306  func (c *Allocator) applyMultiClusterAllocation(ctx context.Context, gsa *allocationv1.GameServerAllocation) (result *allocationv1.GameServerAllocation, err error) {
   307  	selector := labels.Everything()
   308  	if len(gsa.Spec.MultiClusterSetting.PolicySelector.MatchLabels)+len(gsa.Spec.MultiClusterSetting.PolicySelector.MatchExpressions) != 0 {
   309  		selector, err = metav1.LabelSelectorAsSelector(&gsa.Spec.MultiClusterSetting.PolicySelector)
   310  		if err != nil {
   311  			return nil, err
   312  		}
   313  	}
   314  
   315  	policies, err := c.allocationPolicyLister.GameServerAllocationPolicies(gsa.ObjectMeta.Namespace).List(selector)
   316  	if err != nil {
   317  		return nil, err
   318  	} else if len(policies) == 0 {
   319  		return nil, errors.New("no multi-cluster allocation policy is specified")
   320  	}
   321  
   322  	it := multiclusterv1.NewConnectionInfoIterator(policies)
   323  	for {
   324  		connectionInfo := it.Next()
   325  		if connectionInfo == nil {
   326  			break
   327  		}
   328  		if len(connectionInfo.AllocationEndpoints) == 0 {
   329  			// Change the namespace to the policy namespace and allocate locally
   330  			gsaCopy := gsa
   331  			if gsa.Namespace != connectionInfo.Namespace {
   332  				gsaCopy = gsa.DeepCopy()
   333  				gsaCopy.Namespace = connectionInfo.Namespace
   334  			}
   335  			result, err = c.allocateFromLocalCluster(ctx, gsaCopy)
   336  			if err != nil {
   337  				c.loggerForGameServerAllocation(gsaCopy).WithError(err).Error("self-allocation failed")
   338  			}
   339  		} else {
   340  			result, err = c.allocateFromRemoteCluster(gsa, connectionInfo, gsa.ObjectMeta.Namespace)
   341  			if err != nil {
   342  				c.loggerForGameServerAllocation(gsa).WithField("allocConnInfo", connectionInfo).WithError(err).Error("remote-allocation failed")
   343  			}
   344  		}
   345  		if result != nil && result.Status.State == allocationv1.GameServerAllocationAllocated {
   346  			return result, nil
   347  		}
   348  	}
   349  	return result, err
   350  }
   351  
   352  // allocateFromRemoteCluster allocates gameservers from a remote cluster by making
   353  // an http call to allocation service in that cluster.
   354  func (c *Allocator) allocateFromRemoteCluster(gsa *allocationv1.GameServerAllocation, connectionInfo *multiclusterv1.ClusterConnectionInfo, namespace string) (*allocationv1.GameServerAllocation, error) {
   355  	var allocationResponse *pb.AllocationResponse
   356  
   357  	// TODO: cache the client
   358  	dialOpts, err := c.createRemoteClusterDialOption(namespace, connectionInfo)
   359  	if err != nil {
   360  		return nil, err
   361  	}
   362  
   363  	// Forward the game server allocation request to another cluster,
   364  	// and disable multicluster settings to avoid the target cluster
   365  	// forward the allocation request again.
   366  	request := converters.ConvertGSAToAllocationRequest(gsa)
   367  	request.MultiClusterSetting.Enabled = false
   368  	request.Namespace = connectionInfo.Namespace
   369  
   370  	ctx, cancel := context.WithTimeout(context.Background(), c.totalRemoteAllocationTimeout)
   371  	defer cancel() // nolint: errcheck
   372  	// Retry on remote call failures.
   373  	var endpoint string
   374  	err = Retry(remoteAllocationRetry, func() error {
   375  		for i, ip := range connectionInfo.AllocationEndpoints {
   376  			select {
   377  			case <-ctx.Done():
   378  				return ErrTotalTimeoutExceeded
   379  			default:
   380  			}
   381  			endpoint = addPort(ip)
   382  			c.loggerForGameServerAllocationKey("remote-allocation").WithField("request", request).WithField("endpoint", endpoint).Debug("forwarding allocation request")
   383  			allocationResponse, err = c.remoteAllocationCallback(ctx, endpoint, dialOpts, request)
   384  			if err != nil {
   385  				c.baseLogger.WithError(err).Error("remote allocation failed")
   386  				// If there are multiple endpoints for the allocator connection and the current one is
   387  				// failing, try the next endpoint. Otherwise, return the error response.
   388  				if (i + 1) < len(connectionInfo.AllocationEndpoints) {
   389  					// If there is a server error try a different endpoint
   390  					c.loggerForGameServerAllocationKey("remote-allocation").WithField("request", request).WithError(err).WithField("endpoint", endpoint).Warn("The request failed. Trying next endpoint")
   391  					continue
   392  				}
   393  				return err
   394  			}
   395  			break
   396  		}
   397  
   398  		return nil
   399  	})
   400  
   401  	return converters.ConvertAllocationResponseToGSA(allocationResponse, endpoint), err
   402  }
   403  
   404  // createRemoteClusterDialOption creates a grpc client dial option with proper certs to make a remote call.
   405  func (c *Allocator) createRemoteClusterDialOption(namespace string, connectionInfo *multiclusterv1.ClusterConnectionInfo) (grpc.DialOption, error) {
   406  	// TODO: disableMTLS works for a single cluster; still need to address how the flag interacts with multi-cluster authentication.
   407  	clientCert, clientKey, caCert, err := c.getClientCertificates(namespace, connectionInfo.SecretName)
   408  	if err != nil {
   409  		return nil, err
   410  	}
   411  	if clientCert == nil || clientKey == nil {
   412  		return nil, fmt.Errorf("missing client certificate key pair in secret %s", connectionInfo.SecretName)
   413  	}
   414  
   415  	// Load client cert
   416  	cert, err := tls.X509KeyPair(clientCert, clientKey)
   417  	if err != nil {
   418  		return nil, err
   419  	}
   420  
   421  	tlsConfig := &tls.Config{Certificates: []tls.Certificate{cert}}
   422  	if len(connectionInfo.ServerCA) != 0 || len(caCert) != 0 {
   423  		// Load CA cert, if provided and trust the server certificate.
   424  		// This is required for self-signed certs.
   425  		tlsConfig.RootCAs = x509.NewCertPool()
   426  		if len(connectionInfo.ServerCA) != 0 && !tlsConfig.RootCAs.AppendCertsFromPEM(connectionInfo.ServerCA) {
   427  			return nil, errors.New("only PEM format is accepted for server CA")
   428  		}
   429  		// Add client CA cert, which can be used instead of / as well as the specified ServerCA cert
   430  		if len(caCert) != 0 {
   431  			_ = tlsConfig.RootCAs.AppendCertsFromPEM(caCert)
   432  		}
   433  	}
   434  
   435  	return grpc.WithTransportCredentials(credentials.NewTLS(tlsConfig)), nil
   436  }
   437  
   438  // getClientCertificates returns the client certificates and CA cert for remote allocation cluster call
   439  func (c *Allocator) getClientCertificates(namespace, secretName string) (clientCert, clientKey, caCert []byte, err error) {
   440  	secret, err := c.secretLister.Secrets(namespace).Get(secretName)
   441  	if err != nil {
   442  		return nil, nil, nil, err
   443  	}
   444  	if secret == nil || len(secret.Data) == 0 {
   445  		return nil, nil, nil, fmt.Errorf("secret %s does not have data", secretName)
   446  	}
   447  
   448  	// Create http client using cert
   449  	clientCert = secret.Data[secretClientCertName]
   450  	clientKey = secret.Data[secretClientKeyName]
   451  	caCert = secret.Data[secretCACertName]
   452  	return clientCert, clientKey, caCert, nil
   453  }
   454  
   455  // allocate allocated a GameServer from a given GameServerAllocation
   456  // this sets up allocation through a batch process.
   457  func (c *Allocator) allocate(ctx context.Context, gsa *allocationv1.GameServerAllocation) (*agonesv1.GameServer, error) {
   458  	// creates an allocation request. This contains the requested GameServerAllocation, as well as the
   459  	// channel we expect the return values to come back for this GameServerAllocation
   460  	req := request{gsa: gsa, response: make(chan response)}
   461  
   462  	// this pushes the request into the batching process
   463  	c.pendingRequests <- req
   464  
   465  	select {
   466  	case res := <-req.response: // wait for the batch to be completed
   467  		return res.gs, res.err
   468  	case <-ctx.Done():
   469  		return nil, ErrTotalTimeoutExceeded
   470  	}
   471  }
   472  
   473  // ListenAndAllocate is a blocking function that runs in a loop
   474  // looking at c.requestBatches for batches of requests that are coming through.
   475  func (c *Allocator) ListenAndAllocate(ctx context.Context, updateWorkerCount int) {
   476  	// setup workers for allocation updates. Push response values into
   477  	// this queue for concurrent updating of GameServers to Allocated
   478  	updateQueue := c.allocationUpdateWorkers(ctx, updateWorkerCount)
   479  
   480  	// Batch processing strategy:
   481  	// We constantly loop around the below for loop. If nothing is found in c.pendingRequests, we move to
   482  	// default: which will wait for half a second, to allow for some requests to backup in c.pendingRequests,
   483  	// providing us with a batch of Allocation requests in that channel
   484  
   485  	// Once we have 1 or more requests in c.pendingRequests (which is buffered to 100), we can start the batch process.
   486  
   487  	// Assuming this is the first run (either entirely, or for a while), list will be nil, and therefore the first
   488  	// thing that will be done is retrieving the Ready GameServers and sorting them for this batch via
   489  	// c.listSortedReadyGameServers(). This list is maintained as we flow through the batch.
   490  
   491  	// We then use findGameServerForAllocation to loop around the sorted list of Ready GameServers to look for matches
   492  	// against the preferred and required selectors of the GameServerAllocation. If there is an error, we immediately
   493  	// pass that straight back to the response channel for this GameServerAllocation.
   494  
   495  	// Assuming we find a matching GameServer to our GameServerAllocation, we remove it from the list and the backing
   496  	// Ready GameServer cache.
   497  
   498  	// We then pass the found GameServers into the updateQueue, where there are updateWorkerCount number of goroutines
   499  	// waiting to concurrently attempt to move the GameServer into an Allocated state, and return the result to
   500  	// GameServerAllocation request's response channel
   501  
   502  	// Then we get the next item off the batch (c.pendingRequests), and do this all over again, but this time, we have
   503  	// an already sorted list of GameServers, so we only need to find one that matches our GameServerAllocation
   504  	// selectors, and put it into updateQueue
   505  
   506  	// The tracking of requestCount >= maxBatchBeforeRefresh is necessary, because without it, at high enough load
   507  	// the list of GameServers that we are using to allocate would never get refreshed (list = nil) with an updated
   508  	// list of Ready GameServers, and you would eventually never be able to Allocate anything as long as the load
   509  	// continued.
   510  
   511  	var list []*agonesv1.GameServer
   512  	var sortKey uint64
   513  	requestCount := 0
   514  
   515  	for {
   516  		select {
   517  		case req := <-c.pendingRequests:
   518  			// refresh the list after every 100 allocations made in a single batch
   519  			if requestCount >= maxBatchBeforeRefresh {
   520  				list = nil
   521  				requestCount = 0
   522  			}
   523  
   524  			if runtime.FeatureEnabled(runtime.FeatureCountsAndLists) {
   525  				// SortKey returns the sorting values (list of Priorities) as a determinstic key.
   526  				// In case gsa.Spec.Priorities is nil this will still return a sortKey.
   527  				// In case of error this will return 0 for the sortKey.
   528  				newSortKey, err := req.gsa.SortKey()
   529  				if err != nil {
   530  					c.baseLogger.WithError(err).Warn("error getting sortKey for GameServerAllocationSpec", err)
   531  				}
   532  				// Set sortKey if this is the first request, or the previous request errored on creating a sortKey.
   533  				if sortKey == uint64(0) {
   534  					sortKey = newSortKey
   535  				}
   536  
   537  				if newSortKey != sortKey {
   538  					sortKey = newSortKey
   539  					list = nil
   540  					requestCount = 0
   541  				}
   542  			}
   543  
   544  			requestCount++
   545  
   546  			if list == nil {
   547  				if !runtime.FeatureEnabled(runtime.FeatureCountsAndLists) || req.gsa.Spec.Scheduling == apis.Packed {
   548  					list = c.allocationCache.ListSortedGameServers(req.gsa)
   549  				} else {
   550  					// If FeatureCountsAndLists and Scheduling == Distributed, sort game servers by Priorities
   551  					list = c.allocationCache.ListSortedGameServersPriorities(req.gsa)
   552  				}
   553  			}
   554  
   555  			gs, index, err := findGameServerForAllocation(req.gsa, list)
   556  			if err != nil {
   557  				req.response <- response{request: req, gs: nil, err: err}
   558  				continue
   559  			}
   560  			// remove the game server that has been allocated
   561  			list = append(list[:index], list[index+1:]...)
   562  
   563  			if err := c.allocationCache.RemoveGameServer(gs); err != nil {
   564  				// this seems unlikely, but lets handle it just in case
   565  				req.response <- response{request: req, gs: nil, err: err}
   566  				continue
   567  			}
   568  
   569  			updateQueue <- response{request: req, gs: gs.DeepCopy(), err: nil}
   570  
   571  		case <-ctx.Done():
   572  			return
   573  		default:
   574  			list = nil
   575  			requestCount = 0
   576  			// slow down cpu churn, and allow items to batch
   577  			time.Sleep(c.batchWaitTime)
   578  		}
   579  	}
   580  }
   581  
   582  // allocationUpdateWorkers runs workerCount number of goroutines as workers to
   583  // process each GameServer passed into the returned updateQueue
   584  // Each worker will concurrently attempt to move the GameServer to an Allocated
   585  // state and then respond to the initial request's response channel with the
   586  // details of that update
   587  func (c *Allocator) allocationUpdateWorkers(ctx context.Context, workerCount int) chan<- response {
   588  	updateQueue := make(chan response)
   589  
   590  	for i := 0; i < workerCount; i++ {
   591  		go func() {
   592  			for {
   593  				select {
   594  				case res := <-updateQueue:
   595  					gs, err := c.applyAllocationToGameServer(ctx, res.request.gsa.Spec.MetaPatch, res.gs, res.request.gsa)
   596  					if err != nil {
   597  						if !k8serrors.IsConflict(errors.Cause(err)) {
   598  							// since we could not allocate, we should put it back
   599  							// but not if it's a conflict, as the cache is no longer up to date, and
   600  							// we should wait for it to get updated with fresh info.
   601  							c.allocationCache.AddGameServer(gs)
   602  						}
   603  						res.err = errors.Wrap(err, "error updating allocated gameserver")
   604  					} else {
   605  						// put the GameServer back into the cache, so it's immediately around for re-allocation
   606  						c.allocationCache.AddGameServer(gs)
   607  						res.gs = gs
   608  					}
   609  
   610  					res.request.response <- res
   611  				case <-ctx.Done():
   612  					return
   613  				}
   614  			}
   615  		}()
   616  	}
   617  
   618  	return updateQueue
   619  }
   620  
   621  // applyAllocationToGameServer patches the inputted GameServer with the allocation metadata changes, and updates it to the Allocated State.
   622  // Returns the updated GameServer.
   623  func (c *Allocator) applyAllocationToGameServer(ctx context.Context, mp allocationv1.MetaPatch, gs *agonesv1.GameServer, gsa *allocationv1.GameServerAllocation) (*agonesv1.GameServer, error) {
   624  	// patch ObjectMeta labels
   625  	if mp.Labels != nil {
   626  		if gs.ObjectMeta.Labels == nil {
   627  			gs.ObjectMeta.Labels = make(map[string]string, len(mp.Labels))
   628  		}
   629  		for key, value := range mp.Labels {
   630  			gs.ObjectMeta.Labels[key] = value
   631  		}
   632  	}
   633  
   634  	if gs.ObjectMeta.Annotations == nil {
   635  		gs.ObjectMeta.Annotations = make(map[string]string, len(mp.Annotations))
   636  	}
   637  	// apply annotations patch
   638  	for key, value := range mp.Annotations {
   639  		gs.ObjectMeta.Annotations[key] = value
   640  	}
   641  
   642  	// add last allocated, so it always gets updated, even if it is already Allocated
   643  	ts, err := time.Now().MarshalText()
   644  	if err != nil {
   645  		return nil, err
   646  	}
   647  	gs.ObjectMeta.Annotations[LastAllocatedAnnotationKey] = string(ts)
   648  	gs.Status.State = agonesv1.GameServerStateAllocated
   649  
   650  	// perfom any Counter or List actions
   651  	var counterErrors error
   652  	var listErrors error
   653  	if runtime.FeatureEnabled(runtime.FeatureCountsAndLists) {
   654  		if gsa.Spec.Counters != nil {
   655  			for counter, ca := range gsa.Spec.Counters {
   656  				counterErrors = goErrors.Join(counterErrors, ca.CounterActions(counter, gs))
   657  			}
   658  		}
   659  		if gsa.Spec.Lists != nil {
   660  			for list, la := range gsa.Spec.Lists {
   661  				listErrors = goErrors.Join(listErrors, la.ListActions(list, gs))
   662  			}
   663  		}
   664  	}
   665  
   666  	gsUpdate, updateErr := c.gameServerGetter.GameServers(gs.ObjectMeta.Namespace).Update(ctx, gs, metav1.UpdateOptions{})
   667  	if updateErr != nil {
   668  		return gsUpdate, updateErr
   669  	}
   670  
   671  	// If successful Update record any Counter or List action errors as a warning
   672  	if counterErrors != nil {
   673  		c.recorder.Event(gsUpdate, corev1.EventTypeWarning, "CounterActionError", counterErrors.Error())
   674  	}
   675  	if listErrors != nil {
   676  		c.recorder.Event(gsUpdate, corev1.EventTypeWarning, "ListActionError", listErrors.Error())
   677  	}
   678  	c.recorder.Event(gsUpdate, corev1.EventTypeNormal, string(gsUpdate.Status.State), "Allocated")
   679  
   680  	return gsUpdate, updateErr
   681  }
   682  
   683  // Retry retries fn based on backoff provided.
   684  func Retry(backoff wait.Backoff, fn func() error) error {
   685  	var lastConflictErr error
   686  	err := wait.ExponentialBackoff(backoff, func() (bool, error) {
   687  		err := fn()
   688  
   689  		st, ok := status.FromError(err)
   690  		if ok {
   691  			if st.Code() == codes.ResourceExhausted {
   692  				return true, err
   693  			}
   694  		}
   695  
   696  		switch {
   697  		case err == nil:
   698  			return true, nil
   699  		case err == ErrNoGameServer:
   700  			return true, err
   701  		case err == ErrTotalTimeoutExceeded:
   702  			return true, err
   703  		default:
   704  			lastConflictErr = err
   705  			return false, nil
   706  		}
   707  	})
   708  	if wait.Interrupted(err) {
   709  		err = lastConflictErr
   710  	}
   711  	return err
   712  }
   713  
   714  // newMetrics creates a new gsa latency recorder.
   715  func (c *Allocator) newMetrics(ctx context.Context) *metrics {
   716  	ctx, err := tag.New(ctx, latencyTags...)
   717  	if err != nil {
   718  		c.baseLogger.WithError(err).Warn("failed to tag latency recorder.")
   719  	}
   720  	return &metrics{
   721  		ctx:              ctx,
   722  		gameServerLister: c.allocationCache.gameServerLister,
   723  		logger:           c.baseLogger,
   724  		start:            time.Now(),
   725  	}
   726  }
   727  
   728  func addPort(ip string) string {
   729  	if strings.Contains(ip, ":") {
   730  		return ip
   731  	}
   732  	return fmt.Sprintf("%s:%s", ip, allocatorPort)
   733  }