agones.dev/agones@v1.54.0/pkg/portallocator/portallocator.go (about)

     1  // Copyright 2018 Google LLC All Rights Reserved.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package portallocator
    16  
    17  import (
    18  	"context"
    19  	"sort"
    20  	"sync"
    21  
    22  	agonesv1 "agones.dev/agones/pkg/apis/agones/v1"
    23  	"agones.dev/agones/pkg/client/informers/externalversions"
    24  	listerv1 "agones.dev/agones/pkg/client/listers/agones/v1"
    25  	"agones.dev/agones/pkg/util/runtime"
    26  	"github.com/pkg/errors"
    27  	"github.com/sirupsen/logrus"
    28  	corev1 "k8s.io/api/core/v1"
    29  	"k8s.io/apimachinery/pkg/labels"
    30  	"k8s.io/apimachinery/pkg/types"
    31  	"k8s.io/client-go/informers"
    32  	corelisterv1 "k8s.io/client-go/listers/core/v1"
    33  	"k8s.io/client-go/tools/cache"
    34  )
    35  
    36  // Interface manages the dynamic port allocation strategy.
    37  //
    38  // The portallocator does not currently support mixing static portAllocations (or any pods with defined HostPort)
    39  // within the dynamic port range other than the ones it coordinates.
    40  type Interface interface {
    41  	// Run sets up the current state of port allocations and
    42  	// starts tracking Pod and Node changes
    43  	Run(ctx context.Context) error
    44  
    45  	// Allocate assigns a port to the GameServer and returns it.
    46  	Allocate(gs *agonesv1.GameServer) *agonesv1.GameServer
    47  
    48  	// DeAllocate marks the given ports as no longer allocated
    49  	DeAllocate(gs *agonesv1.GameServer)
    50  }
    51  
    52  // PortRange is a named port range.
    53  type PortRange struct {
    54  	// MinPort is the minimum port that can be allocated in this range.
    55  	MinPort int32
    56  	// MaxPort is the maximum port that can be allocated in this range.
    57  	MaxPort int32
    58  }
    59  
    60  type portAllocator struct {
    61  	allocators []*portRangeAllocator
    62  }
    63  
    64  // New returns a new dynamic port allocator. minPort and maxPort are the
    65  // top and bottom portAllocations that can be allocated in the range for
    66  // the game servers.
    67  func New(portRanges map[string]PortRange,
    68  	kubeInformerFactory informers.SharedInformerFactory,
    69  	agonesInformerFactory externalversions.SharedInformerFactory) Interface {
    70  	return newAllocator(portRanges, kubeInformerFactory, agonesInformerFactory)
    71  }
    72  
    73  func newAllocator(portRanges map[string]PortRange,
    74  	kubeInformerFactory informers.SharedInformerFactory,
    75  	agonesInformerFactory externalversions.SharedInformerFactory) *portAllocator {
    76  	allocs := make([]*portRangeAllocator, 0, len(portRanges))
    77  	for name, pr := range portRanges {
    78  		allocs = append(allocs, newRangeAllocator(name, pr.MinPort, pr.MaxPort, kubeInformerFactory, agonesInformerFactory))
    79  	}
    80  
    81  	return &portAllocator{
    82  		allocators: allocs,
    83  	}
    84  }
    85  
    86  // Run sets up the current state of port allocations and starts tracking Pod and Node changes.
    87  func (pa *portAllocator) Run(ctx context.Context) error {
    88  	for _, a := range pa.allocators {
    89  		if err := a.Run(ctx); err != nil {
    90  			return err
    91  		}
    92  	}
    93  	return nil
    94  }
    95  
    96  // Allocate assigns a port to the GameServer and returns it.
    97  func (pa *portAllocator) Allocate(gs *agonesv1.GameServer) *agonesv1.GameServer {
    98  	for _, a := range pa.allocators {
    99  		gs = a.Allocate(gs)
   100  	}
   101  	return gs
   102  }
   103  
   104  // DeAllocate marks the given ports as no longer allocated.
   105  func (pa *portAllocator) DeAllocate(gs *agonesv1.GameServer) {
   106  	for _, a := range pa.allocators {
   107  		a.DeAllocate(gs)
   108  	}
   109  }
   110  
   111  // A set of port allocations for a node
   112  type portAllocation map[int32]bool
   113  
   114  //nolint:govet // ignore fieldalignment, singleton
   115  type portRangeAllocator struct {
   116  	logger             *logrus.Entry
   117  	name               string
   118  	mutex              sync.RWMutex
   119  	portAllocations    []portAllocation
   120  	gameServerRegistry map[types.UID]bool
   121  	minPort            int32
   122  	maxPort            int32
   123  	gameServerSynced   cache.InformerSynced
   124  	gameServerLister   listerv1.GameServerLister
   125  	gameServerInformer cache.SharedIndexInformer
   126  	nodeSynced         cache.InformerSynced
   127  	nodeLister         corelisterv1.NodeLister
   128  	nodeInformer       cache.SharedIndexInformer
   129  }
   130  
   131  func newRangeAllocator(name string, minPort, maxPort int32,
   132  	kubeInformerFactory informers.SharedInformerFactory,
   133  	agonesInformerFactory externalversions.SharedInformerFactory) *portRangeAllocator {
   134  	v1 := kubeInformerFactory.Core().V1()
   135  	nodes := v1.Nodes()
   136  	gameServers := agonesInformerFactory.Agones().V1().GameServers()
   137  
   138  	pa := &portRangeAllocator{
   139  		name:               name,
   140  		mutex:              sync.RWMutex{},
   141  		minPort:            minPort,
   142  		maxPort:            maxPort,
   143  		gameServerRegistry: map[types.UID]bool{},
   144  		gameServerSynced:   gameServers.Informer().HasSynced,
   145  		gameServerLister:   gameServers.Lister(),
   146  		gameServerInformer: gameServers.Informer(),
   147  		nodeLister:         nodes.Lister(),
   148  		nodeInformer:       nodes.Informer(),
   149  		nodeSynced:         nodes.Informer().HasSynced,
   150  	}
   151  	pa.logger = runtime.NewLoggerWithType(pa).WithField("range", name)
   152  
   153  	_, _ = pa.gameServerInformer.AddEventHandler(cache.ResourceEventHandlerFuncs{
   154  		DeleteFunc: pa.syncDeleteGameServer,
   155  	})
   156  
   157  	pa.logger.WithField("minPort", minPort).WithField("maxPort", maxPort).Debug("Starting")
   158  	return pa
   159  }
   160  
   161  // Run sets up the current state of port allocations and
   162  // starts tracking Pod and Node changes
   163  func (pa *portRangeAllocator) Run(ctx context.Context) error {
   164  	pa.logger.Debug("Running")
   165  
   166  	if !cache.WaitForCacheSync(ctx.Done(), pa.gameServerSynced, pa.nodeSynced) {
   167  		return errors.New("failed to wait for caches to sync")
   168  	}
   169  
   170  	// on run, let's make sure we start with a perfect slate straight away
   171  	if err := pa.syncAll(); err != nil {
   172  		return errors.Wrap(err, "error performing initial sync")
   173  	}
   174  
   175  	return nil
   176  }
   177  
   178  // Allocate assigns a port to the GameServer and returns it.
   179  func (pa *portRangeAllocator) Allocate(gs *agonesv1.GameServer) *agonesv1.GameServer {
   180  	pa.mutex.Lock()
   181  	defer pa.mutex.Unlock()
   182  
   183  	type pn struct {
   184  		pa   portAllocation
   185  		port int32
   186  	}
   187  
   188  	// we only want this to be called inside the mutex lock
   189  	// so let's define the function here so it can never be called elsewhere.
   190  	// Also the return gives an escape from the double loop
   191  	findOpenPorts := func(amount int) []pn {
   192  		var ports []pn
   193  		if amount <= 0 {
   194  			return ports
   195  		}
   196  		for _, n := range pa.portAllocations {
   197  			for p, taken := range n {
   198  				if !taken {
   199  					ports = append(ports, pn{pa: n, port: p})
   200  					// only allocate as many ports as are asked for by the GameServer
   201  					if len(ports) == amount {
   202  						return ports
   203  					}
   204  				}
   205  			}
   206  		}
   207  		return ports
   208  	}
   209  
   210  	// this allows us to do recursion, within the mutex lock
   211  	var allocate func(gs *agonesv1.GameServer) *agonesv1.GameServer
   212  	allocate = func(gs *agonesv1.GameServer) *agonesv1.GameServer {
   213  		var amount int
   214  		if runtime.FeatureEnabled(runtime.FeaturePortRanges) {
   215  			amount = gs.CountPortsForRange(pa.name, func(policy agonesv1.PortPolicy) bool {
   216  				return policy == agonesv1.Dynamic || policy == agonesv1.Passthrough
   217  			})
   218  		} else {
   219  			amount = gs.CountPorts(func(policy agonesv1.PortPolicy) bool {
   220  				return policy == agonesv1.Dynamic || policy == agonesv1.Passthrough
   221  			})
   222  		}
   223  		allocations := findOpenPorts(amount)
   224  
   225  		if len(allocations) == amount {
   226  			pa.gameServerRegistry[gs.ObjectMeta.UID] = true
   227  
   228  			var extraPorts []agonesv1.GameServerPort
   229  
   230  			for i, p := range gs.Spec.Ports {
   231  				if p.PortPolicy != agonesv1.Dynamic && p.PortPolicy != agonesv1.Passthrough {
   232  					continue
   233  				}
   234  				if runtime.FeatureEnabled(runtime.FeaturePortRanges) && p.Range != pa.name {
   235  					continue
   236  				}
   237  				// pop off allocation
   238  				var a pn
   239  				a, allocations = allocations[0], allocations[1:]
   240  				a.pa[a.port] = true
   241  				gs.Spec.Ports[i].HostPort = a.port
   242  
   243  				if p.PortPolicy == agonesv1.Passthrough {
   244  					gs.Spec.Ports[i].ContainerPort = a.port
   245  				}
   246  
   247  				// create a port for TCP when using TCPUDP protocol
   248  				if p.Protocol == agonesv1.ProtocolTCPUDP {
   249  					var duplicate = p
   250  					duplicate.HostPort = a.port
   251  
   252  					if duplicate.PortPolicy == agonesv1.Passthrough {
   253  						duplicate.ContainerPort = a.port
   254  					}
   255  
   256  					extraPorts = append(extraPorts, duplicate)
   257  
   258  					gs.Spec.Ports[i].Name = p.Name + "-tcp"
   259  					gs.Spec.Ports[i].Protocol = corev1.ProtocolTCP
   260  				}
   261  			}
   262  
   263  			// create the UDP port when using TCPUDP protocol
   264  			for _, p := range extraPorts {
   265  				p.Name += "-udp"
   266  				p.Protocol = corev1.ProtocolUDP
   267  				gs.Spec.Ports = append(gs.Spec.Ports, p)
   268  			}
   269  
   270  			return gs
   271  		}
   272  
   273  		// if we get here, we ran out of ports. Add a node, and try again.
   274  		// this is important, because to autoscale scale up, we create GameServers that
   275  		// can't be scheduled on the current set of nodes, so we need to be sure
   276  		// there are always ports available to be allocated.
   277  		pa.portAllocations = append(pa.portAllocations, pa.newPortAllocation())
   278  
   279  		return allocate(gs)
   280  	}
   281  
   282  	return allocate(gs)
   283  }
   284  
   285  // DeAllocate marks the given ports as no longer allocated
   286  func (pa *portRangeAllocator) DeAllocate(gs *agonesv1.GameServer) {
   287  	// skip if it wasn't previously allocated
   288  
   289  	found := func() bool {
   290  		pa.mutex.RLock()
   291  		defer pa.mutex.RUnlock()
   292  		if _, ok := pa.gameServerRegistry[gs.ObjectMeta.UID]; ok {
   293  			return true
   294  		}
   295  		return false
   296  	}
   297  
   298  	if !found() {
   299  		pa.logger.WithField("gs", gs.ObjectMeta.Name).
   300  			Debug("Did not allocate this GameServer. Ignoring for DeAllocation")
   301  		return
   302  	}
   303  
   304  	pa.mutex.Lock()
   305  	defer pa.mutex.Unlock()
   306  	for _, p := range gs.Spec.Ports {
   307  		if p.HostPort < pa.minPort || p.HostPort > pa.maxPort {
   308  			continue
   309  		}
   310  		pa.portAllocations = setPortAllocation(p.HostPort, pa.portAllocations, false)
   311  	}
   312  
   313  	delete(pa.gameServerRegistry, gs.ObjectMeta.UID)
   314  }
   315  
   316  // syncDeleteGameServer when a GameServer Pod is deleted
   317  // make the HostPort available
   318  func (pa *portRangeAllocator) syncDeleteGameServer(object interface{}) {
   319  	if gs, ok := object.(*agonesv1.GameServer); ok {
   320  		pa.logger.WithField("gs", gs).Debug("Syncing deleted GameServer")
   321  		pa.DeAllocate(gs)
   322  	}
   323  }
   324  
   325  // syncAll syncs the pod, node and gameserver caches then
   326  // traverses all Nodes in the cluster and all looks at GameServers
   327  // and Terminating Pods values make sure those
   328  // portAllocations are marked as taken.
   329  // Locks the mutex while doing this.
   330  // This is basically a stop the world Garbage Collection on port allocations, but it only happens on startup.
   331  func (pa *portRangeAllocator) syncAll() error {
   332  	pa.mutex.Lock()
   333  	defer pa.mutex.Unlock()
   334  
   335  	pa.logger.Debug("Resetting Port Allocation")
   336  
   337  	nodes, err := pa.nodeLister.List(labels.Everything())
   338  	if err != nil {
   339  		return errors.Wrap(err, "error listing all nodes")
   340  	}
   341  
   342  	gameservers, err := pa.gameServerLister.List(labels.Everything())
   343  	if err != nil {
   344  		return errors.Wrapf(err, "error listing all GameServers")
   345  	}
   346  
   347  	gsRegistry := map[types.UID]bool{}
   348  
   349  	// place to put GameServer port allocations that are not ready yet/after the ready state
   350  	allocations, nonReadyNodesPorts := pa.registerExistingGameServerPorts(gameservers, nodes, gsRegistry)
   351  
   352  	// close off the port on the first node you find
   353  	// we actually don't mind what node it is, since we only care
   354  	// that there is a port open *somewhere* as the default scheduler
   355  	// will re-route for us based on HostPort allocation
   356  	for _, p := range nonReadyNodesPorts {
   357  		allocations = setPortAllocation(p, allocations, true)
   358  	}
   359  
   360  	pa.portAllocations = allocations
   361  	pa.gameServerRegistry = gsRegistry
   362  
   363  	return nil
   364  }
   365  
   366  // registerExistingGameServerPorts registers the gameservers against gsRegistry and the ports against nodePorts.
   367  // and returns an ordered list of portAllocations per cluster nodes, and an array of
   368  // any GameServers allocated a port, but not yet assigned a Node will returned as an array of port values.
   369  func (pa *portRangeAllocator) registerExistingGameServerPorts(gameservers []*agonesv1.GameServer, nodes []*corev1.Node, gsRegistry map[types.UID]bool) ([]portAllocation, []int32) {
   370  	// setup blank port values
   371  	nodePortAllocation := pa.nodePortAllocation(nodes)
   372  	nodePortCount := make(map[string]int64, len(nodes))
   373  	for _, n := range nodes {
   374  		nodePortCount[n.ObjectMeta.Name] = 0
   375  	}
   376  
   377  	var nonReadyNodesPorts []int32
   378  
   379  	for _, gs := range gameservers {
   380  		for _, p := range gs.Spec.Ports {
   381  			if p.PortPolicy != agonesv1.Dynamic && p.PortPolicy != agonesv1.Passthrough {
   382  				continue
   383  			}
   384  			// if the port exists in our range, it should be marked as taken.
   385  			if p.HostPort < pa.minPort || p.HostPort > pa.maxPort {
   386  				continue
   387  			}
   388  			gsRegistry[gs.ObjectMeta.UID] = true
   389  
   390  			// if the node doesn't exist, it's likely unscheduled
   391  			_, ok := nodePortAllocation[gs.Status.NodeName]
   392  			if gs.Status.NodeName != "" && ok {
   393  				nodePortAllocation[gs.Status.NodeName][p.HostPort] = true
   394  				nodePortCount[gs.Status.NodeName]++
   395  			} else if p.HostPort != 0 {
   396  				nonReadyNodesPorts = append(nonReadyNodesPorts, p.HostPort)
   397  			}
   398  		}
   399  	}
   400  
   401  	// make a list of the keys
   402  	keys := make([]string, 0, len(nodePortAllocation))
   403  	for k := range nodePortAllocation {
   404  		keys = append(keys, k)
   405  	}
   406  
   407  	// sort, since this is how it would have originally been allocated across the
   408  	// ordered []portAllocation
   409  	sort.Slice(keys, func(i, j int) bool {
   410  		return nodePortCount[keys[i]] > nodePortCount[keys[j]]
   411  	})
   412  
   413  	// this gives us back an ordered node list
   414  	allocations := make([]portAllocation, len(nodePortAllocation))
   415  	for i, k := range keys {
   416  		allocations[i] = nodePortAllocation[k]
   417  
   418  	}
   419  
   420  	return allocations, nonReadyNodesPorts
   421  }
   422  
   423  // nodePortAllocation returns a map of port allocations all set to being available
   424  // with a map key for each node, as well as the node registry record (since we're already looping)
   425  func (pa *portRangeAllocator) nodePortAllocation(nodes []*corev1.Node) map[string]portAllocation {
   426  	nodePorts := map[string]portAllocation{}
   427  
   428  	for _, n := range nodes {
   429  		// ignore unschedulable nodes
   430  		if !n.Spec.Unschedulable {
   431  			nodePorts[n.Name] = pa.newPortAllocation()
   432  		}
   433  	}
   434  
   435  	return nodePorts
   436  }
   437  
   438  func (pa *portRangeAllocator) newPortAllocation() portAllocation {
   439  	p := make(portAllocation, (pa.maxPort-pa.minPort)+1)
   440  	for i := pa.minPort; i <= pa.maxPort; i++ {
   441  		p[i] = false
   442  	}
   443  
   444  	return p
   445  }
   446  
   447  // setPortAllocation takes a port from an all
   448  func setPortAllocation(port int32, allocations []portAllocation, taken bool) []portAllocation {
   449  	for _, np := range allocations {
   450  		if np[port] != taken {
   451  			np[port] = taken
   452  			break
   453  		}
   454  	}
   455  	return allocations
   456  }