github.phpd.cn/cilium/cilium@v1.6.12/pkg/aws/eni/node_manager.go (about)

     1  // Copyright 2019 Authors of Cilium
     2  // Copyright 2017 Lyft, Inc.
     3  //
     4  // Licensed under the Apache License, Version 2.0 (the "License");
     5  // you may not use this file except in compliance with the License.
     6  // You may obtain a copy of the License at
     7  //
     8  //     http://www.apache.org/licenses/LICENSE-2.0
     9  //
    10  // Unless required by applicable law or agreed to in writing, software
    11  // distributed under the License is distributed on an "AS IS" BASIS,
    12  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  // See the License for the specific language governing permissions and
    14  // limitations under the License.
    15  
    16  package eni
    17  
    18  import (
    19  	"context"
    20  	"fmt"
    21  	"sort"
    22  	"time"
    23  
    24  	"github.com/cilium/cilium/pkg/aws/types"
    25  	"github.com/cilium/cilium/pkg/k8s/apis/cilium.io/v2"
    26  	"github.com/cilium/cilium/pkg/lock"
    27  	"github.com/cilium/cilium/pkg/trigger"
    28  
    29  	"golang.org/x/sync/semaphore"
    30  )
    31  
    32  type k8sAPI interface {
    33  	Update(origResource, newResource *v2.CiliumNode) (*v2.CiliumNode, error)
    34  	UpdateStatus(origResource, newResource *v2.CiliumNode) (*v2.CiliumNode, error)
    35  	Get(name string) (*v2.CiliumNode, error)
    36  }
    37  
    38  type nodeManagerAPI interface {
    39  	GetENI(instanceID string, index int) *v2.ENI
    40  	GetENIs(instanceID string) []*v2.ENI
    41  	GetSubnet(subnetID string) *types.Subnet
    42  	GetSubnets() types.SubnetMap
    43  	FindSubnetByTags(vpcID, availabilityZone string, required types.Tags) *types.Subnet
    44  	Resync() time.Time
    45  	UpdateENI(instanceID string, eni *v2.ENI)
    46  }
    47  
    48  type ec2API interface {
    49  	CreateNetworkInterface(toAllocate int64, subnetID, desc string, groups []string) (string, *v2.ENI, error)
    50  	DeleteNetworkInterface(eniID string) error
    51  	AttachNetworkInterface(index int64, instanceID, eniID string) (string, error)
    52  	ModifyNetworkInterface(eniID, attachmentID string, deleteOnTermination bool) error
    53  	AssignPrivateIpAddresses(eniID string, addresses int64) error
    54  	UnassignPrivateIpAddresses(eniID string, addresses []string) error
    55  }
    56  
    57  type metricsAPI interface {
    58  	IncENIAllocationAttempt(status, subnetID string)
    59  	AddIPAllocation(subnetID string, allocated int64)
    60  	AddIPRelease(subnetID string, released int64)
    61  	SetAllocatedIPs(typ string, allocated int)
    62  	SetAvailableENIs(available int)
    63  	SetAvailableIPsPerSubnet(subnetID string, availabilityZone string, available int)
    64  	SetNodes(category string, nodes int)
    65  	IncResyncCount()
    66  	PoolMaintainerTrigger() trigger.MetricsObserver
    67  	K8sSyncTrigger() trigger.MetricsObserver
    68  	ResyncTrigger() trigger.MetricsObserver
    69  }
    70  
    71  // nodeMap is a mapping of node names to ENI nodes
    72  type nodeMap map[string]*Node
    73  
    74  // NodeManager manages all nodes with ENIs
    75  type NodeManager struct {
    76  	mutex           lock.RWMutex
    77  	nodes           nodeMap
    78  	instancesAPI    nodeManagerAPI
    79  	ec2API          ec2API
    80  	k8sAPI          k8sAPI
    81  	metricsAPI      metricsAPI
    82  	resyncTrigger   *trigger.Trigger
    83  	parallelWorkers int64
    84  }
    85  
    86  // NewNodeManager returns a new NodeManager
    87  func NewNodeManager(instancesAPI nodeManagerAPI, ec2API ec2API, k8sAPI k8sAPI, metrics metricsAPI, parallelWorkers int64) (*NodeManager, error) {
    88  	if parallelWorkers < 1 {
    89  		parallelWorkers = 1
    90  	}
    91  
    92  	mngr := &NodeManager{
    93  		nodes:           nodeMap{},
    94  		instancesAPI:    instancesAPI,
    95  		ec2API:          ec2API,
    96  		k8sAPI:          k8sAPI,
    97  		metricsAPI:      metrics,
    98  		parallelWorkers: parallelWorkers,
    99  	}
   100  
   101  	resyncTrigger, err := trigger.NewTrigger(trigger.Parameters{
   102  		Name:            "eni-node-manager-resync",
   103  		MinInterval:     10 * time.Millisecond,
   104  		MetricsObserver: metrics.ResyncTrigger(),
   105  		TriggerFunc: func(reasons []string) {
   106  			syncTime := instancesAPI.Resync()
   107  			mngr.Resync(syncTime)
   108  		},
   109  	})
   110  	if err != nil {
   111  		return nil, fmt.Errorf("unable to initialize resync trigger: %s", err)
   112  	}
   113  
   114  	mngr.resyncTrigger = resyncTrigger
   115  
   116  	return mngr, nil
   117  }
   118  
   119  // GetNames returns the list of all node names
   120  func (n *NodeManager) GetNames() (allNodeNames []string) {
   121  	n.mutex.RLock()
   122  	defer n.mutex.RUnlock()
   123  
   124  	allNodeNames = make([]string, 0, len(n.nodes))
   125  
   126  	for name := range n.nodes {
   127  		allNodeNames = append(allNodeNames, name)
   128  	}
   129  
   130  	return
   131  }
   132  
   133  // Update is called whenever a CiliumNode resource has been updated in the
   134  // Kubernetes apiserver
   135  func (n *NodeManager) Update(resource *v2.CiliumNode) bool {
   136  	n.mutex.Lock()
   137  	node, ok := n.nodes[resource.Name]
   138  	if !ok {
   139  		node = &Node{
   140  			name:    resource.Name,
   141  			manager: n,
   142  		}
   143  
   144  		poolMaintainer, err := trigger.NewTrigger(trigger.Parameters{
   145  			Name:            fmt.Sprintf("eni-pool-maintainer-%s", resource.Name),
   146  			MinInterval:     10 * time.Millisecond,
   147  			MetricsObserver: n.metricsAPI.PoolMaintainerTrigger(),
   148  			TriggerFunc: func(reasons []string) {
   149  				if err := node.MaintainIpPool(); err != nil {
   150  					node.logger().WithError(err).Warning("Unable to maintain ip pool of node")
   151  				}
   152  			},
   153  		})
   154  		if err != nil {
   155  			node.logger().WithError(err).Error("Unable to create pool-maintainer trigger")
   156  			return false
   157  		}
   158  
   159  		k8sSync, err := trigger.NewTrigger(trigger.Parameters{
   160  			Name:            fmt.Sprintf("eni-node-k8s-sync-%s", resource.Name),
   161  			MinInterval:     10 * time.Millisecond,
   162  			MetricsObserver: n.metricsAPI.K8sSyncTrigger(),
   163  			TriggerFunc: func(reasons []string) {
   164  				node.SyncToAPIServer()
   165  			},
   166  		})
   167  		if err != nil {
   168  			poolMaintainer.Shutdown()
   169  			node.logger().WithError(err).Error("Unable to create k8s-sync trigger")
   170  			return false
   171  		}
   172  
   173  		node.poolMaintainer = poolMaintainer
   174  		node.k8sSync = k8sSync
   175  		n.nodes[node.name] = node
   176  
   177  		log.WithField(fieldName, resource.Name).Info("Discovered new CiliumNode custom resource")
   178  	}
   179  	n.mutex.Unlock()
   180  
   181  	return node.updatedResource(resource)
   182  }
   183  
   184  // Delete is called after a CiliumNode resource has been deleted via the
   185  // Kubernetes apiserver
   186  func (n *NodeManager) Delete(nodeName string) {
   187  	n.mutex.Lock()
   188  	if node, ok := n.nodes[nodeName]; ok {
   189  		if node.poolMaintainer != nil {
   190  			node.poolMaintainer.Shutdown()
   191  		}
   192  		if node.k8sSync != nil {
   193  			node.k8sSync.Shutdown()
   194  		}
   195  	}
   196  
   197  	delete(n.nodes, nodeName)
   198  	n.mutex.Unlock()
   199  }
   200  
   201  // Get returns the node with the given name
   202  func (n *NodeManager) Get(nodeName string) *Node {
   203  	n.mutex.RLock()
   204  	node := n.nodes[nodeName]
   205  	n.mutex.RUnlock()
   206  	return node
   207  }
   208  
   209  // GetNodesByIPWatermark returns all nodes that require addresses to be
   210  // allocated or released, sorted by the number of addresses needed to be operated
   211  // in descending order. Number of addresses to be released is negative value
   212  // so that nodes with IP deficit are resolved first
   213  func (n *NodeManager) GetNodesByIPWatermark() []*Node {
   214  	n.mutex.RLock()
   215  	list := make([]*Node, len(n.nodes))
   216  	index := 0
   217  	for _, node := range n.nodes {
   218  		list[index] = node
   219  		index++
   220  	}
   221  	n.mutex.RUnlock()
   222  
   223  	sort.Slice(list, func(i, j int) bool {
   224  		valuei := list[i].getNeededAddresses()
   225  		valuej := list[j].getNeededAddresses()
   226  		// Number of addresses to be released is negative value,
   227  		// nodes with more excess addresses are released earlier
   228  		if valuei < 0 && valuej < 0 {
   229  			return valuei < valuej
   230  		}
   231  		return valuei > valuej
   232  	})
   233  
   234  	return list
   235  }
   236  
   237  type resyncStats struct {
   238  	mutex               lock.Mutex
   239  	totalUsed           int
   240  	totalAvailable      int
   241  	totalNeeded         int
   242  	remainingInterfaces int
   243  	nodes               int
   244  	nodesAtCapacity     int
   245  	nodesInDeficit      int
   246  }
   247  
   248  func (n *NodeManager) resyncNode(node *Node, stats *resyncStats, syncTime time.Time) {
   249  	node.mutex.Lock()
   250  
   251  	if syncTime.After(node.resyncNeeded) {
   252  		node.loggerLocked().Debug("Resetting resyncNeeded")
   253  		node.resyncNeeded = time.Time{}
   254  	}
   255  
   256  	node.recalculateLocked()
   257  	allocationNeeded := node.allocationNeeded()
   258  	releaseNeeded := node.releaseNeeded()
   259  	if allocationNeeded || releaseNeeded {
   260  		node.waitingForPoolMaintenance = true
   261  		node.poolMaintainer.Trigger()
   262  	}
   263  
   264  	stats.mutex.Lock()
   265  	stats.totalUsed += node.stats.usedIPs
   266  	availableOnNode := node.stats.availableIPs - node.stats.usedIPs
   267  	stats.totalAvailable += availableOnNode
   268  	stats.totalNeeded += node.stats.neededIPs
   269  	stats.remainingInterfaces += node.stats.remainingInterfaces
   270  	stats.nodes++
   271  
   272  	if allocationNeeded {
   273  		stats.nodesInDeficit++
   274  	}
   275  
   276  	if node.stats.remainingInterfaces == 0 && availableOnNode == 0 {
   277  		stats.nodesAtCapacity++
   278  	}
   279  
   280  	for subnetID, subnet := range n.instancesAPI.GetSubnets() {
   281  		n.metricsAPI.SetAvailableIPsPerSubnet(subnetID, subnet.AvailabilityZone, subnet.AvailableAddresses)
   282  	}
   283  
   284  	stats.mutex.Unlock()
   285  	node.mutex.Unlock()
   286  
   287  	node.k8sSync.Trigger()
   288  }
   289  
   290  // Resync will attend all nodes and resolves IP deficits. The order of
   291  // attendance is defined by the number of IPs needed to reach the configured
   292  // watermarks. Any updates to the node resource are synchronized to the
   293  // Kubernetes apiserver.
   294  func (n *NodeManager) Resync(syncTime time.Time) {
   295  	stats := resyncStats{}
   296  	sem := semaphore.NewWeighted(n.parallelWorkers)
   297  
   298  	for _, node := range n.GetNodesByIPWatermark() {
   299  		err := sem.Acquire(context.TODO(), 1)
   300  		if err != nil {
   301  			continue
   302  		}
   303  		go func(node *Node, stats *resyncStats) {
   304  			n.resyncNode(node, stats, syncTime)
   305  			sem.Release(1)
   306  		}(node, &stats)
   307  	}
   308  
   309  	// Acquire the full semaphore, this requires all go routines to
   310  	// complete and thus blocks until all nodes are synced
   311  	sem.Acquire(context.TODO(), n.parallelWorkers)
   312  
   313  	n.metricsAPI.SetAllocatedIPs("used", stats.totalUsed)
   314  	n.metricsAPI.SetAllocatedIPs("available", stats.totalAvailable)
   315  	n.metricsAPI.SetAllocatedIPs("needed", stats.totalNeeded)
   316  	n.metricsAPI.SetAvailableENIs(stats.remainingInterfaces)
   317  	n.metricsAPI.SetNodes("total", stats.nodes)
   318  	n.metricsAPI.SetNodes("in-deficit", stats.nodesInDeficit)
   319  	n.metricsAPI.SetNodes("at-capacity", stats.nodesAtCapacity)
   320  }