github.phpd.cn/cilium/cilium@v1.6.12/pkg/aws/eni/node_manager.go (about) 1 // Copyright 2019 Authors of Cilium 2 // Copyright 2017 Lyft, Inc. 3 // 4 // Licensed under the Apache License, Version 2.0 (the "License"); 5 // you may not use this file except in compliance with the License. 6 // You may obtain a copy of the License at 7 // 8 // http://www.apache.org/licenses/LICENSE-2.0 9 // 10 // Unless required by applicable law or agreed to in writing, software 11 // distributed under the License is distributed on an "AS IS" BASIS, 12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 // See the License for the specific language governing permissions and 14 // limitations under the License. 15 16 package eni 17 18 import ( 19 "context" 20 "fmt" 21 "sort" 22 "time" 23 24 "github.com/cilium/cilium/pkg/aws/types" 25 "github.com/cilium/cilium/pkg/k8s/apis/cilium.io/v2" 26 "github.com/cilium/cilium/pkg/lock" 27 "github.com/cilium/cilium/pkg/trigger" 28 29 "golang.org/x/sync/semaphore" 30 ) 31 32 type k8sAPI interface { 33 Update(origResource, newResource *v2.CiliumNode) (*v2.CiliumNode, error) 34 UpdateStatus(origResource, newResource *v2.CiliumNode) (*v2.CiliumNode, error) 35 Get(name string) (*v2.CiliumNode, error) 36 } 37 38 type nodeManagerAPI interface { 39 GetENI(instanceID string, index int) *v2.ENI 40 GetENIs(instanceID string) []*v2.ENI 41 GetSubnet(subnetID string) *types.Subnet 42 GetSubnets() types.SubnetMap 43 FindSubnetByTags(vpcID, availabilityZone string, required types.Tags) *types.Subnet 44 Resync() time.Time 45 UpdateENI(instanceID string, eni *v2.ENI) 46 } 47 48 type ec2API interface { 49 CreateNetworkInterface(toAllocate int64, subnetID, desc string, groups []string) (string, *v2.ENI, error) 50 DeleteNetworkInterface(eniID string) error 51 AttachNetworkInterface(index int64, instanceID, eniID string) (string, error) 52 ModifyNetworkInterface(eniID, attachmentID string, deleteOnTermination bool) error 53 AssignPrivateIpAddresses(eniID string, addresses int64) error 54 UnassignPrivateIpAddresses(eniID string, addresses []string) error 55 } 56 57 type metricsAPI interface { 58 IncENIAllocationAttempt(status, subnetID string) 59 AddIPAllocation(subnetID string, allocated int64) 60 AddIPRelease(subnetID string, released int64) 61 SetAllocatedIPs(typ string, allocated int) 62 SetAvailableENIs(available int) 63 SetAvailableIPsPerSubnet(subnetID string, availabilityZone string, available int) 64 SetNodes(category string, nodes int) 65 IncResyncCount() 66 PoolMaintainerTrigger() trigger.MetricsObserver 67 K8sSyncTrigger() trigger.MetricsObserver 68 ResyncTrigger() trigger.MetricsObserver 69 } 70 71 // nodeMap is a mapping of node names to ENI nodes 72 type nodeMap map[string]*Node 73 74 // NodeManager manages all nodes with ENIs 75 type NodeManager struct { 76 mutex lock.RWMutex 77 nodes nodeMap 78 instancesAPI nodeManagerAPI 79 ec2API ec2API 80 k8sAPI k8sAPI 81 metricsAPI metricsAPI 82 resyncTrigger *trigger.Trigger 83 parallelWorkers int64 84 } 85 86 // NewNodeManager returns a new NodeManager 87 func NewNodeManager(instancesAPI nodeManagerAPI, ec2API ec2API, k8sAPI k8sAPI, metrics metricsAPI, parallelWorkers int64) (*NodeManager, error) { 88 if parallelWorkers < 1 { 89 parallelWorkers = 1 90 } 91 92 mngr := &NodeManager{ 93 nodes: nodeMap{}, 94 instancesAPI: instancesAPI, 95 ec2API: ec2API, 96 k8sAPI: k8sAPI, 97 metricsAPI: metrics, 98 parallelWorkers: parallelWorkers, 99 } 100 101 resyncTrigger, err := trigger.NewTrigger(trigger.Parameters{ 102 Name: "eni-node-manager-resync", 103 MinInterval: 10 * time.Millisecond, 104 MetricsObserver: metrics.ResyncTrigger(), 105 TriggerFunc: func(reasons []string) { 106 syncTime := instancesAPI.Resync() 107 mngr.Resync(syncTime) 108 }, 109 }) 110 if err != nil { 111 return nil, fmt.Errorf("unable to initialize resync trigger: %s", err) 112 } 113 114 mngr.resyncTrigger = resyncTrigger 115 116 return mngr, nil 117 } 118 119 // GetNames returns the list of all node names 120 func (n *NodeManager) GetNames() (allNodeNames []string) { 121 n.mutex.RLock() 122 defer n.mutex.RUnlock() 123 124 allNodeNames = make([]string, 0, len(n.nodes)) 125 126 for name := range n.nodes { 127 allNodeNames = append(allNodeNames, name) 128 } 129 130 return 131 } 132 133 // Update is called whenever a CiliumNode resource has been updated in the 134 // Kubernetes apiserver 135 func (n *NodeManager) Update(resource *v2.CiliumNode) bool { 136 n.mutex.Lock() 137 node, ok := n.nodes[resource.Name] 138 if !ok { 139 node = &Node{ 140 name: resource.Name, 141 manager: n, 142 } 143 144 poolMaintainer, err := trigger.NewTrigger(trigger.Parameters{ 145 Name: fmt.Sprintf("eni-pool-maintainer-%s", resource.Name), 146 MinInterval: 10 * time.Millisecond, 147 MetricsObserver: n.metricsAPI.PoolMaintainerTrigger(), 148 TriggerFunc: func(reasons []string) { 149 if err := node.MaintainIpPool(); err != nil { 150 node.logger().WithError(err).Warning("Unable to maintain ip pool of node") 151 } 152 }, 153 }) 154 if err != nil { 155 node.logger().WithError(err).Error("Unable to create pool-maintainer trigger") 156 return false 157 } 158 159 k8sSync, err := trigger.NewTrigger(trigger.Parameters{ 160 Name: fmt.Sprintf("eni-node-k8s-sync-%s", resource.Name), 161 MinInterval: 10 * time.Millisecond, 162 MetricsObserver: n.metricsAPI.K8sSyncTrigger(), 163 TriggerFunc: func(reasons []string) { 164 node.SyncToAPIServer() 165 }, 166 }) 167 if err != nil { 168 poolMaintainer.Shutdown() 169 node.logger().WithError(err).Error("Unable to create k8s-sync trigger") 170 return false 171 } 172 173 node.poolMaintainer = poolMaintainer 174 node.k8sSync = k8sSync 175 n.nodes[node.name] = node 176 177 log.WithField(fieldName, resource.Name).Info("Discovered new CiliumNode custom resource") 178 } 179 n.mutex.Unlock() 180 181 return node.updatedResource(resource) 182 } 183 184 // Delete is called after a CiliumNode resource has been deleted via the 185 // Kubernetes apiserver 186 func (n *NodeManager) Delete(nodeName string) { 187 n.mutex.Lock() 188 if node, ok := n.nodes[nodeName]; ok { 189 if node.poolMaintainer != nil { 190 node.poolMaintainer.Shutdown() 191 } 192 if node.k8sSync != nil { 193 node.k8sSync.Shutdown() 194 } 195 } 196 197 delete(n.nodes, nodeName) 198 n.mutex.Unlock() 199 } 200 201 // Get returns the node with the given name 202 func (n *NodeManager) Get(nodeName string) *Node { 203 n.mutex.RLock() 204 node := n.nodes[nodeName] 205 n.mutex.RUnlock() 206 return node 207 } 208 209 // GetNodesByIPWatermark returns all nodes that require addresses to be 210 // allocated or released, sorted by the number of addresses needed to be operated 211 // in descending order. Number of addresses to be released is negative value 212 // so that nodes with IP deficit are resolved first 213 func (n *NodeManager) GetNodesByIPWatermark() []*Node { 214 n.mutex.RLock() 215 list := make([]*Node, len(n.nodes)) 216 index := 0 217 for _, node := range n.nodes { 218 list[index] = node 219 index++ 220 } 221 n.mutex.RUnlock() 222 223 sort.Slice(list, func(i, j int) bool { 224 valuei := list[i].getNeededAddresses() 225 valuej := list[j].getNeededAddresses() 226 // Number of addresses to be released is negative value, 227 // nodes with more excess addresses are released earlier 228 if valuei < 0 && valuej < 0 { 229 return valuei < valuej 230 } 231 return valuei > valuej 232 }) 233 234 return list 235 } 236 237 type resyncStats struct { 238 mutex lock.Mutex 239 totalUsed int 240 totalAvailable int 241 totalNeeded int 242 remainingInterfaces int 243 nodes int 244 nodesAtCapacity int 245 nodesInDeficit int 246 } 247 248 func (n *NodeManager) resyncNode(node *Node, stats *resyncStats, syncTime time.Time) { 249 node.mutex.Lock() 250 251 if syncTime.After(node.resyncNeeded) { 252 node.loggerLocked().Debug("Resetting resyncNeeded") 253 node.resyncNeeded = time.Time{} 254 } 255 256 node.recalculateLocked() 257 allocationNeeded := node.allocationNeeded() 258 releaseNeeded := node.releaseNeeded() 259 if allocationNeeded || releaseNeeded { 260 node.waitingForPoolMaintenance = true 261 node.poolMaintainer.Trigger() 262 } 263 264 stats.mutex.Lock() 265 stats.totalUsed += node.stats.usedIPs 266 availableOnNode := node.stats.availableIPs - node.stats.usedIPs 267 stats.totalAvailable += availableOnNode 268 stats.totalNeeded += node.stats.neededIPs 269 stats.remainingInterfaces += node.stats.remainingInterfaces 270 stats.nodes++ 271 272 if allocationNeeded { 273 stats.nodesInDeficit++ 274 } 275 276 if node.stats.remainingInterfaces == 0 && availableOnNode == 0 { 277 stats.nodesAtCapacity++ 278 } 279 280 for subnetID, subnet := range n.instancesAPI.GetSubnets() { 281 n.metricsAPI.SetAvailableIPsPerSubnet(subnetID, subnet.AvailabilityZone, subnet.AvailableAddresses) 282 } 283 284 stats.mutex.Unlock() 285 node.mutex.Unlock() 286 287 node.k8sSync.Trigger() 288 } 289 290 // Resync will attend all nodes and resolves IP deficits. The order of 291 // attendance is defined by the number of IPs needed to reach the configured 292 // watermarks. Any updates to the node resource are synchronized to the 293 // Kubernetes apiserver. 294 func (n *NodeManager) Resync(syncTime time.Time) { 295 stats := resyncStats{} 296 sem := semaphore.NewWeighted(n.parallelWorkers) 297 298 for _, node := range n.GetNodesByIPWatermark() { 299 err := sem.Acquire(context.TODO(), 1) 300 if err != nil { 301 continue 302 } 303 go func(node *Node, stats *resyncStats) { 304 n.resyncNode(node, stats, syncTime) 305 sem.Release(1) 306 }(node, &stats) 307 } 308 309 // Acquire the full semaphore, this requires all go routines to 310 // complete and thus blocks until all nodes are synced 311 sem.Acquire(context.TODO(), n.parallelWorkers) 312 313 n.metricsAPI.SetAllocatedIPs("used", stats.totalUsed) 314 n.metricsAPI.SetAllocatedIPs("available", stats.totalAvailable) 315 n.metricsAPI.SetAllocatedIPs("needed", stats.totalNeeded) 316 n.metricsAPI.SetAvailableENIs(stats.remainingInterfaces) 317 n.metricsAPI.SetNodes("total", stats.nodes) 318 n.metricsAPI.SetNodes("in-deficit", stats.nodesInDeficit) 319 n.metricsAPI.SetNodes("at-capacity", stats.nodesAtCapacity) 320 }