github.com/kubearmor/cilium@v1.6.12/pkg/node/manager/manager.go (about) 1 // Copyright 2016-2019 Authors of Cilium 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package manager 16 17 import ( 18 "math" 19 "net" 20 "time" 21 22 "github.com/cilium/cilium/pkg/datapath" 23 "github.com/cilium/cilium/pkg/identity" 24 "github.com/cilium/cilium/pkg/ipcache" 25 "github.com/cilium/cilium/pkg/lock" 26 "github.com/cilium/cilium/pkg/metrics" 27 "github.com/cilium/cilium/pkg/node" 28 "github.com/cilium/cilium/pkg/node/addressing" 29 "github.com/cilium/cilium/pkg/option" 30 "github.com/cilium/cilium/pkg/source" 31 32 "github.com/prometheus/client_golang/prometheus" 33 ) 34 35 var ( 36 baseBackgroundSyncInterval = time.Minute 37 ) 38 39 type nodeEntry struct { 40 // mutex serves two purposes: 41 // 1. Serialize any direct access to the node field in this entry. 42 // 2. Serialize all calls do the datapath layer for a particular node. 43 // 44 // See description of Manager.mutex for more details 45 // 46 // If both the nodeEntry.mutex and Manager.mutex must be held, then the 47 // Manager.mutex must *always* be acquired first. 48 mutex lock.Mutex 49 node node.Node 50 } 51 52 // Manager is the entity that manages a collection of nodes 53 type Manager struct { 54 // mutex is the lock protecting access to the nodes map. The mutex must 55 // be held for any access of the nodes map. 56 // 57 // The manager mutex works together with the entry mutex in the 58 // following way to minimize the duration the manager mutex is held: 59 // 60 // 1. Acquire manager mutex to safely access nodes map and to retrieve 61 // node entry. 62 // 2. Acquire mutex of the entry while the manager mutex is still held. 63 // This guarantees that no change to the entry has happened. 64 // 3. Release of the manager mutex to unblock changes or reads to other 65 // node entries. 66 // 4. Change of entry data or performing of datapath interactions 67 // 5. Release of the entry mutex 68 // 69 // If both the nodeEntry.mutex and Manager.mutex must be held, then the 70 // Manager.mutex must *always* be acquired first. 71 mutex lock.RWMutex 72 73 // nodes is the list of nodes. Access must be protected via mutex. 74 nodes map[node.Identity]*nodeEntry 75 76 // nodeHandlersMu protects the nodeHandlers map against concurrent access. 77 nodeHandlersMu lock.RWMutex 78 // nodeHandlers has a slice containing all node handlers subscribed to node 79 // events. 80 nodeHandlers map[datapath.NodeHandler]struct{} 81 82 // closeChan is closed when the manager is closed 83 closeChan chan struct{} 84 85 // name is the name of the manager. It must be unique and feasibility 86 // to be used a prometheus metric name. 87 name string 88 89 // metricEventsReceived is the prometheus metric to track the number of 90 // node events received 91 metricEventsReceived *prometheus.CounterVec 92 93 // metricNumNodes is the prometheus metric to track the number of nodes 94 // being managed 95 metricNumNodes prometheus.Gauge 96 97 // metricDatapathValidations is the prometheus metric to track the 98 // number of datapath node validation calls 99 metricDatapathValidations prometheus.Counter 100 } 101 102 // Subscribe subscribes the given node handler to node events. 103 func (m *Manager) Subscribe(nh datapath.NodeHandler) { 104 m.nodeHandlersMu.Lock() 105 m.nodeHandlers[nh] = struct{}{} 106 m.nodeHandlersMu.Unlock() 107 // Add all nodes already received by the manager. 108 for _, v := range m.nodes { 109 v.mutex.Lock() 110 nh.NodeAdd(v.node) 111 v.mutex.Unlock() 112 } 113 } 114 115 // Unsubscribe unsubscribes the given node handler with node events. 116 func (m *Manager) Unsubscribe(nh datapath.NodeHandler) { 117 m.nodeHandlersMu.Lock() 118 delete(m.nodeHandlers, nh) 119 m.nodeHandlersMu.Unlock() 120 } 121 122 // Iter executes the given function in all subscribed node handlers. 123 func (m *Manager) Iter(f func(nh datapath.NodeHandler)) { 124 m.nodeHandlersMu.RLock() 125 defer m.nodeHandlersMu.RUnlock() 126 127 for nh := range m.nodeHandlers { 128 f(nh) 129 } 130 } 131 132 // NewManager returns a new node manager 133 func NewManager(name string, dp datapath.NodeHandler) (*Manager, error) { 134 m := &Manager{ 135 name: name, 136 nodes: map[node.Identity]*nodeEntry{}, 137 nodeHandlers: map[datapath.NodeHandler]struct{}{}, 138 closeChan: make(chan struct{}), 139 } 140 m.Subscribe(dp) 141 142 m.metricEventsReceived = prometheus.NewCounterVec(prometheus.CounterOpts{ 143 Namespace: metrics.Namespace, 144 Subsystem: "nodes", 145 Name: name + "_events_received_total", 146 Help: "Number of node events received", 147 }, []string{"eventType", "source"}) 148 149 m.metricNumNodes = prometheus.NewGauge(prometheus.GaugeOpts{ 150 Namespace: metrics.Namespace, 151 Subsystem: "nodes", 152 Name: name + "_num", 153 Help: "Number of nodes managed", 154 }) 155 156 m.metricDatapathValidations = prometheus.NewCounter(prometheus.CounterOpts{ 157 Namespace: metrics.Namespace, 158 Subsystem: "nodes", 159 Name: name + "_datapath_validations_total", 160 Help: "Number of validation calls to implement the datapath implemention of a node", 161 }) 162 163 err := metrics.RegisterList([]prometheus.Collector{m.metricDatapathValidations, m.metricEventsReceived, m.metricNumNodes}) 164 if err != nil { 165 return nil, err 166 } 167 168 go m.backgroundSync() 169 170 return m, nil 171 } 172 173 // Close shuts down a node manager 174 func (m *Manager) Close() { 175 m.mutex.Lock() 176 defer m.mutex.Unlock() 177 178 close(m.closeChan) 179 180 metrics.Unregister(m.metricNumNodes) 181 metrics.Unregister(m.metricEventsReceived) 182 metrics.Unregister(m.metricDatapathValidations) 183 184 // delete all nodes to clean up the datapath for each node 185 for _, n := range m.nodes { 186 n.mutex.Lock() 187 m.Iter(func(nh datapath.NodeHandler) { 188 nh.NodeDelete(n.node) 189 }) 190 n.mutex.Unlock() 191 } 192 } 193 194 // ClusterSizeDependantInterval returns a time.Duration that is dependant on 195 // the cluster size, i.e. the number of nodes that have been discovered. This 196 // can be used to control sync intervals of shared or centralized resources to 197 // avoid overloading these resources as the cluster grows. 198 // 199 // Example sync interval with baseInterval = 1 * time.Minute 200 // 201 // nodes | sync interval 202 // ------+----------------- 203 // 1 | 41.588830833s 204 // 2 | 1m05.916737320s 205 // 4 | 1m36.566274746s 206 // 8 | 2m11.833474640s 207 // 16 | 2m49.992800643s 208 // 32 | 3m29.790453687s 209 // 64 | 4m10.463236193s 210 // 128 | 4m51.588744261s 211 // 256 | 5m32.944565093s 212 // 512 | 6m14.416550710s 213 // 1024 | 6m55.946873494s 214 // 2048 | 7m37.506428894s 215 // 4096 | 8m19.080616652s 216 // 8192 | 9m00.662124608s 217 // 16384 | 9m42.247293667s 218 func (m *Manager) ClusterSizeDependantInterval(baseInterval time.Duration) time.Duration { 219 m.mutex.RLock() 220 numNodes := len(m.nodes) 221 m.mutex.RUnlock() 222 223 // no nodes are being managed, no work will be performed, return 224 // baseInterval to check again in a reasonable timeframe 225 if numNodes == 0 { 226 return baseInterval 227 } 228 229 waitNanoseconds := float64(baseInterval.Nanoseconds()) * math.Log1p(float64(numNodes)) 230 return time.Duration(int64(waitNanoseconds)) 231 232 } 233 234 func (m *Manager) backgroundSyncInterval() time.Duration { 235 return m.ClusterSizeDependantInterval(baseBackgroundSyncInterval) 236 } 237 238 func (m *Manager) backgroundSync() { 239 for { 240 syncInterval := m.backgroundSyncInterval() 241 log.WithField("syncInterval", syncInterval.String()).Debug("Performing regular background work") 242 243 // get a copy of the node identities to avoid locking the entire manager 244 // throughout the process of running the datapath validation. 245 nodes := m.GetNodeIdentities() 246 for _, nodeIdentity := range nodes { 247 // Retrieve latest node information in case any event 248 // changed the node since the call to GetNodes() 249 m.mutex.RLock() 250 entry, ok := m.nodes[nodeIdentity] 251 if !ok { 252 m.mutex.RUnlock() 253 continue 254 } 255 256 entry.mutex.Lock() 257 m.mutex.RUnlock() 258 m.Iter(func(nh datapath.NodeHandler) { 259 nh.NodeValidateImplementation(entry.node) 260 }) 261 entry.mutex.Unlock() 262 263 m.metricDatapathValidations.Inc() 264 } 265 266 select { 267 case <-m.closeChan: 268 return 269 case <-time.After(syncInterval): 270 } 271 } 272 } 273 274 // NodeUpdated is called after the information of a node has been updated. The 275 // node in the manager is added or updated if the source is allowed to update 276 // the node. If an update or addition has occurred, NodeUpdate() of the datapath 277 // interface is invoked. 278 func (m *Manager) NodeUpdated(n node.Node) { 279 log.Debugf("Received node update event from %s: %#v", n.Source, n) 280 nodeIdentity := n.Identity() 281 var nodeIP, nodeIP4 net.IP 282 dpUpdate := true 283 284 for _, address := range n.IPAddresses { 285 // Map the Cilium internal IP to the reachable node IP so it 286 // can be routed via the overlay. Routing via overlay is always 287 // done via public v4 address hence n.GetNodeIP(false). 288 if address.Type == addressing.NodeCiliumInternalIP { 289 nodeIP = n.GetNodeIP(false) 290 if address.IP.To4() != nil { 291 nodeIP4 = nodeIP 292 } 293 } else { 294 continue 295 } 296 297 isOwning := ipcache.IPIdentityCache.Upsert(address.IP.String(), nodeIP, n.EncryptionKey, ipcache.Identity{ 298 ID: identity.ReservedIdentityHost, 299 Source: n.Source, 300 }) 301 302 // Upsert() will return true if the ipcache entry is owned by 303 // the source of the node update that triggered this node 304 // update (kvstore, k8s, ...) The datapath is only updated if 305 // that source of truth is updated. 306 if !isOwning { 307 dpUpdate = false 308 } 309 } 310 if option.Config.EncryptNode { 311 for _, address := range n.IPAddresses { 312 if address.Type == addressing.NodeCiliumInternalIP { 313 continue 314 } 315 316 isOwning := ipcache.IPIdentityCache.Upsert(address.IP.String(), nodeIP4, n.EncryptionKey, ipcache.Identity{ 317 ID: identity.ReservedIdentityHost, 318 Source: n.Source, 319 }) 320 if !isOwning { 321 dpUpdate = false 322 } 323 } 324 } 325 326 for _, address := range []net.IP{n.IPv4HealthIP, n.IPv6HealthIP} { 327 if address == nil { 328 continue 329 } 330 isOwning := ipcache.IPIdentityCache.Upsert(address.String(), n.GetNodeIP(false), n.EncryptionKey, ipcache.Identity{ 331 ID: identity.ReservedIdentityHealth, 332 Source: n.Source, 333 }) 334 if !isOwning { 335 dpUpdate = false 336 } 337 } 338 339 m.mutex.Lock() 340 entry, oldNodeExists := m.nodes[nodeIdentity] 341 if oldNodeExists { 342 m.metricEventsReceived.WithLabelValues("update", string(n.Source)).Inc() 343 344 if !source.AllowOverwrite(entry.node.Source, n.Source) { 345 m.mutex.Unlock() 346 return 347 } 348 349 entry.mutex.Lock() 350 m.mutex.Unlock() 351 oldNode := entry.node 352 entry.node = n 353 if dpUpdate { 354 m.Iter(func(nh datapath.NodeHandler) { 355 nh.NodeUpdate(oldNode, entry.node) 356 }) 357 } 358 entry.mutex.Unlock() 359 } else { 360 m.metricEventsReceived.WithLabelValues("add", string(n.Source)).Inc() 361 m.metricNumNodes.Inc() 362 363 entry = &nodeEntry{node: n} 364 entry.mutex.Lock() 365 m.nodes[nodeIdentity] = entry 366 m.mutex.Unlock() 367 if dpUpdate { 368 m.Iter(func(nh datapath.NodeHandler) { 369 nh.NodeAdd(entry.node) 370 }) 371 } 372 entry.mutex.Unlock() 373 } 374 } 375 376 // NodeDeleted is called after a node has been deleted. It removes the node 377 // from the manager if the node is still owned by the source of which the event 378 // orgins from. If the node was removed, NodeDelete() is invoked of the 379 // datapath interface. 380 func (m *Manager) NodeDeleted(n node.Node) { 381 m.metricEventsReceived.WithLabelValues("delete", string(n.Source)).Inc() 382 383 log.Debugf("Received node delete event from %s", n.Source) 384 385 nodeIdentity := n.Identity() 386 387 m.mutex.Lock() 388 entry, oldNodeExists := m.nodes[nodeIdentity] 389 if !oldNodeExists { 390 m.mutex.Unlock() 391 return 392 } 393 394 // If the source is Kubernetes and the node is the node we are running on 395 // Kubernetes is giving us a hint it is about to delete our node. Close down 396 // the agent gracefully in this case. 397 if n.Source != entry.node.Source { 398 m.mutex.Unlock() 399 if n.IsLocal() && n.Source == source.Kubernetes { 400 log.Debugf("Kubernetes is deleting local node, close manager") 401 m.Close() 402 } else { 403 log.Debugf("Ignoring delete event of node %s from source %s. The node is owned by %s", 404 n.Name, n.Source, entry.node.Source) 405 } 406 return 407 } 408 409 for _, address := range entry.node.IPAddresses { 410 ipcache.IPIdentityCache.Delete(address.IP.String(), n.Source) 411 } 412 413 m.metricNumNodes.Dec() 414 415 entry.mutex.Lock() 416 delete(m.nodes, nodeIdentity) 417 m.mutex.Unlock() 418 m.Iter(func(nh datapath.NodeHandler) { 419 nh.NodeDelete(n) 420 }) 421 entry.mutex.Unlock() 422 } 423 424 // Exists returns true if a node with the name exists 425 func (m *Manager) Exists(id node.Identity) bool { 426 m.mutex.RLock() 427 defer m.mutex.RUnlock() 428 _, ok := m.nodes[id] 429 return ok 430 } 431 432 // GetNodeIdentities returns a list of all node identities store in node 433 // manager. 434 func (m *Manager) GetNodeIdentities() []node.Identity { 435 m.mutex.RLock() 436 defer m.mutex.RUnlock() 437 438 nodes := make([]node.Identity, 0, len(m.nodes)) 439 for nodeIdentity := range m.nodes { 440 nodes = append(nodes, nodeIdentity) 441 } 442 443 return nodes 444 } 445 446 // GetNodes returns a copy of all of the nodes as a map from Identity to Node. 447 func (m *Manager) GetNodes() map[node.Identity]node.Node { 448 m.mutex.RLock() 449 defer m.mutex.RUnlock() 450 451 nodes := make(map[node.Identity]node.Node) 452 for nodeIdentity, entry := range m.nodes { 453 entry.mutex.Lock() 454 nodes[nodeIdentity] = entry.node 455 entry.mutex.Unlock() 456 } 457 458 return nodes 459 } 460 461 // DeleteAllNodes deletes all nodes from the node maanger. 462 func (m *Manager) DeleteAllNodes() { 463 m.mutex.Lock() 464 for _, entry := range m.nodes { 465 entry.mutex.Lock() 466 m.Iter(func(nh datapath.NodeHandler) { 467 nh.NodeDelete(entry.node) 468 }) 469 entry.mutex.Unlock() 470 } 471 m.nodes = map[node.Identity]*nodeEntry{} 472 m.mutex.Unlock() 473 }