github.com/kubearmor/cilium@v1.6.12/pkg/node/manager/manager.go (about)

     1  // Copyright 2016-2019 Authors of Cilium
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package manager
    16  
    17  import (
    18  	"math"
    19  	"net"
    20  	"time"
    21  
    22  	"github.com/cilium/cilium/pkg/datapath"
    23  	"github.com/cilium/cilium/pkg/identity"
    24  	"github.com/cilium/cilium/pkg/ipcache"
    25  	"github.com/cilium/cilium/pkg/lock"
    26  	"github.com/cilium/cilium/pkg/metrics"
    27  	"github.com/cilium/cilium/pkg/node"
    28  	"github.com/cilium/cilium/pkg/node/addressing"
    29  	"github.com/cilium/cilium/pkg/option"
    30  	"github.com/cilium/cilium/pkg/source"
    31  
    32  	"github.com/prometheus/client_golang/prometheus"
    33  )
    34  
    35  var (
    36  	baseBackgroundSyncInterval = time.Minute
    37  )
    38  
    39  type nodeEntry struct {
    40  	// mutex serves two purposes:
    41  	// 1. Serialize any direct access to the node field in this entry.
    42  	// 2. Serialize all calls do the datapath layer for a particular node.
    43  	//
    44  	// See description of Manager.mutex for more details
    45  	//
    46  	// If both the nodeEntry.mutex and Manager.mutex must be held, then the
    47  	// Manager.mutex must *always* be acquired first.
    48  	mutex lock.Mutex
    49  	node  node.Node
    50  }
    51  
    52  // Manager is the entity that manages a collection of nodes
    53  type Manager struct {
    54  	// mutex is the lock protecting access to the nodes map. The mutex must
    55  	// be held for any access of the nodes map.
    56  	//
    57  	// The manager mutex works together with the entry mutex in the
    58  	// following way to minimize the duration the manager mutex is held:
    59  	//
    60  	// 1. Acquire manager mutex to safely access nodes map and to retrieve
    61  	//    node entry.
    62  	// 2. Acquire mutex of the entry while the manager mutex is still held.
    63  	//    This guarantees that no change to the entry has happened.
    64  	// 3. Release of the manager mutex to unblock changes or reads to other
    65  	//    node entries.
    66  	// 4. Change of entry data or performing of datapath interactions
    67  	// 5. Release of the entry mutex
    68  	//
    69  	// If both the nodeEntry.mutex and Manager.mutex must be held, then the
    70  	// Manager.mutex must *always* be acquired first.
    71  	mutex lock.RWMutex
    72  
    73  	// nodes is the list of nodes. Access must be protected via mutex.
    74  	nodes map[node.Identity]*nodeEntry
    75  
    76  	// nodeHandlersMu protects the nodeHandlers map against concurrent access.
    77  	nodeHandlersMu lock.RWMutex
    78  	// nodeHandlers has a slice containing all node handlers subscribed to node
    79  	// events.
    80  	nodeHandlers map[datapath.NodeHandler]struct{}
    81  
    82  	// closeChan is closed when the manager is closed
    83  	closeChan chan struct{}
    84  
    85  	// name is the name of the manager. It must be unique and feasibility
    86  	// to be used a prometheus metric name.
    87  	name string
    88  
    89  	// metricEventsReceived is the prometheus metric to track the number of
    90  	// node events received
    91  	metricEventsReceived *prometheus.CounterVec
    92  
    93  	// metricNumNodes is the prometheus metric to track the number of nodes
    94  	// being managed
    95  	metricNumNodes prometheus.Gauge
    96  
    97  	// metricDatapathValidations is the prometheus metric to track the
    98  	// number of datapath node validation calls
    99  	metricDatapathValidations prometheus.Counter
   100  }
   101  
   102  // Subscribe subscribes the given node handler to node events.
   103  func (m *Manager) Subscribe(nh datapath.NodeHandler) {
   104  	m.nodeHandlersMu.Lock()
   105  	m.nodeHandlers[nh] = struct{}{}
   106  	m.nodeHandlersMu.Unlock()
   107  	// Add all nodes already received by the manager.
   108  	for _, v := range m.nodes {
   109  		v.mutex.Lock()
   110  		nh.NodeAdd(v.node)
   111  		v.mutex.Unlock()
   112  	}
   113  }
   114  
   115  // Unsubscribe unsubscribes the given node handler with node events.
   116  func (m *Manager) Unsubscribe(nh datapath.NodeHandler) {
   117  	m.nodeHandlersMu.Lock()
   118  	delete(m.nodeHandlers, nh)
   119  	m.nodeHandlersMu.Unlock()
   120  }
   121  
   122  // Iter executes the given function in all subscribed node handlers.
   123  func (m *Manager) Iter(f func(nh datapath.NodeHandler)) {
   124  	m.nodeHandlersMu.RLock()
   125  	defer m.nodeHandlersMu.RUnlock()
   126  
   127  	for nh := range m.nodeHandlers {
   128  		f(nh)
   129  	}
   130  }
   131  
   132  // NewManager returns a new node manager
   133  func NewManager(name string, dp datapath.NodeHandler) (*Manager, error) {
   134  	m := &Manager{
   135  		name:         name,
   136  		nodes:        map[node.Identity]*nodeEntry{},
   137  		nodeHandlers: map[datapath.NodeHandler]struct{}{},
   138  		closeChan:    make(chan struct{}),
   139  	}
   140  	m.Subscribe(dp)
   141  
   142  	m.metricEventsReceived = prometheus.NewCounterVec(prometheus.CounterOpts{
   143  		Namespace: metrics.Namespace,
   144  		Subsystem: "nodes",
   145  		Name:      name + "_events_received_total",
   146  		Help:      "Number of node events received",
   147  	}, []string{"eventType", "source"})
   148  
   149  	m.metricNumNodes = prometheus.NewGauge(prometheus.GaugeOpts{
   150  		Namespace: metrics.Namespace,
   151  		Subsystem: "nodes",
   152  		Name:      name + "_num",
   153  		Help:      "Number of nodes managed",
   154  	})
   155  
   156  	m.metricDatapathValidations = prometheus.NewCounter(prometheus.CounterOpts{
   157  		Namespace: metrics.Namespace,
   158  		Subsystem: "nodes",
   159  		Name:      name + "_datapath_validations_total",
   160  		Help:      "Number of validation calls to implement the datapath implemention of a node",
   161  	})
   162  
   163  	err := metrics.RegisterList([]prometheus.Collector{m.metricDatapathValidations, m.metricEventsReceived, m.metricNumNodes})
   164  	if err != nil {
   165  		return nil, err
   166  	}
   167  
   168  	go m.backgroundSync()
   169  
   170  	return m, nil
   171  }
   172  
   173  // Close shuts down a node manager
   174  func (m *Manager) Close() {
   175  	m.mutex.Lock()
   176  	defer m.mutex.Unlock()
   177  
   178  	close(m.closeChan)
   179  
   180  	metrics.Unregister(m.metricNumNodes)
   181  	metrics.Unregister(m.metricEventsReceived)
   182  	metrics.Unregister(m.metricDatapathValidations)
   183  
   184  	// delete all nodes to clean up the datapath for each node
   185  	for _, n := range m.nodes {
   186  		n.mutex.Lock()
   187  		m.Iter(func(nh datapath.NodeHandler) {
   188  			nh.NodeDelete(n.node)
   189  		})
   190  		n.mutex.Unlock()
   191  	}
   192  }
   193  
   194  // ClusterSizeDependantInterval returns a time.Duration that is dependant on
   195  // the cluster size, i.e. the number of nodes that have been discovered. This
   196  // can be used to control sync intervals of shared or centralized resources to
   197  // avoid overloading these resources as the cluster grows.
   198  //
   199  // Example sync interval with baseInterval = 1 * time.Minute
   200  //
   201  // nodes | sync interval
   202  // ------+-----------------
   203  // 1     |   41.588830833s
   204  // 2     | 1m05.916737320s
   205  // 4     | 1m36.566274746s
   206  // 8     | 2m11.833474640s
   207  // 16    | 2m49.992800643s
   208  // 32    | 3m29.790453687s
   209  // 64    | 4m10.463236193s
   210  // 128   | 4m51.588744261s
   211  // 256   | 5m32.944565093s
   212  // 512   | 6m14.416550710s
   213  // 1024  | 6m55.946873494s
   214  // 2048  | 7m37.506428894s
   215  // 4096  | 8m19.080616652s
   216  // 8192  | 9m00.662124608s
   217  // 16384 | 9m42.247293667s
   218  func (m *Manager) ClusterSizeDependantInterval(baseInterval time.Duration) time.Duration {
   219  	m.mutex.RLock()
   220  	numNodes := len(m.nodes)
   221  	m.mutex.RUnlock()
   222  
   223  	// no nodes are being managed, no work will be performed, return
   224  	// baseInterval to check again in a reasonable timeframe
   225  	if numNodes == 0 {
   226  		return baseInterval
   227  	}
   228  
   229  	waitNanoseconds := float64(baseInterval.Nanoseconds()) * math.Log1p(float64(numNodes))
   230  	return time.Duration(int64(waitNanoseconds))
   231  
   232  }
   233  
   234  func (m *Manager) backgroundSyncInterval() time.Duration {
   235  	return m.ClusterSizeDependantInterval(baseBackgroundSyncInterval)
   236  }
   237  
   238  func (m *Manager) backgroundSync() {
   239  	for {
   240  		syncInterval := m.backgroundSyncInterval()
   241  		log.WithField("syncInterval", syncInterval.String()).Debug("Performing regular background work")
   242  
   243  		// get a copy of the node identities to avoid locking the entire manager
   244  		// throughout the process of running the datapath validation.
   245  		nodes := m.GetNodeIdentities()
   246  		for _, nodeIdentity := range nodes {
   247  			// Retrieve latest node information in case any event
   248  			// changed the node since the call to GetNodes()
   249  			m.mutex.RLock()
   250  			entry, ok := m.nodes[nodeIdentity]
   251  			if !ok {
   252  				m.mutex.RUnlock()
   253  				continue
   254  			}
   255  
   256  			entry.mutex.Lock()
   257  			m.mutex.RUnlock()
   258  			m.Iter(func(nh datapath.NodeHandler) {
   259  				nh.NodeValidateImplementation(entry.node)
   260  			})
   261  			entry.mutex.Unlock()
   262  
   263  			m.metricDatapathValidations.Inc()
   264  		}
   265  
   266  		select {
   267  		case <-m.closeChan:
   268  			return
   269  		case <-time.After(syncInterval):
   270  		}
   271  	}
   272  }
   273  
   274  // NodeUpdated is called after the information of a node has been updated. The
   275  // node in the manager is added or updated if the source is allowed to update
   276  // the node. If an update or addition has occurred, NodeUpdate() of the datapath
   277  // interface is invoked.
   278  func (m *Manager) NodeUpdated(n node.Node) {
   279  	log.Debugf("Received node update event from %s: %#v", n.Source, n)
   280  	nodeIdentity := n.Identity()
   281  	var nodeIP, nodeIP4 net.IP
   282  	dpUpdate := true
   283  
   284  	for _, address := range n.IPAddresses {
   285  		// Map the Cilium internal IP to the reachable node IP so it
   286  		// can be routed via the overlay. Routing via overlay is always
   287  		// done via public v4 address hence n.GetNodeIP(false).
   288  		if address.Type == addressing.NodeCiliumInternalIP {
   289  			nodeIP = n.GetNodeIP(false)
   290  			if address.IP.To4() != nil {
   291  				nodeIP4 = nodeIP
   292  			}
   293  		} else {
   294  			continue
   295  		}
   296  
   297  		isOwning := ipcache.IPIdentityCache.Upsert(address.IP.String(), nodeIP, n.EncryptionKey, ipcache.Identity{
   298  			ID:     identity.ReservedIdentityHost,
   299  			Source: n.Source,
   300  		})
   301  
   302  		// Upsert() will return true if the ipcache entry is owned by
   303  		// the source of the node update that triggered this node
   304  		// update (kvstore, k8s, ...) The datapath is only updated if
   305  		// that source of truth is updated.
   306  		if !isOwning {
   307  			dpUpdate = false
   308  		}
   309  	}
   310  	if option.Config.EncryptNode {
   311  		for _, address := range n.IPAddresses {
   312  			if address.Type == addressing.NodeCiliumInternalIP {
   313  				continue
   314  			}
   315  
   316  			isOwning := ipcache.IPIdentityCache.Upsert(address.IP.String(), nodeIP4, n.EncryptionKey, ipcache.Identity{
   317  				ID:     identity.ReservedIdentityHost,
   318  				Source: n.Source,
   319  			})
   320  			if !isOwning {
   321  				dpUpdate = false
   322  			}
   323  		}
   324  	}
   325  
   326  	for _, address := range []net.IP{n.IPv4HealthIP, n.IPv6HealthIP} {
   327  		if address == nil {
   328  			continue
   329  		}
   330  		isOwning := ipcache.IPIdentityCache.Upsert(address.String(), n.GetNodeIP(false), n.EncryptionKey, ipcache.Identity{
   331  			ID:     identity.ReservedIdentityHealth,
   332  			Source: n.Source,
   333  		})
   334  		if !isOwning {
   335  			dpUpdate = false
   336  		}
   337  	}
   338  
   339  	m.mutex.Lock()
   340  	entry, oldNodeExists := m.nodes[nodeIdentity]
   341  	if oldNodeExists {
   342  		m.metricEventsReceived.WithLabelValues("update", string(n.Source)).Inc()
   343  
   344  		if !source.AllowOverwrite(entry.node.Source, n.Source) {
   345  			m.mutex.Unlock()
   346  			return
   347  		}
   348  
   349  		entry.mutex.Lock()
   350  		m.mutex.Unlock()
   351  		oldNode := entry.node
   352  		entry.node = n
   353  		if dpUpdate {
   354  			m.Iter(func(nh datapath.NodeHandler) {
   355  				nh.NodeUpdate(oldNode, entry.node)
   356  			})
   357  		}
   358  		entry.mutex.Unlock()
   359  	} else {
   360  		m.metricEventsReceived.WithLabelValues("add", string(n.Source)).Inc()
   361  		m.metricNumNodes.Inc()
   362  
   363  		entry = &nodeEntry{node: n}
   364  		entry.mutex.Lock()
   365  		m.nodes[nodeIdentity] = entry
   366  		m.mutex.Unlock()
   367  		if dpUpdate {
   368  			m.Iter(func(nh datapath.NodeHandler) {
   369  				nh.NodeAdd(entry.node)
   370  			})
   371  		}
   372  		entry.mutex.Unlock()
   373  	}
   374  }
   375  
   376  // NodeDeleted is called after a node has been deleted. It removes the node
   377  // from the manager if the node is still owned by the source of which the event
   378  // orgins from. If the node was removed, NodeDelete() is invoked of the
   379  // datapath interface.
   380  func (m *Manager) NodeDeleted(n node.Node) {
   381  	m.metricEventsReceived.WithLabelValues("delete", string(n.Source)).Inc()
   382  
   383  	log.Debugf("Received node delete event from %s", n.Source)
   384  
   385  	nodeIdentity := n.Identity()
   386  
   387  	m.mutex.Lock()
   388  	entry, oldNodeExists := m.nodes[nodeIdentity]
   389  	if !oldNodeExists {
   390  		m.mutex.Unlock()
   391  		return
   392  	}
   393  
   394  	// If the source is Kubernetes and the node is the node we are running on
   395  	// Kubernetes is giving us a hint it is about to delete our node. Close down
   396  	// the agent gracefully in this case.
   397  	if n.Source != entry.node.Source {
   398  		m.mutex.Unlock()
   399  		if n.IsLocal() && n.Source == source.Kubernetes {
   400  			log.Debugf("Kubernetes is deleting local node, close manager")
   401  			m.Close()
   402  		} else {
   403  			log.Debugf("Ignoring delete event of node %s from source %s. The node is owned by %s",
   404  				n.Name, n.Source, entry.node.Source)
   405  		}
   406  		return
   407  	}
   408  
   409  	for _, address := range entry.node.IPAddresses {
   410  		ipcache.IPIdentityCache.Delete(address.IP.String(), n.Source)
   411  	}
   412  
   413  	m.metricNumNodes.Dec()
   414  
   415  	entry.mutex.Lock()
   416  	delete(m.nodes, nodeIdentity)
   417  	m.mutex.Unlock()
   418  	m.Iter(func(nh datapath.NodeHandler) {
   419  		nh.NodeDelete(n)
   420  	})
   421  	entry.mutex.Unlock()
   422  }
   423  
   424  // Exists returns true if a node with the name exists
   425  func (m *Manager) Exists(id node.Identity) bool {
   426  	m.mutex.RLock()
   427  	defer m.mutex.RUnlock()
   428  	_, ok := m.nodes[id]
   429  	return ok
   430  }
   431  
   432  // GetNodeIdentities returns a list of all node identities store in node
   433  // manager.
   434  func (m *Manager) GetNodeIdentities() []node.Identity {
   435  	m.mutex.RLock()
   436  	defer m.mutex.RUnlock()
   437  
   438  	nodes := make([]node.Identity, 0, len(m.nodes))
   439  	for nodeIdentity := range m.nodes {
   440  		nodes = append(nodes, nodeIdentity)
   441  	}
   442  
   443  	return nodes
   444  }
   445  
   446  // GetNodes returns a copy of all of the nodes as a map from Identity to Node.
   447  func (m *Manager) GetNodes() map[node.Identity]node.Node {
   448  	m.mutex.RLock()
   449  	defer m.mutex.RUnlock()
   450  
   451  	nodes := make(map[node.Identity]node.Node)
   452  	for nodeIdentity, entry := range m.nodes {
   453  		entry.mutex.Lock()
   454  		nodes[nodeIdentity] = entry.node
   455  		entry.mutex.Unlock()
   456  	}
   457  
   458  	return nodes
   459  }
   460  
   461  // DeleteAllNodes deletes all nodes from the node maanger.
   462  func (m *Manager) DeleteAllNodes() {
   463  	m.mutex.Lock()
   464  	for _, entry := range m.nodes {
   465  		entry.mutex.Lock()
   466  		m.Iter(func(nh datapath.NodeHandler) {
   467  			nh.NodeDelete(entry.node)
   468  		})
   469  		entry.mutex.Unlock()
   470  	}
   471  	m.nodes = map[node.Identity]*nodeEntry{}
   472  	m.mutex.Unlock()
   473  }