github.com/cilium/cilium@v1.16.2/pkg/datapath/iptables/reconciler.go (about)

     1  // SPDX-License-Identifier: Apache-2.0
     2  // Copyright Authors of Cilium
     3  
     4  package iptables
     5  
     6  import (
     7  	"context"
     8  	"net"
     9  	"net/netip"
    10  
    11  	"github.com/cilium/hive/cell"
    12  	"github.com/cilium/stream"
    13  	"github.com/sirupsen/logrus"
    14  	"k8s.io/apimachinery/pkg/util/sets"
    15  
    16  	"github.com/cilium/cilium/pkg/datapath/tables"
    17  	"github.com/cilium/cilium/pkg/logging"
    18  	"github.com/cilium/cilium/pkg/node"
    19  	"github.com/cilium/cilium/pkg/time"
    20  )
    21  
    22  type desiredState struct {
    23  	installRules bool
    24  
    25  	devices       sets.Set[string]
    26  	localNodeInfo localNodeInfo
    27  	proxies       map[string]proxyInfo
    28  	noTrackPods   sets.Set[noTrackPodInfo]
    29  }
    30  
    31  type localNodeInfo struct {
    32  	internalIPv4          net.IP
    33  	internalIPv6          net.IP
    34  	ipv4AllocCIDR         string
    35  	ipv6AllocCIDR         string
    36  	ipv4NativeRoutingCIDR string
    37  	ipv6NativeRoutingCIDR string
    38  }
    39  
    40  func (lni localNodeInfo) equal(other localNodeInfo) bool {
    41  	if lni.internalIPv4.Equal(other.internalIPv4) &&
    42  		lni.internalIPv6.Equal(other.internalIPv6) &&
    43  		lni.ipv4AllocCIDR == other.ipv4AllocCIDR &&
    44  		lni.ipv6AllocCIDR == other.ipv6AllocCIDR &&
    45  		lni.ipv4NativeRoutingCIDR == other.ipv4NativeRoutingCIDR &&
    46  		lni.ipv6NativeRoutingCIDR == other.ipv6NativeRoutingCIDR {
    47  		return true
    48  	}
    49  	return false
    50  }
    51  
    52  func toLocalNodeInfo(n node.LocalNode) localNodeInfo {
    53  	var (
    54  		v4AllocCIDR, v6AllocCIDR                 string
    55  		v4NativeRoutingCIDR, v6NativeRoutingCIDR string
    56  	)
    57  
    58  	if n.IPv4AllocCIDR != nil {
    59  		v4AllocCIDR = n.IPv4AllocCIDR.String()
    60  	}
    61  	if n.IPv6AllocCIDR != nil {
    62  		v6AllocCIDR = n.IPv6AllocCIDR.String()
    63  	}
    64  	if n.IPv4NativeRoutingCIDR != nil {
    65  		v4NativeRoutingCIDR = n.IPv4NativeRoutingCIDR.String()
    66  	}
    67  	if n.IPv6NativeRoutingCIDR != nil {
    68  		v6NativeRoutingCIDR = n.IPv6NativeRoutingCIDR.String()
    69  	}
    70  
    71  	return localNodeInfo{
    72  		internalIPv4:          n.GetCiliumInternalIP(false),
    73  		internalIPv6:          n.GetCiliumInternalIP(true),
    74  		ipv4AllocCIDR:         v4AllocCIDR,
    75  		ipv6AllocCIDR:         v6AllocCIDR,
    76  		ipv4NativeRoutingCIDR: v4NativeRoutingCIDR,
    77  		ipv6NativeRoutingCIDR: v6NativeRoutingCIDR,
    78  	}
    79  }
    80  
    81  // reconciliationRequest is a request to the reconciler to update the
    82  // state with the new info.
    83  // updated is a notification channel that is closed when reconciliation has
    84  // been completed successfully.
    85  type reconciliationRequest[T any] struct {
    86  	info T
    87  
    88  	// closed when the state is reconciled successfully
    89  	updated chan struct{}
    90  }
    91  
    92  type proxyInfo struct {
    93  	name string
    94  	port uint16
    95  }
    96  
    97  type noTrackPodInfo struct {
    98  	ip   netip.Addr
    99  	port uint16
   100  }
   101  
   102  func reconciliationLoop(
   103  	ctx context.Context,
   104  	log logrus.FieldLogger,
   105  	health cell.Health,
   106  	installIptRules bool,
   107  	params *reconcilerParams,
   108  	updateRules func(state desiredState, firstInit bool) error,
   109  	updateProxyRules func(proxyPort uint16, name string) error,
   110  	installNoTrackRules func(addr netip.Addr, port uint16) error,
   111  	removeNoTrackRules func(addr netip.Addr, port uint16) error,
   112  ) error {
   113  	// The minimum interval between reconciliation attempts
   114  	const minReconciliationInterval = 200 * time.Millisecond
   115  
   116  	// log limiter for partial (proxy and no track rules) reconciliation errors
   117  	partialLogLimiter := logging.NewLimiter(10*time.Second, 3)
   118  	// log limiter for full reconciliation errors
   119  	fullLogLimiter := logging.NewLimiter(10*time.Second, 3)
   120  
   121  	state := desiredState{
   122  		installRules: installIptRules,
   123  		proxies:      make(map[string]proxyInfo),
   124  		noTrackPods:  sets.New[noTrackPodInfo](),
   125  	}
   126  
   127  	ctx, cancel := context.WithCancel(ctx)
   128  	defer cancel()
   129  
   130  	localNodeEvents := stream.ToChannel(ctx, params.localNodeStore)
   131  	state.localNodeInfo = toLocalNodeInfo(<-localNodeEvents)
   132  
   133  	devices, devicesWatch := tables.SelectedDevices(params.devices, params.db.ReadTxn())
   134  	state.devices = sets.New(tables.DeviceNames(devices)...)
   135  
   136  	// Use a ticker to limit how often the desired state is reconciled to avoid doing
   137  	// lots of operations when e.g. ipset updates.
   138  	ticker := time.NewTicker(minReconciliationInterval)
   139  	defer ticker.Stop()
   140  
   141  	// stateChanged is true when the desired state has changed or when reconciling it
   142  	// has failed. It's set to false when reconciling succeeds.
   143  	stateChanged := true
   144  
   145  	firstInit := true
   146  
   147  	// Run an initial full reconciliation before listening on partial reconciliation
   148  	// request channels (like proxies and no track rules).
   149  	if err := updateRules(state, firstInit); err != nil {
   150  		health.Degraded("iptables rules update failed", err)
   151  		// Keep stateChanged=true and firstInit=true to try again on the next tick.
   152  	} else {
   153  		health.OK("iptables rules update completed")
   154  		firstInit = false
   155  		stateChanged = false
   156  	}
   157  
   158  	// list of pending channels waiting for reconciliation
   159  	var updatedChs []chan<- struct{}
   160  
   161  stop:
   162  	for {
   163  		select {
   164  		case <-ctx.Done():
   165  			break stop
   166  		case <-devicesWatch:
   167  			devices, devicesWatch = tables.SelectedDevices(params.devices, params.db.ReadTxn())
   168  			newDevices := sets.New(tables.DeviceNames(devices)...)
   169  			if newDevices.Equal(state.devices) {
   170  				continue
   171  			}
   172  			state.devices = newDevices
   173  			stateChanged = true
   174  		case localNode, ok := <-localNodeEvents:
   175  			if !ok {
   176  				break stop
   177  			}
   178  			localNodeInfo := toLocalNodeInfo(localNode)
   179  			if localNodeInfo.equal(state.localNodeInfo) {
   180  				continue
   181  			}
   182  			state.localNodeInfo = localNodeInfo
   183  			stateChanged = true
   184  		case req, ok := <-params.proxies:
   185  			if !ok {
   186  				break stop
   187  			}
   188  			if info, ok := state.proxies[req.info.name]; ok && info == req.info {
   189  				continue
   190  			}
   191  
   192  			// if existing, previous rules related to the previous entry for the same proxy name
   193  			// will be deleted by the manager (see Manager.addProxyRules)
   194  			state.proxies[req.info.name] = req.info
   195  
   196  			if firstInit {
   197  				// first init not yet completed, proxy rules will be updated as part of that
   198  				stateChanged = true
   199  				updatedChs = append(updatedChs, req.updated)
   200  				continue
   201  			}
   202  
   203  			if err := updateProxyRules(req.info.port, req.info.name); err != nil {
   204  				if partialLogLimiter.Allow() {
   205  					log.WithError(err).Error("iptables proxy rules incremental update failed, will retry a full reconciliation")
   206  				}
   207  				// incremental rules update failed, schedule a full iptables reconciliation
   208  				stateChanged = true
   209  				updatedChs = append(updatedChs, req.updated)
   210  			} else {
   211  				close(req.updated)
   212  			}
   213  		case req, ok := <-params.addNoTrackPod:
   214  			if !ok {
   215  				break stop
   216  			}
   217  			if state.noTrackPods.Has(req.info) {
   218  				close(req.updated)
   219  				continue
   220  			}
   221  			state.noTrackPods.Insert(req.info)
   222  
   223  			if firstInit {
   224  				// first init not yet completed, no track pod rules will be updated as part of that
   225  				stateChanged = true
   226  				updatedChs = append(updatedChs, req.updated)
   227  				continue
   228  			}
   229  
   230  			if err := installNoTrackRules(req.info.ip, req.info.port); err != nil {
   231  				if partialLogLimiter.Allow() {
   232  					log.WithError(err).Error("iptables no track rules incremental install failed, will retry a full reconciliation")
   233  				}
   234  				// incremental rules update failed, schedule a full iptables reconciliation
   235  				stateChanged = true
   236  				updatedChs = append(updatedChs, req.updated)
   237  			} else {
   238  				close(req.updated)
   239  			}
   240  		case req, ok := <-params.delNoTrackPod:
   241  			if !ok {
   242  				break stop
   243  			}
   244  			if !state.noTrackPods.Has(req.info) {
   245  				close(req.updated)
   246  				continue
   247  			}
   248  			state.noTrackPods.Delete(req.info)
   249  
   250  			if firstInit {
   251  				// first init not yet completed, no track pod rules will be updated as part of that
   252  				stateChanged = true
   253  				updatedChs = append(updatedChs, req.updated)
   254  				continue
   255  			}
   256  
   257  			if err := removeNoTrackRules(req.info.ip, req.info.port); err != nil {
   258  				if partialLogLimiter.Allow() {
   259  					log.WithError(err).Error("iptables no track rules incremental removal failed, will retry a full reconciliation")
   260  				}
   261  				// incremental rules update failed, schedule a full iptables reconciliation
   262  				stateChanged = true
   263  				updatedChs = append(updatedChs, req.updated)
   264  			} else {
   265  				close(req.updated)
   266  			}
   267  		case <-ticker.C:
   268  			if !stateChanged {
   269  				continue
   270  			}
   271  
   272  			if err := updateRules(state, firstInit); err != nil {
   273  				if fullLogLimiter.Allow() {
   274  					log.WithError(err).Error("iptables rules full reconciliation failed, will retry another one later")
   275  				}
   276  				health.Degraded("iptables rules full reconciliation failed", err)
   277  				// Keep stateChanged=true to try again on the next tick.
   278  			} else {
   279  				health.OK("iptables rules full reconciliation completed")
   280  				firstInit = false
   281  				stateChanged = false
   282  			}
   283  
   284  			// close all channels waiting for reconciliation
   285  			// do this even in case of a failed reconciliation, to avoid
   286  			// blocking consumer goroutines indefinitely.
   287  			for _, ch := range updatedChs {
   288  				close(ch)
   289  			}
   290  			updatedChs = updatedChs[:0]
   291  		}
   292  	}
   293  
   294  	cancel()
   295  
   296  	// close all channels waiting for reconciliation
   297  	for _, ch := range updatedChs {
   298  		close(ch)
   299  	}
   300  
   301  	// drain channels
   302  	for range localNodeEvents {
   303  	}
   304  	for range params.proxies {
   305  	}
   306  	for range params.addNoTrackPod {
   307  	}
   308  	for range params.delNoTrackPod {
   309  	}
   310  
   311  	return nil
   312  }