github.com/cilium/cilium@v1.16.2/pkg/datapath/iptables/reconciler.go (about) 1 // SPDX-License-Identifier: Apache-2.0 2 // Copyright Authors of Cilium 3 4 package iptables 5 6 import ( 7 "context" 8 "net" 9 "net/netip" 10 11 "github.com/cilium/hive/cell" 12 "github.com/cilium/stream" 13 "github.com/sirupsen/logrus" 14 "k8s.io/apimachinery/pkg/util/sets" 15 16 "github.com/cilium/cilium/pkg/datapath/tables" 17 "github.com/cilium/cilium/pkg/logging" 18 "github.com/cilium/cilium/pkg/node" 19 "github.com/cilium/cilium/pkg/time" 20 ) 21 22 type desiredState struct { 23 installRules bool 24 25 devices sets.Set[string] 26 localNodeInfo localNodeInfo 27 proxies map[string]proxyInfo 28 noTrackPods sets.Set[noTrackPodInfo] 29 } 30 31 type localNodeInfo struct { 32 internalIPv4 net.IP 33 internalIPv6 net.IP 34 ipv4AllocCIDR string 35 ipv6AllocCIDR string 36 ipv4NativeRoutingCIDR string 37 ipv6NativeRoutingCIDR string 38 } 39 40 func (lni localNodeInfo) equal(other localNodeInfo) bool { 41 if lni.internalIPv4.Equal(other.internalIPv4) && 42 lni.internalIPv6.Equal(other.internalIPv6) && 43 lni.ipv4AllocCIDR == other.ipv4AllocCIDR && 44 lni.ipv6AllocCIDR == other.ipv6AllocCIDR && 45 lni.ipv4NativeRoutingCIDR == other.ipv4NativeRoutingCIDR && 46 lni.ipv6NativeRoutingCIDR == other.ipv6NativeRoutingCIDR { 47 return true 48 } 49 return false 50 } 51 52 func toLocalNodeInfo(n node.LocalNode) localNodeInfo { 53 var ( 54 v4AllocCIDR, v6AllocCIDR string 55 v4NativeRoutingCIDR, v6NativeRoutingCIDR string 56 ) 57 58 if n.IPv4AllocCIDR != nil { 59 v4AllocCIDR = n.IPv4AllocCIDR.String() 60 } 61 if n.IPv6AllocCIDR != nil { 62 v6AllocCIDR = n.IPv6AllocCIDR.String() 63 } 64 if n.IPv4NativeRoutingCIDR != nil { 65 v4NativeRoutingCIDR = n.IPv4NativeRoutingCIDR.String() 66 } 67 if n.IPv6NativeRoutingCIDR != nil { 68 v6NativeRoutingCIDR = n.IPv6NativeRoutingCIDR.String() 69 } 70 71 return localNodeInfo{ 72 internalIPv4: n.GetCiliumInternalIP(false), 73 internalIPv6: n.GetCiliumInternalIP(true), 74 ipv4AllocCIDR: v4AllocCIDR, 75 ipv6AllocCIDR: v6AllocCIDR, 76 ipv4NativeRoutingCIDR: v4NativeRoutingCIDR, 77 ipv6NativeRoutingCIDR: v6NativeRoutingCIDR, 78 } 79 } 80 81 // reconciliationRequest is a request to the reconciler to update the 82 // state with the new info. 83 // updated is a notification channel that is closed when reconciliation has 84 // been completed successfully. 85 type reconciliationRequest[T any] struct { 86 info T 87 88 // closed when the state is reconciled successfully 89 updated chan struct{} 90 } 91 92 type proxyInfo struct { 93 name string 94 port uint16 95 } 96 97 type noTrackPodInfo struct { 98 ip netip.Addr 99 port uint16 100 } 101 102 func reconciliationLoop( 103 ctx context.Context, 104 log logrus.FieldLogger, 105 health cell.Health, 106 installIptRules bool, 107 params *reconcilerParams, 108 updateRules func(state desiredState, firstInit bool) error, 109 updateProxyRules func(proxyPort uint16, name string) error, 110 installNoTrackRules func(addr netip.Addr, port uint16) error, 111 removeNoTrackRules func(addr netip.Addr, port uint16) error, 112 ) error { 113 // The minimum interval between reconciliation attempts 114 const minReconciliationInterval = 200 * time.Millisecond 115 116 // log limiter for partial (proxy and no track rules) reconciliation errors 117 partialLogLimiter := logging.NewLimiter(10*time.Second, 3) 118 // log limiter for full reconciliation errors 119 fullLogLimiter := logging.NewLimiter(10*time.Second, 3) 120 121 state := desiredState{ 122 installRules: installIptRules, 123 proxies: make(map[string]proxyInfo), 124 noTrackPods: sets.New[noTrackPodInfo](), 125 } 126 127 ctx, cancel := context.WithCancel(ctx) 128 defer cancel() 129 130 localNodeEvents := stream.ToChannel(ctx, params.localNodeStore) 131 state.localNodeInfo = toLocalNodeInfo(<-localNodeEvents) 132 133 devices, devicesWatch := tables.SelectedDevices(params.devices, params.db.ReadTxn()) 134 state.devices = sets.New(tables.DeviceNames(devices)...) 135 136 // Use a ticker to limit how often the desired state is reconciled to avoid doing 137 // lots of operations when e.g. ipset updates. 138 ticker := time.NewTicker(minReconciliationInterval) 139 defer ticker.Stop() 140 141 // stateChanged is true when the desired state has changed or when reconciling it 142 // has failed. It's set to false when reconciling succeeds. 143 stateChanged := true 144 145 firstInit := true 146 147 // Run an initial full reconciliation before listening on partial reconciliation 148 // request channels (like proxies and no track rules). 149 if err := updateRules(state, firstInit); err != nil { 150 health.Degraded("iptables rules update failed", err) 151 // Keep stateChanged=true and firstInit=true to try again on the next tick. 152 } else { 153 health.OK("iptables rules update completed") 154 firstInit = false 155 stateChanged = false 156 } 157 158 // list of pending channels waiting for reconciliation 159 var updatedChs []chan<- struct{} 160 161 stop: 162 for { 163 select { 164 case <-ctx.Done(): 165 break stop 166 case <-devicesWatch: 167 devices, devicesWatch = tables.SelectedDevices(params.devices, params.db.ReadTxn()) 168 newDevices := sets.New(tables.DeviceNames(devices)...) 169 if newDevices.Equal(state.devices) { 170 continue 171 } 172 state.devices = newDevices 173 stateChanged = true 174 case localNode, ok := <-localNodeEvents: 175 if !ok { 176 break stop 177 } 178 localNodeInfo := toLocalNodeInfo(localNode) 179 if localNodeInfo.equal(state.localNodeInfo) { 180 continue 181 } 182 state.localNodeInfo = localNodeInfo 183 stateChanged = true 184 case req, ok := <-params.proxies: 185 if !ok { 186 break stop 187 } 188 if info, ok := state.proxies[req.info.name]; ok && info == req.info { 189 continue 190 } 191 192 // if existing, previous rules related to the previous entry for the same proxy name 193 // will be deleted by the manager (see Manager.addProxyRules) 194 state.proxies[req.info.name] = req.info 195 196 if firstInit { 197 // first init not yet completed, proxy rules will be updated as part of that 198 stateChanged = true 199 updatedChs = append(updatedChs, req.updated) 200 continue 201 } 202 203 if err := updateProxyRules(req.info.port, req.info.name); err != nil { 204 if partialLogLimiter.Allow() { 205 log.WithError(err).Error("iptables proxy rules incremental update failed, will retry a full reconciliation") 206 } 207 // incremental rules update failed, schedule a full iptables reconciliation 208 stateChanged = true 209 updatedChs = append(updatedChs, req.updated) 210 } else { 211 close(req.updated) 212 } 213 case req, ok := <-params.addNoTrackPod: 214 if !ok { 215 break stop 216 } 217 if state.noTrackPods.Has(req.info) { 218 close(req.updated) 219 continue 220 } 221 state.noTrackPods.Insert(req.info) 222 223 if firstInit { 224 // first init not yet completed, no track pod rules will be updated as part of that 225 stateChanged = true 226 updatedChs = append(updatedChs, req.updated) 227 continue 228 } 229 230 if err := installNoTrackRules(req.info.ip, req.info.port); err != nil { 231 if partialLogLimiter.Allow() { 232 log.WithError(err).Error("iptables no track rules incremental install failed, will retry a full reconciliation") 233 } 234 // incremental rules update failed, schedule a full iptables reconciliation 235 stateChanged = true 236 updatedChs = append(updatedChs, req.updated) 237 } else { 238 close(req.updated) 239 } 240 case req, ok := <-params.delNoTrackPod: 241 if !ok { 242 break stop 243 } 244 if !state.noTrackPods.Has(req.info) { 245 close(req.updated) 246 continue 247 } 248 state.noTrackPods.Delete(req.info) 249 250 if firstInit { 251 // first init not yet completed, no track pod rules will be updated as part of that 252 stateChanged = true 253 updatedChs = append(updatedChs, req.updated) 254 continue 255 } 256 257 if err := removeNoTrackRules(req.info.ip, req.info.port); err != nil { 258 if partialLogLimiter.Allow() { 259 log.WithError(err).Error("iptables no track rules incremental removal failed, will retry a full reconciliation") 260 } 261 // incremental rules update failed, schedule a full iptables reconciliation 262 stateChanged = true 263 updatedChs = append(updatedChs, req.updated) 264 } else { 265 close(req.updated) 266 } 267 case <-ticker.C: 268 if !stateChanged { 269 continue 270 } 271 272 if err := updateRules(state, firstInit); err != nil { 273 if fullLogLimiter.Allow() { 274 log.WithError(err).Error("iptables rules full reconciliation failed, will retry another one later") 275 } 276 health.Degraded("iptables rules full reconciliation failed", err) 277 // Keep stateChanged=true to try again on the next tick. 278 } else { 279 health.OK("iptables rules full reconciliation completed") 280 firstInit = false 281 stateChanged = false 282 } 283 284 // close all channels waiting for reconciliation 285 // do this even in case of a failed reconciliation, to avoid 286 // blocking consumer goroutines indefinitely. 287 for _, ch := range updatedChs { 288 close(ch) 289 } 290 updatedChs = updatedChs[:0] 291 } 292 } 293 294 cancel() 295 296 // close all channels waiting for reconciliation 297 for _, ch := range updatedChs { 298 close(ch) 299 } 300 301 // drain channels 302 for range localNodeEvents { 303 } 304 for range params.proxies { 305 } 306 for range params.addNoTrackPod { 307 } 308 for range params.delNoTrackPod { 309 } 310 311 return nil 312 }