github.com/cilium/cilium@v1.16.2/pkg/maps/ctmap/gc/gc.go (about) 1 // SPDX-License-Identifier: Apache-2.0 2 // Copyright Authors of Cilium 3 4 package gc 5 6 import ( 7 "fmt" 8 "net/netip" 9 "os" 10 11 "github.com/cilium/hive/cell" 12 "github.com/cilium/statedb" 13 "github.com/sirupsen/logrus" 14 15 "github.com/cilium/cilium/pkg/controller" 16 "github.com/cilium/cilium/pkg/datapath/tables" 17 "github.com/cilium/cilium/pkg/datapath/types" 18 "github.com/cilium/cilium/pkg/endpoint" 19 "github.com/cilium/cilium/pkg/inctimer" 20 "github.com/cilium/cilium/pkg/logging/logfields" 21 "github.com/cilium/cilium/pkg/maps/ctmap" 22 "github.com/cilium/cilium/pkg/option" 23 "github.com/cilium/cilium/pkg/time" 24 ) 25 26 type Enabler interface { 27 // Enable enables the connection tracking garbage collection. 28 Enable() 29 } 30 31 // EndpointManager is any type which returns the list of Endpoints which are 32 // globally exposed on the current node. 33 type EndpointManager interface { 34 GetEndpoints() []*endpoint.Endpoint 35 } 36 37 type PerClusterCTMapsRetriever func() []*ctmap.Map 38 39 type parameters struct { 40 cell.In 41 42 Lifecycle cell.Lifecycle 43 Logger logrus.FieldLogger 44 DB *statedb.DB 45 NodeAddrs statedb.Table[tables.NodeAddress] 46 DaemonConfig *option.DaemonConfig 47 EndpointManager EndpointManager 48 Datapath types.Datapath 49 SignalManager SignalHandler 50 51 PerClusterCTMapsRetriever PerClusterCTMapsRetriever `optional:"true"` 52 } 53 54 type GC struct { 55 logger logrus.FieldLogger 56 57 ipv4 bool 58 ipv6 bool 59 60 db *statedb.DB 61 nodeAddrs statedb.Table[tables.NodeAddress] 62 63 endpointsManager EndpointManager 64 nodeAddressing types.NodeAddressing 65 signalHandler SignalHandler 66 67 perClusterCTMapsRetriever PerClusterCTMapsRetriever 68 controllerManager *controller.Manager 69 } 70 71 func New(params parameters) *GC { 72 gc := &GC{ 73 logger: params.Logger, 74 75 ipv4: params.DaemonConfig.EnableIPv4, 76 ipv6: params.DaemonConfig.EnableIPv6, 77 78 db: params.DB, 79 nodeAddrs: params.NodeAddrs, 80 81 endpointsManager: params.EndpointManager, 82 nodeAddressing: params.Datapath.LocalNodeAddressing(), 83 signalHandler: params.SignalManager, 84 85 controllerManager: controller.NewManager(), 86 } 87 params.Lifecycle.Append(cell.Hook{ 88 // OnStart not yet defined pending further modularization of CT map GC. 89 OnStop: func(cell.HookContext) error { 90 gc.controllerManager.RemoveAllAndWait() 91 return nil 92 }, 93 }) 94 return gc 95 } 96 97 // Enable enables the connection tracking garbage collection. 98 func (gc *GC) Enable() { 99 var ( 100 initialScan = true 101 initialScanComplete = make(chan struct{}) 102 ) 103 104 go func() { 105 ipv4 := gc.ipv4 106 ipv6 := gc.ipv6 107 triggeredBySignal := false 108 ctTimer, ctTimerDone := inctimer.New() 109 var gcPrev time.Time 110 defer ctTimerDone() 111 for { 112 var ( 113 maxDeleteRatio float64 114 115 // epsMap contains an IP -> EP mapping. It is used by EmitCTEntryCB to 116 // avoid doing gc.endpointsManager.LookupIP, which is more expensive. 117 epsMap = make(map[netip.Addr]*endpoint.Endpoint) 118 119 // gcStart and emitEntryCB are used to populate DNSZombieMapping fields 120 // on endpoints. These hold IPs that are deletable in the DNS caches, 121 // but may be in use by connections. Each loop of this GC keeps those 122 // entries alive by touching them in emitEntryCB. We also need to 123 // record the start of each CT GC loop (further below in the 124 // goroutine). In all cases the timestamp used is the start of the GC 125 // loop. This simplifies the logic to determine if a marked connection 126 // was marked in the most recent GC loop or not: if the active 127 // timestamp is before the recorded start of the GC loop then it must 128 // mean the next iteration has completed and it is not in-use. 129 gcStart = time.Now() 130 131 // aliveTime is offset to the future by ToFQDNsIdleConnectionGracePeriod 132 // (default 0), allowing previously active connections to be considerred 133 // alive during idle periods of upto ToFQDNsIdleConnectionGracePeriod. 134 aliveTime = gcStart.Add(option.Config.ToFQDNsIdleConnectionGracePeriod) 135 136 emitEntryCB = func(srcIP, dstIP netip.Addr, srcPort, dstPort uint16, nextHdr, flags uint8, entry *ctmap.CtEntry) { 137 // FQDN related connections can only be outbound 138 if flags != ctmap.TUPLE_F_OUT { 139 return 140 } 141 if ep, exists := epsMap[srcIP]; exists { 142 ep.MarkDNSCTEntry(dstIP, aliveTime) 143 } 144 } 145 146 success = false 147 ) 148 149 gcInterval := gcStart.Sub(gcPrev) 150 if gcPrev.IsZero() { 151 gcInterval = time.Duration(0) 152 } 153 gcPrev = gcStart 154 155 eps := gc.endpointsManager.GetEndpoints() 156 for _, e := range eps { 157 epsMap[e.IPv4Address()] = e 158 epsMap[e.IPv6Address()] = e 159 } 160 161 if len(eps) > 0 || initialScan { 162 gc.logger.Info("Starting initial GC of connection tracking") 163 maxDeleteRatio, success = gc.runGC(nil, ipv4, ipv6, triggeredBySignal, &ctmap.GCFilter{RemoveExpired: true, EmitCTEntryCB: emitEntryCB}) 164 } 165 for _, e := range eps { 166 if !e.ConntrackLocal() { 167 // Skip because GC was handled above. 168 continue 169 } 170 _, epSuccess := gc.runGC(e, ipv4, ipv6, triggeredBySignal, &ctmap.GCFilter{RemoveExpired: true, EmitCTEntryCB: emitEntryCB}) 171 success = success && epSuccess 172 } 173 174 // Mark the CT GC as over in each EP DNSZombies instance, if we did a *full* GC run 175 interval := ctmap.GetInterval(gcInterval, maxDeleteRatio) 176 if success && ipv4 == gc.ipv4 && ipv6 == gc.ipv6 { 177 for _, e := range eps { 178 e.MarkCTGCTime(gcStart, time.Now().Add(interval)) 179 } 180 } 181 182 if initialScan { 183 close(initialScanComplete) 184 initialScan = false 185 } 186 187 triggeredBySignal = false 188 gc.signalHandler.UnmuteSignals() 189 select { 190 case x, ok := <-gc.signalHandler.Signals(): 191 if !ok { 192 gc.logger.Info("Signal handler closed. Stopping conntrack garbage collector") 193 return 194 } 195 // mute before draining so that no more wakeups are queued just 196 // after we have drained 197 gc.signalHandler.MuteSignals() 198 triggeredBySignal = true 199 ipv4 = false 200 ipv6 = false 201 if x == SignalProtoV4 { 202 ipv4 = true 203 } else if x == SignalProtoV6 { 204 ipv6 = true 205 } 206 // Drain current queue since we just woke up anyway. 207 for len(gc.signalHandler.Signals()) > 0 { 208 x := <-gc.signalHandler.Signals() 209 if x == SignalProtoV4 { 210 ipv4 = true 211 } else if x == SignalProtoV6 { 212 ipv6 = true 213 } 214 } 215 case <-ctTimer.After(interval): 216 gc.signalHandler.MuteSignals() 217 ipv4 = gc.ipv4 218 ipv6 = gc.ipv6 219 } 220 } 221 }() 222 223 select { 224 case <-initialScanComplete: 225 gc.logger.Info("Initial scan of connection tracking completed") 226 case <-time.After(30 * time.Second): 227 gc.logger.Fatal("Timeout while waiting for initial conntrack scan") 228 } 229 230 // Not supporting BPF map pressure for local CT maps as of yet. 231 ctmap.CalculateCTMapPressure(gc.controllerManager, ctmap.GlobalMaps(gc.ipv4, gc.ipv6)...) 232 } 233 234 // runGC run CT's garbage collector for the given endpoint. `isLocal` refers if 235 // the CT map is set to local. If `isIPv6` is set specifies that is the IPv6 236 // map. `filter` represents the filter type to be used while looping all CT 237 // entries. 238 // 239 // The provided endpoint is optional; if it is provided, then its map will be 240 // garbage collected and any failures will be logged to the endpoint log. 241 // Otherwise it will garbage-collect the global map and use the global log. 242 func (gc *GC) runGC(e *endpoint.Endpoint, ipv4, ipv6, triggeredBySignal bool, filter *ctmap.GCFilter) (maxDeleteRatio float64, success bool) { 243 var maps []*ctmap.Map 244 success = true 245 246 if e == nil { 247 maps = ctmap.GlobalMaps(ipv4, ipv6) 248 249 // We treat per-cluster CT Maps as global maps. When we don't enable 250 // cluster-aware addressing, perClusterCTMapsRetriever is nil (default). 251 if gc.perClusterCTMapsRetriever != nil { 252 maps = append(maps, gc.perClusterCTMapsRetriever()...) 253 } 254 } else { 255 maps = ctmap.LocalMaps(e, ipv4, ipv6) 256 } 257 for _, m := range maps { 258 path, err := ctmap.OpenCTMap(m) 259 if err != nil { 260 success = false 261 msg := "Skipping CT garbage collection" 262 scopedLog := gc.logger.WithError(err).WithField(logfields.Path, path) 263 if os.IsNotExist(err) { 264 scopedLog.Debug(msg) 265 } else { 266 scopedLog.Warn(msg) 267 } 268 if e != nil { 269 e.LogStatus(endpoint.BPF, endpoint.Warning, fmt.Sprintf("%s: %s", msg, err)) 270 } 271 continue 272 } 273 defer m.Close() 274 275 deleted, err := ctmap.GC(m, filter) 276 if err != nil { 277 gc.logger.WithError(err).Error("failed to perform CT garbage collection") 278 success = false 279 } 280 281 if deleted > 0 { 282 ratio := float64(deleted) / float64(m.MaxEntries()) 283 if ratio > maxDeleteRatio { 284 maxDeleteRatio = ratio 285 } 286 gc.logger.WithFields(logrus.Fields{ 287 logfields.Path: path, 288 "count": deleted, 289 }).Debug("Deleted filtered entries from map") 290 } 291 } 292 293 if e == nil && triggeredBySignal { 294 vsns := []ctmap.CTMapIPVersion{} 295 if ipv4 { 296 vsns = append(vsns, ctmap.CTMapIPv4) 297 } 298 if ipv6 { 299 vsns = append(vsns, ctmap.CTMapIPv6) 300 } 301 302 for _, vsn := range vsns { 303 ctMapTCP, ctMapAny := ctmap.FilterMapsByProto(maps, vsn) 304 stats := ctmap.PurgeOrphanNATEntries(ctMapTCP, ctMapAny) 305 if stats != nil && (stats.EgressDeleted != 0 || stats.IngressDeleted != 0) { 306 gc.logger.WithFields(logrus.Fields{ 307 "ingressDeleted": stats.IngressDeleted, 308 "egressDeleted": stats.EgressDeleted, 309 "ingressAlive": stats.IngressAlive, 310 "egressAlive": stats.EgressAlive, 311 "ctMapIPVersion": vsn, 312 }).Info("Deleted orphan SNAT entries from map") 313 } 314 } 315 } 316 317 return 318 } 319 320 type fakeCTMapGC struct{} 321 322 func NewFake() Enabler { return fakeCTMapGC{} } 323 func (fakeCTMapGC) Enable() {}