github.com/cilium/cilium@v1.16.2/pkg/maps/ctmap/gc/gc.go (about)

     1  // SPDX-License-Identifier: Apache-2.0
     2  // Copyright Authors of Cilium
     3  
     4  package gc
     5  
     6  import (
     7  	"fmt"
     8  	"net/netip"
     9  	"os"
    10  
    11  	"github.com/cilium/hive/cell"
    12  	"github.com/cilium/statedb"
    13  	"github.com/sirupsen/logrus"
    14  
    15  	"github.com/cilium/cilium/pkg/controller"
    16  	"github.com/cilium/cilium/pkg/datapath/tables"
    17  	"github.com/cilium/cilium/pkg/datapath/types"
    18  	"github.com/cilium/cilium/pkg/endpoint"
    19  	"github.com/cilium/cilium/pkg/inctimer"
    20  	"github.com/cilium/cilium/pkg/logging/logfields"
    21  	"github.com/cilium/cilium/pkg/maps/ctmap"
    22  	"github.com/cilium/cilium/pkg/option"
    23  	"github.com/cilium/cilium/pkg/time"
    24  )
    25  
    26  type Enabler interface {
    27  	// Enable enables the connection tracking garbage collection.
    28  	Enable()
    29  }
    30  
    31  // EndpointManager is any type which returns the list of Endpoints which are
    32  // globally exposed on the current node.
    33  type EndpointManager interface {
    34  	GetEndpoints() []*endpoint.Endpoint
    35  }
    36  
    37  type PerClusterCTMapsRetriever func() []*ctmap.Map
    38  
    39  type parameters struct {
    40  	cell.In
    41  
    42  	Lifecycle       cell.Lifecycle
    43  	Logger          logrus.FieldLogger
    44  	DB              *statedb.DB
    45  	NodeAddrs       statedb.Table[tables.NodeAddress]
    46  	DaemonConfig    *option.DaemonConfig
    47  	EndpointManager EndpointManager
    48  	Datapath        types.Datapath
    49  	SignalManager   SignalHandler
    50  
    51  	PerClusterCTMapsRetriever PerClusterCTMapsRetriever `optional:"true"`
    52  }
    53  
    54  type GC struct {
    55  	logger logrus.FieldLogger
    56  
    57  	ipv4 bool
    58  	ipv6 bool
    59  
    60  	db        *statedb.DB
    61  	nodeAddrs statedb.Table[tables.NodeAddress]
    62  
    63  	endpointsManager EndpointManager
    64  	nodeAddressing   types.NodeAddressing
    65  	signalHandler    SignalHandler
    66  
    67  	perClusterCTMapsRetriever PerClusterCTMapsRetriever
    68  	controllerManager         *controller.Manager
    69  }
    70  
    71  func New(params parameters) *GC {
    72  	gc := &GC{
    73  		logger: params.Logger,
    74  
    75  		ipv4: params.DaemonConfig.EnableIPv4,
    76  		ipv6: params.DaemonConfig.EnableIPv6,
    77  
    78  		db:        params.DB,
    79  		nodeAddrs: params.NodeAddrs,
    80  
    81  		endpointsManager: params.EndpointManager,
    82  		nodeAddressing:   params.Datapath.LocalNodeAddressing(),
    83  		signalHandler:    params.SignalManager,
    84  
    85  		controllerManager: controller.NewManager(),
    86  	}
    87  	params.Lifecycle.Append(cell.Hook{
    88  		// OnStart not yet defined pending further modularization of CT map GC.
    89  		OnStop: func(cell.HookContext) error {
    90  			gc.controllerManager.RemoveAllAndWait()
    91  			return nil
    92  		},
    93  	})
    94  	return gc
    95  }
    96  
    97  // Enable enables the connection tracking garbage collection.
    98  func (gc *GC) Enable() {
    99  	var (
   100  		initialScan         = true
   101  		initialScanComplete = make(chan struct{})
   102  	)
   103  
   104  	go func() {
   105  		ipv4 := gc.ipv4
   106  		ipv6 := gc.ipv6
   107  		triggeredBySignal := false
   108  		ctTimer, ctTimerDone := inctimer.New()
   109  		var gcPrev time.Time
   110  		defer ctTimerDone()
   111  		for {
   112  			var (
   113  				maxDeleteRatio float64
   114  
   115  				// epsMap contains an IP -> EP mapping. It is used by EmitCTEntryCB to
   116  				// avoid doing gc.endpointsManager.LookupIP, which is more expensive.
   117  				epsMap = make(map[netip.Addr]*endpoint.Endpoint)
   118  
   119  				// gcStart and emitEntryCB are used to populate DNSZombieMapping fields
   120  				// on endpoints. These hold IPs that are deletable in the DNS caches,
   121  				// but may be in use by connections. Each loop of this GC keeps those
   122  				// entries alive by touching them in emitEntryCB. We also need to
   123  				// record the start of each CT GC loop (further below in the
   124  				// goroutine). In all cases the timestamp used is the start of the GC
   125  				// loop. This simplifies the logic to determine if a marked connection
   126  				// was marked in the most recent GC loop or not: if the active
   127  				// timestamp is before the recorded start of the GC loop then it must
   128  				// mean the next iteration has completed and it is not in-use.
   129  				gcStart = time.Now()
   130  
   131  				// aliveTime is offset to the future by ToFQDNsIdleConnectionGracePeriod
   132  				// (default 0), allowing previously active connections to be considerred
   133  				// alive during idle periods of upto ToFQDNsIdleConnectionGracePeriod.
   134  				aliveTime = gcStart.Add(option.Config.ToFQDNsIdleConnectionGracePeriod)
   135  
   136  				emitEntryCB = func(srcIP, dstIP netip.Addr, srcPort, dstPort uint16, nextHdr, flags uint8, entry *ctmap.CtEntry) {
   137  					// FQDN related connections can only be outbound
   138  					if flags != ctmap.TUPLE_F_OUT {
   139  						return
   140  					}
   141  					if ep, exists := epsMap[srcIP]; exists {
   142  						ep.MarkDNSCTEntry(dstIP, aliveTime)
   143  					}
   144  				}
   145  
   146  				success = false
   147  			)
   148  
   149  			gcInterval := gcStart.Sub(gcPrev)
   150  			if gcPrev.IsZero() {
   151  				gcInterval = time.Duration(0)
   152  			}
   153  			gcPrev = gcStart
   154  
   155  			eps := gc.endpointsManager.GetEndpoints()
   156  			for _, e := range eps {
   157  				epsMap[e.IPv4Address()] = e
   158  				epsMap[e.IPv6Address()] = e
   159  			}
   160  
   161  			if len(eps) > 0 || initialScan {
   162  				gc.logger.Info("Starting initial GC of connection tracking")
   163  				maxDeleteRatio, success = gc.runGC(nil, ipv4, ipv6, triggeredBySignal, &ctmap.GCFilter{RemoveExpired: true, EmitCTEntryCB: emitEntryCB})
   164  			}
   165  			for _, e := range eps {
   166  				if !e.ConntrackLocal() {
   167  					// Skip because GC was handled above.
   168  					continue
   169  				}
   170  				_, epSuccess := gc.runGC(e, ipv4, ipv6, triggeredBySignal, &ctmap.GCFilter{RemoveExpired: true, EmitCTEntryCB: emitEntryCB})
   171  				success = success && epSuccess
   172  			}
   173  
   174  			// Mark the CT GC as over in each EP DNSZombies instance, if we did a *full* GC run
   175  			interval := ctmap.GetInterval(gcInterval, maxDeleteRatio)
   176  			if success && ipv4 == gc.ipv4 && ipv6 == gc.ipv6 {
   177  				for _, e := range eps {
   178  					e.MarkCTGCTime(gcStart, time.Now().Add(interval))
   179  				}
   180  			}
   181  
   182  			if initialScan {
   183  				close(initialScanComplete)
   184  				initialScan = false
   185  			}
   186  
   187  			triggeredBySignal = false
   188  			gc.signalHandler.UnmuteSignals()
   189  			select {
   190  			case x, ok := <-gc.signalHandler.Signals():
   191  				if !ok {
   192  					gc.logger.Info("Signal handler closed. Stopping conntrack garbage collector")
   193  					return
   194  				}
   195  				// mute before draining so that no more wakeups are queued just
   196  				// after we have drained
   197  				gc.signalHandler.MuteSignals()
   198  				triggeredBySignal = true
   199  				ipv4 = false
   200  				ipv6 = false
   201  				if x == SignalProtoV4 {
   202  					ipv4 = true
   203  				} else if x == SignalProtoV6 {
   204  					ipv6 = true
   205  				}
   206  				// Drain current queue since we just woke up anyway.
   207  				for len(gc.signalHandler.Signals()) > 0 {
   208  					x := <-gc.signalHandler.Signals()
   209  					if x == SignalProtoV4 {
   210  						ipv4 = true
   211  					} else if x == SignalProtoV6 {
   212  						ipv6 = true
   213  					}
   214  				}
   215  			case <-ctTimer.After(interval):
   216  				gc.signalHandler.MuteSignals()
   217  				ipv4 = gc.ipv4
   218  				ipv6 = gc.ipv6
   219  			}
   220  		}
   221  	}()
   222  
   223  	select {
   224  	case <-initialScanComplete:
   225  		gc.logger.Info("Initial scan of connection tracking completed")
   226  	case <-time.After(30 * time.Second):
   227  		gc.logger.Fatal("Timeout while waiting for initial conntrack scan")
   228  	}
   229  
   230  	// Not supporting BPF map pressure for local CT maps as of yet.
   231  	ctmap.CalculateCTMapPressure(gc.controllerManager, ctmap.GlobalMaps(gc.ipv4, gc.ipv6)...)
   232  }
   233  
   234  // runGC run CT's garbage collector for the given endpoint. `isLocal` refers if
   235  // the CT map is set to local. If `isIPv6` is set specifies that is the IPv6
   236  // map. `filter` represents the filter type to be used while looping all CT
   237  // entries.
   238  //
   239  // The provided endpoint is optional; if it is provided, then its map will be
   240  // garbage collected and any failures will be logged to the endpoint log.
   241  // Otherwise it will garbage-collect the global map and use the global log.
   242  func (gc *GC) runGC(e *endpoint.Endpoint, ipv4, ipv6, triggeredBySignal bool, filter *ctmap.GCFilter) (maxDeleteRatio float64, success bool) {
   243  	var maps []*ctmap.Map
   244  	success = true
   245  
   246  	if e == nil {
   247  		maps = ctmap.GlobalMaps(ipv4, ipv6)
   248  
   249  		// We treat per-cluster CT Maps as global maps. When we don't enable
   250  		// cluster-aware addressing, perClusterCTMapsRetriever is nil (default).
   251  		if gc.perClusterCTMapsRetriever != nil {
   252  			maps = append(maps, gc.perClusterCTMapsRetriever()...)
   253  		}
   254  	} else {
   255  		maps = ctmap.LocalMaps(e, ipv4, ipv6)
   256  	}
   257  	for _, m := range maps {
   258  		path, err := ctmap.OpenCTMap(m)
   259  		if err != nil {
   260  			success = false
   261  			msg := "Skipping CT garbage collection"
   262  			scopedLog := gc.logger.WithError(err).WithField(logfields.Path, path)
   263  			if os.IsNotExist(err) {
   264  				scopedLog.Debug(msg)
   265  			} else {
   266  				scopedLog.Warn(msg)
   267  			}
   268  			if e != nil {
   269  				e.LogStatus(endpoint.BPF, endpoint.Warning, fmt.Sprintf("%s: %s", msg, err))
   270  			}
   271  			continue
   272  		}
   273  		defer m.Close()
   274  
   275  		deleted, err := ctmap.GC(m, filter)
   276  		if err != nil {
   277  			gc.logger.WithError(err).Error("failed to perform CT garbage collection")
   278  			success = false
   279  		}
   280  
   281  		if deleted > 0 {
   282  			ratio := float64(deleted) / float64(m.MaxEntries())
   283  			if ratio > maxDeleteRatio {
   284  				maxDeleteRatio = ratio
   285  			}
   286  			gc.logger.WithFields(logrus.Fields{
   287  				logfields.Path: path,
   288  				"count":        deleted,
   289  			}).Debug("Deleted filtered entries from map")
   290  		}
   291  	}
   292  
   293  	if e == nil && triggeredBySignal {
   294  		vsns := []ctmap.CTMapIPVersion{}
   295  		if ipv4 {
   296  			vsns = append(vsns, ctmap.CTMapIPv4)
   297  		}
   298  		if ipv6 {
   299  			vsns = append(vsns, ctmap.CTMapIPv6)
   300  		}
   301  
   302  		for _, vsn := range vsns {
   303  			ctMapTCP, ctMapAny := ctmap.FilterMapsByProto(maps, vsn)
   304  			stats := ctmap.PurgeOrphanNATEntries(ctMapTCP, ctMapAny)
   305  			if stats != nil && (stats.EgressDeleted != 0 || stats.IngressDeleted != 0) {
   306  				gc.logger.WithFields(logrus.Fields{
   307  					"ingressDeleted": stats.IngressDeleted,
   308  					"egressDeleted":  stats.EgressDeleted,
   309  					"ingressAlive":   stats.IngressAlive,
   310  					"egressAlive":    stats.EgressAlive,
   311  					"ctMapIPVersion": vsn,
   312  				}).Info("Deleted orphan SNAT entries from map")
   313  			}
   314  		}
   315  	}
   316  
   317  	return
   318  }
   319  
   320  type fakeCTMapGC struct{}
   321  
   322  func NewFake() Enabler      { return fakeCTMapGC{} }
   323  func (fakeCTMapGC) Enable() {}