k8s.io/kubernetes@v1.31.0-alpha.0.0.20240520171757-56147500dadc/pkg/proxy/nftables/proxier.go (about)

     1  //go:build linux
     2  // +build linux
     3  
     4  /*
     5  Copyright 2015 The Kubernetes Authors.
     6  
     7  Licensed under the Apache License, Version 2.0 (the "License");
     8  you may not use this file except in compliance with the License.
     9  You may obtain a copy of the License at
    10  
    11      http://www.apache.org/licenses/LICENSE-2.0
    12  
    13  Unless required by applicable law or agreed to in writing, software
    14  distributed under the License is distributed on an "AS IS" BASIS,
    15  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    16  See the License for the specific language governing permissions and
    17  limitations under the License.
    18  */
    19  
    20  package nftables
    21  
    22  //
    23  // NOTE: this needs to be tested in e2e since it uses nftables for everything.
    24  //
    25  
    26  import (
    27  	"context"
    28  	"crypto/sha256"
    29  	"encoding/base32"
    30  	"fmt"
    31  	"net"
    32  	"reflect"
    33  	"strconv"
    34  	"strings"
    35  	"sync"
    36  	"sync/atomic"
    37  	"time"
    38  
    39  	v1 "k8s.io/api/core/v1"
    40  	discovery "k8s.io/api/discovery/v1"
    41  	"k8s.io/apimachinery/pkg/types"
    42  	"k8s.io/apimachinery/pkg/util/sets"
    43  	"k8s.io/apimachinery/pkg/util/wait"
    44  	"k8s.io/client-go/tools/events"
    45  	utilsysctl "k8s.io/component-helpers/node/util/sysctl"
    46  	"k8s.io/klog/v2"
    47  	"k8s.io/kubernetes/pkg/proxy"
    48  	"k8s.io/kubernetes/pkg/proxy/conntrack"
    49  	"k8s.io/kubernetes/pkg/proxy/healthcheck"
    50  	"k8s.io/kubernetes/pkg/proxy/metaproxier"
    51  	"k8s.io/kubernetes/pkg/proxy/metrics"
    52  	proxyutil "k8s.io/kubernetes/pkg/proxy/util"
    53  	"k8s.io/kubernetes/pkg/util/async"
    54  	utilexec "k8s.io/utils/exec"
    55  	netutils "k8s.io/utils/net"
    56  	"k8s.io/utils/ptr"
    57  	"sigs.k8s.io/knftables"
    58  )
    59  
    60  const (
    61  	// Our nftables table. All of our chains/sets/maps are created inside this table,
    62  	// so they don't need any "kube-" or "kube-proxy-" prefix of their own.
    63  	kubeProxyTable = "kube-proxy"
    64  
    65  	// base chains
    66  	filterPreroutingChain     = "filter-prerouting"
    67  	filterInputChain          = "filter-input"
    68  	filterForwardChain        = "filter-forward"
    69  	filterOutputChain         = "filter-output"
    70  	filterOutputPostDNATChain = "filter-output-post-dnat"
    71  	natPreroutingChain        = "nat-prerouting"
    72  	natOutputChain            = "nat-output"
    73  	natPostroutingChain       = "nat-postrouting"
    74  
    75  	// service dispatch
    76  	servicesChain       = "services"
    77  	serviceIPsMap       = "service-ips"
    78  	serviceNodePortsMap = "service-nodeports"
    79  
    80  	// set of IPs that accept NodePort traffic
    81  	nodePortIPsSet = "nodeport-ips"
    82  
    83  	// set of active ClusterIPs.
    84  	clusterIPsSet = "cluster-ips"
    85  
    86  	// handling for services with no endpoints
    87  	serviceEndpointsCheckChain  = "service-endpoints-check"
    88  	nodePortEndpointsCheckChain = "nodeport-endpoints-check"
    89  	noEndpointServicesMap       = "no-endpoint-services"
    90  	noEndpointNodePortsMap      = "no-endpoint-nodeports"
    91  	rejectChain                 = "reject-chain"
    92  
    93  	// handling traffic to unallocated ClusterIPs and undefined ports of ClusterIPs
    94  	clusterIPsCheckChain = "cluster-ips-check"
    95  
    96  	// LoadBalancerSourceRanges handling
    97  	firewallIPsMap     = "firewall-ips"
    98  	firewallCheckChain = "firewall-check"
    99  
   100  	// masquerading
   101  	markMasqChain     = "mark-for-masquerade"
   102  	masqueradingChain = "masquerading"
   103  )
   104  
   105  // NewDualStackProxier creates a MetaProxier instance, with IPv4 and IPv6 proxies.
   106  func NewDualStackProxier(
   107  	ctx context.Context,
   108  	sysctl utilsysctl.Interface,
   109  	syncPeriod time.Duration,
   110  	minSyncPeriod time.Duration,
   111  	masqueradeAll bool,
   112  	masqueradeBit int,
   113  	localDetectors map[v1.IPFamily]proxyutil.LocalTrafficDetector,
   114  	hostname string,
   115  	nodeIPs map[v1.IPFamily]net.IP,
   116  	recorder events.EventRecorder,
   117  	healthzServer *healthcheck.ProxierHealthServer,
   118  	nodePortAddresses []string,
   119  	initOnly bool,
   120  ) (proxy.Provider, error) {
   121  	// Create an ipv4 instance of the single-stack proxier
   122  	ipv4Proxier, err := NewProxier(ctx, v1.IPv4Protocol, sysctl,
   123  		syncPeriod, minSyncPeriod, masqueradeAll, masqueradeBit,
   124  		localDetectors[v1.IPv4Protocol], hostname, nodeIPs[v1.IPv4Protocol],
   125  		recorder, healthzServer, nodePortAddresses, initOnly)
   126  	if err != nil {
   127  		return nil, fmt.Errorf("unable to create ipv4 proxier: %v", err)
   128  	}
   129  
   130  	ipv6Proxier, err := NewProxier(ctx, v1.IPv6Protocol, sysctl,
   131  		syncPeriod, minSyncPeriod, masqueradeAll, masqueradeBit,
   132  		localDetectors[v1.IPv6Protocol], hostname, nodeIPs[v1.IPv6Protocol],
   133  		recorder, healthzServer, nodePortAddresses, initOnly)
   134  	if err != nil {
   135  		return nil, fmt.Errorf("unable to create ipv6 proxier: %v", err)
   136  	}
   137  	if initOnly {
   138  		return nil, nil
   139  	}
   140  	return metaproxier.NewMetaProxier(ipv4Proxier, ipv6Proxier), nil
   141  }
   142  
   143  // Proxier is an nftables based proxy
   144  type Proxier struct {
   145  	// ipFamily defines the IP family which this proxier is tracking.
   146  	ipFamily v1.IPFamily
   147  
   148  	// endpointsChanges and serviceChanges contains all changes to endpoints and
   149  	// services that happened since nftables was synced. For a single object,
   150  	// changes are accumulated, i.e. previous is state from before all of them,
   151  	// current is state after applying all of those.
   152  	endpointsChanges *proxy.EndpointsChangeTracker
   153  	serviceChanges   *proxy.ServiceChangeTracker
   154  
   155  	mu           sync.Mutex // protects the following fields
   156  	svcPortMap   proxy.ServicePortMap
   157  	endpointsMap proxy.EndpointsMap
   158  	nodeLabels   map[string]string
   159  	// endpointSlicesSynced, and servicesSynced are set to true
   160  	// when corresponding objects are synced after startup. This is used to avoid
   161  	// updating nftables with some partial data after kube-proxy restart.
   162  	endpointSlicesSynced bool
   163  	servicesSynced       bool
   164  	initialized          int32
   165  	syncRunner           *async.BoundedFrequencyRunner // governs calls to syncProxyRules
   166  	syncPeriod           time.Duration
   167  	flushed              bool
   168  
   169  	// These are effectively const and do not need the mutex to be held.
   170  	nftables       knftables.Interface
   171  	masqueradeAll  bool
   172  	masqueradeMark string
   173  	conntrack      conntrack.Interface
   174  	localDetector  proxyutil.LocalTrafficDetector
   175  	hostname       string
   176  	nodeIP         net.IP
   177  	recorder       events.EventRecorder
   178  
   179  	serviceHealthServer healthcheck.ServiceHealthServer
   180  	healthzServer       *healthcheck.ProxierHealthServer
   181  
   182  	// nodePortAddresses selects the interfaces where nodePort works.
   183  	nodePortAddresses *proxyutil.NodePortAddresses
   184  	// networkInterfacer defines an interface for several net library functions.
   185  	// Inject for test purpose.
   186  	networkInterfacer proxyutil.NetworkInterfacer
   187  
   188  	// staleChains contains information about chains to be deleted later
   189  	staleChains map[string]time.Time
   190  
   191  	// serviceCIDRs is a comma separated list of ServiceCIDRs belonging to the IPFamily
   192  	// which proxier is operating on, can be directly consumed by knftables.
   193  	serviceCIDRs string
   194  
   195  	logger klog.Logger
   196  }
   197  
   198  // Proxier implements proxy.Provider
   199  var _ proxy.Provider = &Proxier{}
   200  
   201  // NewProxier returns a new nftables Proxier. Once a proxier is created, it will keep
   202  // nftables up to date in the background and will not terminate if a particular nftables
   203  // call fails.
   204  func NewProxier(ctx context.Context,
   205  	ipFamily v1.IPFamily,
   206  	sysctl utilsysctl.Interface,
   207  	syncPeriod time.Duration,
   208  	minSyncPeriod time.Duration,
   209  	masqueradeAll bool,
   210  	masqueradeBit int,
   211  	localDetector proxyutil.LocalTrafficDetector,
   212  	hostname string,
   213  	nodeIP net.IP,
   214  	recorder events.EventRecorder,
   215  	healthzServer *healthcheck.ProxierHealthServer,
   216  	nodePortAddressStrings []string,
   217  	initOnly bool,
   218  ) (*Proxier, error) {
   219  	logger := klog.LoggerWithValues(klog.FromContext(ctx), "ipFamily", ipFamily)
   220  
   221  	if initOnly {
   222  		logger.Info("System initialized and --init-only specified")
   223  		return nil, nil
   224  	}
   225  
   226  	// Generate the masquerade mark to use for SNAT rules.
   227  	masqueradeValue := 1 << uint(masqueradeBit)
   228  	masqueradeMark := fmt.Sprintf("%#08x", masqueradeValue)
   229  	logger.V(2).Info("Using nftables mark for masquerade", "mark", masqueradeMark)
   230  
   231  	nodePortAddresses := proxyutil.NewNodePortAddresses(ipFamily, nodePortAddressStrings)
   232  
   233  	serviceHealthServer := healthcheck.NewServiceHealthServer(hostname, recorder, nodePortAddresses, healthzServer)
   234  
   235  	var nftablesFamily knftables.Family
   236  	if ipFamily == v1.IPv4Protocol {
   237  		nftablesFamily = knftables.IPv4Family
   238  	} else {
   239  		nftablesFamily = knftables.IPv6Family
   240  	}
   241  	nft, err := knftables.New(nftablesFamily, kubeProxyTable)
   242  	if err != nil {
   243  		return nil, err
   244  	}
   245  
   246  	proxier := &Proxier{
   247  		ipFamily:            ipFamily,
   248  		svcPortMap:          make(proxy.ServicePortMap),
   249  		serviceChanges:      proxy.NewServiceChangeTracker(newServiceInfo, ipFamily, recorder, nil),
   250  		endpointsMap:        make(proxy.EndpointsMap),
   251  		endpointsChanges:    proxy.NewEndpointsChangeTracker(hostname, newEndpointInfo, ipFamily, recorder, nil),
   252  		syncPeriod:          syncPeriod,
   253  		nftables:            nft,
   254  		masqueradeAll:       masqueradeAll,
   255  		masqueradeMark:      masqueradeMark,
   256  		conntrack:           conntrack.NewExec(utilexec.New()),
   257  		localDetector:       localDetector,
   258  		hostname:            hostname,
   259  		nodeIP:              nodeIP,
   260  		recorder:            recorder,
   261  		serviceHealthServer: serviceHealthServer,
   262  		healthzServer:       healthzServer,
   263  		nodePortAddresses:   nodePortAddresses,
   264  		networkInterfacer:   proxyutil.RealNetwork{},
   265  		staleChains:         make(map[string]time.Time),
   266  		logger:              logger,
   267  	}
   268  
   269  	burstSyncs := 2
   270  	logger.V(2).Info("NFTables sync params", "minSyncPeriod", minSyncPeriod, "syncPeriod", syncPeriod, "burstSyncs", burstSyncs)
   271  	proxier.syncRunner = async.NewBoundedFrequencyRunner("sync-runner", proxier.syncProxyRules, minSyncPeriod, syncPeriod, burstSyncs)
   272  
   273  	return proxier, nil
   274  }
   275  
   276  // internal struct for string service information
   277  type servicePortInfo struct {
   278  	*proxy.BaseServicePortInfo
   279  	// The following fields are computed and stored for performance reasons.
   280  	nameString             string
   281  	clusterPolicyChainName string
   282  	localPolicyChainName   string
   283  	externalChainName      string
   284  	firewallChainName      string
   285  }
   286  
   287  // returns a new proxy.ServicePort which abstracts a serviceInfo
   288  func newServiceInfo(port *v1.ServicePort, service *v1.Service, bsvcPortInfo *proxy.BaseServicePortInfo) proxy.ServicePort {
   289  	svcPort := &servicePortInfo{BaseServicePortInfo: bsvcPortInfo}
   290  
   291  	// Store the following for performance reasons.
   292  	svcName := types.NamespacedName{Namespace: service.Namespace, Name: service.Name}
   293  	svcPortName := proxy.ServicePortName{NamespacedName: svcName, Port: port.Name}
   294  	svcPort.nameString = svcPortName.String()
   295  
   296  	chainNameBase := servicePortChainNameBase(&svcPortName, strings.ToLower(string(svcPort.Protocol())))
   297  	svcPort.clusterPolicyChainName = servicePortPolicyClusterChainNamePrefix + chainNameBase
   298  	svcPort.localPolicyChainName = servicePortPolicyLocalChainNamePrefix + chainNameBase
   299  	svcPort.externalChainName = serviceExternalChainNamePrefix + chainNameBase
   300  	svcPort.firewallChainName = servicePortFirewallChainNamePrefix + chainNameBase
   301  
   302  	return svcPort
   303  }
   304  
   305  // internal struct for endpoints information
   306  type endpointInfo struct {
   307  	*proxy.BaseEndpointInfo
   308  
   309  	chainName       string
   310  	affinitySetName string
   311  }
   312  
   313  // returns a new proxy.Endpoint which abstracts a endpointInfo
   314  func newEndpointInfo(baseInfo *proxy.BaseEndpointInfo, svcPortName *proxy.ServicePortName) proxy.Endpoint {
   315  	chainNameBase := servicePortEndpointChainNameBase(svcPortName, strings.ToLower(string(svcPortName.Protocol)), baseInfo.String())
   316  	return &endpointInfo{
   317  		BaseEndpointInfo: baseInfo,
   318  		chainName:        servicePortEndpointChainNamePrefix + chainNameBase,
   319  		affinitySetName:  servicePortEndpointAffinityNamePrefix + chainNameBase,
   320  	}
   321  }
   322  
   323  // nftablesBaseChains lists our "base chains"; those that are directly connected to the
   324  // netfilter hooks (e.g., "postrouting", "input", etc.), as opposed to "regular" chains,
   325  // which are only run when a rule jumps to them. See
   326  // https://wiki.nftables.org/wiki-nftables/index.php/Configuring_chains.
   327  //
   328  // These are set up from setupNFTables() and then not directly referenced by
   329  // syncProxyRules().
   330  //
   331  // All of our base chains have names that are just "${type}-${hook}". e.g., "nat-prerouting".
   332  type nftablesBaseChain struct {
   333  	name      string
   334  	chainType knftables.BaseChainType
   335  	hook      knftables.BaseChainHook
   336  	priority  knftables.BaseChainPriority
   337  }
   338  
   339  var nftablesBaseChains = []nftablesBaseChain{
   340  	// We want our filtering rules to operate on pre-DNAT dest IPs, so our filter
   341  	// chains have to run before DNAT.
   342  	{filterPreroutingChain, knftables.FilterType, knftables.PreroutingHook, knftables.DNATPriority + "-10"},
   343  	{filterInputChain, knftables.FilterType, knftables.InputHook, knftables.DNATPriority + "-10"},
   344  	{filterForwardChain, knftables.FilterType, knftables.ForwardHook, knftables.DNATPriority + "-10"},
   345  	{filterOutputChain, knftables.FilterType, knftables.OutputHook, knftables.DNATPriority + "-10"},
   346  	{filterOutputPostDNATChain, knftables.FilterType, knftables.OutputHook, knftables.DNATPriority + "+10"},
   347  	{natPreroutingChain, knftables.NATType, knftables.PreroutingHook, knftables.DNATPriority},
   348  	{natOutputChain, knftables.NATType, knftables.OutputHook, knftables.DNATPriority},
   349  	{natPostroutingChain, knftables.NATType, knftables.PostroutingHook, knftables.SNATPriority},
   350  }
   351  
   352  // nftablesJumpChains lists our top-level "regular chains" that are jumped to directly
   353  // from one of the base chains. These are set up from setupNFTables(), and some of them
   354  // are also referenced in syncProxyRules().
   355  type nftablesJumpChain struct {
   356  	dstChain  string
   357  	srcChain  string
   358  	extraArgs string
   359  }
   360  
   361  var nftablesJumpChains = []nftablesJumpChain{
   362  	// We can't jump to endpointsCheckChain from filter-prerouting like
   363  	// firewallCheckChain because reject action is only valid in chains using the
   364  	// input, forward or output hooks with kernels before 5.9.
   365  	{nodePortEndpointsCheckChain, filterInputChain, "ct state new"},
   366  	{serviceEndpointsCheckChain, filterInputChain, "ct state new"},
   367  	{serviceEndpointsCheckChain, filterForwardChain, "ct state new"},
   368  	{serviceEndpointsCheckChain, filterOutputChain, "ct state new"},
   369  
   370  	{firewallCheckChain, filterPreroutingChain, "ct state new"},
   371  	{firewallCheckChain, filterOutputChain, "ct state new"},
   372  
   373  	{servicesChain, natOutputChain, ""},
   374  	{servicesChain, natPreroutingChain, ""},
   375  	{masqueradingChain, natPostroutingChain, ""},
   376  
   377  	{clusterIPsCheckChain, filterForwardChain, "ct state new"},
   378  	{clusterIPsCheckChain, filterOutputPostDNATChain, "ct state new"},
   379  }
   380  
   381  // ensureChain adds commands to tx to ensure that chain exists and doesn't contain
   382  // anything from before this transaction (using createdChains to ensure that we don't
   383  // Flush a chain more than once and lose *new* rules as well.)
   384  func ensureChain(chain string, tx *knftables.Transaction, createdChains sets.Set[string]) {
   385  	if createdChains.Has(chain) {
   386  		return
   387  	}
   388  	tx.Add(&knftables.Chain{
   389  		Name: chain,
   390  	})
   391  	tx.Flush(&knftables.Chain{
   392  		Name: chain,
   393  	})
   394  	createdChains.Insert(chain)
   395  }
   396  
   397  func (proxier *Proxier) setupNFTables(tx *knftables.Transaction) {
   398  	ipX := "ip"
   399  	ipvX_addr := "ipv4_addr" //nolint:stylecheck // var name intentionally resembles value
   400  	noLocalhost := "ip daddr != 127.0.0.0/8"
   401  	if proxier.ipFamily == v1.IPv6Protocol {
   402  		ipX = "ip6"
   403  		ipvX_addr = "ipv6_addr"
   404  		noLocalhost = "ip6 daddr != ::1"
   405  	}
   406  
   407  	tx.Add(&knftables.Table{
   408  		Comment: ptr.To("rules for kube-proxy"),
   409  	})
   410  
   411  	// Do an extra "add+delete" once to ensure all previous base chains in the table
   412  	// will be recreated. Otherwise, altering properties (e.g. priority) of these
   413  	// chains would fail the transaction.
   414  	if !proxier.flushed {
   415  		for _, bc := range nftablesBaseChains {
   416  			chain := &knftables.Chain{
   417  				Name: bc.name,
   418  			}
   419  			tx.Add(chain)
   420  			tx.Delete(chain)
   421  		}
   422  		proxier.flushed = true
   423  	}
   424  
   425  	// Create and flush base chains
   426  	for _, bc := range nftablesBaseChains {
   427  		chain := &knftables.Chain{
   428  			Name:     bc.name,
   429  			Type:     ptr.To(bc.chainType),
   430  			Hook:     ptr.To(bc.hook),
   431  			Priority: ptr.To(bc.priority),
   432  		}
   433  		tx.Add(chain)
   434  		tx.Flush(chain)
   435  	}
   436  
   437  	// Create and flush ordinary chains and add rules jumping to them
   438  	createdChains := sets.New[string]()
   439  	for _, c := range nftablesJumpChains {
   440  		ensureChain(c.dstChain, tx, createdChains)
   441  		tx.Add(&knftables.Rule{
   442  			Chain: c.srcChain,
   443  			Rule: knftables.Concat(
   444  				c.extraArgs,
   445  				"jump", c.dstChain,
   446  			),
   447  		})
   448  	}
   449  
   450  	// Ensure all of our other "top-level" chains exist
   451  	for _, chain := range []string{servicesChain, clusterIPsCheckChain, masqueradingChain, markMasqChain} {
   452  		ensureChain(chain, tx, createdChains)
   453  	}
   454  
   455  	// Add the rules in the mark-for-masquerade and masquerading chains
   456  	tx.Add(&knftables.Rule{
   457  		Chain: markMasqChain,
   458  		Rule: knftables.Concat(
   459  			"mark", "set", "mark", "or", proxier.masqueradeMark,
   460  		),
   461  	})
   462  
   463  	tx.Add(&knftables.Rule{
   464  		Chain: masqueradingChain,
   465  		Rule: knftables.Concat(
   466  			"mark", "and", proxier.masqueradeMark, "==", "0",
   467  			"return",
   468  		),
   469  	})
   470  	tx.Add(&knftables.Rule{
   471  		Chain: masqueradingChain,
   472  		Rule: knftables.Concat(
   473  			"mark", "set", "mark", "xor", proxier.masqueradeMark,
   474  		),
   475  	})
   476  	tx.Add(&knftables.Rule{
   477  		Chain: masqueradingChain,
   478  		Rule:  "masquerade fully-random",
   479  	})
   480  
   481  	// add cluster-ips set.
   482  	tx.Add(&knftables.Set{
   483  		Name:    clusterIPsSet,
   484  		Type:    ipvX_addr,
   485  		Comment: ptr.To("Active ClusterIPs"),
   486  	})
   487  
   488  	// reject traffic to invalid ports of ClusterIPs.
   489  	tx.Add(&knftables.Rule{
   490  		Chain: clusterIPsCheckChain,
   491  		Rule: knftables.Concat(
   492  			ipX, "daddr", "@", clusterIPsSet, "reject",
   493  		),
   494  		Comment: ptr.To("Reject traffic to invalid ports of ClusterIPs"),
   495  	})
   496  
   497  	// drop traffic to unallocated ClusterIPs.
   498  	if len(proxier.serviceCIDRs) > 0 {
   499  		tx.Add(&knftables.Rule{
   500  			Chain: clusterIPsCheckChain,
   501  			Rule: knftables.Concat(
   502  				ipX, "daddr", "{", proxier.serviceCIDRs, "}",
   503  				"drop",
   504  			),
   505  			Comment: ptr.To("Drop traffic to unallocated ClusterIPs"),
   506  		})
   507  	}
   508  
   509  	// Fill in nodeport-ips set if needed (or delete it if not). (We do "add+delete"
   510  	// rather than just "delete" when we want to ensure the set doesn't exist, because
   511  	// doing just "delete" would return an error if the set didn't exist.)
   512  	tx.Add(&knftables.Set{
   513  		Name:    nodePortIPsSet,
   514  		Type:    ipvX_addr,
   515  		Comment: ptr.To("IPs that accept NodePort traffic"),
   516  	})
   517  	if proxier.nodePortAddresses.MatchAll() {
   518  		tx.Delete(&knftables.Set{
   519  			Name: nodePortIPsSet,
   520  		})
   521  	} else {
   522  		tx.Flush(&knftables.Set{
   523  			Name: nodePortIPsSet,
   524  		})
   525  		nodeIPs, err := proxier.nodePortAddresses.GetNodeIPs(proxier.networkInterfacer)
   526  		if err != nil {
   527  			proxier.logger.Error(err, "Failed to get node ip address matching nodeport cidrs, services with nodeport may not work as intended", "CIDRs", proxier.nodePortAddresses)
   528  		}
   529  		for _, ip := range nodeIPs {
   530  			if ip.IsLoopback() {
   531  				proxier.logger.Error(nil, "--nodeport-addresses includes localhost but localhost NodePorts are not supported", "address", ip.String())
   532  				continue
   533  			}
   534  			tx.Add(&knftables.Element{
   535  				Set: nodePortIPsSet,
   536  				Key: []string{
   537  					ip.String(),
   538  				},
   539  			})
   540  		}
   541  	}
   542  
   543  	// Set up "no endpoints" drop/reject handling
   544  	tx.Add(&knftables.Map{
   545  		Name:    noEndpointServicesMap,
   546  		Type:    ipvX_addr + " . inet_proto . inet_service : verdict",
   547  		Comment: ptr.To("vmap to drop or reject packets to services with no endpoints"),
   548  	})
   549  	tx.Add(&knftables.Map{
   550  		Name:    noEndpointNodePortsMap,
   551  		Type:    "inet_proto . inet_service : verdict",
   552  		Comment: ptr.To("vmap to drop or reject packets to service nodeports with no endpoints"),
   553  	})
   554  
   555  	tx.Add(&knftables.Chain{
   556  		Name:    rejectChain,
   557  		Comment: ptr.To("helper for @no-endpoint-services / @no-endpoint-nodeports"),
   558  	})
   559  	tx.Flush(&knftables.Chain{
   560  		Name: rejectChain,
   561  	})
   562  	tx.Add(&knftables.Rule{
   563  		Chain: rejectChain,
   564  		Rule:  "reject",
   565  	})
   566  
   567  	tx.Add(&knftables.Rule{
   568  		Chain: serviceEndpointsCheckChain,
   569  		Rule: knftables.Concat(
   570  			ipX, "daddr", ".", "meta l4proto", ".", "th dport",
   571  			"vmap", "@", noEndpointServicesMap,
   572  		),
   573  	})
   574  
   575  	if proxier.nodePortAddresses.MatchAll() {
   576  		tx.Add(&knftables.Rule{
   577  			Chain: nodePortEndpointsCheckChain,
   578  			Rule: knftables.Concat(
   579  				noLocalhost,
   580  				"meta l4proto . th dport",
   581  				"vmap", "@", noEndpointNodePortsMap,
   582  			),
   583  		})
   584  	} else {
   585  		tx.Add(&knftables.Rule{
   586  			Chain: nodePortEndpointsCheckChain,
   587  			Rule: knftables.Concat(
   588  				ipX, "daddr", "@", nodePortIPsSet,
   589  				"meta l4proto . th dport",
   590  				"vmap", "@", noEndpointNodePortsMap,
   591  			),
   592  		})
   593  	}
   594  
   595  	// Set up LoadBalancerSourceRanges firewalling
   596  	tx.Add(&knftables.Map{
   597  		Name:    firewallIPsMap,
   598  		Type:    ipvX_addr + " . inet_proto . inet_service : verdict",
   599  		Comment: ptr.To("destinations that are subject to LoadBalancerSourceRanges"),
   600  	})
   601  
   602  	ensureChain(firewallCheckChain, tx, createdChains)
   603  	tx.Add(&knftables.Rule{
   604  		Chain: firewallCheckChain,
   605  		Rule: knftables.Concat(
   606  			ipX, "daddr", ".", "meta l4proto", ".", "th dport",
   607  			"vmap", "@", firewallIPsMap,
   608  		),
   609  	})
   610  
   611  	// Set up service dispatch
   612  	tx.Add(&knftables.Map{
   613  		Name:    serviceIPsMap,
   614  		Type:    ipvX_addr + " . inet_proto . inet_service : verdict",
   615  		Comment: ptr.To("ClusterIP, ExternalIP and LoadBalancer IP traffic"),
   616  	})
   617  	tx.Add(&knftables.Map{
   618  		Name:    serviceNodePortsMap,
   619  		Type:    "inet_proto . inet_service : verdict",
   620  		Comment: ptr.To("NodePort traffic"),
   621  	})
   622  	tx.Add(&knftables.Rule{
   623  		Chain: servicesChain,
   624  		Rule: knftables.Concat(
   625  			ipX, "daddr", ".", "meta l4proto", ".", "th dport",
   626  			"vmap", "@", serviceIPsMap,
   627  		),
   628  	})
   629  	if proxier.nodePortAddresses.MatchAll() {
   630  		tx.Add(&knftables.Rule{
   631  			Chain: servicesChain,
   632  			Rule: knftables.Concat(
   633  				"fib daddr type local",
   634  				noLocalhost,
   635  				"meta l4proto . th dport",
   636  				"vmap", "@", serviceNodePortsMap,
   637  			),
   638  		})
   639  	} else {
   640  		tx.Add(&knftables.Rule{
   641  			Chain: servicesChain,
   642  			Rule: knftables.Concat(
   643  				ipX, "daddr @nodeport-ips",
   644  				"meta l4proto . th dport",
   645  				"vmap", "@", serviceNodePortsMap,
   646  			),
   647  		})
   648  	}
   649  }
   650  
   651  // CleanupLeftovers removes all nftables rules and chains created by the Proxier
   652  // It returns true if an error was encountered. Errors are logged.
   653  func CleanupLeftovers(ctx context.Context) bool {
   654  	logger := klog.FromContext(ctx)
   655  	var encounteredError bool
   656  
   657  	for _, family := range []knftables.Family{knftables.IPv4Family, knftables.IPv6Family} {
   658  		nft, err := knftables.New(family, kubeProxyTable)
   659  		if err == nil {
   660  			tx := nft.NewTransaction()
   661  			tx.Delete(&knftables.Table{})
   662  			err = nft.Run(ctx, tx)
   663  		}
   664  		if err != nil && !knftables.IsNotFound(err) {
   665  			logger.Error(err, "Error cleaning up nftables rules")
   666  			encounteredError = true
   667  		}
   668  	}
   669  
   670  	return encounteredError
   671  }
   672  
   673  // Sync is called to synchronize the proxier state to nftables as soon as possible.
   674  func (proxier *Proxier) Sync() {
   675  	if proxier.healthzServer != nil {
   676  		proxier.healthzServer.QueuedUpdate(proxier.ipFamily)
   677  	}
   678  	metrics.SyncProxyRulesLastQueuedTimestamp.SetToCurrentTime()
   679  	proxier.syncRunner.Run()
   680  }
   681  
   682  // SyncLoop runs periodic work.  This is expected to run as a goroutine or as the main loop of the app.  It does not return.
   683  func (proxier *Proxier) SyncLoop() {
   684  	// Update healthz timestamp at beginning in case Sync() never succeeds.
   685  	if proxier.healthzServer != nil {
   686  		proxier.healthzServer.Updated(proxier.ipFamily)
   687  	}
   688  
   689  	// synthesize "last change queued" time as the informers are syncing.
   690  	metrics.SyncProxyRulesLastQueuedTimestamp.SetToCurrentTime()
   691  	proxier.syncRunner.Loop(wait.NeverStop)
   692  }
   693  
   694  func (proxier *Proxier) setInitialized(value bool) {
   695  	var initialized int32
   696  	if value {
   697  		initialized = 1
   698  	}
   699  	atomic.StoreInt32(&proxier.initialized, initialized)
   700  }
   701  
   702  func (proxier *Proxier) isInitialized() bool {
   703  	return atomic.LoadInt32(&proxier.initialized) > 0
   704  }
   705  
   706  // OnServiceAdd is called whenever creation of new service object
   707  // is observed.
   708  func (proxier *Proxier) OnServiceAdd(service *v1.Service) {
   709  	proxier.OnServiceUpdate(nil, service)
   710  }
   711  
   712  // OnServiceUpdate is called whenever modification of an existing
   713  // service object is observed.
   714  func (proxier *Proxier) OnServiceUpdate(oldService, service *v1.Service) {
   715  	if proxier.serviceChanges.Update(oldService, service) && proxier.isInitialized() {
   716  		proxier.Sync()
   717  	}
   718  }
   719  
   720  // OnServiceDelete is called whenever deletion of an existing service
   721  // object is observed.
   722  func (proxier *Proxier) OnServiceDelete(service *v1.Service) {
   723  	proxier.OnServiceUpdate(service, nil)
   724  
   725  }
   726  
   727  // OnServiceSynced is called once all the initial event handlers were
   728  // called and the state is fully propagated to local cache.
   729  func (proxier *Proxier) OnServiceSynced() {
   730  	proxier.mu.Lock()
   731  	proxier.servicesSynced = true
   732  	proxier.setInitialized(proxier.endpointSlicesSynced)
   733  	proxier.mu.Unlock()
   734  
   735  	// Sync unconditionally - this is called once per lifetime.
   736  	proxier.syncProxyRules()
   737  }
   738  
   739  // OnEndpointSliceAdd is called whenever creation of a new endpoint slice object
   740  // is observed.
   741  func (proxier *Proxier) OnEndpointSliceAdd(endpointSlice *discovery.EndpointSlice) {
   742  	if proxier.endpointsChanges.EndpointSliceUpdate(endpointSlice, false) && proxier.isInitialized() {
   743  		proxier.Sync()
   744  	}
   745  }
   746  
   747  // OnEndpointSliceUpdate is called whenever modification of an existing endpoint
   748  // slice object is observed.
   749  func (proxier *Proxier) OnEndpointSliceUpdate(_, endpointSlice *discovery.EndpointSlice) {
   750  	if proxier.endpointsChanges.EndpointSliceUpdate(endpointSlice, false) && proxier.isInitialized() {
   751  		proxier.Sync()
   752  	}
   753  }
   754  
   755  // OnEndpointSliceDelete is called whenever deletion of an existing endpoint slice
   756  // object is observed.
   757  func (proxier *Proxier) OnEndpointSliceDelete(endpointSlice *discovery.EndpointSlice) {
   758  	if proxier.endpointsChanges.EndpointSliceUpdate(endpointSlice, true) && proxier.isInitialized() {
   759  		proxier.Sync()
   760  	}
   761  }
   762  
   763  // OnEndpointSlicesSynced is called once all the initial event handlers were
   764  // called and the state is fully propagated to local cache.
   765  func (proxier *Proxier) OnEndpointSlicesSynced() {
   766  	proxier.mu.Lock()
   767  	proxier.endpointSlicesSynced = true
   768  	proxier.setInitialized(proxier.servicesSynced)
   769  	proxier.mu.Unlock()
   770  
   771  	// Sync unconditionally - this is called once per lifetime.
   772  	proxier.syncProxyRules()
   773  }
   774  
   775  // OnNodeAdd is called whenever creation of new node object
   776  // is observed.
   777  func (proxier *Proxier) OnNodeAdd(node *v1.Node) {
   778  	if node.Name != proxier.hostname {
   779  		proxier.logger.Error(nil, "Received a watch event for a node that doesn't match the current node",
   780  			"eventNode", node.Name, "currentNode", proxier.hostname)
   781  		return
   782  	}
   783  
   784  	if reflect.DeepEqual(proxier.nodeLabels, node.Labels) {
   785  		return
   786  	}
   787  
   788  	proxier.mu.Lock()
   789  	proxier.nodeLabels = map[string]string{}
   790  	for k, v := range node.Labels {
   791  		proxier.nodeLabels[k] = v
   792  	}
   793  	proxier.mu.Unlock()
   794  	proxier.logger.V(4).Info("Updated proxier node labels", "labels", node.Labels)
   795  
   796  	proxier.Sync()
   797  }
   798  
   799  // OnNodeUpdate is called whenever modification of an existing
   800  // node object is observed.
   801  func (proxier *Proxier) OnNodeUpdate(oldNode, node *v1.Node) {
   802  	if node.Name != proxier.hostname {
   803  		proxier.logger.Error(nil, "Received a watch event for a node that doesn't match the current node",
   804  			"eventNode", node.Name, "currentNode", proxier.hostname)
   805  		return
   806  	}
   807  
   808  	if reflect.DeepEqual(proxier.nodeLabels, node.Labels) {
   809  		return
   810  	}
   811  
   812  	proxier.mu.Lock()
   813  	proxier.nodeLabels = map[string]string{}
   814  	for k, v := range node.Labels {
   815  		proxier.nodeLabels[k] = v
   816  	}
   817  	proxier.mu.Unlock()
   818  	proxier.logger.V(4).Info("Updated proxier node labels", "labels", node.Labels)
   819  
   820  	proxier.Sync()
   821  }
   822  
   823  // OnNodeDelete is called whenever deletion of an existing node
   824  // object is observed.
   825  func (proxier *Proxier) OnNodeDelete(node *v1.Node) {
   826  	if node.Name != proxier.hostname {
   827  		proxier.logger.Error(nil, "Received a watch event for a node that doesn't match the current node",
   828  			"eventNode", node.Name, "currentNode", proxier.hostname)
   829  		return
   830  	}
   831  
   832  	proxier.mu.Lock()
   833  	proxier.nodeLabels = nil
   834  	proxier.mu.Unlock()
   835  
   836  	proxier.Sync()
   837  }
   838  
   839  // OnNodeSynced is called once all the initial event handlers were
   840  // called and the state is fully propagated to local cache.
   841  func (proxier *Proxier) OnNodeSynced() {
   842  }
   843  
   844  // OnServiceCIDRsChanged is called whenever a change is observed
   845  // in any of the ServiceCIDRs, and provides complete list of service cidrs.
   846  func (proxier *Proxier) OnServiceCIDRsChanged(cidrs []string) {
   847  	proxier.mu.Lock()
   848  	defer proxier.mu.Unlock()
   849  
   850  	cidrsForProxier := make([]string, 0)
   851  	for _, cidr := range cidrs {
   852  		isIPv4CIDR := netutils.IsIPv4CIDRString(cidr)
   853  		if proxier.ipFamily == v1.IPv4Protocol && isIPv4CIDR {
   854  			cidrsForProxier = append(cidrsForProxier, cidr)
   855  		}
   856  
   857  		if proxier.ipFamily == v1.IPv6Protocol && !isIPv4CIDR {
   858  			cidrsForProxier = append(cidrsForProxier, cidr)
   859  		}
   860  	}
   861  	proxier.serviceCIDRs = strings.Join(cidrsForProxier, ",")
   862  }
   863  
   864  const (
   865  	// Maximum length for one of our chain name prefixes, including the trailing
   866  	// hyphen.
   867  	chainNamePrefixLengthMax = 16
   868  
   869  	// Maximum length of the string returned from servicePortChainNameBase or
   870  	// servicePortEndpointChainNameBase.
   871  	chainNameBaseLengthMax = knftables.NameLengthMax - chainNamePrefixLengthMax
   872  )
   873  
   874  const (
   875  	servicePortPolicyClusterChainNamePrefix = "service-"
   876  	servicePortPolicyLocalChainNamePrefix   = "local-"
   877  	serviceExternalChainNamePrefix          = "external-"
   878  	servicePortEndpointChainNamePrefix      = "endpoint-"
   879  	servicePortEndpointAffinityNamePrefix   = "affinity-"
   880  	servicePortFirewallChainNamePrefix      = "firewall-"
   881  )
   882  
   883  // hashAndTruncate prefixes name with a hash of itself and then truncates to
   884  // chainNameBaseLengthMax. The hash ensures that (a) the name is still unique if we have
   885  // to truncate the end, and (b) it's visually distinguishable from other chains that would
   886  // otherwise have nearly identical names (e.g., different endpoint chains for a given
   887  // service that differ in only a single digit).
   888  func hashAndTruncate(name string) string {
   889  	hash := sha256.Sum256([]byte(name))
   890  	encoded := base32.StdEncoding.EncodeToString(hash[:])
   891  	name = encoded[:8] + "-" + name
   892  	if len(name) > chainNameBaseLengthMax {
   893  		name = name[:chainNameBaseLengthMax-3] + "..."
   894  	}
   895  	return name
   896  }
   897  
   898  // servicePortChainNameBase returns the base name for a chain for the given ServicePort.
   899  // This is something like "HASH-namespace/serviceName/protocol/portName", e.g,
   900  // "ULMVA6XW-ns1/svc1/tcp/p80".
   901  func servicePortChainNameBase(servicePortName *proxy.ServicePortName, protocol string) string {
   902  	// nftables chains can contain the characters [A-Za-z0-9_./-] (but must start with
   903  	// a letter, underscore, or dot).
   904  	//
   905  	// Namespace, Service, and Port names can contain [a-z0-9-] (with some additional
   906  	// restrictions that aren't relevant here).
   907  	//
   908  	// Protocol is /(tcp|udp|sctp)/.
   909  	//
   910  	// Thus, we can safely use all Namespace names, Service names, protocol values,
   911  	// and Port names directly in nftables chain names (though note that this assumes
   912  	// that the chain name won't *start* with any of those strings, since that might
   913  	// be illegal). We use "/" to separate the parts of the name, which is one of the
   914  	// two characters allowed in a chain name that isn't allowed in our input strings.
   915  
   916  	name := fmt.Sprintf("%s/%s/%s/%s",
   917  		servicePortName.NamespacedName.Namespace,
   918  		servicePortName.NamespacedName.Name,
   919  		protocol,
   920  		servicePortName.Port,
   921  	)
   922  
   923  	// The namespace, service, and port name can each be up to 63 characters, protocol
   924  	// can be up to 4, plus 8 for the hash and 4 additional punctuation characters.
   925  	// That's a total of 205, which is less than chainNameBaseLengthMax (240). So this
   926  	// will never actually return a truncated name.
   927  	return hashAndTruncate(name)
   928  }
   929  
   930  // servicePortEndpointChainNameBase returns the suffix for chain names for the given
   931  // endpoint. This is something like
   932  // "HASH-namespace/serviceName/protocol/portName__endpointIP/endpointport", e.g.,
   933  // "5OJB2KTY-ns1/svc1/tcp/p80__10.180.0.1/80".
   934  func servicePortEndpointChainNameBase(servicePortName *proxy.ServicePortName, protocol, endpoint string) string {
   935  	// As above in servicePortChainNameBase: Namespace, Service, Port, Protocol, and
   936  	// EndpointPort are all safe to copy into the chain name directly. But if
   937  	// EndpointIP is IPv6 then it will contain colons, which aren't allowed in a chain
   938  	// name. IPv6 IPs are also quite long, but we can't safely truncate them (e.g. to
   939  	// only the final segment) because (especially for manually-created external
   940  	// endpoints), we can't know for sure that any part of them is redundant.
   941  
   942  	endpointIP, endpointPort, _ := net.SplitHostPort(endpoint)
   943  	if strings.Contains(endpointIP, ":") {
   944  		endpointIP = strings.ReplaceAll(endpointIP, ":", ".")
   945  	}
   946  
   947  	// As above, we use "/" to separate parts of the name, and "__" to separate the
   948  	// "service" part from the "endpoint" part.
   949  	name := fmt.Sprintf("%s/%s/%s/%s__%s/%s",
   950  		servicePortName.NamespacedName.Namespace,
   951  		servicePortName.NamespacedName.Name,
   952  		protocol,
   953  		servicePortName.Port,
   954  		endpointIP,
   955  		endpointPort,
   956  	)
   957  
   958  	// The part of name before the "__" can be up to 205 characters (as with
   959  	// servicePortChainNameBase above). An IPv6 address can be up to 39 characters, and
   960  	// a port can be up to 5 digits, plus 3 punctuation characters gives a max total
   961  	// length of 252, well over chainNameBaseLengthMax (240), so truncation is
   962  	// theoretically possible (though incredibly unlikely).
   963  	return hashAndTruncate(name)
   964  }
   965  
   966  func isServiceChainName(chainString string) bool {
   967  	// The chains returned from servicePortChainNameBase and
   968  	// servicePortEndpointChainNameBase will always have at least one "/" in them.
   969  	// Since none of our "stock" chain names use slashes, we can distinguish them this
   970  	// way.
   971  	return strings.Contains(chainString, "/")
   972  }
   973  
   974  func isAffinitySetName(set string) bool {
   975  	return strings.HasPrefix(set, servicePortEndpointAffinityNamePrefix)
   976  }
   977  
   978  // This is where all of the nftables calls happen.
   979  // This assumes proxier.mu is NOT held
   980  func (proxier *Proxier) syncProxyRules() {
   981  	proxier.mu.Lock()
   982  	defer proxier.mu.Unlock()
   983  
   984  	// don't sync rules till we've received services and endpoints
   985  	if !proxier.isInitialized() {
   986  		proxier.logger.V(2).Info("Not syncing nftables until Services and Endpoints have been received from master")
   987  		return
   988  	}
   989  
   990  	//
   991  	// Below this point we will not return until we try to write the nftables rules.
   992  	//
   993  
   994  	// Keep track of how long syncs take.
   995  	start := time.Now()
   996  	defer func() {
   997  		metrics.SyncProxyRulesLatency.Observe(metrics.SinceInSeconds(start))
   998  		proxier.logger.V(2).Info("SyncProxyRules complete", "elapsed", time.Since(start))
   999  	}()
  1000  
  1001  	serviceUpdateResult := proxier.svcPortMap.Update(proxier.serviceChanges)
  1002  	endpointUpdateResult := proxier.endpointsMap.Update(proxier.endpointsChanges)
  1003  
  1004  	proxier.logger.V(2).Info("Syncing nftables rules")
  1005  
  1006  	success := false
  1007  	defer func() {
  1008  		if !success {
  1009  			proxier.logger.Info("Sync failed", "retryingTime", proxier.syncPeriod)
  1010  			proxier.syncRunner.RetryAfter(proxier.syncPeriod)
  1011  		}
  1012  	}()
  1013  
  1014  	// If there are sufficiently-stale chains left over from previous transactions,
  1015  	// try to delete them now.
  1016  	if len(proxier.staleChains) > 0 {
  1017  		oneSecondAgo := start.Add(-time.Second)
  1018  		tx := proxier.nftables.NewTransaction()
  1019  		deleted := 0
  1020  		for chain, modtime := range proxier.staleChains {
  1021  			if modtime.Before(oneSecondAgo) {
  1022  				tx.Delete(&knftables.Chain{
  1023  					Name: chain,
  1024  				})
  1025  				delete(proxier.staleChains, chain)
  1026  				deleted++
  1027  			}
  1028  		}
  1029  		if deleted > 0 {
  1030  			proxier.logger.Info("Deleting stale nftables chains", "numChains", deleted)
  1031  			err := proxier.nftables.Run(context.TODO(), tx)
  1032  			if err != nil {
  1033  				// We already deleted the entries from staleChains, but if
  1034  				// the chains still exist, they'll just get added back
  1035  				// (with a later timestamp) at the end of the sync.
  1036  				proxier.logger.Error(err, "Unable to delete stale chains; will retry later")
  1037  				metrics.NFTablesCleanupFailuresTotal.Inc()
  1038  			}
  1039  		}
  1040  	}
  1041  
  1042  	// Now start the actual syncing transaction
  1043  	tx := proxier.nftables.NewTransaction()
  1044  	proxier.setupNFTables(tx)
  1045  
  1046  	// We need to use, eg, "ip daddr" for IPv4 but "ip6 daddr" for IPv6
  1047  	ipX := "ip"
  1048  	ipvX_addr := "ipv4_addr" //nolint:stylecheck // var name intentionally resembles value
  1049  	if proxier.ipFamily == v1.IPv6Protocol {
  1050  		ipX = "ip6"
  1051  		ipvX_addr = "ipv6_addr"
  1052  	}
  1053  
  1054  	// We currently fully-rebuild our sets and maps on each resync
  1055  	tx.Flush(&knftables.Set{
  1056  		Name: clusterIPsSet,
  1057  	})
  1058  	tx.Flush(&knftables.Map{
  1059  		Name: firewallIPsMap,
  1060  	})
  1061  	tx.Flush(&knftables.Map{
  1062  		Name: noEndpointServicesMap,
  1063  	})
  1064  	tx.Flush(&knftables.Map{
  1065  		Name: noEndpointNodePortsMap,
  1066  	})
  1067  	tx.Flush(&knftables.Map{
  1068  		Name: serviceIPsMap,
  1069  	})
  1070  	tx.Flush(&knftables.Map{
  1071  		Name: serviceNodePortsMap,
  1072  	})
  1073  
  1074  	// Accumulate service/endpoint chains and affinity sets to keep.
  1075  	activeChains := sets.New[string]()
  1076  	activeAffinitySets := sets.New[string]()
  1077  
  1078  	// Compute total number of endpoint chains across all services
  1079  	// to get a sense of how big the cluster is.
  1080  	totalEndpoints := 0
  1081  	for svcName := range proxier.svcPortMap {
  1082  		totalEndpoints += len(proxier.endpointsMap[svcName])
  1083  	}
  1084  
  1085  	// These two variables are used to publish the sync_proxy_rules_no_endpoints_total
  1086  	// metric.
  1087  	serviceNoLocalEndpointsTotalInternal := 0
  1088  	serviceNoLocalEndpointsTotalExternal := 0
  1089  
  1090  	// Build rules for each service-port.
  1091  	for svcName, svc := range proxier.svcPortMap {
  1092  		svcInfo, ok := svc.(*servicePortInfo)
  1093  		if !ok {
  1094  			proxier.logger.Error(nil, "Failed to cast serviceInfo", "serviceName", svcName)
  1095  			continue
  1096  		}
  1097  		protocol := strings.ToLower(string(svcInfo.Protocol()))
  1098  		svcPortNameString := svcInfo.nameString
  1099  
  1100  		// Figure out the endpoints for Cluster and Local traffic policy.
  1101  		// allLocallyReachableEndpoints is the set of all endpoints that can be routed to
  1102  		// from this node, given the service's traffic policies. hasEndpoints is true
  1103  		// if the service has any usable endpoints on any node, not just this one.
  1104  		allEndpoints := proxier.endpointsMap[svcName]
  1105  		clusterEndpoints, localEndpoints, allLocallyReachableEndpoints, hasEndpoints := proxy.CategorizeEndpoints(allEndpoints, svcInfo, proxier.nodeLabels)
  1106  
  1107  		// Note the endpoint chains that will be used
  1108  		for _, ep := range allLocallyReachableEndpoints {
  1109  			if epInfo, ok := ep.(*endpointInfo); ok {
  1110  				ensureChain(epInfo.chainName, tx, activeChains)
  1111  			}
  1112  		}
  1113  
  1114  		// clusterPolicyChain contains the endpoints used with "Cluster" traffic policy
  1115  		clusterPolicyChain := svcInfo.clusterPolicyChainName
  1116  		usesClusterPolicyChain := len(clusterEndpoints) > 0 && svcInfo.UsesClusterEndpoints()
  1117  		if usesClusterPolicyChain {
  1118  			ensureChain(clusterPolicyChain, tx, activeChains)
  1119  		}
  1120  
  1121  		// localPolicyChain contains the endpoints used with "Local" traffic policy
  1122  		localPolicyChain := svcInfo.localPolicyChainName
  1123  		usesLocalPolicyChain := len(localEndpoints) > 0 && svcInfo.UsesLocalEndpoints()
  1124  		if usesLocalPolicyChain {
  1125  			ensureChain(localPolicyChain, tx, activeChains)
  1126  		}
  1127  
  1128  		// internalPolicyChain is the chain containing the endpoints for
  1129  		// "internal" (ClusterIP) traffic. internalTrafficChain is the chain that
  1130  		// internal traffic is routed to (which is always the same as
  1131  		// internalPolicyChain). hasInternalEndpoints is true if we should
  1132  		// generate rules pointing to internalTrafficChain, or false if there are
  1133  		// no available internal endpoints.
  1134  		internalPolicyChain := clusterPolicyChain
  1135  		hasInternalEndpoints := hasEndpoints
  1136  		if svcInfo.InternalPolicyLocal() {
  1137  			internalPolicyChain = localPolicyChain
  1138  			if len(localEndpoints) == 0 {
  1139  				hasInternalEndpoints = false
  1140  			}
  1141  		}
  1142  		internalTrafficChain := internalPolicyChain
  1143  
  1144  		// Similarly, externalPolicyChain is the chain containing the endpoints
  1145  		// for "external" (NodePort, LoadBalancer, and ExternalIP) traffic.
  1146  		// externalTrafficChain is the chain that external traffic is routed to
  1147  		// (which is always the service's "EXT" chain). hasExternalEndpoints is
  1148  		// true if there are endpoints that will be reached by external traffic.
  1149  		// (But we may still have to generate externalTrafficChain even if there
  1150  		// are no external endpoints, to ensure that the short-circuit rules for
  1151  		// local traffic are set up.)
  1152  		externalPolicyChain := clusterPolicyChain
  1153  		hasExternalEndpoints := hasEndpoints
  1154  		if svcInfo.ExternalPolicyLocal() {
  1155  			externalPolicyChain = localPolicyChain
  1156  			if len(localEndpoints) == 0 {
  1157  				hasExternalEndpoints = false
  1158  			}
  1159  		}
  1160  		externalTrafficChain := svcInfo.externalChainName // eventually jumps to externalPolicyChain
  1161  
  1162  		// usesExternalTrafficChain is based on hasEndpoints, not hasExternalEndpoints,
  1163  		// because we need the local-traffic-short-circuiting rules even when there
  1164  		// are no externally-usable endpoints.
  1165  		usesExternalTrafficChain := hasEndpoints && svcInfo.ExternallyAccessible()
  1166  		if usesExternalTrafficChain {
  1167  			ensureChain(externalTrafficChain, tx, activeChains)
  1168  		}
  1169  
  1170  		var internalTrafficFilterVerdict, externalTrafficFilterVerdict string
  1171  		if !hasEndpoints {
  1172  			// The service has no endpoints at all; hasInternalEndpoints and
  1173  			// hasExternalEndpoints will also be false, and we will not
  1174  			// generate any chains in the "nat" table for the service; only
  1175  			// rules in the "filter" table rejecting incoming packets for
  1176  			// the service's IPs.
  1177  			internalTrafficFilterVerdict = fmt.Sprintf("goto %s", rejectChain)
  1178  			externalTrafficFilterVerdict = fmt.Sprintf("goto %s", rejectChain)
  1179  		} else {
  1180  			if !hasInternalEndpoints {
  1181  				// The internalTrafficPolicy is "Local" but there are no local
  1182  				// endpoints. Traffic to the clusterIP will be dropped, but
  1183  				// external traffic may still be accepted.
  1184  				internalTrafficFilterVerdict = "drop"
  1185  				serviceNoLocalEndpointsTotalInternal++
  1186  			}
  1187  			if !hasExternalEndpoints {
  1188  				// The externalTrafficPolicy is "Local" but there are no
  1189  				// local endpoints. Traffic to "external" IPs from outside
  1190  				// the cluster will be dropped, but traffic from inside
  1191  				// the cluster may still be accepted.
  1192  				externalTrafficFilterVerdict = "drop"
  1193  				serviceNoLocalEndpointsTotalExternal++
  1194  			}
  1195  		}
  1196  
  1197  		// Capture the clusterIP.
  1198  		tx.Add(&knftables.Element{
  1199  			Set: clusterIPsSet,
  1200  			Key: []string{svcInfo.ClusterIP().String()},
  1201  		})
  1202  		if hasInternalEndpoints {
  1203  			tx.Add(&knftables.Element{
  1204  				Map: serviceIPsMap,
  1205  				Key: []string{
  1206  					svcInfo.ClusterIP().String(),
  1207  					protocol,
  1208  					strconv.Itoa(svcInfo.Port()),
  1209  				},
  1210  				Value: []string{
  1211  					fmt.Sprintf("goto %s", internalTrafficChain),
  1212  				},
  1213  			})
  1214  		} else {
  1215  			// No endpoints.
  1216  			tx.Add(&knftables.Element{
  1217  				Map: noEndpointServicesMap,
  1218  				Key: []string{
  1219  					svcInfo.ClusterIP().String(),
  1220  					protocol,
  1221  					strconv.Itoa(svcInfo.Port()),
  1222  				},
  1223  				Value: []string{
  1224  					internalTrafficFilterVerdict,
  1225  				},
  1226  				Comment: &svcPortNameString,
  1227  			})
  1228  		}
  1229  
  1230  		// Capture externalIPs.
  1231  		for _, externalIP := range svcInfo.ExternalIPs() {
  1232  			if hasEndpoints {
  1233  				// Send traffic bound for external IPs to the "external
  1234  				// destinations" chain.
  1235  				tx.Add(&knftables.Element{
  1236  					Map: serviceIPsMap,
  1237  					Key: []string{
  1238  						externalIP.String(),
  1239  						protocol,
  1240  						strconv.Itoa(svcInfo.Port()),
  1241  					},
  1242  					Value: []string{
  1243  						fmt.Sprintf("goto %s", externalTrafficChain),
  1244  					},
  1245  				})
  1246  			}
  1247  			if !hasExternalEndpoints {
  1248  				// Either no endpoints at all (REJECT) or no endpoints for
  1249  				// external traffic (DROP anything that didn't get
  1250  				// short-circuited by the EXT chain.)
  1251  				tx.Add(&knftables.Element{
  1252  					Map: noEndpointServicesMap,
  1253  					Key: []string{
  1254  						externalIP.String(),
  1255  						protocol,
  1256  						strconv.Itoa(svcInfo.Port()),
  1257  					},
  1258  					Value: []string{
  1259  						externalTrafficFilterVerdict,
  1260  					},
  1261  					Comment: &svcPortNameString,
  1262  				})
  1263  			}
  1264  		}
  1265  
  1266  		usesFWChain := len(svcInfo.LoadBalancerVIPs()) > 0 && len(svcInfo.LoadBalancerSourceRanges()) > 0
  1267  		fwChain := svcInfo.firewallChainName
  1268  		if usesFWChain {
  1269  			ensureChain(fwChain, tx, activeChains)
  1270  			var sources []string
  1271  			allowFromNode := false
  1272  			for _, cidr := range svcInfo.LoadBalancerSourceRanges() {
  1273  				if len(sources) > 0 {
  1274  					sources = append(sources, ",")
  1275  				}
  1276  				sources = append(sources, cidr.String())
  1277  				if cidr.Contains(proxier.nodeIP) {
  1278  					allowFromNode = true
  1279  				}
  1280  			}
  1281  			// For VIP-like LBs, the VIP is often added as a local
  1282  			// address (via an IP route rule).  In that case, a request
  1283  			// from a node to the VIP will not hit the loadbalancer but
  1284  			// will loop back with the source IP set to the VIP.  We
  1285  			// need the following rules to allow requests from this node.
  1286  			if allowFromNode {
  1287  				for _, lbip := range svcInfo.LoadBalancerVIPs() {
  1288  					sources = append(sources, ",", lbip.String())
  1289  				}
  1290  			}
  1291  			tx.Add(&knftables.Rule{
  1292  				Chain: fwChain,
  1293  				Rule: knftables.Concat(
  1294  					ipX, "saddr", "!=", "{", sources, "}",
  1295  					"drop",
  1296  				),
  1297  			})
  1298  		}
  1299  
  1300  		// Capture load-balancer ingress.
  1301  		for _, lbip := range svcInfo.LoadBalancerVIPs() {
  1302  			if hasEndpoints {
  1303  				tx.Add(&knftables.Element{
  1304  					Map: serviceIPsMap,
  1305  					Key: []string{
  1306  						lbip.String(),
  1307  						protocol,
  1308  						strconv.Itoa(svcInfo.Port()),
  1309  					},
  1310  					Value: []string{
  1311  						fmt.Sprintf("goto %s", externalTrafficChain),
  1312  					},
  1313  				})
  1314  			}
  1315  
  1316  			if usesFWChain {
  1317  				tx.Add(&knftables.Element{
  1318  					Map: firewallIPsMap,
  1319  					Key: []string{
  1320  						lbip.String(),
  1321  						protocol,
  1322  						strconv.Itoa(svcInfo.Port()),
  1323  					},
  1324  					Value: []string{
  1325  						fmt.Sprintf("goto %s", fwChain),
  1326  					},
  1327  					Comment: &svcPortNameString,
  1328  				})
  1329  			}
  1330  		}
  1331  		if !hasExternalEndpoints {
  1332  			// Either no endpoints at all (REJECT) or no endpoints for
  1333  			// external traffic (DROP anything that didn't get short-circuited
  1334  			// by the EXT chain.)
  1335  			for _, lbip := range svcInfo.LoadBalancerVIPs() {
  1336  				tx.Add(&knftables.Element{
  1337  					Map: noEndpointServicesMap,
  1338  					Key: []string{
  1339  						lbip.String(),
  1340  						protocol,
  1341  						strconv.Itoa(svcInfo.Port()),
  1342  					},
  1343  					Value: []string{
  1344  						externalTrafficFilterVerdict,
  1345  					},
  1346  					Comment: &svcPortNameString,
  1347  				})
  1348  			}
  1349  		}
  1350  
  1351  		// Capture nodeports.
  1352  		if svcInfo.NodePort() != 0 {
  1353  			if hasEndpoints {
  1354  				// Jump to the external destination chain.  For better or for
  1355  				// worse, nodeports are not subect to loadBalancerSourceRanges,
  1356  				// and we can't change that.
  1357  				tx.Add(&knftables.Element{
  1358  					Map: serviceNodePortsMap,
  1359  					Key: []string{
  1360  						protocol,
  1361  						strconv.Itoa(svcInfo.NodePort()),
  1362  					},
  1363  					Value: []string{
  1364  						fmt.Sprintf("goto %s", externalTrafficChain),
  1365  					},
  1366  				})
  1367  			}
  1368  			if !hasExternalEndpoints {
  1369  				// Either no endpoints at all (REJECT) or no endpoints for
  1370  				// external traffic (DROP anything that didn't get
  1371  				// short-circuited by the EXT chain.)
  1372  				tx.Add(&knftables.Element{
  1373  					Map: noEndpointNodePortsMap,
  1374  					Key: []string{
  1375  						protocol,
  1376  						strconv.Itoa(svcInfo.NodePort()),
  1377  					},
  1378  					Value: []string{
  1379  						externalTrafficFilterVerdict,
  1380  					},
  1381  					Comment: &svcPortNameString,
  1382  				})
  1383  			}
  1384  		}
  1385  
  1386  		// Set up internal traffic handling.
  1387  		if hasInternalEndpoints {
  1388  			if proxier.masqueradeAll {
  1389  				tx.Add(&knftables.Rule{
  1390  					Chain: internalTrafficChain,
  1391  					Rule: knftables.Concat(
  1392  						ipX, "daddr", svcInfo.ClusterIP(),
  1393  						protocol, "dport", svcInfo.Port(),
  1394  						"jump", markMasqChain,
  1395  					),
  1396  				})
  1397  			} else if proxier.localDetector.IsImplemented() {
  1398  				// This masquerades off-cluster traffic to a service VIP. The
  1399  				// idea is that you can establish a static route for your
  1400  				// Service range, routing to any node, and that node will
  1401  				// bridge into the Service for you. Since that might bounce
  1402  				// off-node, we masquerade here.
  1403  				tx.Add(&knftables.Rule{
  1404  					Chain: internalTrafficChain,
  1405  					Rule: knftables.Concat(
  1406  						ipX, "daddr", svcInfo.ClusterIP(),
  1407  						protocol, "dport", svcInfo.Port(),
  1408  						proxier.localDetector.IfNotLocalNFT(),
  1409  						"jump", markMasqChain,
  1410  					),
  1411  				})
  1412  			}
  1413  		}
  1414  
  1415  		// Set up external traffic handling (if any "external" destinations are
  1416  		// enabled). All captured traffic for all external destinations should
  1417  		// jump to externalTrafficChain, which will handle some special cases and
  1418  		// then jump to externalPolicyChain.
  1419  		if usesExternalTrafficChain {
  1420  			if !svcInfo.ExternalPolicyLocal() {
  1421  				// If we are using non-local endpoints we need to masquerade,
  1422  				// in case we cross nodes.
  1423  				tx.Add(&knftables.Rule{
  1424  					Chain: externalTrafficChain,
  1425  					Rule: knftables.Concat(
  1426  						"jump", markMasqChain,
  1427  					),
  1428  				})
  1429  			} else {
  1430  				// If we are only using same-node endpoints, we can retain the
  1431  				// source IP in most cases.
  1432  
  1433  				if proxier.localDetector.IsImplemented() {
  1434  					// Treat all locally-originated pod -> external destination
  1435  					// traffic as a special-case.  It is subject to neither
  1436  					// form of traffic policy, which simulates going up-and-out
  1437  					// to an external load-balancer and coming back in.
  1438  					tx.Add(&knftables.Rule{
  1439  						Chain: externalTrafficChain,
  1440  						Rule: knftables.Concat(
  1441  							proxier.localDetector.IfLocalNFT(),
  1442  							"goto", clusterPolicyChain,
  1443  						),
  1444  						Comment: ptr.To("short-circuit pod traffic"),
  1445  					})
  1446  				}
  1447  
  1448  				// Locally originated traffic (not a pod, but the host node)
  1449  				// still needs masquerade because the LBIP itself is a local
  1450  				// address, so that will be the chosen source IP.
  1451  				tx.Add(&knftables.Rule{
  1452  					Chain: externalTrafficChain,
  1453  					Rule: knftables.Concat(
  1454  						"fib", "saddr", "type", "local",
  1455  						"jump", markMasqChain,
  1456  					),
  1457  					Comment: ptr.To("masquerade local traffic"),
  1458  				})
  1459  
  1460  				// Redirect all src-type=LOCAL -> external destination to the
  1461  				// policy=cluster chain. This allows traffic originating
  1462  				// from the host to be redirected to the service correctly.
  1463  				tx.Add(&knftables.Rule{
  1464  					Chain: externalTrafficChain,
  1465  					Rule: knftables.Concat(
  1466  						"fib", "saddr", "type", "local",
  1467  						"goto", clusterPolicyChain,
  1468  					),
  1469  					Comment: ptr.To("short-circuit local traffic"),
  1470  				})
  1471  			}
  1472  
  1473  			// Anything else falls thru to the appropriate policy chain.
  1474  			if hasExternalEndpoints {
  1475  				tx.Add(&knftables.Rule{
  1476  					Chain: externalTrafficChain,
  1477  					Rule: knftables.Concat(
  1478  						"goto", externalPolicyChain,
  1479  					),
  1480  				})
  1481  			}
  1482  		}
  1483  
  1484  		if svcInfo.SessionAffinityType() == v1.ServiceAffinityClientIP {
  1485  			// Generate the per-endpoint affinity sets
  1486  			for _, ep := range allLocallyReachableEndpoints {
  1487  				epInfo, ok := ep.(*endpointInfo)
  1488  				if !ok {
  1489  					proxier.logger.Error(nil, "Failed to cast endpointsInfo", "endpointsInfo", ep)
  1490  					continue
  1491  				}
  1492  
  1493  				// Create a set to store current affinity mappings. As
  1494  				// with the iptables backend, endpoint affinity is
  1495  				// recorded for connections from a particular source IP
  1496  				// (without regard to source port) to a particular
  1497  				// ServicePort (without regard to which service IP was
  1498  				// used to reach the service). This may be changed in the
  1499  				// future.
  1500  				tx.Add(&knftables.Set{
  1501  					Name: epInfo.affinitySetName,
  1502  					Type: ipvX_addr,
  1503  					Flags: []knftables.SetFlag{
  1504  						// The nft docs say "dynamic" is only
  1505  						// needed for sets containing stateful
  1506  						// objects (eg counters), but (at least on
  1507  						// RHEL8) if we create the set without
  1508  						// "dynamic", it later gets mutated to
  1509  						// have it, and then the next attempt to
  1510  						// tx.Add() it here fails because it looks
  1511  						// like we're trying to change the flags.
  1512  						knftables.DynamicFlag,
  1513  						knftables.TimeoutFlag,
  1514  					},
  1515  					Timeout: ptr.To(time.Duration(svcInfo.StickyMaxAgeSeconds()) * time.Second),
  1516  				})
  1517  				activeAffinitySets.Insert(epInfo.affinitySetName)
  1518  			}
  1519  		}
  1520  
  1521  		// If Cluster policy is in use, create the chain and create rules jumping
  1522  		// from clusterPolicyChain to the clusterEndpoints
  1523  		if usesClusterPolicyChain {
  1524  			proxier.writeServiceToEndpointRules(tx, svcPortNameString, svcInfo, clusterPolicyChain, clusterEndpoints)
  1525  		}
  1526  
  1527  		// If Local policy is in use, create rules jumping from localPolicyChain
  1528  		// to the localEndpoints
  1529  		if usesLocalPolicyChain {
  1530  			proxier.writeServiceToEndpointRules(tx, svcPortNameString, svcInfo, localPolicyChain, localEndpoints)
  1531  		}
  1532  
  1533  		// Generate the per-endpoint chains
  1534  		for _, ep := range allLocallyReachableEndpoints {
  1535  			epInfo, ok := ep.(*endpointInfo)
  1536  			if !ok {
  1537  				proxier.logger.Error(nil, "Failed to cast endpointInfo", "endpointInfo", ep)
  1538  				continue
  1539  			}
  1540  
  1541  			endpointChain := epInfo.chainName
  1542  
  1543  			// Handle traffic that loops back to the originator with SNAT.
  1544  			tx.Add(&knftables.Rule{
  1545  				Chain: endpointChain,
  1546  				Rule: knftables.Concat(
  1547  					ipX, "saddr", epInfo.IP(),
  1548  					"jump", markMasqChain,
  1549  				),
  1550  			})
  1551  
  1552  			// Handle session affinity
  1553  			if svcInfo.SessionAffinityType() == v1.ServiceAffinityClientIP {
  1554  				tx.Add(&knftables.Rule{
  1555  					Chain: endpointChain,
  1556  					Rule: knftables.Concat(
  1557  						"update", "@", epInfo.affinitySetName,
  1558  						"{", ipX, "saddr", "}",
  1559  					),
  1560  				})
  1561  			}
  1562  
  1563  			// DNAT to final destination.
  1564  			tx.Add(&knftables.Rule{
  1565  				Chain: endpointChain,
  1566  				Rule: knftables.Concat(
  1567  					"meta l4proto", protocol,
  1568  					"dnat to", epInfo.String(),
  1569  				),
  1570  			})
  1571  		}
  1572  	}
  1573  
  1574  	// Figure out which chains are now stale. Unfortunately, we can't delete them
  1575  	// right away, because with kernels before 6.2, if there is a map element pointing
  1576  	// to a chain, and you delete that map element, the kernel doesn't notice until a
  1577  	// short amount of time later that the chain is now unreferenced. So we flush them
  1578  	// now, and record the time that they become stale in staleChains so they can be
  1579  	// deleted later.
  1580  	existingChains, err := proxier.nftables.List(context.TODO(), "chains")
  1581  	if err == nil {
  1582  		for _, chain := range existingChains {
  1583  			if isServiceChainName(chain) {
  1584  				if !activeChains.Has(chain) {
  1585  					tx.Flush(&knftables.Chain{
  1586  						Name: chain,
  1587  					})
  1588  					proxier.staleChains[chain] = start
  1589  				} else {
  1590  					delete(proxier.staleChains, chain)
  1591  				}
  1592  			}
  1593  		}
  1594  	} else if !knftables.IsNotFound(err) {
  1595  		proxier.logger.Error(err, "Failed to list nftables chains: stale chains will not be deleted")
  1596  	}
  1597  
  1598  	// OTOH, we can immediately delete any stale affinity sets
  1599  	existingSets, err := proxier.nftables.List(context.TODO(), "sets")
  1600  	if err == nil {
  1601  		for _, set := range existingSets {
  1602  			if isAffinitySetName(set) && !activeAffinitySets.Has(set) {
  1603  				tx.Delete(&knftables.Set{
  1604  					Name: set,
  1605  				})
  1606  			}
  1607  		}
  1608  	} else if !knftables.IsNotFound(err) {
  1609  		proxier.logger.Error(err, "Failed to list nftables sets: stale affinity sets will not be deleted")
  1610  	}
  1611  
  1612  	// Sync rules.
  1613  	proxier.logger.V(2).Info("Reloading service nftables data",
  1614  		"numServices", len(proxier.svcPortMap),
  1615  		"numEndpoints", totalEndpoints,
  1616  	)
  1617  
  1618  	if klogV9 := klog.V(9); klogV9.Enabled() {
  1619  		klogV9.InfoS("Running nftables transaction", "transaction", tx.String())
  1620  	}
  1621  
  1622  	err = proxier.nftables.Run(context.TODO(), tx)
  1623  	if err != nil {
  1624  		proxier.logger.Error(err, "nftables sync failed")
  1625  		metrics.NFTablesSyncFailuresTotal.Inc()
  1626  
  1627  		// staleChains is now incorrect since we didn't actually flush the
  1628  		// chains in it. We can recompute it next time.
  1629  		clear(proxier.staleChains)
  1630  		return
  1631  	}
  1632  	success = true
  1633  
  1634  	for name, lastChangeTriggerTimes := range endpointUpdateResult.LastChangeTriggerTimes {
  1635  		for _, lastChangeTriggerTime := range lastChangeTriggerTimes {
  1636  			latency := metrics.SinceInSeconds(lastChangeTriggerTime)
  1637  			metrics.NetworkProgrammingLatency.Observe(latency)
  1638  			proxier.logger.V(4).Info("Network programming", "endpoint", klog.KRef(name.Namespace, name.Name), "elapsed", latency)
  1639  		}
  1640  	}
  1641  
  1642  	metrics.SyncProxyRulesNoLocalEndpointsTotal.WithLabelValues("internal").Set(float64(serviceNoLocalEndpointsTotalInternal))
  1643  	metrics.SyncProxyRulesNoLocalEndpointsTotal.WithLabelValues("external").Set(float64(serviceNoLocalEndpointsTotalExternal))
  1644  	if proxier.healthzServer != nil {
  1645  		proxier.healthzServer.Updated(proxier.ipFamily)
  1646  	}
  1647  	metrics.SyncProxyRulesLastTimestamp.SetToCurrentTime()
  1648  
  1649  	// Update service healthchecks.  The endpoints list might include services that are
  1650  	// not "OnlyLocal", but the services list will not, and the serviceHealthServer
  1651  	// will just drop those endpoints.
  1652  	if err := proxier.serviceHealthServer.SyncServices(proxier.svcPortMap.HealthCheckNodePorts()); err != nil {
  1653  		proxier.logger.Error(err, "Error syncing healthcheck services")
  1654  	}
  1655  	if err := proxier.serviceHealthServer.SyncEndpoints(proxier.endpointsMap.LocalReadyEndpoints()); err != nil {
  1656  		proxier.logger.Error(err, "Error syncing healthcheck endpoints")
  1657  	}
  1658  
  1659  	// Finish housekeeping, clear stale conntrack entries for UDP Services
  1660  	conntrack.CleanStaleEntries(proxier.conntrack, proxier.svcPortMap, serviceUpdateResult, endpointUpdateResult)
  1661  }
  1662  
  1663  func (proxier *Proxier) writeServiceToEndpointRules(tx *knftables.Transaction, svcPortNameString string, svcInfo *servicePortInfo, svcChain string, endpoints []proxy.Endpoint) {
  1664  	// First write session affinity rules, if applicable.
  1665  	if svcInfo.SessionAffinityType() == v1.ServiceAffinityClientIP {
  1666  		ipX := "ip"
  1667  		if proxier.ipFamily == v1.IPv6Protocol {
  1668  			ipX = "ip6"
  1669  		}
  1670  
  1671  		for _, ep := range endpoints {
  1672  			epInfo, ok := ep.(*endpointInfo)
  1673  			if !ok {
  1674  				continue
  1675  			}
  1676  
  1677  			tx.Add(&knftables.Rule{
  1678  				Chain: svcChain,
  1679  				Rule: knftables.Concat(
  1680  					ipX, "saddr", "@", epInfo.affinitySetName,
  1681  					"goto", epInfo.chainName,
  1682  				),
  1683  			})
  1684  		}
  1685  	}
  1686  
  1687  	// Now write loadbalancing rule
  1688  	var elements []string
  1689  	for i, ep := range endpoints {
  1690  		epInfo, ok := ep.(*endpointInfo)
  1691  		if !ok {
  1692  			continue
  1693  		}
  1694  
  1695  		elements = append(elements,
  1696  			strconv.Itoa(i), ":", "goto", epInfo.chainName,
  1697  		)
  1698  		if i != len(endpoints)-1 {
  1699  			elements = append(elements, ",")
  1700  		}
  1701  	}
  1702  	tx.Add(&knftables.Rule{
  1703  		Chain: svcChain,
  1704  		Rule: knftables.Concat(
  1705  			"numgen random mod", len(endpoints), "vmap",
  1706  			"{", elements, "}",
  1707  		),
  1708  	})
  1709  }