k8s.io/kubernetes@v1.31.0-alpha.0.0.20240520171757-56147500dadc/pkg/proxy/ipvs/proxier.go (about)

     1  //go:build linux
     2  // +build linux
     3  
     4  /*
     5  Copyright 2017 The Kubernetes Authors.
     6  
     7  Licensed under the Apache License, Version 2.0 (the "License");
     8  you may not use this file except in compliance with the License.
     9  You may obtain a copy of the License at
    10  
    11      http://www.apache.org/licenses/LICENSE-2.0
    12  
    13  Unless required by applicable law or agreed to in writing, software
    14  distributed under the License is distributed on an "AS IS" BASIS,
    15  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    16  See the License for the specific language governing permissions and
    17  limitations under the License.
    18  */
    19  
    20  package ipvs
    21  
    22  import (
    23  	"bytes"
    24  	"context"
    25  	"errors"
    26  	"fmt"
    27  	"io"
    28  	"net"
    29  	"reflect"
    30  	"strconv"
    31  	"strings"
    32  	"sync"
    33  	"sync/atomic"
    34  	"time"
    35  
    36  	"k8s.io/klog/v2"
    37  	utilexec "k8s.io/utils/exec"
    38  	netutils "k8s.io/utils/net"
    39  
    40  	v1 "k8s.io/api/core/v1"
    41  	discovery "k8s.io/api/discovery/v1"
    42  	"k8s.io/apimachinery/pkg/types"
    43  	"k8s.io/apimachinery/pkg/util/sets"
    44  	"k8s.io/apimachinery/pkg/util/version"
    45  	"k8s.io/apimachinery/pkg/util/wait"
    46  	"k8s.io/client-go/tools/events"
    47  	utilsysctl "k8s.io/component-helpers/node/util/sysctl"
    48  	"k8s.io/kubernetes/pkg/proxy"
    49  	"k8s.io/kubernetes/pkg/proxy/conntrack"
    50  	"k8s.io/kubernetes/pkg/proxy/healthcheck"
    51  	utilipset "k8s.io/kubernetes/pkg/proxy/ipvs/ipset"
    52  	utilipvs "k8s.io/kubernetes/pkg/proxy/ipvs/util"
    53  	"k8s.io/kubernetes/pkg/proxy/metaproxier"
    54  	"k8s.io/kubernetes/pkg/proxy/metrics"
    55  	proxyutil "k8s.io/kubernetes/pkg/proxy/util"
    56  	"k8s.io/kubernetes/pkg/util/async"
    57  	utiliptables "k8s.io/kubernetes/pkg/util/iptables"
    58  	utilkernel "k8s.io/kubernetes/pkg/util/kernel"
    59  )
    60  
    61  const (
    62  	// kubeServicesChain is the services portal chain
    63  	kubeServicesChain utiliptables.Chain = "KUBE-SERVICES"
    64  
    65  	// kubeProxyFirewallChain is the kube-proxy firewall chain.
    66  	kubeProxyFirewallChain utiliptables.Chain = "KUBE-PROXY-FIREWALL"
    67  
    68  	// kubeSourceRangesFirewallChain is the firewall subchain for LoadBalancerSourceRanges.
    69  	kubeSourceRangesFirewallChain utiliptables.Chain = "KUBE-SOURCE-RANGES-FIREWALL"
    70  
    71  	// kubePostroutingChain is the kubernetes postrouting chain
    72  	kubePostroutingChain utiliptables.Chain = "KUBE-POSTROUTING"
    73  
    74  	// kubeMarkMasqChain is the mark-for-masquerade chain
    75  	kubeMarkMasqChain utiliptables.Chain = "KUBE-MARK-MASQ"
    76  
    77  	// kubeNodePortChain is the kubernetes node port chain
    78  	kubeNodePortChain utiliptables.Chain = "KUBE-NODE-PORT"
    79  
    80  	// kubeForwardChain is the kubernetes forward chain
    81  	kubeForwardChain utiliptables.Chain = "KUBE-FORWARD"
    82  
    83  	// kubeLoadBalancerChain is the kubernetes chain for loadbalancer type service
    84  	kubeLoadBalancerChain utiliptables.Chain = "KUBE-LOAD-BALANCER"
    85  
    86  	// kubeIPVSFilterChain filters external access to main netns
    87  	// https://github.com/kubernetes/kubernetes/issues/72236
    88  	kubeIPVSFilterChain utiliptables.Chain = "KUBE-IPVS-FILTER"
    89  
    90  	// kubeIPVSOutFilterChain filters access to load balancer services from node.
    91  	// https://github.com/kubernetes/kubernetes/issues/119656
    92  	kubeIPVSOutFilterChain utiliptables.Chain = "KUBE-IPVS-OUT-FILTER"
    93  
    94  	// defaultScheduler is the default ipvs scheduler algorithm - round robin.
    95  	defaultScheduler = "rr"
    96  
    97  	// defaultDummyDevice is the default dummy interface which ipvs service address will bind to it.
    98  	defaultDummyDevice = "kube-ipvs0"
    99  )
   100  
   101  // In IPVS proxy mode, the following flags need to be set
   102  const (
   103  	sysctlVSConnTrack             = "net/ipv4/vs/conntrack"
   104  	sysctlConnReuse               = "net/ipv4/vs/conn_reuse_mode"
   105  	sysctlExpireNoDestConn        = "net/ipv4/vs/expire_nodest_conn"
   106  	sysctlExpireQuiescentTemplate = "net/ipv4/vs/expire_quiescent_template"
   107  	sysctlForward                 = "net/ipv4/ip_forward"
   108  	sysctlArpIgnore               = "net/ipv4/conf/all/arp_ignore"
   109  	sysctlArpAnnounce             = "net/ipv4/conf/all/arp_announce"
   110  )
   111  
   112  // NewDualStackProxier returns a new Proxier for dual-stack operation
   113  func NewDualStackProxier(
   114  	ctx context.Context,
   115  	ipt [2]utiliptables.Interface,
   116  	ipvs utilipvs.Interface,
   117  	ipset utilipset.Interface,
   118  	sysctl utilsysctl.Interface,
   119  	exec utilexec.Interface,
   120  	syncPeriod time.Duration,
   121  	minSyncPeriod time.Duration,
   122  	excludeCIDRs []string,
   123  	strictARP bool,
   124  	tcpTimeout time.Duration,
   125  	tcpFinTimeout time.Duration,
   126  	udpTimeout time.Duration,
   127  	masqueradeAll bool,
   128  	masqueradeBit int,
   129  	localDetectors map[v1.IPFamily]proxyutil.LocalTrafficDetector,
   130  	hostname string,
   131  	nodeIPs map[v1.IPFamily]net.IP,
   132  	recorder events.EventRecorder,
   133  	healthzServer *healthcheck.ProxierHealthServer,
   134  	scheduler string,
   135  	nodePortAddresses []string,
   136  	initOnly bool,
   137  ) (proxy.Provider, error) {
   138  	// Create an ipv4 instance of the single-stack proxier
   139  	ipv4Proxier, err := NewProxier(ctx, v1.IPv4Protocol, ipt[0], ipvs, ipset, sysctl,
   140  		exec, syncPeriod, minSyncPeriod, filterCIDRs(false, excludeCIDRs), strictARP,
   141  		tcpTimeout, tcpFinTimeout, udpTimeout, masqueradeAll, masqueradeBit,
   142  		localDetectors[v1.IPv4Protocol], hostname, nodeIPs[v1.IPv4Protocol], recorder,
   143  		healthzServer, scheduler, nodePortAddresses, initOnly)
   144  	if err != nil {
   145  		return nil, fmt.Errorf("unable to create ipv4 proxier: %v", err)
   146  	}
   147  
   148  	ipv6Proxier, err := NewProxier(ctx, v1.IPv6Protocol, ipt[1], ipvs, ipset, sysctl,
   149  		exec, syncPeriod, minSyncPeriod, filterCIDRs(true, excludeCIDRs), strictARP,
   150  		tcpTimeout, tcpFinTimeout, udpTimeout, masqueradeAll, masqueradeBit,
   151  		localDetectors[v1.IPv6Protocol], hostname, nodeIPs[v1.IPv6Protocol], recorder,
   152  		healthzServer, scheduler, nodePortAddresses, initOnly)
   153  	if err != nil {
   154  		return nil, fmt.Errorf("unable to create ipv6 proxier: %v", err)
   155  	}
   156  	if initOnly {
   157  		return nil, nil
   158  	}
   159  
   160  	// Return a meta-proxier that dispatch calls between the two
   161  	// single-stack proxier instances
   162  	return metaproxier.NewMetaProxier(ipv4Proxier, ipv6Proxier), nil
   163  }
   164  
   165  // Proxier is an ipvs based proxy for connections between a localhost:lport
   166  // and services that provide the actual backends.
   167  type Proxier struct {
   168  	// the ipfamily on which this proxy is operating on.
   169  	ipFamily v1.IPFamily
   170  	// endpointsChanges and serviceChanges contains all changes to endpoints and
   171  	// services that happened since last syncProxyRules call. For a single object,
   172  	// changes are accumulated, i.e. previous is state from before all of them,
   173  	// current is state after applying all of those.
   174  	endpointsChanges *proxy.EndpointsChangeTracker
   175  	serviceChanges   *proxy.ServiceChangeTracker
   176  
   177  	mu           sync.Mutex // protects the following fields
   178  	svcPortMap   proxy.ServicePortMap
   179  	endpointsMap proxy.EndpointsMap
   180  	nodeLabels   map[string]string
   181  	// initialSync is a bool indicating if the proxier is syncing for the first time.
   182  	// It is set to true when a new proxier is initialized and then set to false on all
   183  	// future syncs.
   184  	// This lets us run specific logic that's required only during proxy startup.
   185  	// For eg: it enables us to update weights of existing destinations only on startup
   186  	// saving us the cost of querying and updating real servers during every sync.
   187  	initialSync bool
   188  	// endpointSlicesSynced, and servicesSynced are set to true when
   189  	// corresponding objects are synced after startup. This is used to avoid updating
   190  	// ipvs rules with some partial data after kube-proxy restart.
   191  	endpointSlicesSynced bool
   192  	servicesSynced       bool
   193  	initialized          int32
   194  	syncRunner           *async.BoundedFrequencyRunner // governs calls to syncProxyRules
   195  
   196  	// These are effectively const and do not need the mutex to be held.
   197  	syncPeriod    time.Duration
   198  	minSyncPeriod time.Duration
   199  	// Values are CIDR's to exclude when cleaning up IPVS rules.
   200  	excludeCIDRs []*net.IPNet
   201  	// Set to true to set sysctls arp_ignore and arp_announce
   202  	strictARP      bool
   203  	iptables       utiliptables.Interface
   204  	ipvs           utilipvs.Interface
   205  	ipset          utilipset.Interface
   206  	conntrack      conntrack.Interface
   207  	masqueradeAll  bool
   208  	masqueradeMark string
   209  	localDetector  proxyutil.LocalTrafficDetector
   210  	hostname       string
   211  	nodeIP         net.IP
   212  	recorder       events.EventRecorder
   213  
   214  	serviceHealthServer healthcheck.ServiceHealthServer
   215  	healthzServer       *healthcheck.ProxierHealthServer
   216  
   217  	ipvsScheduler string
   218  	// The following buffers are used to reuse memory and avoid allocations
   219  	// that are significantly impacting performance.
   220  	iptablesData     *bytes.Buffer
   221  	filterChainsData *bytes.Buffer
   222  	natChains        proxyutil.LineBuffer
   223  	filterChains     proxyutil.LineBuffer
   224  	natRules         proxyutil.LineBuffer
   225  	filterRules      proxyutil.LineBuffer
   226  	// Added as a member to the struct to allow injection for testing.
   227  	netlinkHandle NetLinkHandle
   228  	// ipsetList is the list of ipsets that ipvs proxier used.
   229  	ipsetList map[string]*IPSet
   230  	// nodePortAddresses selects the interfaces where nodePort works.
   231  	nodePortAddresses *proxyutil.NodePortAddresses
   232  	// networkInterfacer defines an interface for several net library functions.
   233  	// Inject for test purpose.
   234  	networkInterfacer     proxyutil.NetworkInterfacer
   235  	gracefuldeleteManager *GracefulTerminationManager
   236  	// serviceNoLocalEndpointsInternal represents the set of services that couldn't be applied
   237  	// due to the absence of local endpoints when the internal traffic policy is "Local".
   238  	// It is used to publish the sync_proxy_rules_no_endpoints_total
   239  	// metric with the traffic_policy label set to "internal".
   240  	// A Set is used here since we end up calculating endpoint topology multiple times for the same Service
   241  	// if it has multiple ports but each Service should only be counted once.
   242  	serviceNoLocalEndpointsInternal sets.Set[string]
   243  	// serviceNoLocalEndpointsExternal represents the set of services that couldn't be applied
   244  	// due to the absence of any endpoints when the external traffic policy is "Local".
   245  	// It is used to publish the sync_proxy_rules_no_endpoints_total
   246  	// metric with the traffic_policy label set to "external".
   247  	// A Set is used here since we end up calculating endpoint topology multiple times for the same Service
   248  	// if it has multiple ports but each Service should only be counted once.
   249  	serviceNoLocalEndpointsExternal sets.Set[string]
   250  	// lbNoNodeAccessIPPortProtocolEntries represents the set of loadBalancers IP + Port + Protocol that should not be accessible from K8s nodes
   251  	// We cannot directly restrict LB access from node using LoadBalancerSourceRanges, we need to install
   252  	// additional iptables rules.
   253  	// (ref: https://github.com/kubernetes/kubernetes/issues/119656)
   254  	lbNoNodeAccessIPPortProtocolEntries []*utilipset.Entry
   255  
   256  	logger klog.Logger
   257  }
   258  
   259  // Proxier implements proxy.Provider
   260  var _ proxy.Provider = &Proxier{}
   261  
   262  // NewProxier returns a new Proxier given an iptables and ipvs Interface instance.
   263  // Because of the iptables and ipvs logic, it is assumed that there is only a single Proxier active on a machine.
   264  // An error will be returned if it fails to update or acquire the initial lock.
   265  // Once a proxier is created, it will keep iptables and ipvs rules up to date in the background and
   266  // will not terminate if a particular iptables or ipvs call fails.
   267  func NewProxier(
   268  	ctx context.Context,
   269  	ipFamily v1.IPFamily,
   270  	ipt utiliptables.Interface,
   271  	ipvs utilipvs.Interface,
   272  	ipset utilipset.Interface,
   273  	sysctl utilsysctl.Interface,
   274  	exec utilexec.Interface,
   275  	syncPeriod time.Duration,
   276  	minSyncPeriod time.Duration,
   277  	excludeCIDRs []string,
   278  	strictARP bool,
   279  	tcpTimeout time.Duration,
   280  	tcpFinTimeout time.Duration,
   281  	udpTimeout time.Duration,
   282  	masqueradeAll bool,
   283  	masqueradeBit int,
   284  	localDetector proxyutil.LocalTrafficDetector,
   285  	hostname string,
   286  	nodeIP net.IP,
   287  	recorder events.EventRecorder,
   288  	healthzServer *healthcheck.ProxierHealthServer,
   289  	scheduler string,
   290  	nodePortAddressStrings []string,
   291  	initOnly bool,
   292  ) (*Proxier, error) {
   293  	logger := klog.LoggerWithValues(klog.FromContext(ctx), "ipFamily", ipFamily)
   294  	// Set the conntrack sysctl we need for
   295  	if err := proxyutil.EnsureSysctl(sysctl, sysctlVSConnTrack, 1); err != nil {
   296  		return nil, err
   297  	}
   298  
   299  	kernelVersion, err := utilkernel.GetVersion()
   300  	if err != nil {
   301  		return nil, fmt.Errorf("failed to get kernel version: %w", err)
   302  	}
   303  
   304  	if kernelVersion.LessThan(version.MustParseGeneric(utilkernel.IPVSConnReuseModeMinSupportedKernelVersion)) {
   305  		logger.Error(nil, "Can't set sysctl, kernel version doesn't satisfy minimum version requirements", "sysctl", sysctlConnReuse, "minimumKernelVersion", utilkernel.IPVSConnReuseModeMinSupportedKernelVersion)
   306  	} else if kernelVersion.AtLeast(version.MustParseGeneric(utilkernel.IPVSConnReuseModeFixedKernelVersion)) {
   307  		// https://github.com/kubernetes/kubernetes/issues/93297
   308  		logger.V(2).Info("Left as-is", "sysctl", sysctlConnReuse)
   309  	} else {
   310  		// Set the connection reuse mode
   311  		if err := proxyutil.EnsureSysctl(sysctl, sysctlConnReuse, 0); err != nil {
   312  			return nil, err
   313  		}
   314  	}
   315  
   316  	// Set the expire_nodest_conn sysctl we need for
   317  	if err := proxyutil.EnsureSysctl(sysctl, sysctlExpireNoDestConn, 1); err != nil {
   318  		return nil, err
   319  	}
   320  
   321  	// Set the expire_quiescent_template sysctl we need for
   322  	if err := proxyutil.EnsureSysctl(sysctl, sysctlExpireQuiescentTemplate, 1); err != nil {
   323  		return nil, err
   324  	}
   325  
   326  	// Set the ip_forward sysctl we need for
   327  	if err := proxyutil.EnsureSysctl(sysctl, sysctlForward, 1); err != nil {
   328  		return nil, err
   329  	}
   330  
   331  	if strictARP {
   332  		// Set the arp_ignore sysctl we need for
   333  		if err := proxyutil.EnsureSysctl(sysctl, sysctlArpIgnore, 1); err != nil {
   334  			return nil, err
   335  		}
   336  
   337  		// Set the arp_announce sysctl we need for
   338  		if err := proxyutil.EnsureSysctl(sysctl, sysctlArpAnnounce, 2); err != nil {
   339  			return nil, err
   340  		}
   341  	}
   342  
   343  	// Configure IPVS timeouts if any one of the timeout parameters have been set.
   344  	// This is the equivalent to running ipvsadm --set, a value of 0 indicates the
   345  	// current system timeout should be preserved
   346  	if tcpTimeout > 0 || tcpFinTimeout > 0 || udpTimeout > 0 {
   347  		if err := ipvs.ConfigureTimeouts(tcpTimeout, tcpFinTimeout, udpTimeout); err != nil {
   348  			logger.Error(err, "Failed to configure IPVS timeouts")
   349  		}
   350  	}
   351  
   352  	if initOnly {
   353  		logger.Info("System initialized and --init-only specified")
   354  		return nil, nil
   355  	}
   356  
   357  	// Generate the masquerade mark to use for SNAT rules.
   358  	masqueradeValue := 1 << uint(masqueradeBit)
   359  	masqueradeMark := fmt.Sprintf("%#08x", masqueradeValue)
   360  
   361  	logger.V(2).Info("Record nodeIP and family", "nodeIP", nodeIP, "family", ipFamily)
   362  
   363  	if len(scheduler) == 0 {
   364  		logger.Info("IPVS scheduler not specified, use rr by default")
   365  		scheduler = defaultScheduler
   366  	}
   367  
   368  	nodePortAddresses := proxyutil.NewNodePortAddresses(ipFamily, nodePortAddressStrings)
   369  
   370  	serviceHealthServer := healthcheck.NewServiceHealthServer(hostname, recorder, nodePortAddresses, healthzServer)
   371  
   372  	// excludeCIDRs has been validated before, here we just parse it to IPNet list
   373  	parsedExcludeCIDRs, _ := netutils.ParseCIDRs(excludeCIDRs)
   374  
   375  	proxier := &Proxier{
   376  		ipFamily:              ipFamily,
   377  		svcPortMap:            make(proxy.ServicePortMap),
   378  		serviceChanges:        proxy.NewServiceChangeTracker(newServiceInfo, ipFamily, recorder, nil),
   379  		endpointsMap:          make(proxy.EndpointsMap),
   380  		endpointsChanges:      proxy.NewEndpointsChangeTracker(hostname, nil, ipFamily, recorder, nil),
   381  		initialSync:           true,
   382  		syncPeriod:            syncPeriod,
   383  		minSyncPeriod:         minSyncPeriod,
   384  		excludeCIDRs:          parsedExcludeCIDRs,
   385  		iptables:              ipt,
   386  		masqueradeAll:         masqueradeAll,
   387  		masqueradeMark:        masqueradeMark,
   388  		conntrack:             conntrack.NewExec(exec),
   389  		localDetector:         localDetector,
   390  		hostname:              hostname,
   391  		nodeIP:                nodeIP,
   392  		recorder:              recorder,
   393  		serviceHealthServer:   serviceHealthServer,
   394  		healthzServer:         healthzServer,
   395  		ipvs:                  ipvs,
   396  		ipvsScheduler:         scheduler,
   397  		iptablesData:          bytes.NewBuffer(nil),
   398  		filterChainsData:      bytes.NewBuffer(nil),
   399  		natChains:             proxyutil.NewLineBuffer(),
   400  		natRules:              proxyutil.NewLineBuffer(),
   401  		filterChains:          proxyutil.NewLineBuffer(),
   402  		filterRules:           proxyutil.NewLineBuffer(),
   403  		netlinkHandle:         NewNetLinkHandle(ipFamily == v1.IPv6Protocol),
   404  		ipset:                 ipset,
   405  		nodePortAddresses:     nodePortAddresses,
   406  		networkInterfacer:     proxyutil.RealNetwork{},
   407  		gracefuldeleteManager: NewGracefulTerminationManager(ipvs),
   408  		logger:                logger,
   409  	}
   410  	// initialize ipsetList with all sets we needed
   411  	proxier.ipsetList = make(map[string]*IPSet)
   412  	for _, is := range ipsetInfo {
   413  		proxier.ipsetList[is.name] = NewIPSet(ipset, is.name, is.setType, (ipFamily == v1.IPv6Protocol), is.comment)
   414  	}
   415  	burstSyncs := 2
   416  	logger.V(2).Info("ipvs sync params", "minSyncPeriod", minSyncPeriod, "syncPeriod", syncPeriod, "burstSyncs", burstSyncs)
   417  	proxier.syncRunner = async.NewBoundedFrequencyRunner("sync-runner", proxier.syncProxyRules, minSyncPeriod, syncPeriod, burstSyncs)
   418  	proxier.gracefuldeleteManager.Run()
   419  	return proxier, nil
   420  }
   421  
   422  func filterCIDRs(wantIPv6 bool, cidrs []string) []string {
   423  	var filteredCIDRs []string
   424  	for _, cidr := range cidrs {
   425  		if netutils.IsIPv6CIDRString(cidr) == wantIPv6 {
   426  			filteredCIDRs = append(filteredCIDRs, cidr)
   427  		}
   428  	}
   429  	return filteredCIDRs
   430  }
   431  
   432  // iptablesJumpChain is tables of iptables chains that ipvs proxier used to install iptables or cleanup iptables.
   433  // `to` is the iptables chain we want to operate.
   434  // `from` is the source iptables chain
   435  var iptablesJumpChain = []struct {
   436  	table   utiliptables.Table
   437  	from    utiliptables.Chain
   438  	to      utiliptables.Chain
   439  	comment string
   440  }{
   441  	{utiliptables.TableNAT, utiliptables.ChainOutput, kubeServicesChain, "kubernetes service portals"},
   442  	{utiliptables.TableNAT, utiliptables.ChainPrerouting, kubeServicesChain, "kubernetes service portals"},
   443  	{utiliptables.TableNAT, utiliptables.ChainPostrouting, kubePostroutingChain, "kubernetes postrouting rules"},
   444  	{utiliptables.TableFilter, utiliptables.ChainForward, kubeForwardChain, "kubernetes forwarding rules"},
   445  	{utiliptables.TableFilter, utiliptables.ChainInput, kubeNodePortChain, "kubernetes health check rules"},
   446  	{utiliptables.TableFilter, utiliptables.ChainInput, kubeProxyFirewallChain, "kube-proxy firewall rules"},
   447  	{utiliptables.TableFilter, utiliptables.ChainForward, kubeProxyFirewallChain, "kube-proxy firewall rules"},
   448  	{utiliptables.TableFilter, utiliptables.ChainInput, kubeIPVSFilterChain, "kubernetes ipvs access filter"},
   449  	{utiliptables.TableFilter, utiliptables.ChainOutput, kubeIPVSOutFilterChain, "kubernetes ipvs access filter"},
   450  }
   451  
   452  var iptablesChains = []struct {
   453  	table utiliptables.Table
   454  	chain utiliptables.Chain
   455  }{
   456  	{utiliptables.TableNAT, kubeServicesChain},
   457  	{utiliptables.TableNAT, kubePostroutingChain},
   458  	{utiliptables.TableNAT, kubeNodePortChain},
   459  	{utiliptables.TableNAT, kubeLoadBalancerChain},
   460  	{utiliptables.TableNAT, kubeMarkMasqChain},
   461  	{utiliptables.TableFilter, kubeForwardChain},
   462  	{utiliptables.TableFilter, kubeNodePortChain},
   463  	{utiliptables.TableFilter, kubeProxyFirewallChain},
   464  	{utiliptables.TableFilter, kubeSourceRangesFirewallChain},
   465  	{utiliptables.TableFilter, kubeIPVSFilterChain},
   466  	{utiliptables.TableFilter, kubeIPVSOutFilterChain},
   467  }
   468  
   469  var iptablesCleanupChains = []struct {
   470  	table utiliptables.Table
   471  	chain utiliptables.Chain
   472  }{
   473  	{utiliptables.TableNAT, kubeServicesChain},
   474  	{utiliptables.TableNAT, kubePostroutingChain},
   475  	{utiliptables.TableNAT, kubeNodePortChain},
   476  	{utiliptables.TableNAT, kubeLoadBalancerChain},
   477  	{utiliptables.TableFilter, kubeForwardChain},
   478  	{utiliptables.TableFilter, kubeNodePortChain},
   479  	{utiliptables.TableFilter, kubeProxyFirewallChain},
   480  	{utiliptables.TableFilter, kubeSourceRangesFirewallChain},
   481  	{utiliptables.TableFilter, kubeIPVSFilterChain},
   482  	{utiliptables.TableFilter, kubeIPVSOutFilterChain},
   483  }
   484  
   485  // ipsetInfo is all ipset we needed in ipvs proxier
   486  var ipsetInfo = []struct {
   487  	name    string
   488  	setType utilipset.Type
   489  	comment string
   490  }{
   491  	{kubeLoopBackIPSet, utilipset.HashIPPortIP, kubeLoopBackIPSetComment},
   492  	{kubeClusterIPSet, utilipset.HashIPPort, kubeClusterIPSetComment},
   493  	{kubeExternalIPSet, utilipset.HashIPPort, kubeExternalIPSetComment},
   494  	{kubeExternalIPLocalSet, utilipset.HashIPPort, kubeExternalIPLocalSetComment},
   495  	{kubeLoadBalancerSet, utilipset.HashIPPort, kubeLoadBalancerSetComment},
   496  	{kubeLoadBalancerFWSet, utilipset.HashIPPort, kubeLoadBalancerFWSetComment},
   497  	{kubeLoadBalancerLocalSet, utilipset.HashIPPort, kubeLoadBalancerLocalSetComment},
   498  	{kubeLoadBalancerSourceIPSet, utilipset.HashIPPortIP, kubeLoadBalancerSourceIPSetComment},
   499  	{kubeLoadBalancerSourceCIDRSet, utilipset.HashIPPortNet, kubeLoadBalancerSourceCIDRSetComment},
   500  	{kubeNodePortSetTCP, utilipset.BitmapPort, kubeNodePortSetTCPComment},
   501  	{kubeNodePortLocalSetTCP, utilipset.BitmapPort, kubeNodePortLocalSetTCPComment},
   502  	{kubeNodePortSetUDP, utilipset.BitmapPort, kubeNodePortSetUDPComment},
   503  	{kubeNodePortLocalSetUDP, utilipset.BitmapPort, kubeNodePortLocalSetUDPComment},
   504  	{kubeNodePortSetSCTP, utilipset.HashIPPort, kubeNodePortSetSCTPComment},
   505  	{kubeNodePortLocalSetSCTP, utilipset.HashIPPort, kubeNodePortLocalSetSCTPComment},
   506  	{kubeHealthCheckNodePortSet, utilipset.BitmapPort, kubeHealthCheckNodePortSetComment},
   507  	{kubeIPVSSet, utilipset.HashIP, kubeIPVSSetComment},
   508  }
   509  
   510  // ipsetWithIptablesChain is the ipsets list with iptables source chain and the chain jump to
   511  // `iptables -t nat -A <from> -m set --match-set <name> <matchType> -j <to>`
   512  // example: iptables -t nat -A KUBE-SERVICES -m set --match-set KUBE-NODE-PORT-TCP dst -j KUBE-NODE-PORT
   513  // ipsets with other match rules will be created Individually.
   514  // Note: kubeNodePortLocalSetTCP must be prior to kubeNodePortSetTCP, the same for UDP.
   515  var ipsetWithIptablesChain = []struct {
   516  	name          string
   517  	table         utiliptables.Table
   518  	from          string
   519  	to            string
   520  	matchType     string
   521  	protocolMatch string
   522  }{
   523  	{kubeLoopBackIPSet, utiliptables.TableNAT, string(kubePostroutingChain), "MASQUERADE", "dst,dst,src", ""},
   524  	{kubeLoadBalancerSet, utiliptables.TableNAT, string(kubeServicesChain), string(kubeLoadBalancerChain), "dst,dst", ""},
   525  	{kubeLoadBalancerLocalSet, utiliptables.TableNAT, string(kubeLoadBalancerChain), "RETURN", "dst,dst", ""},
   526  	{kubeNodePortLocalSetTCP, utiliptables.TableNAT, string(kubeNodePortChain), "RETURN", "dst", utilipset.ProtocolTCP},
   527  	{kubeNodePortSetTCP, utiliptables.TableNAT, string(kubeNodePortChain), string(kubeMarkMasqChain), "dst", utilipset.ProtocolTCP},
   528  	{kubeNodePortLocalSetUDP, utiliptables.TableNAT, string(kubeNodePortChain), "RETURN", "dst", utilipset.ProtocolUDP},
   529  	{kubeNodePortSetUDP, utiliptables.TableNAT, string(kubeNodePortChain), string(kubeMarkMasqChain), "dst", utilipset.ProtocolUDP},
   530  	{kubeNodePortLocalSetSCTP, utiliptables.TableNAT, string(kubeNodePortChain), "RETURN", "dst,dst", utilipset.ProtocolSCTP},
   531  	{kubeNodePortSetSCTP, utiliptables.TableNAT, string(kubeNodePortChain), string(kubeMarkMasqChain), "dst,dst", utilipset.ProtocolSCTP},
   532  
   533  	{kubeLoadBalancerFWSet, utiliptables.TableFilter, string(kubeProxyFirewallChain), string(kubeSourceRangesFirewallChain), "dst,dst", ""},
   534  	{kubeLoadBalancerSourceCIDRSet, utiliptables.TableFilter, string(kubeSourceRangesFirewallChain), "RETURN", "dst,dst,src", ""},
   535  	{kubeLoadBalancerSourceIPSet, utiliptables.TableFilter, string(kubeSourceRangesFirewallChain), "RETURN", "dst,dst,src", ""},
   536  }
   537  
   538  // internal struct for string service information
   539  type servicePortInfo struct {
   540  	*proxy.BaseServicePortInfo
   541  	// The following fields are computed and stored for performance reasons.
   542  	nameString string
   543  }
   544  
   545  // returns a new proxy.ServicePort which abstracts a serviceInfo
   546  func newServiceInfo(port *v1.ServicePort, service *v1.Service, bsvcPortInfo *proxy.BaseServicePortInfo) proxy.ServicePort {
   547  	svcPort := &servicePortInfo{BaseServicePortInfo: bsvcPortInfo}
   548  
   549  	// Store the following for performance reasons.
   550  	svcName := types.NamespacedName{Namespace: service.Namespace, Name: service.Name}
   551  	svcPortName := proxy.ServicePortName{NamespacedName: svcName, Port: port.Name}
   552  	svcPort.nameString = svcPortName.String()
   553  
   554  	return svcPort
   555  }
   556  
   557  // getFirstColumn reads all the content from r into memory and return a
   558  // slice which consists of the first word from each line.
   559  func getFirstColumn(r io.Reader) ([]string, error) {
   560  	b, err := io.ReadAll(r)
   561  	if err != nil {
   562  		return nil, err
   563  	}
   564  
   565  	lines := strings.Split(string(b), "\n")
   566  	words := make([]string, 0, len(lines))
   567  	for i := range lines {
   568  		fields := strings.Fields(lines[i])
   569  		if len(fields) > 0 {
   570  			words = append(words, fields[0])
   571  		}
   572  	}
   573  	return words, nil
   574  }
   575  
   576  // CanUseIPVSProxier checks if we can use the ipvs Proxier.
   577  // The ipset version and the scheduler are checked. If any virtual servers (VS)
   578  // already exist with the configured scheduler, we just return. Otherwise
   579  // we check if a dummy VS can be configured with the configured scheduler.
   580  // Kernel modules will be loaded automatically if necessary.
   581  func CanUseIPVSProxier(ctx context.Context, ipvs utilipvs.Interface, ipsetver IPSetVersioner, scheduler string) error {
   582  	logger := klog.FromContext(ctx)
   583  	// BUG: https://github.com/moby/ipvs/issues/27
   584  	// If ipvs is not compiled into the kernel no error is returned and handle==nil.
   585  	// This in turn causes ipvs.GetVirtualServers and ipvs.AddVirtualServer
   586  	// to return ok (err==nil). If/when this bug is fixed parameter "ipvs" will be nil
   587  	// if ipvs is not supported by the kernel. Until then a re-read work-around is used.
   588  	if ipvs == nil {
   589  		return fmt.Errorf("Ipvs not supported by the kernel")
   590  	}
   591  
   592  	// Check ipset version
   593  	versionString, err := ipsetver.GetVersion()
   594  	if err != nil {
   595  		return fmt.Errorf("error getting ipset version, error: %v", err)
   596  	}
   597  	if !checkMinVersion(versionString) {
   598  		return fmt.Errorf("ipset version: %s is less than min required version: %s", versionString, MinIPSetCheckVersion)
   599  	}
   600  
   601  	if scheduler == "" {
   602  		scheduler = defaultScheduler
   603  	}
   604  
   605  	// If any virtual server (VS) using the scheduler exist we skip the checks.
   606  	vservers, err := ipvs.GetVirtualServers()
   607  	if err != nil {
   608  		logger.Error(err, "Can't read the ipvs")
   609  		return err
   610  	}
   611  	logger.V(5).Info("Virtual Servers", "count", len(vservers))
   612  	if len(vservers) > 0 {
   613  		// This is most likely a kube-proxy re-start. We know that ipvs works
   614  		// and if any VS uses the configured scheduler, we are done.
   615  		for _, vs := range vservers {
   616  			if vs.Scheduler == scheduler {
   617  				logger.V(5).Info("VS exist, Skipping checks")
   618  				return nil
   619  			}
   620  		}
   621  		logger.V(5).Info("No existing VS uses the configured scheduler", "scheduler", scheduler)
   622  	}
   623  
   624  	// Try to insert a dummy VS with the passed scheduler.
   625  	// We should use a VIP address that is not used on the node.
   626  	// An address "198.51.100.0" from the TEST-NET-2 rage in https://datatracker.ietf.org/doc/html/rfc5737
   627  	// is used. These addresses are reserved for documentation. If the user is using
   628  	// this address for a VS anyway we *will* mess up, but that would be an invalid configuration.
   629  	// If the user have configured the address to an interface on the node (but not a VS)
   630  	// then traffic will temporary be routed to ipvs during the probe and dropped.
   631  	// The later case is also and invalid configuration, but the traffic impact will be minor.
   632  	// This should not be a problem if users honors reserved addresses, but cut/paste
   633  	// from documentation is not unheard of, so the restriction to not use the TEST-NET-2 range
   634  	// must be documented.
   635  	vs := utilipvs.VirtualServer{
   636  		Address:   netutils.ParseIPSloppy("198.51.100.0"),
   637  		Protocol:  "TCP",
   638  		Port:      20000,
   639  		Scheduler: scheduler,
   640  	}
   641  	if err := ipvs.AddVirtualServer(&vs); err != nil {
   642  		logger.Error(err, "Could not create dummy VS", "scheduler", scheduler)
   643  		return err
   644  	}
   645  
   646  	// To overcome the BUG described above we check that the VS is *really* added.
   647  	vservers, err = ipvs.GetVirtualServers()
   648  	if err != nil {
   649  		logger.Error(err, "ipvs.GetVirtualServers")
   650  		return err
   651  	}
   652  	logger.V(5).Info("Virtual Servers after adding dummy", "count", len(vservers))
   653  	if len(vservers) == 0 {
   654  		logger.Info("Dummy VS not created", "scheduler", scheduler)
   655  		return fmt.Errorf("Ipvs not supported") // This is a BUG work-around
   656  	}
   657  	logger.V(5).Info("Dummy VS created", "vs", vs)
   658  
   659  	if err := ipvs.DeleteVirtualServer(&vs); err != nil {
   660  		logger.Error(err, "Could not delete dummy VS")
   661  		return err
   662  	}
   663  
   664  	return nil
   665  }
   666  
   667  // CleanupIptablesLeftovers removes all iptables rules and chains created by the Proxier
   668  // It returns true if an error was encountered. Errors are logged.
   669  func cleanupIptablesLeftovers(ctx context.Context, ipt utiliptables.Interface) (encounteredError bool) {
   670  	logger := klog.FromContext(ctx)
   671  	// Unlink the iptables chains created by ipvs Proxier
   672  	for _, jc := range iptablesJumpChain {
   673  		args := []string{
   674  			"-m", "comment", "--comment", jc.comment,
   675  			"-j", string(jc.to),
   676  		}
   677  		if err := ipt.DeleteRule(jc.table, jc.from, args...); err != nil {
   678  			if !utiliptables.IsNotFoundError(err) {
   679  				logger.Error(err, "Error removing iptables rules in ipvs proxier")
   680  				encounteredError = true
   681  			}
   682  		}
   683  	}
   684  
   685  	// Flush and remove all of our chains. Flushing all chains before removing them also removes all links between chains first.
   686  	for _, ch := range iptablesCleanupChains {
   687  		if err := ipt.FlushChain(ch.table, ch.chain); err != nil {
   688  			if !utiliptables.IsNotFoundError(err) {
   689  				logger.Error(err, "Error removing iptables rules in ipvs proxier")
   690  				encounteredError = true
   691  			}
   692  		}
   693  	}
   694  
   695  	// Remove all of our chains.
   696  	for _, ch := range iptablesCleanupChains {
   697  		if err := ipt.DeleteChain(ch.table, ch.chain); err != nil {
   698  			if !utiliptables.IsNotFoundError(err) {
   699  				logger.Error(err, "Error removing iptables rules in ipvs proxier")
   700  				encounteredError = true
   701  			}
   702  		}
   703  	}
   704  
   705  	return encounteredError
   706  }
   707  
   708  // CleanupLeftovers clean up all ipvs and iptables rules created by ipvs Proxier.
   709  func CleanupLeftovers(ctx context.Context, ipvs utilipvs.Interface, ipt utiliptables.Interface, ipset utilipset.Interface) (encounteredError bool) {
   710  	logger := klog.FromContext(ctx)
   711  	// Clear all ipvs rules
   712  	if ipvs != nil {
   713  		err := ipvs.Flush()
   714  		if err != nil {
   715  			logger.Error(err, "Error flushing ipvs rules")
   716  			encounteredError = true
   717  		}
   718  	}
   719  	// Delete dummy interface created by ipvs Proxier.
   720  	nl := NewNetLinkHandle(false)
   721  	err := nl.DeleteDummyDevice(defaultDummyDevice)
   722  	if err != nil {
   723  		logger.Error(err, "Error deleting dummy device created by ipvs proxier", "device", defaultDummyDevice)
   724  		encounteredError = true
   725  	}
   726  	// Clear iptables created by ipvs Proxier.
   727  	encounteredError = cleanupIptablesLeftovers(ctx, ipt) || encounteredError
   728  	// Destroy ip sets created by ipvs Proxier.  We should call it after cleaning up
   729  	// iptables since we can NOT delete ip set which is still referenced by iptables.
   730  	for _, set := range ipsetInfo {
   731  		err = ipset.DestroySet(set.name)
   732  		if err != nil {
   733  			if !utilipset.IsNotFoundError(err) {
   734  				logger.Error(err, "Error removing ipset", "ipset", set.name)
   735  				encounteredError = true
   736  			}
   737  		}
   738  	}
   739  	return encounteredError
   740  }
   741  
   742  // Sync is called to synchronize the proxier state to iptables and ipvs as soon as possible.
   743  func (proxier *Proxier) Sync() {
   744  	if proxier.healthzServer != nil {
   745  		proxier.healthzServer.QueuedUpdate(proxier.ipFamily)
   746  	}
   747  	metrics.SyncProxyRulesLastQueuedTimestamp.SetToCurrentTime()
   748  	proxier.syncRunner.Run()
   749  }
   750  
   751  // SyncLoop runs periodic work.  This is expected to run as a goroutine or as the main loop of the app.  It does not return.
   752  func (proxier *Proxier) SyncLoop() {
   753  	// Update healthz timestamp at beginning in case Sync() never succeeds.
   754  	if proxier.healthzServer != nil {
   755  		proxier.healthzServer.Updated(proxier.ipFamily)
   756  	}
   757  	// synthesize "last change queued" time as the informers are syncing.
   758  	metrics.SyncProxyRulesLastQueuedTimestamp.SetToCurrentTime()
   759  	proxier.syncRunner.Loop(wait.NeverStop)
   760  }
   761  
   762  func (proxier *Proxier) setInitialized(value bool) {
   763  	var initialized int32
   764  	if value {
   765  		initialized = 1
   766  	}
   767  	atomic.StoreInt32(&proxier.initialized, initialized)
   768  }
   769  
   770  func (proxier *Proxier) isInitialized() bool {
   771  	return atomic.LoadInt32(&proxier.initialized) > 0
   772  }
   773  
   774  // OnServiceAdd is called whenever creation of new service object is observed.
   775  func (proxier *Proxier) OnServiceAdd(service *v1.Service) {
   776  	proxier.OnServiceUpdate(nil, service)
   777  }
   778  
   779  // OnServiceUpdate is called whenever modification of an existing service object is observed.
   780  func (proxier *Proxier) OnServiceUpdate(oldService, service *v1.Service) {
   781  	if proxier.serviceChanges.Update(oldService, service) && proxier.isInitialized() {
   782  		proxier.Sync()
   783  	}
   784  }
   785  
   786  // OnServiceDelete is called whenever deletion of an existing service object is observed.
   787  func (proxier *Proxier) OnServiceDelete(service *v1.Service) {
   788  	proxier.OnServiceUpdate(service, nil)
   789  }
   790  
   791  // OnServiceSynced is called once all the initial event handlers were called and the state is fully propagated to local cache.
   792  func (proxier *Proxier) OnServiceSynced() {
   793  	proxier.mu.Lock()
   794  	proxier.servicesSynced = true
   795  	proxier.setInitialized(proxier.endpointSlicesSynced)
   796  	proxier.mu.Unlock()
   797  
   798  	// Sync unconditionally - this is called once per lifetime.
   799  	proxier.syncProxyRules()
   800  }
   801  
   802  // OnEndpointSliceAdd is called whenever creation of a new endpoint slice object
   803  // is observed.
   804  func (proxier *Proxier) OnEndpointSliceAdd(endpointSlice *discovery.EndpointSlice) {
   805  	if proxier.endpointsChanges.EndpointSliceUpdate(endpointSlice, false) && proxier.isInitialized() {
   806  		proxier.Sync()
   807  	}
   808  }
   809  
   810  // OnEndpointSliceUpdate is called whenever modification of an existing endpoint
   811  // slice object is observed.
   812  func (proxier *Proxier) OnEndpointSliceUpdate(_, endpointSlice *discovery.EndpointSlice) {
   813  	if proxier.endpointsChanges.EndpointSliceUpdate(endpointSlice, false) && proxier.isInitialized() {
   814  		proxier.Sync()
   815  	}
   816  }
   817  
   818  // OnEndpointSliceDelete is called whenever deletion of an existing endpoint slice
   819  // object is observed.
   820  func (proxier *Proxier) OnEndpointSliceDelete(endpointSlice *discovery.EndpointSlice) {
   821  	if proxier.endpointsChanges.EndpointSliceUpdate(endpointSlice, true) && proxier.isInitialized() {
   822  		proxier.Sync()
   823  	}
   824  }
   825  
   826  // OnEndpointSlicesSynced is called once all the initial event handlers were
   827  // called and the state is fully propagated to local cache.
   828  func (proxier *Proxier) OnEndpointSlicesSynced() {
   829  	proxier.mu.Lock()
   830  	proxier.endpointSlicesSynced = true
   831  	proxier.setInitialized(proxier.servicesSynced)
   832  	proxier.mu.Unlock()
   833  
   834  	// Sync unconditionally - this is called once per lifetime.
   835  	proxier.syncProxyRules()
   836  }
   837  
   838  // OnNodeAdd is called whenever creation of new node object
   839  // is observed.
   840  func (proxier *Proxier) OnNodeAdd(node *v1.Node) {
   841  	if node.Name != proxier.hostname {
   842  		proxier.logger.Error(nil, "Received a watch event for a node that doesn't match the current node", "eventNode", node.Name, "currentNode", proxier.hostname)
   843  		return
   844  	}
   845  
   846  	if reflect.DeepEqual(proxier.nodeLabels, node.Labels) {
   847  		return
   848  	}
   849  
   850  	proxier.mu.Lock()
   851  	proxier.nodeLabels = map[string]string{}
   852  	for k, v := range node.Labels {
   853  		proxier.nodeLabels[k] = v
   854  	}
   855  	proxier.mu.Unlock()
   856  	proxier.logger.V(4).Info("Updated proxier node labels", "labels", node.Labels)
   857  
   858  	proxier.Sync()
   859  }
   860  
   861  // OnNodeUpdate is called whenever modification of an existing
   862  // node object is observed.
   863  func (proxier *Proxier) OnNodeUpdate(oldNode, node *v1.Node) {
   864  	if node.Name != proxier.hostname {
   865  		proxier.logger.Error(nil, "Received a watch event for a node that doesn't match the current node", "eventNode", node.Name, "currentNode", proxier.hostname)
   866  		return
   867  	}
   868  
   869  	if reflect.DeepEqual(proxier.nodeLabels, node.Labels) {
   870  		return
   871  	}
   872  
   873  	proxier.mu.Lock()
   874  	proxier.nodeLabels = map[string]string{}
   875  	for k, v := range node.Labels {
   876  		proxier.nodeLabels[k] = v
   877  	}
   878  	proxier.mu.Unlock()
   879  	proxier.logger.V(4).Info("Updated proxier node labels", "labels", node.Labels)
   880  
   881  	proxier.Sync()
   882  }
   883  
   884  // OnNodeDelete is called whenever deletion of an existing node
   885  // object is observed.
   886  func (proxier *Proxier) OnNodeDelete(node *v1.Node) {
   887  	if node.Name != proxier.hostname {
   888  		proxier.logger.Error(nil, "Received a watch event for a node that doesn't match the current node", "eventNode", node.Name, "currentNode", proxier.hostname)
   889  		return
   890  	}
   891  
   892  	proxier.mu.Lock()
   893  	proxier.nodeLabels = nil
   894  	proxier.mu.Unlock()
   895  
   896  	proxier.Sync()
   897  }
   898  
   899  // OnNodeSynced is called once all the initial event handlers were
   900  // called and the state is fully propagated to local cache.
   901  func (proxier *Proxier) OnNodeSynced() {
   902  }
   903  
   904  // OnServiceCIDRsChanged is called whenever a change is observed
   905  // in any of the ServiceCIDRs, and provides complete list of service cidrs.
   906  func (proxier *Proxier) OnServiceCIDRsChanged(_ []string) {}
   907  
   908  // This is where all of the ipvs calls happen.
   909  func (proxier *Proxier) syncProxyRules() {
   910  	proxier.mu.Lock()
   911  	defer proxier.mu.Unlock()
   912  
   913  	// don't sync rules till we've received services and endpoints
   914  	if !proxier.isInitialized() {
   915  		proxier.logger.V(2).Info("Not syncing ipvs rules until Services and Endpoints have been received from master")
   916  		return
   917  	}
   918  
   919  	// its safe to set initialSync to false as it acts as a flag for startup actions
   920  	// and the mutex is held.
   921  	defer func() {
   922  		proxier.initialSync = false
   923  	}()
   924  
   925  	// Keep track of how long syncs take.
   926  	start := time.Now()
   927  	defer func() {
   928  		metrics.SyncProxyRulesLatency.Observe(metrics.SinceInSeconds(start))
   929  		proxier.logger.V(4).Info("syncProxyRules complete", "elapsed", time.Since(start))
   930  	}()
   931  
   932  	// We assume that if this was called, we really want to sync them,
   933  	// even if nothing changed in the meantime. In other words, callers are
   934  	// responsible for detecting no-op changes and not calling this function.
   935  	serviceUpdateResult := proxier.svcPortMap.Update(proxier.serviceChanges)
   936  	endpointUpdateResult := proxier.endpointsMap.Update(proxier.endpointsChanges)
   937  
   938  	proxier.logger.V(3).Info("Syncing ipvs proxier rules")
   939  
   940  	proxier.serviceNoLocalEndpointsInternal = sets.New[string]()
   941  	proxier.serviceNoLocalEndpointsExternal = sets.New[string]()
   942  
   943  	proxier.lbNoNodeAccessIPPortProtocolEntries = make([]*utilipset.Entry, 0)
   944  
   945  	// Begin install iptables
   946  
   947  	// Reset all buffers used later.
   948  	// This is to avoid memory reallocations and thus improve performance.
   949  	proxier.natChains.Reset()
   950  	proxier.natRules.Reset()
   951  	proxier.filterChains.Reset()
   952  	proxier.filterRules.Reset()
   953  
   954  	// Write table headers.
   955  	proxier.filterChains.Write("*filter")
   956  	proxier.natChains.Write("*nat")
   957  
   958  	proxier.createAndLinkKubeChain()
   959  
   960  	// make sure dummy interface exists in the system where ipvs Proxier will bind service address on it
   961  	_, err := proxier.netlinkHandle.EnsureDummyDevice(defaultDummyDevice)
   962  	if err != nil {
   963  		proxier.logger.Error(err, "Failed to create dummy interface", "interface", defaultDummyDevice)
   964  		return
   965  	}
   966  
   967  	// make sure ip sets exists in the system.
   968  	for _, set := range proxier.ipsetList {
   969  		if err := ensureIPSet(set); err != nil {
   970  			return
   971  		}
   972  		set.resetEntries()
   973  	}
   974  
   975  	// activeIPVSServices represents IPVS service successfully created in this round of sync
   976  	activeIPVSServices := sets.New[string]()
   977  	// activeBindAddrs Represents addresses we want on the defaultDummyDevice after this round of sync
   978  	activeBindAddrs := sets.New[string]()
   979  	// alreadyBoundAddrs Represents addresses currently assigned to the dummy interface
   980  	alreadyBoundAddrs, err := proxier.netlinkHandle.GetLocalAddresses(defaultDummyDevice)
   981  	if err != nil {
   982  		proxier.logger.Error(err, "Error listing addresses binded to dummy interface")
   983  	}
   984  	// nodeAddressSet All addresses *except* those on the dummy interface
   985  	nodeAddressSet, err := proxier.netlinkHandle.GetAllLocalAddressesExcept(defaultDummyDevice)
   986  	if err != nil {
   987  		proxier.logger.Error(err, "Error listing node addresses")
   988  	}
   989  
   990  	hasNodePort := false
   991  	for _, svc := range proxier.svcPortMap {
   992  		svcInfo, ok := svc.(*servicePortInfo)
   993  		if ok && svcInfo.NodePort() != 0 {
   994  			hasNodePort = true
   995  			break
   996  		}
   997  	}
   998  
   999  	// List of node IP addresses to be used as IPVS services if nodePort is set. This
  1000  	// can be reused for all nodePort services.
  1001  	var nodeIPs []net.IP
  1002  	if hasNodePort {
  1003  		if proxier.nodePortAddresses.MatchAll() {
  1004  			for _, ipStr := range nodeAddressSet.UnsortedList() {
  1005  				nodeIPs = append(nodeIPs, netutils.ParseIPSloppy(ipStr))
  1006  			}
  1007  		} else {
  1008  			allNodeIPs, err := proxier.nodePortAddresses.GetNodeIPs(proxier.networkInterfacer)
  1009  			if err != nil {
  1010  				proxier.logger.Error(err, "Failed to get node IP address matching nodeport cidr")
  1011  			} else {
  1012  				for _, ip := range allNodeIPs {
  1013  					if !ip.IsLoopback() {
  1014  						nodeIPs = append(nodeIPs, ip)
  1015  					}
  1016  				}
  1017  			}
  1018  		}
  1019  	}
  1020  
  1021  	// Build IPVS rules for each service.
  1022  	for svcPortName, svcPort := range proxier.svcPortMap {
  1023  		svcInfo, ok := svcPort.(*servicePortInfo)
  1024  		if !ok {
  1025  			proxier.logger.Error(nil, "Failed to cast serviceInfo", "servicePortName", svcPortName)
  1026  			continue
  1027  		}
  1028  
  1029  		protocol := strings.ToLower(string(svcInfo.Protocol()))
  1030  		// Precompute svcNameString; with many services the many calls
  1031  		// to ServicePortName.String() show up in CPU profiles.
  1032  		svcPortNameString := svcPortName.String()
  1033  
  1034  		// Handle traffic that loops back to the originator with SNAT.
  1035  		for _, e := range proxier.endpointsMap[svcPortName] {
  1036  			ep, ok := e.(*proxy.BaseEndpointInfo)
  1037  			if !ok {
  1038  				proxier.logger.Error(nil, "Failed to cast BaseEndpointInfo", "endpoint", e)
  1039  				continue
  1040  			}
  1041  			if !ep.IsLocal() {
  1042  				continue
  1043  			}
  1044  			epIP := ep.IP()
  1045  			epPort := ep.Port()
  1046  			// Error parsing this endpoint has been logged. Skip to next endpoint.
  1047  			if epIP == "" || epPort == 0 {
  1048  				continue
  1049  			}
  1050  			entry := &utilipset.Entry{
  1051  				IP:       epIP,
  1052  				Port:     epPort,
  1053  				Protocol: protocol,
  1054  				IP2:      epIP,
  1055  				SetType:  utilipset.HashIPPortIP,
  1056  			}
  1057  			if valid := proxier.ipsetList[kubeLoopBackIPSet].validateEntry(entry); !valid {
  1058  				proxier.logger.Error(nil, "Error adding entry to ipset", "entry", entry, "ipset", proxier.ipsetList[kubeLoopBackIPSet].Name)
  1059  				continue
  1060  			}
  1061  			proxier.ipsetList[kubeLoopBackIPSet].activeEntries.Insert(entry.String())
  1062  		}
  1063  
  1064  		// Capture the clusterIP.
  1065  		// ipset call
  1066  		entry := &utilipset.Entry{
  1067  			IP:       svcInfo.ClusterIP().String(),
  1068  			Port:     svcInfo.Port(),
  1069  			Protocol: protocol,
  1070  			SetType:  utilipset.HashIPPort,
  1071  		}
  1072  		// add service Cluster IP:Port to kubeServiceAccess ip set for the purpose of solving hairpin.
  1073  		// proxier.kubeServiceAccessSet.activeEntries.Insert(entry.String())
  1074  		if valid := proxier.ipsetList[kubeClusterIPSet].validateEntry(entry); !valid {
  1075  			proxier.logger.Error(nil, "Error adding entry to ipset", "entry", entry, "ipset", proxier.ipsetList[kubeClusterIPSet].Name)
  1076  			continue
  1077  		}
  1078  		proxier.ipsetList[kubeClusterIPSet].activeEntries.Insert(entry.String())
  1079  		// ipvs call
  1080  		serv := &utilipvs.VirtualServer{
  1081  			Address:   svcInfo.ClusterIP(),
  1082  			Port:      uint16(svcInfo.Port()),
  1083  			Protocol:  string(svcInfo.Protocol()),
  1084  			Scheduler: proxier.ipvsScheduler,
  1085  		}
  1086  		// Set session affinity flag and timeout for IPVS service
  1087  		if svcInfo.SessionAffinityType() == v1.ServiceAffinityClientIP {
  1088  			serv.Flags |= utilipvs.FlagPersistent
  1089  			serv.Timeout = uint32(svcInfo.StickyMaxAgeSeconds())
  1090  		}
  1091  		// Set the source hash flag needed for the distribution method "mh"
  1092  		if proxier.ipvsScheduler == "mh" {
  1093  			serv.Flags |= utilipvs.FlagSourceHash
  1094  		}
  1095  		// We need to bind ClusterIP to dummy interface, so set `bindAddr` parameter to `true` in syncService()
  1096  		if err := proxier.syncService(svcPortNameString, serv, true, alreadyBoundAddrs); err == nil {
  1097  			activeIPVSServices.Insert(serv.String())
  1098  			activeBindAddrs.Insert(serv.Address.String())
  1099  			// ExternalTrafficPolicy only works for NodePort and external LB traffic, does not affect ClusterIP
  1100  			// So we still need clusterIP rules in onlyNodeLocalEndpoints mode.
  1101  			internalNodeLocal := false
  1102  			if svcInfo.InternalPolicyLocal() {
  1103  				internalNodeLocal = true
  1104  			}
  1105  			if err := proxier.syncEndpoint(svcPortName, internalNodeLocal, serv); err != nil {
  1106  				proxier.logger.Error(err, "Failed to sync endpoint for service", "servicePortName", svcPortName, "virtualServer", serv)
  1107  			}
  1108  		} else {
  1109  			proxier.logger.Error(err, "Failed to sync service", "servicePortName", svcPortName, "virtualServer", serv)
  1110  		}
  1111  
  1112  		// Capture externalIPs.
  1113  		for _, externalIP := range svcInfo.ExternalIPs() {
  1114  			// ipset call
  1115  			entry := &utilipset.Entry{
  1116  				IP:       externalIP.String(),
  1117  				Port:     svcInfo.Port(),
  1118  				Protocol: protocol,
  1119  				SetType:  utilipset.HashIPPort,
  1120  			}
  1121  
  1122  			if svcInfo.ExternalPolicyLocal() {
  1123  				if valid := proxier.ipsetList[kubeExternalIPLocalSet].validateEntry(entry); !valid {
  1124  					proxier.logger.Error(nil, "Error adding entry to ipset", "entry", entry, "ipset", proxier.ipsetList[kubeExternalIPLocalSet].Name)
  1125  					continue
  1126  				}
  1127  				proxier.ipsetList[kubeExternalIPLocalSet].activeEntries.Insert(entry.String())
  1128  			} else {
  1129  				// We have to SNAT packets to external IPs.
  1130  				if valid := proxier.ipsetList[kubeExternalIPSet].validateEntry(entry); !valid {
  1131  					proxier.logger.Error(nil, "Error adding entry to ipset", "entry", entry, "ipset", proxier.ipsetList[kubeExternalIPSet].Name)
  1132  					continue
  1133  				}
  1134  				proxier.ipsetList[kubeExternalIPSet].activeEntries.Insert(entry.String())
  1135  			}
  1136  
  1137  			// ipvs call
  1138  			serv := &utilipvs.VirtualServer{
  1139  				Address:   externalIP,
  1140  				Port:      uint16(svcInfo.Port()),
  1141  				Protocol:  string(svcInfo.Protocol()),
  1142  				Scheduler: proxier.ipvsScheduler,
  1143  			}
  1144  			if svcInfo.SessionAffinityType() == v1.ServiceAffinityClientIP {
  1145  				serv.Flags |= utilipvs.FlagPersistent
  1146  				serv.Timeout = uint32(svcInfo.StickyMaxAgeSeconds())
  1147  			}
  1148  			// Set the source hash flag needed for the distribution method "mh"
  1149  			if proxier.ipvsScheduler == "mh" {
  1150  				serv.Flags |= utilipvs.FlagSourceHash
  1151  			}
  1152  			// We must not add the address to the dummy device if it exist on another interface
  1153  			shouldBind := !nodeAddressSet.Has(serv.Address.String())
  1154  			if err := proxier.syncService(svcPortNameString, serv, shouldBind, alreadyBoundAddrs); err == nil {
  1155  				activeIPVSServices.Insert(serv.String())
  1156  				if shouldBind {
  1157  					activeBindAddrs.Insert(serv.Address.String())
  1158  				}
  1159  				if err := proxier.syncEndpoint(svcPortName, svcInfo.ExternalPolicyLocal(), serv); err != nil {
  1160  					proxier.logger.Error(err, "Failed to sync endpoint for service", "servicePortName", svcPortName, "virtualServer", serv)
  1161  				}
  1162  			} else {
  1163  				proxier.logger.Error(err, "Failed to sync service", "servicePortName", svcPortName, "virtualServer", serv)
  1164  			}
  1165  		}
  1166  
  1167  		// Capture load-balancer ingress.
  1168  		for _, ingress := range svcInfo.LoadBalancerVIPs() {
  1169  			// ipset call
  1170  			entry = &utilipset.Entry{
  1171  				IP:       ingress.String(),
  1172  				Port:     svcInfo.Port(),
  1173  				Protocol: protocol,
  1174  				SetType:  utilipset.HashIPPort,
  1175  			}
  1176  			// add service load balancer ingressIP:Port to kubeServiceAccess ip set for the purpose of solving hairpin.
  1177  			// proxier.kubeServiceAccessSet.activeEntries.Insert(entry.String())
  1178  			// If we are proxying globally, we need to masquerade in case we cross nodes.
  1179  			// If we are proxying only locally, we can retain the source IP.
  1180  			if valid := proxier.ipsetList[kubeLoadBalancerSet].validateEntry(entry); !valid {
  1181  				proxier.logger.Error(nil, "Error adding entry to ipset", "entry", entry, "ipset", proxier.ipsetList[kubeLoadBalancerSet].Name)
  1182  				continue
  1183  			}
  1184  			proxier.ipsetList[kubeLoadBalancerSet].activeEntries.Insert(entry.String())
  1185  			// insert loadbalancer entry to lbIngressLocalSet if service externaltrafficpolicy=local
  1186  			if svcInfo.ExternalPolicyLocal() {
  1187  				if valid := proxier.ipsetList[kubeLoadBalancerLocalSet].validateEntry(entry); !valid {
  1188  					proxier.logger.Error(nil, "Error adding entry to ipset", "entry", entry, "ipset", proxier.ipsetList[kubeLoadBalancerLocalSet].Name)
  1189  					continue
  1190  				}
  1191  				proxier.ipsetList[kubeLoadBalancerLocalSet].activeEntries.Insert(entry.String())
  1192  			}
  1193  			if len(svcInfo.LoadBalancerSourceRanges()) != 0 {
  1194  				// The service firewall rules are created based on ServiceSpec.loadBalancerSourceRanges field.
  1195  				// This currently works for loadbalancers that preserves source ips.
  1196  				// For loadbalancers which direct traffic to service NodePort, the firewall rules will not apply.
  1197  				if valid := proxier.ipsetList[kubeLoadBalancerFWSet].validateEntry(entry); !valid {
  1198  					proxier.logger.Error(nil, "Error adding entry to ipset", "entry", entry, "ipset", proxier.ipsetList[kubeLoadBalancerFWSet].Name)
  1199  					continue
  1200  				}
  1201  				proxier.ipsetList[kubeLoadBalancerFWSet].activeEntries.Insert(entry.String())
  1202  				allowFromNode := false
  1203  				for _, cidr := range svcInfo.LoadBalancerSourceRanges() {
  1204  					// ipset call
  1205  					entry = &utilipset.Entry{
  1206  						IP:       ingress.String(),
  1207  						Port:     svcInfo.Port(),
  1208  						Protocol: protocol,
  1209  						Net:      cidr.String(),
  1210  						SetType:  utilipset.HashIPPortNet,
  1211  					}
  1212  					// enumerate all white list source cidr
  1213  					if valid := proxier.ipsetList[kubeLoadBalancerSourceCIDRSet].validateEntry(entry); !valid {
  1214  						proxier.logger.Error(nil, "Error adding entry to ipset", "entry", entry, "ipset", proxier.ipsetList[kubeLoadBalancerSourceCIDRSet].Name)
  1215  						continue
  1216  					}
  1217  					proxier.ipsetList[kubeLoadBalancerSourceCIDRSet].activeEntries.Insert(entry.String())
  1218  
  1219  					if cidr.Contains(proxier.nodeIP) {
  1220  						allowFromNode = true
  1221  					}
  1222  				}
  1223  				// generally, ip route rule was added to intercept request to loadbalancer vip from the
  1224  				// loadbalancer's backend hosts. In this case, request will not hit the loadbalancer but loop back directly.
  1225  				// Need to add the following rule to allow request on host.
  1226  				if allowFromNode {
  1227  					entry = &utilipset.Entry{
  1228  						IP:       ingress.String(),
  1229  						Port:     svcInfo.Port(),
  1230  						Protocol: protocol,
  1231  						IP2:      ingress.String(),
  1232  						SetType:  utilipset.HashIPPortIP,
  1233  					}
  1234  					// enumerate all white list source ip
  1235  					if valid := proxier.ipsetList[kubeLoadBalancerSourceIPSet].validateEntry(entry); !valid {
  1236  						proxier.logger.Error(nil, "Error adding entry to ipset", "entry", entry, "ipset", proxier.ipsetList[kubeLoadBalancerSourceIPSet].Name)
  1237  						continue
  1238  					}
  1239  					proxier.ipsetList[kubeLoadBalancerSourceIPSet].activeEntries.Insert(entry.String())
  1240  				} else {
  1241  					// since nodeIP is not covered in any of SourceRange we need to explicitly block the lbIP access from k8s nodes.
  1242  					proxier.lbNoNodeAccessIPPortProtocolEntries = append(proxier.lbNoNodeAccessIPPortProtocolEntries, entry)
  1243  
  1244  				}
  1245  			}
  1246  			// ipvs call
  1247  			serv := &utilipvs.VirtualServer{
  1248  				Address:   ingress,
  1249  				Port:      uint16(svcInfo.Port()),
  1250  				Protocol:  string(svcInfo.Protocol()),
  1251  				Scheduler: proxier.ipvsScheduler,
  1252  			}
  1253  			if svcInfo.SessionAffinityType() == v1.ServiceAffinityClientIP {
  1254  				serv.Flags |= utilipvs.FlagPersistent
  1255  				serv.Timeout = uint32(svcInfo.StickyMaxAgeSeconds())
  1256  			}
  1257  			// Set the source hash flag needed for the distribution method "mh"
  1258  			if proxier.ipvsScheduler == "mh" {
  1259  				serv.Flags |= utilipvs.FlagSourceHash
  1260  			}
  1261  			// We must not add the address to the dummy device if it exist on another interface
  1262  			shouldBind := !nodeAddressSet.Has(serv.Address.String())
  1263  			if err := proxier.syncService(svcPortNameString, serv, shouldBind, alreadyBoundAddrs); err == nil {
  1264  				activeIPVSServices.Insert(serv.String())
  1265  				if shouldBind {
  1266  					activeBindAddrs.Insert(serv.Address.String())
  1267  				}
  1268  				if err := proxier.syncEndpoint(svcPortName, svcInfo.ExternalPolicyLocal(), serv); err != nil {
  1269  					proxier.logger.Error(err, "Failed to sync endpoint for service", "servicePortName", svcPortName, "virtualServer", serv)
  1270  				}
  1271  			} else {
  1272  				proxier.logger.Error(err, "Failed to sync service", "servicePortName", svcPortName, "virtualServer", serv)
  1273  			}
  1274  		}
  1275  
  1276  		if svcInfo.NodePort() != 0 {
  1277  			if len(nodeIPs) == 0 {
  1278  				// Skip nodePort configuration since an error occurred when
  1279  				// computing nodeAddresses or nodeIPs.
  1280  				continue
  1281  			}
  1282  
  1283  			// Nodeports need SNAT, unless they're local.
  1284  			// ipset call
  1285  
  1286  			var (
  1287  				nodePortSet *IPSet
  1288  				entries     []*utilipset.Entry
  1289  			)
  1290  
  1291  			switch protocol {
  1292  			case utilipset.ProtocolTCP:
  1293  				nodePortSet = proxier.ipsetList[kubeNodePortSetTCP]
  1294  				entries = []*utilipset.Entry{{
  1295  					// No need to provide ip info
  1296  					Port:     svcInfo.NodePort(),
  1297  					Protocol: protocol,
  1298  					SetType:  utilipset.BitmapPort,
  1299  				}}
  1300  			case utilipset.ProtocolUDP:
  1301  				nodePortSet = proxier.ipsetList[kubeNodePortSetUDP]
  1302  				entries = []*utilipset.Entry{{
  1303  					// No need to provide ip info
  1304  					Port:     svcInfo.NodePort(),
  1305  					Protocol: protocol,
  1306  					SetType:  utilipset.BitmapPort,
  1307  				}}
  1308  			case utilipset.ProtocolSCTP:
  1309  				nodePortSet = proxier.ipsetList[kubeNodePortSetSCTP]
  1310  				// Since hash ip:port is used for SCTP, all the nodeIPs to be used in the SCTP ipset entries.
  1311  				entries = []*utilipset.Entry{}
  1312  				for _, nodeIP := range nodeIPs {
  1313  					entries = append(entries, &utilipset.Entry{
  1314  						IP:       nodeIP.String(),
  1315  						Port:     svcInfo.NodePort(),
  1316  						Protocol: protocol,
  1317  						SetType:  utilipset.HashIPPort,
  1318  					})
  1319  				}
  1320  			default:
  1321  				// It should never hit
  1322  				proxier.logger.Error(nil, "Unsupported protocol type", "protocol", protocol)
  1323  			}
  1324  			if nodePortSet != nil {
  1325  				entryInvalidErr := false
  1326  				for _, entry := range entries {
  1327  					if valid := nodePortSet.validateEntry(entry); !valid {
  1328  						proxier.logger.Error(nil, "Error adding entry to ipset", "entry", entry, "ipset", nodePortSet.Name)
  1329  						entryInvalidErr = true
  1330  						break
  1331  					}
  1332  					nodePortSet.activeEntries.Insert(entry.String())
  1333  				}
  1334  				if entryInvalidErr {
  1335  					continue
  1336  				}
  1337  			}
  1338  
  1339  			// Add externaltrafficpolicy=local type nodeport entry
  1340  			if svcInfo.ExternalPolicyLocal() {
  1341  				var nodePortLocalSet *IPSet
  1342  				switch protocol {
  1343  				case utilipset.ProtocolTCP:
  1344  					nodePortLocalSet = proxier.ipsetList[kubeNodePortLocalSetTCP]
  1345  				case utilipset.ProtocolUDP:
  1346  					nodePortLocalSet = proxier.ipsetList[kubeNodePortLocalSetUDP]
  1347  				case utilipset.ProtocolSCTP:
  1348  					nodePortLocalSet = proxier.ipsetList[kubeNodePortLocalSetSCTP]
  1349  				default:
  1350  					// It should never hit
  1351  					proxier.logger.Error(nil, "Unsupported protocol type", "protocol", protocol)
  1352  				}
  1353  				if nodePortLocalSet != nil {
  1354  					entryInvalidErr := false
  1355  					for _, entry := range entries {
  1356  						if valid := nodePortLocalSet.validateEntry(entry); !valid {
  1357  							proxier.logger.Error(nil, "Error adding entry to ipset", "entry", entry, "ipset", nodePortLocalSet.Name)
  1358  							entryInvalidErr = true
  1359  							break
  1360  						}
  1361  						nodePortLocalSet.activeEntries.Insert(entry.String())
  1362  					}
  1363  					if entryInvalidErr {
  1364  						continue
  1365  					}
  1366  				}
  1367  			}
  1368  
  1369  			// Build ipvs kernel routes for each node ip address
  1370  			for _, nodeIP := range nodeIPs {
  1371  				// ipvs call
  1372  				serv := &utilipvs.VirtualServer{
  1373  					Address:   nodeIP,
  1374  					Port:      uint16(svcInfo.NodePort()),
  1375  					Protocol:  string(svcInfo.Protocol()),
  1376  					Scheduler: proxier.ipvsScheduler,
  1377  				}
  1378  				if svcInfo.SessionAffinityType() == v1.ServiceAffinityClientIP {
  1379  					serv.Flags |= utilipvs.FlagPersistent
  1380  					serv.Timeout = uint32(svcInfo.StickyMaxAgeSeconds())
  1381  				}
  1382  				// Set the source hash flag needed for the distribution method "mh"
  1383  				if proxier.ipvsScheduler == "mh" {
  1384  					serv.Flags |= utilipvs.FlagSourceHash
  1385  				}
  1386  				// There is no need to bind Node IP to dummy interface, so set parameter `bindAddr` to `false`.
  1387  				if err := proxier.syncService(svcPortNameString, serv, false, alreadyBoundAddrs); err == nil {
  1388  					activeIPVSServices.Insert(serv.String())
  1389  					if err := proxier.syncEndpoint(svcPortName, svcInfo.ExternalPolicyLocal(), serv); err != nil {
  1390  						proxier.logger.Error(err, "Failed to sync endpoint for service", "servicePortName", svcPortName, "virtualServer", serv)
  1391  					}
  1392  				} else {
  1393  					proxier.logger.Error(err, "Failed to sync service", "servicePortName", svcPortName, "virtualServer", serv)
  1394  				}
  1395  			}
  1396  		}
  1397  
  1398  		if svcInfo.HealthCheckNodePort() != 0 {
  1399  			nodePortSet := proxier.ipsetList[kubeHealthCheckNodePortSet]
  1400  			entry := &utilipset.Entry{
  1401  				// No need to provide ip info
  1402  				Port:     svcInfo.HealthCheckNodePort(),
  1403  				Protocol: "tcp",
  1404  				SetType:  utilipset.BitmapPort,
  1405  			}
  1406  
  1407  			if valid := nodePortSet.validateEntry(entry); !valid {
  1408  				proxier.logger.Error(nil, "Error adding entry to ipset", "entry", entry, "ipset", nodePortSet.Name)
  1409  				continue
  1410  			}
  1411  			nodePortSet.activeEntries.Insert(entry.String())
  1412  		}
  1413  	}
  1414  
  1415  	// Set the KUBE-IPVS-IPS set to the "activeBindAddrs"
  1416  	proxier.ipsetList[kubeIPVSSet].activeEntries = activeBindAddrs
  1417  
  1418  	// sync ipset entries
  1419  	for _, set := range proxier.ipsetList {
  1420  		set.syncIPSetEntries()
  1421  	}
  1422  
  1423  	// Tail call iptables rules for ipset, make sure only call iptables once
  1424  	// in a single loop per ip set.
  1425  	proxier.writeIptablesRules()
  1426  
  1427  	// Sync iptables rules.
  1428  	// NOTE: NoFlushTables is used so we don't flush non-kubernetes chains in the table.
  1429  	proxier.iptablesData.Reset()
  1430  	proxier.iptablesData.Write(proxier.natChains.Bytes())
  1431  	proxier.iptablesData.Write(proxier.natRules.Bytes())
  1432  	proxier.iptablesData.Write(proxier.filterChains.Bytes())
  1433  	proxier.iptablesData.Write(proxier.filterRules.Bytes())
  1434  
  1435  	proxier.logger.V(5).Info(
  1436  		"Restoring iptables", "natChains", proxier.natChains,
  1437  		"natRules", proxier.natRules, "filterChains", proxier.filterChains,
  1438  		"filterRules", proxier.filterRules)
  1439  	err = proxier.iptables.RestoreAll(proxier.iptablesData.Bytes(), utiliptables.NoFlushTables, utiliptables.RestoreCounters)
  1440  	if err != nil {
  1441  		if pErr, ok := err.(utiliptables.ParseError); ok {
  1442  			lines := utiliptables.ExtractLines(proxier.iptablesData.Bytes(), pErr.Line(), 3)
  1443  			proxier.logger.Error(pErr, "Failed to execute iptables-restore", "rules", lines)
  1444  		} else {
  1445  			proxier.logger.Error(err, "Failed to execute iptables-restore", "rules", proxier.iptablesData.Bytes())
  1446  		}
  1447  		metrics.IPTablesRestoreFailuresTotal.Inc()
  1448  		return
  1449  	}
  1450  	for name, lastChangeTriggerTimes := range endpointUpdateResult.LastChangeTriggerTimes {
  1451  		for _, lastChangeTriggerTime := range lastChangeTriggerTimes {
  1452  			latency := metrics.SinceInSeconds(lastChangeTriggerTime)
  1453  			metrics.NetworkProgrammingLatency.Observe(latency)
  1454  			proxier.logger.V(4).Info("Network programming", "endpoint", klog.KRef(name.Namespace, name.Name), "elapsed", latency)
  1455  		}
  1456  	}
  1457  
  1458  	// Remove superfluous addresses from the dummy device
  1459  	superfluousAddresses := alreadyBoundAddrs.Difference(activeBindAddrs)
  1460  	if superfluousAddresses.Len() > 0 {
  1461  		proxier.logger.V(2).Info("Removing addresses", "interface", defaultDummyDevice, "addresses", superfluousAddresses)
  1462  		for adr := range superfluousAddresses {
  1463  			if err := proxier.netlinkHandle.UnbindAddress(adr, defaultDummyDevice); err != nil {
  1464  				proxier.logger.Error(err, "UnbindAddress", "interface", defaultDummyDevice, "address", adr)
  1465  			}
  1466  		}
  1467  	}
  1468  
  1469  	// currentIPVSServices represent IPVS services listed from the system
  1470  	// (including any we have created in this sync)
  1471  	currentIPVSServices := make(map[string]*utilipvs.VirtualServer)
  1472  	appliedSvcs, err := proxier.ipvs.GetVirtualServers()
  1473  	if err == nil {
  1474  		for _, appliedSvc := range appliedSvcs {
  1475  			currentIPVSServices[appliedSvc.String()] = appliedSvc
  1476  		}
  1477  	} else {
  1478  		proxier.logger.Error(err, "Failed to get ipvs service")
  1479  	}
  1480  	proxier.cleanLegacyService(activeIPVSServices, currentIPVSServices)
  1481  
  1482  	if proxier.healthzServer != nil {
  1483  		proxier.healthzServer.Updated(proxier.ipFamily)
  1484  	}
  1485  	metrics.SyncProxyRulesLastTimestamp.SetToCurrentTime()
  1486  
  1487  	// Update service healthchecks.  The endpoints list might include services that are
  1488  	// not "OnlyLocal", but the services list will not, and the serviceHealthServer
  1489  	// will just drop those endpoints.
  1490  	if err := proxier.serviceHealthServer.SyncServices(proxier.svcPortMap.HealthCheckNodePorts()); err != nil {
  1491  		proxier.logger.Error(err, "Error syncing healthcheck services")
  1492  	}
  1493  	if err := proxier.serviceHealthServer.SyncEndpoints(proxier.endpointsMap.LocalReadyEndpoints()); err != nil {
  1494  		proxier.logger.Error(err, "Error syncing healthcheck endpoints")
  1495  	}
  1496  
  1497  	metrics.SyncProxyRulesNoLocalEndpointsTotal.WithLabelValues("internal").Set(float64(proxier.serviceNoLocalEndpointsInternal.Len()))
  1498  	metrics.SyncProxyRulesNoLocalEndpointsTotal.WithLabelValues("external").Set(float64(proxier.serviceNoLocalEndpointsExternal.Len()))
  1499  
  1500  	// Finish housekeeping, clear stale conntrack entries for UDP Services
  1501  	conntrack.CleanStaleEntries(proxier.conntrack, proxier.svcPortMap, serviceUpdateResult, endpointUpdateResult)
  1502  }
  1503  
  1504  // writeIptablesRules write all iptables rules to proxier.natRules or proxier.FilterRules that ipvs proxier needed
  1505  // according to proxier.ipsetList information and the ipset match relationship that `ipsetWithIptablesChain` specified.
  1506  // some ipset(kubeClusterIPSet for example) have particular match rules and iptables jump relation should be sync separately.
  1507  func (proxier *Proxier) writeIptablesRules() {
  1508  
  1509  	// Dismiss connects to localhost early in the service chain
  1510  	loAddr := "127.0.0.0/8"
  1511  	if proxier.ipFamily == v1.IPv6Protocol {
  1512  		loAddr = "::1/128"
  1513  	}
  1514  	proxier.natRules.Write("-A", string(kubeServicesChain), "-s", loAddr, "-j", "RETURN")
  1515  
  1516  	// We are creating those slices ones here to avoid memory reallocations
  1517  	// in every loop. Note that reuse the memory, instead of doing:
  1518  	//   slice = <some new slice>
  1519  	// you should always do one of the below:
  1520  	//   slice = slice[:0] // and then append to it
  1521  	//   slice = append(slice[:0], ...)
  1522  	// To avoid growing this slice, we arbitrarily set its size to 64,
  1523  	// there is never more than that many arguments for a single line.
  1524  	// Note that even if we go over 64, it will still be correct - it
  1525  	// is just for efficiency, not correctness.
  1526  	args := make([]string, 64)
  1527  
  1528  	for _, set := range ipsetWithIptablesChain {
  1529  		if _, find := proxier.ipsetList[set.name]; find && !proxier.ipsetList[set.name].isEmpty() {
  1530  			args = append(args[:0], "-A", set.from)
  1531  			if set.protocolMatch != "" {
  1532  				args = append(args, "-p", set.protocolMatch)
  1533  			}
  1534  			args = append(args,
  1535  				"-m", "comment", "--comment", proxier.ipsetList[set.name].getComment(),
  1536  				"-m", "set", "--match-set", proxier.ipsetList[set.name].Name,
  1537  				set.matchType,
  1538  			)
  1539  			if set.table == utiliptables.TableFilter {
  1540  				proxier.filterRules.Write(args, "-j", set.to)
  1541  			} else {
  1542  				proxier.natRules.Write(args, "-j", set.to)
  1543  			}
  1544  		}
  1545  	}
  1546  
  1547  	if !proxier.ipsetList[kubeClusterIPSet].isEmpty() {
  1548  		args = append(args[:0],
  1549  			"-A", string(kubeServicesChain),
  1550  			"-m", "comment", "--comment", proxier.ipsetList[kubeClusterIPSet].getComment(),
  1551  			"-m", "set", "--match-set", proxier.ipsetList[kubeClusterIPSet].Name,
  1552  		)
  1553  		if proxier.masqueradeAll {
  1554  			proxier.natRules.Write(
  1555  				args, "dst,dst",
  1556  				"-j", string(kubeMarkMasqChain))
  1557  		} else if proxier.localDetector.IsImplemented() {
  1558  			// This masquerades off-cluster traffic to a service VIP.  The idea
  1559  			// is that you can establish a static route for your Service range,
  1560  			// routing to any node, and that node will bridge into the Service
  1561  			// for you.  Since that might bounce off-node, we masquerade here.
  1562  			// If/when we support "Local" policy for VIPs, we should update this.
  1563  			proxier.natRules.Write(
  1564  				args, "dst,dst",
  1565  				proxier.localDetector.IfNotLocal(),
  1566  				"-j", string(kubeMarkMasqChain))
  1567  		} else {
  1568  			// Masquerade all OUTPUT traffic coming from a service ip.
  1569  			// The kube dummy interface has all service VIPs assigned which
  1570  			// results in the service VIP being picked as the source IP to reach
  1571  			// a VIP. This leads to a connection from VIP:<random port> to
  1572  			// VIP:<service port>.
  1573  			// Always masquerading OUTPUT (node-originating) traffic with a VIP
  1574  			// source ip and service port destination fixes the outgoing connections.
  1575  			proxier.natRules.Write(
  1576  				args, "src,dst",
  1577  				"-j", string(kubeMarkMasqChain))
  1578  		}
  1579  	}
  1580  
  1581  	// externalIPRules adds iptables rules applies to Service ExternalIPs
  1582  	externalIPRules := func(args []string) {
  1583  		// Allow traffic for external IPs that does not come from a bridge (i.e. not from a container)
  1584  		// nor from a local process to be forwarded to the service.
  1585  		// This rule roughly translates to "all traffic from off-machine".
  1586  		// This is imperfect in the face of network plugins that might not use a bridge, but we can revisit that later.
  1587  		externalTrafficOnlyArgs := append(args,
  1588  			"-m", "physdev", "!", "--physdev-is-in",
  1589  			"-m", "addrtype", "!", "--src-type", "LOCAL")
  1590  		proxier.natRules.Write(externalTrafficOnlyArgs, "-j", "ACCEPT")
  1591  		dstLocalOnlyArgs := append(args, "-m", "addrtype", "--dst-type", "LOCAL")
  1592  		// Allow traffic bound for external IPs that happen to be recognized as local IPs to stay local.
  1593  		// This covers cases like GCE load-balancers which get added to the local routing table.
  1594  		proxier.natRules.Write(dstLocalOnlyArgs, "-j", "ACCEPT")
  1595  	}
  1596  
  1597  	if !proxier.ipsetList[kubeExternalIPSet].isEmpty() {
  1598  		// Build masquerade rules for packets to external IPs.
  1599  		args = append(args[:0],
  1600  			"-A", string(kubeServicesChain),
  1601  			"-m", "comment", "--comment", proxier.ipsetList[kubeExternalIPSet].getComment(),
  1602  			"-m", "set", "--match-set", proxier.ipsetList[kubeExternalIPSet].Name,
  1603  			"dst,dst",
  1604  		)
  1605  		proxier.natRules.Write(args, "-j", string(kubeMarkMasqChain))
  1606  		externalIPRules(args)
  1607  	}
  1608  
  1609  	if !proxier.ipsetList[kubeExternalIPLocalSet].isEmpty() {
  1610  		args = append(args[:0],
  1611  			"-A", string(kubeServicesChain),
  1612  			"-m", "comment", "--comment", proxier.ipsetList[kubeExternalIPLocalSet].getComment(),
  1613  			"-m", "set", "--match-set", proxier.ipsetList[kubeExternalIPLocalSet].Name,
  1614  			"dst,dst",
  1615  		)
  1616  		externalIPRules(args)
  1617  	}
  1618  
  1619  	// -A KUBE-SERVICES  -m addrtype  --dst-type LOCAL -j KUBE-NODE-PORT
  1620  	args = append(args[:0],
  1621  		"-A", string(kubeServicesChain),
  1622  		"-m", "addrtype", "--dst-type", "LOCAL",
  1623  	)
  1624  	proxier.natRules.Write(args, "-j", string(kubeNodePortChain))
  1625  
  1626  	// mark for masquerading for KUBE-LOAD-BALANCER
  1627  	proxier.natRules.Write(
  1628  		"-A", string(kubeLoadBalancerChain),
  1629  		"-j", string(kubeMarkMasqChain),
  1630  	)
  1631  
  1632  	// drop packets filtered by KUBE-SOURCE-RANGES-FIREWALL
  1633  	proxier.filterRules.Write(
  1634  		"-A", string(kubeSourceRangesFirewallChain),
  1635  		"-j", "DROP",
  1636  	)
  1637  
  1638  	// disable LB access from node
  1639  	// for IPVS src and dst both would be lbIP
  1640  	for _, entry := range proxier.lbNoNodeAccessIPPortProtocolEntries {
  1641  		proxier.filterRules.Write(
  1642  			"-A", string(kubeIPVSOutFilterChain),
  1643  			"-s", entry.IP,
  1644  			"-m", "ipvs", "--vaddr", entry.IP, "--vproto", entry.Protocol, "--vport", strconv.Itoa(entry.Port),
  1645  			"-j", "DROP",
  1646  		)
  1647  	}
  1648  
  1649  	// Accept all traffic with destination of ipvs virtual service, in case other iptables rules
  1650  	// block the traffic, that may result in ipvs rules invalid.
  1651  	// Those rules must be in the end of KUBE-SERVICE chain
  1652  	proxier.acceptIPVSTraffic()
  1653  
  1654  	// If the masqueradeMark has been added then we want to forward that same
  1655  	// traffic, this allows NodePort traffic to be forwarded even if the default
  1656  	// FORWARD policy is not accept.
  1657  	proxier.filterRules.Write(
  1658  		"-A", string(kubeForwardChain),
  1659  		"-m", "comment", "--comment", `"kubernetes forwarding rules"`,
  1660  		"-m", "mark", "--mark", fmt.Sprintf("%s/%s", proxier.masqueradeMark, proxier.masqueradeMark),
  1661  		"-j", "ACCEPT",
  1662  	)
  1663  
  1664  	// The following rule ensures the traffic after the initial packet accepted
  1665  	// by the "kubernetes forwarding rules" rule above will be accepted.
  1666  	proxier.filterRules.Write(
  1667  		"-A", string(kubeForwardChain),
  1668  		"-m", "comment", "--comment", `"kubernetes forwarding conntrack rule"`,
  1669  		"-m", "conntrack",
  1670  		"--ctstate", "RELATED,ESTABLISHED",
  1671  		"-j", "ACCEPT",
  1672  	)
  1673  
  1674  	// Add rule to accept traffic towards health check node port
  1675  	proxier.filterRules.Write(
  1676  		"-A", string(kubeNodePortChain),
  1677  		"-m", "comment", "--comment", proxier.ipsetList[kubeHealthCheckNodePortSet].getComment(),
  1678  		"-m", "set", "--match-set", proxier.ipsetList[kubeHealthCheckNodePortSet].Name, "dst",
  1679  		"-j", "ACCEPT",
  1680  	)
  1681  
  1682  	// Add rules to the filter/KUBE-IPVS-FILTER chain to prevent access to ports on the host through VIP addresses.
  1683  	// https://github.com/kubernetes/kubernetes/issues/72236
  1684  	proxier.filterRules.Write(
  1685  		"-A", string(kubeIPVSFilterChain),
  1686  		"-m", "set", "--match-set", proxier.ipsetList[kubeLoadBalancerSet].Name, "dst,dst", "-j", "RETURN")
  1687  	proxier.filterRules.Write(
  1688  		"-A", string(kubeIPVSFilterChain),
  1689  		"-m", "set", "--match-set", proxier.ipsetList[kubeClusterIPSet].Name, "dst,dst", "-j", "RETURN")
  1690  	proxier.filterRules.Write(
  1691  		"-A", string(kubeIPVSFilterChain),
  1692  		"-m", "set", "--match-set", proxier.ipsetList[kubeExternalIPSet].Name, "dst,dst", "-j", "RETURN")
  1693  	proxier.filterRules.Write(
  1694  		"-A", string(kubeIPVSFilterChain),
  1695  		"-m", "set", "--match-set", proxier.ipsetList[kubeExternalIPLocalSet].Name, "dst,dst", "-j", "RETURN")
  1696  	proxier.filterRules.Write(
  1697  		"-A", string(kubeIPVSFilterChain),
  1698  		"-m", "set", "--match-set", proxier.ipsetList[kubeHealthCheckNodePortSet].Name, "dst", "-j", "RETURN")
  1699  	proxier.filterRules.Write(
  1700  		"-A", string(kubeIPVSFilterChain),
  1701  		"-m", "conntrack", "--ctstate", "NEW",
  1702  		"-m", "set", "--match-set", proxier.ipsetList[kubeIPVSSet].Name, "dst", "-j", "REJECT")
  1703  
  1704  	// Install the kubernetes-specific postrouting rules. We use a whole chain for
  1705  	// this so that it is easier to flush and change, for example if the mark
  1706  	// value should ever change.
  1707  
  1708  	proxier.natRules.Write(
  1709  		"-A", string(kubePostroutingChain),
  1710  		"-m", "mark", "!", "--mark", fmt.Sprintf("%s/%s", proxier.masqueradeMark, proxier.masqueradeMark),
  1711  		"-j", "RETURN",
  1712  	)
  1713  	// Clear the mark to avoid re-masquerading if the packet re-traverses the network stack.
  1714  	proxier.natRules.Write(
  1715  		"-A", string(kubePostroutingChain),
  1716  		// XOR proxier.masqueradeMark to unset it
  1717  		"-j", "MARK", "--xor-mark", proxier.masqueradeMark,
  1718  	)
  1719  	masqRule := []string{
  1720  		"-A", string(kubePostroutingChain),
  1721  		"-m", "comment", "--comment", `"kubernetes service traffic requiring SNAT"`,
  1722  		"-j", "MASQUERADE",
  1723  	}
  1724  	if proxier.iptables.HasRandomFully() {
  1725  		masqRule = append(masqRule, "--random-fully")
  1726  	}
  1727  	proxier.natRules.Write(masqRule)
  1728  
  1729  	// Install the kubernetes-specific masquerade mark rule. We use a whole chain for
  1730  	// this so that it is easier to flush and change, for example if the mark
  1731  	// value should ever change.
  1732  	proxier.natRules.Write(
  1733  		"-A", string(kubeMarkMasqChain),
  1734  		"-j", "MARK", "--or-mark", proxier.masqueradeMark,
  1735  	)
  1736  
  1737  	// Write the end-of-table markers.
  1738  	proxier.filterRules.Write("COMMIT")
  1739  	proxier.natRules.Write("COMMIT")
  1740  }
  1741  
  1742  func (proxier *Proxier) acceptIPVSTraffic() {
  1743  	sets := []string{kubeClusterIPSet, kubeLoadBalancerSet}
  1744  	for _, set := range sets {
  1745  		var matchType string
  1746  		if !proxier.ipsetList[set].isEmpty() {
  1747  			switch proxier.ipsetList[set].SetType {
  1748  			case utilipset.BitmapPort:
  1749  				matchType = "dst"
  1750  			default:
  1751  				matchType = "dst,dst"
  1752  			}
  1753  			proxier.natRules.Write(
  1754  				"-A", string(kubeServicesChain),
  1755  				"-m", "set", "--match-set", proxier.ipsetList[set].Name, matchType,
  1756  				"-j", "ACCEPT",
  1757  			)
  1758  		}
  1759  	}
  1760  }
  1761  
  1762  // createAndLinkKubeChain create all kube chains that ipvs proxier need and write basic link.
  1763  func (proxier *Proxier) createAndLinkKubeChain() {
  1764  	for _, ch := range iptablesChains {
  1765  		if _, err := proxier.iptables.EnsureChain(ch.table, ch.chain); err != nil {
  1766  			proxier.logger.Error(err, "Failed to ensure chain exists", "table", ch.table, "chain", ch.chain)
  1767  			return
  1768  		}
  1769  		if ch.table == utiliptables.TableNAT {
  1770  			proxier.natChains.Write(utiliptables.MakeChainLine(ch.chain))
  1771  		} else {
  1772  			proxier.filterChains.Write(utiliptables.MakeChainLine(ch.chain))
  1773  		}
  1774  	}
  1775  
  1776  	for _, jc := range iptablesJumpChain {
  1777  		args := []string{"-m", "comment", "--comment", jc.comment, "-j", string(jc.to)}
  1778  		if _, err := proxier.iptables.EnsureRule(utiliptables.Prepend, jc.table, jc.from, args...); err != nil {
  1779  			proxier.logger.Error(err, "Failed to ensure chain jumps", "table", jc.table, "srcChain", jc.from, "dstChain", jc.to)
  1780  		}
  1781  	}
  1782  
  1783  }
  1784  
  1785  func (proxier *Proxier) syncService(svcName string, vs *utilipvs.VirtualServer, bindAddr bool, alreadyBoundAddrs sets.Set[string]) error {
  1786  	appliedVirtualServer, _ := proxier.ipvs.GetVirtualServer(vs)
  1787  	if appliedVirtualServer == nil || !appliedVirtualServer.Equal(vs) {
  1788  		if appliedVirtualServer == nil {
  1789  			// IPVS service is not found, create a new service
  1790  			proxier.logger.V(3).Info("Adding new service", "serviceName", svcName, "virtualServer", vs)
  1791  			if err := proxier.ipvs.AddVirtualServer(vs); err != nil {
  1792  				proxier.logger.Error(err, "Failed to add IPVS service", "serviceName", svcName)
  1793  				return err
  1794  			}
  1795  		} else {
  1796  			// IPVS service was changed, update the existing one
  1797  			// During updates, service VIP will not go down
  1798  			proxier.logger.V(3).Info("IPVS service was changed", "serviceName", svcName)
  1799  			if err := proxier.ipvs.UpdateVirtualServer(vs); err != nil {
  1800  				proxier.logger.Error(err, "Failed to update IPVS service")
  1801  				return err
  1802  			}
  1803  		}
  1804  	}
  1805  
  1806  	// bind service address to dummy interface
  1807  	if bindAddr {
  1808  		// always attempt to bind if alreadyBoundAddrs is nil,
  1809  		// otherwise check if it's already binded and return early
  1810  		if alreadyBoundAddrs != nil && alreadyBoundAddrs.Has(vs.Address.String()) {
  1811  			return nil
  1812  		}
  1813  
  1814  		proxier.logger.V(4).Info("Bind address", "address", vs.Address)
  1815  		_, err := proxier.netlinkHandle.EnsureAddressBind(vs.Address.String(), defaultDummyDevice)
  1816  		if err != nil {
  1817  			proxier.logger.Error(err, "Failed to bind service address to dummy device", "serviceName", svcName)
  1818  			return err
  1819  		}
  1820  	}
  1821  
  1822  	return nil
  1823  }
  1824  
  1825  func (proxier *Proxier) syncEndpoint(svcPortName proxy.ServicePortName, onlyNodeLocalEndpoints bool, vs *utilipvs.VirtualServer) error {
  1826  	appliedVirtualServer, err := proxier.ipvs.GetVirtualServer(vs)
  1827  	if err != nil {
  1828  		proxier.logger.Error(err, "Failed to get IPVS service")
  1829  		return err
  1830  	}
  1831  	if appliedVirtualServer == nil {
  1832  		return errors.New("IPVS virtual service does not exist")
  1833  	}
  1834  
  1835  	// curEndpoints represents IPVS destinations listed from current system.
  1836  	curEndpoints := sets.New[string]()
  1837  	curDests, err := proxier.ipvs.GetRealServers(appliedVirtualServer)
  1838  	if err != nil {
  1839  		proxier.logger.Error(err, "Failed to list IPVS destinations")
  1840  		return err
  1841  	}
  1842  	for _, des := range curDests {
  1843  		curEndpoints.Insert(des.String())
  1844  	}
  1845  
  1846  	endpoints := proxier.endpointsMap[svcPortName]
  1847  
  1848  	// Filtering for topology aware endpoints. This function will only
  1849  	// filter endpoints if appropriate feature gates are enabled and the
  1850  	// Service does not have conflicting configuration such as
  1851  	// externalTrafficPolicy=Local.
  1852  	svcInfo, ok := proxier.svcPortMap[svcPortName]
  1853  	if !ok {
  1854  		proxier.logger.Info("Unable to filter endpoints due to missing service info", "servicePortName", svcPortName)
  1855  	} else {
  1856  		clusterEndpoints, localEndpoints, _, hasAnyEndpoints := proxy.CategorizeEndpoints(endpoints, svcInfo, proxier.nodeLabels)
  1857  		if onlyNodeLocalEndpoints {
  1858  			if len(localEndpoints) > 0 {
  1859  				endpoints = localEndpoints
  1860  			} else {
  1861  				// https://github.com/kubernetes/kubernetes/pull/97081
  1862  				// Allow access from local PODs even if no local endpoints exist.
  1863  				// Traffic from an external source will be routed but the reply
  1864  				// will have the POD address and will be discarded.
  1865  				endpoints = clusterEndpoints
  1866  
  1867  				if hasAnyEndpoints && svcInfo.InternalPolicyLocal() {
  1868  					proxier.serviceNoLocalEndpointsInternal.Insert(svcPortName.NamespacedName.String())
  1869  				}
  1870  
  1871  				if hasAnyEndpoints && svcInfo.ExternalPolicyLocal() {
  1872  					proxier.serviceNoLocalEndpointsExternal.Insert(svcPortName.NamespacedName.String())
  1873  				}
  1874  			}
  1875  		} else {
  1876  			endpoints = clusterEndpoints
  1877  		}
  1878  	}
  1879  
  1880  	newEndpoints := sets.New[string]()
  1881  	for _, epInfo := range endpoints {
  1882  		newEndpoints.Insert(epInfo.String())
  1883  	}
  1884  
  1885  	// Create new endpoints
  1886  	for _, ep := range newEndpoints.UnsortedList() {
  1887  		ip, port, err := net.SplitHostPort(ep)
  1888  		if err != nil {
  1889  			proxier.logger.Error(err, "Failed to parse endpoint", "endpoint", ep)
  1890  			continue
  1891  		}
  1892  		portNum, err := strconv.Atoi(port)
  1893  		if err != nil {
  1894  			proxier.logger.Error(err, "Failed to parse endpoint port", "port", port)
  1895  			continue
  1896  		}
  1897  
  1898  		newDest := &utilipvs.RealServer{
  1899  			Address: netutils.ParseIPSloppy(ip),
  1900  			Port:    uint16(portNum),
  1901  			Weight:  1,
  1902  		}
  1903  
  1904  		if curEndpoints.Has(ep) {
  1905  			// if we are syncing for the first time, loop through all current destinations and
  1906  			// reset their weight.
  1907  			if proxier.initialSync {
  1908  				for _, dest := range curDests {
  1909  					if dest.Weight != newDest.Weight {
  1910  						err = proxier.ipvs.UpdateRealServer(appliedVirtualServer, newDest)
  1911  						if err != nil {
  1912  							proxier.logger.Error(err, "Failed to update destination", "newDest", newDest)
  1913  							continue
  1914  						}
  1915  					}
  1916  				}
  1917  			}
  1918  			// check if newEndpoint is in gracefulDelete list, if true, delete this ep immediately
  1919  			uniqueRS := GetUniqueRSName(vs, newDest)
  1920  			if !proxier.gracefuldeleteManager.InTerminationList(uniqueRS) {
  1921  				continue
  1922  			}
  1923  			proxier.logger.V(5).Info("new ep is in graceful delete list", "uniqueRealServer", uniqueRS)
  1924  			err := proxier.gracefuldeleteManager.MoveRSOutofGracefulDeleteList(uniqueRS)
  1925  			if err != nil {
  1926  				proxier.logger.Error(err, "Failed to delete endpoint in gracefulDeleteQueue", "endpoint", ep)
  1927  				continue
  1928  			}
  1929  		}
  1930  		err = proxier.ipvs.AddRealServer(appliedVirtualServer, newDest)
  1931  		if err != nil {
  1932  			proxier.logger.Error(err, "Failed to add destination", "newDest", newDest)
  1933  			continue
  1934  		}
  1935  	}
  1936  
  1937  	// Delete old endpoints
  1938  	for _, ep := range curEndpoints.Difference(newEndpoints).UnsortedList() {
  1939  		// if curEndpoint is in gracefulDelete, skip
  1940  		uniqueRS := vs.String() + "/" + ep
  1941  		if proxier.gracefuldeleteManager.InTerminationList(uniqueRS) {
  1942  			continue
  1943  		}
  1944  		ip, port, err := net.SplitHostPort(ep)
  1945  		if err != nil {
  1946  			proxier.logger.Error(err, "Failed to parse endpoint", "endpoint", ep)
  1947  			continue
  1948  		}
  1949  		portNum, err := strconv.Atoi(port)
  1950  		if err != nil {
  1951  			proxier.logger.Error(err, "Failed to parse endpoint port", "port", port)
  1952  			continue
  1953  		}
  1954  
  1955  		delDest := &utilipvs.RealServer{
  1956  			Address: netutils.ParseIPSloppy(ip),
  1957  			Port:    uint16(portNum),
  1958  		}
  1959  
  1960  		proxier.logger.V(5).Info("Using graceful delete", "uniqueRealServer", uniqueRS)
  1961  		err = proxier.gracefuldeleteManager.GracefulDeleteRS(appliedVirtualServer, delDest)
  1962  		if err != nil {
  1963  			proxier.logger.Error(err, "Failed to delete destination", "uniqueRealServer", uniqueRS)
  1964  			continue
  1965  		}
  1966  	}
  1967  	return nil
  1968  }
  1969  
  1970  func (proxier *Proxier) cleanLegacyService(activeServices sets.Set[string], currentServices map[string]*utilipvs.VirtualServer) {
  1971  	for cs, svc := range currentServices {
  1972  		if proxier.isIPInExcludeCIDRs(svc.Address) {
  1973  			continue
  1974  		}
  1975  		if getIPFamily(svc.Address) != proxier.ipFamily {
  1976  			// Not our family
  1977  			continue
  1978  		}
  1979  		if !activeServices.Has(cs) {
  1980  			proxier.logger.V(4).Info("Delete service", "virtualServer", svc)
  1981  			if err := proxier.ipvs.DeleteVirtualServer(svc); err != nil {
  1982  				proxier.logger.Error(err, "Failed to delete service", "virtualServer", svc)
  1983  			}
  1984  		}
  1985  	}
  1986  }
  1987  
  1988  func (proxier *Proxier) isIPInExcludeCIDRs(ip net.IP) bool {
  1989  	// make sure it does not fall within an excluded CIDR range.
  1990  	for _, excludedCIDR := range proxier.excludeCIDRs {
  1991  		if excludedCIDR.Contains(ip) {
  1992  			return true
  1993  		}
  1994  	}
  1995  	return false
  1996  }
  1997  
  1998  func getIPFamily(ip net.IP) v1.IPFamily {
  1999  	if netutils.IsIPv4(ip) {
  2000  		return v1.IPv4Protocol
  2001  	}
  2002  	return v1.IPv6Protocol
  2003  }
  2004  
  2005  // ipvs Proxier fall back on iptables when it needs to do SNAT for engress packets
  2006  // It will only operate iptables *nat table.
  2007  // Create and link the kube postrouting chain for SNAT packets.
  2008  // Chain POSTROUTING (policy ACCEPT)
  2009  // target     prot opt source               destination
  2010  // KUBE-POSTROUTING  all  --  0.0.0.0/0            0.0.0.0/0            /* kubernetes postrouting rules *
  2011  // Maintain by kubelet network sync loop
  2012  
  2013  // *nat
  2014  // :KUBE-POSTROUTING - [0:0]
  2015  // Chain KUBE-POSTROUTING (1 references)
  2016  // target     prot opt source               destination
  2017  // MASQUERADE  all  --  0.0.0.0/0            0.0.0.0/0            /* kubernetes service traffic requiring SNAT */ mark match 0x4000/0x4000
  2018  
  2019  // :KUBE-MARK-MASQ - [0:0]
  2020  // Chain KUBE-MARK-MASQ (0 references)
  2021  // target     prot opt source               destination
  2022  // MARK       all  --  0.0.0.0/0            0.0.0.0/0            MARK or 0x4000