k8s.io/kubernetes@v1.29.3/pkg/proxy/ipvs/proxier.go (about)

     1  /*
     2  Copyright 2017 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package ipvs
    18  
    19  import (
    20  	"bytes"
    21  	"errors"
    22  	"fmt"
    23  	"io"
    24  	"net"
    25  	"reflect"
    26  	"strconv"
    27  	"strings"
    28  	"sync"
    29  	"sync/atomic"
    30  	"time"
    31  
    32  	"k8s.io/klog/v2"
    33  	utilexec "k8s.io/utils/exec"
    34  	netutils "k8s.io/utils/net"
    35  
    36  	v1 "k8s.io/api/core/v1"
    37  	discovery "k8s.io/api/discovery/v1"
    38  	"k8s.io/apimachinery/pkg/types"
    39  	"k8s.io/apimachinery/pkg/util/sets"
    40  	"k8s.io/apimachinery/pkg/util/version"
    41  	"k8s.io/apimachinery/pkg/util/wait"
    42  	"k8s.io/client-go/tools/events"
    43  	utilsysctl "k8s.io/component-helpers/node/util/sysctl"
    44  	"k8s.io/kubernetes/pkg/proxy"
    45  	"k8s.io/kubernetes/pkg/proxy/conntrack"
    46  	"k8s.io/kubernetes/pkg/proxy/healthcheck"
    47  	utilipset "k8s.io/kubernetes/pkg/proxy/ipvs/ipset"
    48  	utilipvs "k8s.io/kubernetes/pkg/proxy/ipvs/util"
    49  	"k8s.io/kubernetes/pkg/proxy/metaproxier"
    50  	"k8s.io/kubernetes/pkg/proxy/metrics"
    51  	proxyutil "k8s.io/kubernetes/pkg/proxy/util"
    52  	proxyutiliptables "k8s.io/kubernetes/pkg/proxy/util/iptables"
    53  	"k8s.io/kubernetes/pkg/util/async"
    54  	utiliptables "k8s.io/kubernetes/pkg/util/iptables"
    55  	utilkernel "k8s.io/kubernetes/pkg/util/kernel"
    56  )
    57  
    58  const (
    59  	// kubeServicesChain is the services portal chain
    60  	kubeServicesChain utiliptables.Chain = "KUBE-SERVICES"
    61  
    62  	// kubeProxyFirewallChain is the kube-proxy firewall chain.
    63  	kubeProxyFirewallChain utiliptables.Chain = "KUBE-PROXY-FIREWALL"
    64  
    65  	// kubeSourceRangesFirewallChain is the firewall subchain for LoadBalancerSourceRanges.
    66  	kubeSourceRangesFirewallChain utiliptables.Chain = "KUBE-SOURCE-RANGES-FIREWALL"
    67  
    68  	// kubePostroutingChain is the kubernetes postrouting chain
    69  	kubePostroutingChain utiliptables.Chain = "KUBE-POSTROUTING"
    70  
    71  	// kubeMarkMasqChain is the mark-for-masquerade chain
    72  	kubeMarkMasqChain utiliptables.Chain = "KUBE-MARK-MASQ"
    73  
    74  	// kubeNodePortChain is the kubernetes node port chain
    75  	kubeNodePortChain utiliptables.Chain = "KUBE-NODE-PORT"
    76  
    77  	// kubeForwardChain is the kubernetes forward chain
    78  	kubeForwardChain utiliptables.Chain = "KUBE-FORWARD"
    79  
    80  	// kubeLoadBalancerChain is the kubernetes chain for loadbalancer type service
    81  	kubeLoadBalancerChain utiliptables.Chain = "KUBE-LOAD-BALANCER"
    82  
    83  	// kubeIPVSFilterChain filters external access to main netns
    84  	// https://github.com/kubernetes/kubernetes/issues/72236
    85  	kubeIPVSFilterChain utiliptables.Chain = "KUBE-IPVS-FILTER"
    86  
    87  	// kubeIPVSOutFilterChain filters access to load balancer services from node.
    88  	// https://github.com/kubernetes/kubernetes/issues/119656
    89  	kubeIPVSOutFilterChain utiliptables.Chain = "KUBE-IPVS-OUT-FILTER"
    90  
    91  	// defaultScheduler is the default ipvs scheduler algorithm - round robin.
    92  	defaultScheduler = "rr"
    93  
    94  	// defaultDummyDevice is the default dummy interface which ipvs service address will bind to it.
    95  	defaultDummyDevice = "kube-ipvs0"
    96  )
    97  
    98  // iptablesJumpChain is tables of iptables chains that ipvs proxier used to install iptables or cleanup iptables.
    99  // `to` is the iptables chain we want to operate.
   100  // `from` is the source iptables chain
   101  var iptablesJumpChain = []struct {
   102  	table   utiliptables.Table
   103  	from    utiliptables.Chain
   104  	to      utiliptables.Chain
   105  	comment string
   106  }{
   107  	{utiliptables.TableNAT, utiliptables.ChainOutput, kubeServicesChain, "kubernetes service portals"},
   108  	{utiliptables.TableNAT, utiliptables.ChainPrerouting, kubeServicesChain, "kubernetes service portals"},
   109  	{utiliptables.TableNAT, utiliptables.ChainPostrouting, kubePostroutingChain, "kubernetes postrouting rules"},
   110  	{utiliptables.TableFilter, utiliptables.ChainForward, kubeForwardChain, "kubernetes forwarding rules"},
   111  	{utiliptables.TableFilter, utiliptables.ChainInput, kubeNodePortChain, "kubernetes health check rules"},
   112  	{utiliptables.TableFilter, utiliptables.ChainInput, kubeProxyFirewallChain, "kube-proxy firewall rules"},
   113  	{utiliptables.TableFilter, utiliptables.ChainForward, kubeProxyFirewallChain, "kube-proxy firewall rules"},
   114  	{utiliptables.TableFilter, utiliptables.ChainInput, kubeIPVSFilterChain, "kubernetes ipvs access filter"},
   115  	{utiliptables.TableFilter, utiliptables.ChainOutput, kubeIPVSOutFilterChain, "kubernetes ipvs access filter"},
   116  }
   117  
   118  var iptablesChains = []struct {
   119  	table utiliptables.Table
   120  	chain utiliptables.Chain
   121  }{
   122  	{utiliptables.TableNAT, kubeServicesChain},
   123  	{utiliptables.TableNAT, kubePostroutingChain},
   124  	{utiliptables.TableNAT, kubeNodePortChain},
   125  	{utiliptables.TableNAT, kubeLoadBalancerChain},
   126  	{utiliptables.TableNAT, kubeMarkMasqChain},
   127  	{utiliptables.TableFilter, kubeForwardChain},
   128  	{utiliptables.TableFilter, kubeNodePortChain},
   129  	{utiliptables.TableFilter, kubeProxyFirewallChain},
   130  	{utiliptables.TableFilter, kubeSourceRangesFirewallChain},
   131  	{utiliptables.TableFilter, kubeIPVSFilterChain},
   132  	{utiliptables.TableFilter, kubeIPVSOutFilterChain},
   133  }
   134  
   135  var iptablesCleanupChains = []struct {
   136  	table utiliptables.Table
   137  	chain utiliptables.Chain
   138  }{
   139  	{utiliptables.TableNAT, kubeServicesChain},
   140  	{utiliptables.TableNAT, kubePostroutingChain},
   141  	{utiliptables.TableNAT, kubeNodePortChain},
   142  	{utiliptables.TableNAT, kubeLoadBalancerChain},
   143  	{utiliptables.TableFilter, kubeForwardChain},
   144  	{utiliptables.TableFilter, kubeNodePortChain},
   145  	{utiliptables.TableFilter, kubeProxyFirewallChain},
   146  	{utiliptables.TableFilter, kubeSourceRangesFirewallChain},
   147  	{utiliptables.TableFilter, kubeIPVSFilterChain},
   148  	{utiliptables.TableFilter, kubeIPVSOutFilterChain},
   149  }
   150  
   151  // ipsetInfo is all ipset we needed in ipvs proxier
   152  var ipsetInfo = []struct {
   153  	name    string
   154  	setType utilipset.Type
   155  	comment string
   156  }{
   157  	{kubeLoopBackIPSet, utilipset.HashIPPortIP, kubeLoopBackIPSetComment},
   158  	{kubeClusterIPSet, utilipset.HashIPPort, kubeClusterIPSetComment},
   159  	{kubeExternalIPSet, utilipset.HashIPPort, kubeExternalIPSetComment},
   160  	{kubeExternalIPLocalSet, utilipset.HashIPPort, kubeExternalIPLocalSetComment},
   161  	{kubeLoadBalancerSet, utilipset.HashIPPort, kubeLoadBalancerSetComment},
   162  	{kubeLoadBalancerFWSet, utilipset.HashIPPort, kubeLoadBalancerFWSetComment},
   163  	{kubeLoadBalancerLocalSet, utilipset.HashIPPort, kubeLoadBalancerLocalSetComment},
   164  	{kubeLoadBalancerSourceIPSet, utilipset.HashIPPortIP, kubeLoadBalancerSourceIPSetComment},
   165  	{kubeLoadBalancerSourceCIDRSet, utilipset.HashIPPortNet, kubeLoadBalancerSourceCIDRSetComment},
   166  	{kubeNodePortSetTCP, utilipset.BitmapPort, kubeNodePortSetTCPComment},
   167  	{kubeNodePortLocalSetTCP, utilipset.BitmapPort, kubeNodePortLocalSetTCPComment},
   168  	{kubeNodePortSetUDP, utilipset.BitmapPort, kubeNodePortSetUDPComment},
   169  	{kubeNodePortLocalSetUDP, utilipset.BitmapPort, kubeNodePortLocalSetUDPComment},
   170  	{kubeNodePortSetSCTP, utilipset.HashIPPort, kubeNodePortSetSCTPComment},
   171  	{kubeNodePortLocalSetSCTP, utilipset.HashIPPort, kubeNodePortLocalSetSCTPComment},
   172  	{kubeHealthCheckNodePortSet, utilipset.BitmapPort, kubeHealthCheckNodePortSetComment},
   173  	{kubeIPVSSet, utilipset.HashIP, kubeIPVSSetComment},
   174  }
   175  
   176  // ipsetWithIptablesChain is the ipsets list with iptables source chain and the chain jump to
   177  // `iptables -t nat -A <from> -m set --match-set <name> <matchType> -j <to>`
   178  // example: iptables -t nat -A KUBE-SERVICES -m set --match-set KUBE-NODE-PORT-TCP dst -j KUBE-NODE-PORT
   179  // ipsets with other match rules will be created Individually.
   180  // Note: kubeNodePortLocalSetTCP must be prior to kubeNodePortSetTCP, the same for UDP.
   181  var ipsetWithIptablesChain = []struct {
   182  	name          string
   183  	table         utiliptables.Table
   184  	from          string
   185  	to            string
   186  	matchType     string
   187  	protocolMatch string
   188  }{
   189  	{kubeLoopBackIPSet, utiliptables.TableNAT, string(kubePostroutingChain), "MASQUERADE", "dst,dst,src", ""},
   190  	{kubeLoadBalancerSet, utiliptables.TableNAT, string(kubeServicesChain), string(kubeLoadBalancerChain), "dst,dst", ""},
   191  	{kubeLoadBalancerLocalSet, utiliptables.TableNAT, string(kubeLoadBalancerChain), "RETURN", "dst,dst", ""},
   192  	{kubeNodePortLocalSetTCP, utiliptables.TableNAT, string(kubeNodePortChain), "RETURN", "dst", utilipset.ProtocolTCP},
   193  	{kubeNodePortSetTCP, utiliptables.TableNAT, string(kubeNodePortChain), string(kubeMarkMasqChain), "dst", utilipset.ProtocolTCP},
   194  	{kubeNodePortLocalSetUDP, utiliptables.TableNAT, string(kubeNodePortChain), "RETURN", "dst", utilipset.ProtocolUDP},
   195  	{kubeNodePortSetUDP, utiliptables.TableNAT, string(kubeNodePortChain), string(kubeMarkMasqChain), "dst", utilipset.ProtocolUDP},
   196  	{kubeNodePortLocalSetSCTP, utiliptables.TableNAT, string(kubeNodePortChain), "RETURN", "dst,dst", utilipset.ProtocolSCTP},
   197  	{kubeNodePortSetSCTP, utiliptables.TableNAT, string(kubeNodePortChain), string(kubeMarkMasqChain), "dst,dst", utilipset.ProtocolSCTP},
   198  
   199  	{kubeLoadBalancerFWSet, utiliptables.TableFilter, string(kubeProxyFirewallChain), string(kubeSourceRangesFirewallChain), "dst,dst", ""},
   200  	{kubeLoadBalancerSourceCIDRSet, utiliptables.TableFilter, string(kubeSourceRangesFirewallChain), "RETURN", "dst,dst,src", ""},
   201  	{kubeLoadBalancerSourceIPSet, utiliptables.TableFilter, string(kubeSourceRangesFirewallChain), "RETURN", "dst,dst,src", ""},
   202  }
   203  
   204  // In IPVS proxy mode, the following flags need to be set
   205  const (
   206  	sysctlVSConnTrack             = "net/ipv4/vs/conntrack"
   207  	sysctlConnReuse               = "net/ipv4/vs/conn_reuse_mode"
   208  	sysctlExpireNoDestConn        = "net/ipv4/vs/expire_nodest_conn"
   209  	sysctlExpireQuiescentTemplate = "net/ipv4/vs/expire_quiescent_template"
   210  	sysctlForward                 = "net/ipv4/ip_forward"
   211  	sysctlArpIgnore               = "net/ipv4/conf/all/arp_ignore"
   212  	sysctlArpAnnounce             = "net/ipv4/conf/all/arp_announce"
   213  )
   214  
   215  // Proxier is an ipvs based proxy for connections between a localhost:lport
   216  // and services that provide the actual backends.
   217  type Proxier struct {
   218  	// the ipfamily on which this proxy is operating on.
   219  	ipFamily v1.IPFamily
   220  	// endpointsChanges and serviceChanges contains all changes to endpoints and
   221  	// services that happened since last syncProxyRules call. For a single object,
   222  	// changes are accumulated, i.e. previous is state from before all of them,
   223  	// current is state after applying all of those.
   224  	endpointsChanges *proxy.EndpointsChangeTracker
   225  	serviceChanges   *proxy.ServiceChangeTracker
   226  
   227  	mu           sync.Mutex // protects the following fields
   228  	svcPortMap   proxy.ServicePortMap
   229  	endpointsMap proxy.EndpointsMap
   230  	nodeLabels   map[string]string
   231  	// initialSync is a bool indicating if the proxier is syncing for the first time.
   232  	// It is set to true when a new proxier is initialized and then set to false on all
   233  	// future syncs.
   234  	// This lets us run specific logic that's required only during proxy startup.
   235  	// For eg: it enables us to update weights of existing destinations only on startup
   236  	// saving us the cost of querying and updating real servers during every sync.
   237  	initialSync bool
   238  	// endpointSlicesSynced, and servicesSynced are set to true when
   239  	// corresponding objects are synced after startup. This is used to avoid updating
   240  	// ipvs rules with some partial data after kube-proxy restart.
   241  	endpointSlicesSynced bool
   242  	servicesSynced       bool
   243  	initialized          int32
   244  	syncRunner           *async.BoundedFrequencyRunner // governs calls to syncProxyRules
   245  
   246  	// These are effectively const and do not need the mutex to be held.
   247  	syncPeriod    time.Duration
   248  	minSyncPeriod time.Duration
   249  	// Values are CIDR's to exclude when cleaning up IPVS rules.
   250  	excludeCIDRs []*net.IPNet
   251  	// Set to true to set sysctls arp_ignore and arp_announce
   252  	strictARP      bool
   253  	iptables       utiliptables.Interface
   254  	ipvs           utilipvs.Interface
   255  	ipset          utilipset.Interface
   256  	exec           utilexec.Interface
   257  	masqueradeAll  bool
   258  	masqueradeMark string
   259  	localDetector  proxyutiliptables.LocalTrafficDetector
   260  	hostname       string
   261  	nodeIP         net.IP
   262  	recorder       events.EventRecorder
   263  
   264  	serviceHealthServer healthcheck.ServiceHealthServer
   265  	healthzServer       *healthcheck.ProxierHealthServer
   266  
   267  	ipvsScheduler string
   268  	// The following buffers are used to reuse memory and avoid allocations
   269  	// that are significantly impacting performance.
   270  	iptablesData     *bytes.Buffer
   271  	filterChainsData *bytes.Buffer
   272  	natChains        proxyutil.LineBuffer
   273  	filterChains     proxyutil.LineBuffer
   274  	natRules         proxyutil.LineBuffer
   275  	filterRules      proxyutil.LineBuffer
   276  	// Added as a member to the struct to allow injection for testing.
   277  	netlinkHandle NetLinkHandle
   278  	// ipsetList is the list of ipsets that ipvs proxier used.
   279  	ipsetList map[string]*IPSet
   280  	// nodePortAddresses selects the interfaces where nodePort works.
   281  	nodePortAddresses *proxyutil.NodePortAddresses
   282  	// networkInterfacer defines an interface for several net library functions.
   283  	// Inject for test purpose.
   284  	networkInterfacer     proxyutil.NetworkInterfacer
   285  	gracefuldeleteManager *GracefulTerminationManager
   286  	// serviceNoLocalEndpointsInternal represents the set of services that couldn't be applied
   287  	// due to the absence of local endpoints when the internal traffic policy is "Local".
   288  	// It is used to publish the sync_proxy_rules_no_endpoints_total
   289  	// metric with the traffic_policy label set to "internal".
   290  	// A Set is used here since we end up calculating endpoint topology multiple times for the same Service
   291  	// if it has multiple ports but each Service should only be counted once.
   292  	serviceNoLocalEndpointsInternal sets.Set[string]
   293  	// serviceNoLocalEndpointsExternal represents the set of services that couldn't be applied
   294  	// due to the absence of any endpoints when the external traffic policy is "Local".
   295  	// It is used to publish the sync_proxy_rules_no_endpoints_total
   296  	// metric with the traffic_policy label set to "external".
   297  	// A Set is used here since we end up calculating endpoint topology multiple times for the same Service
   298  	// if it has multiple ports but each Service should only be counted once.
   299  	serviceNoLocalEndpointsExternal sets.Set[string]
   300  	// lbNoNodeAccessIPPortProtocolEntries represents the set of loadBalancers IP + Port + Protocol that should not be accessible from K8s nodes
   301  	// We cannot directly restrict LB access from node using LoadBalancerSourceRanges, we need to install
   302  	// additional iptables rules.
   303  	// (ref: https://github.com/kubernetes/kubernetes/issues/119656)
   304  	lbNoNodeAccessIPPortProtocolEntries []*utilipset.Entry
   305  }
   306  
   307  // Proxier implements proxy.Provider
   308  var _ proxy.Provider = &Proxier{}
   309  
   310  // NewProxier returns a new Proxier given an iptables and ipvs Interface instance.
   311  // Because of the iptables and ipvs logic, it is assumed that there is only a single Proxier active on a machine.
   312  // An error will be returned if it fails to update or acquire the initial lock.
   313  // Once a proxier is created, it will keep iptables and ipvs rules up to date in the background and
   314  // will not terminate if a particular iptables or ipvs call fails.
   315  func NewProxier(ipFamily v1.IPFamily,
   316  	ipt utiliptables.Interface,
   317  	ipvs utilipvs.Interface,
   318  	ipset utilipset.Interface,
   319  	sysctl utilsysctl.Interface,
   320  	exec utilexec.Interface,
   321  	syncPeriod time.Duration,
   322  	minSyncPeriod time.Duration,
   323  	excludeCIDRs []string,
   324  	strictARP bool,
   325  	tcpTimeout time.Duration,
   326  	tcpFinTimeout time.Duration,
   327  	udpTimeout time.Duration,
   328  	masqueradeAll bool,
   329  	masqueradeBit int,
   330  	localDetector proxyutiliptables.LocalTrafficDetector,
   331  	hostname string,
   332  	nodeIP net.IP,
   333  	recorder events.EventRecorder,
   334  	healthzServer *healthcheck.ProxierHealthServer,
   335  	scheduler string,
   336  	nodePortAddressStrings []string,
   337  	initOnly bool,
   338  ) (*Proxier, error) {
   339  	// Set the conntrack sysctl we need for
   340  	if err := proxyutil.EnsureSysctl(sysctl, sysctlVSConnTrack, 1); err != nil {
   341  		return nil, err
   342  	}
   343  
   344  	kernelVersion, err := utilkernel.GetVersion()
   345  	if err != nil {
   346  		return nil, fmt.Errorf("failed to get kernel version: %w", err)
   347  	}
   348  
   349  	if kernelVersion.LessThan(version.MustParseGeneric(utilkernel.IPVSConnReuseModeMinSupportedKernelVersion)) {
   350  		klog.ErrorS(nil, "Can't set sysctl, kernel version doesn't satisfy minimum version requirements", "sysctl", sysctlConnReuse, "minimumKernelVersion", utilkernel.IPVSConnReuseModeMinSupportedKernelVersion)
   351  	} else if kernelVersion.AtLeast(version.MustParseGeneric(utilkernel.IPVSConnReuseModeFixedKernelVersion)) {
   352  		// https://github.com/kubernetes/kubernetes/issues/93297
   353  		klog.V(2).InfoS("Left as-is", "sysctl", sysctlConnReuse)
   354  	} else {
   355  		// Set the connection reuse mode
   356  		if err := proxyutil.EnsureSysctl(sysctl, sysctlConnReuse, 0); err != nil {
   357  			return nil, err
   358  		}
   359  	}
   360  
   361  	// Set the expire_nodest_conn sysctl we need for
   362  	if err := proxyutil.EnsureSysctl(sysctl, sysctlExpireNoDestConn, 1); err != nil {
   363  		return nil, err
   364  	}
   365  
   366  	// Set the expire_quiescent_template sysctl we need for
   367  	if err := proxyutil.EnsureSysctl(sysctl, sysctlExpireQuiescentTemplate, 1); err != nil {
   368  		return nil, err
   369  	}
   370  
   371  	// Set the ip_forward sysctl we need for
   372  	if err := proxyutil.EnsureSysctl(sysctl, sysctlForward, 1); err != nil {
   373  		return nil, err
   374  	}
   375  
   376  	if strictARP {
   377  		// Set the arp_ignore sysctl we need for
   378  		if err := proxyutil.EnsureSysctl(sysctl, sysctlArpIgnore, 1); err != nil {
   379  			return nil, err
   380  		}
   381  
   382  		// Set the arp_announce sysctl we need for
   383  		if err := proxyutil.EnsureSysctl(sysctl, sysctlArpAnnounce, 2); err != nil {
   384  			return nil, err
   385  		}
   386  	}
   387  
   388  	// Configure IPVS timeouts if any one of the timeout parameters have been set.
   389  	// This is the equivalent to running ipvsadm --set, a value of 0 indicates the
   390  	// current system timeout should be preserved
   391  	if tcpTimeout > 0 || tcpFinTimeout > 0 || udpTimeout > 0 {
   392  		if err := ipvs.ConfigureTimeouts(tcpTimeout, tcpFinTimeout, udpTimeout); err != nil {
   393  			klog.ErrorS(err, "Failed to configure IPVS timeouts")
   394  		}
   395  	}
   396  
   397  	if initOnly {
   398  		klog.InfoS("System initialized and --init-only specified")
   399  		return nil, nil
   400  	}
   401  
   402  	// Generate the masquerade mark to use for SNAT rules.
   403  	masqueradeValue := 1 << uint(masqueradeBit)
   404  	masqueradeMark := fmt.Sprintf("%#08x", masqueradeValue)
   405  
   406  	klog.V(2).InfoS("Record nodeIP and family", "nodeIP", nodeIP, "family", ipFamily)
   407  
   408  	if len(scheduler) == 0 {
   409  		klog.InfoS("IPVS scheduler not specified, use rr by default")
   410  		scheduler = defaultScheduler
   411  	}
   412  
   413  	nodePortAddresses := proxyutil.NewNodePortAddresses(ipFamily, nodePortAddressStrings)
   414  
   415  	serviceHealthServer := healthcheck.NewServiceHealthServer(hostname, recorder, nodePortAddresses, healthzServer)
   416  
   417  	// excludeCIDRs has been validated before, here we just parse it to IPNet list
   418  	parsedExcludeCIDRs, _ := netutils.ParseCIDRs(excludeCIDRs)
   419  
   420  	proxier := &Proxier{
   421  		ipFamily:              ipFamily,
   422  		svcPortMap:            make(proxy.ServicePortMap),
   423  		serviceChanges:        proxy.NewServiceChangeTracker(newServiceInfo, ipFamily, recorder, nil),
   424  		endpointsMap:          make(proxy.EndpointsMap),
   425  		endpointsChanges:      proxy.NewEndpointsChangeTracker(hostname, nil, ipFamily, recorder, nil),
   426  		initialSync:           true,
   427  		syncPeriod:            syncPeriod,
   428  		minSyncPeriod:         minSyncPeriod,
   429  		excludeCIDRs:          parsedExcludeCIDRs,
   430  		iptables:              ipt,
   431  		masqueradeAll:         masqueradeAll,
   432  		masqueradeMark:        masqueradeMark,
   433  		exec:                  exec,
   434  		localDetector:         localDetector,
   435  		hostname:              hostname,
   436  		nodeIP:                nodeIP,
   437  		recorder:              recorder,
   438  		serviceHealthServer:   serviceHealthServer,
   439  		healthzServer:         healthzServer,
   440  		ipvs:                  ipvs,
   441  		ipvsScheduler:         scheduler,
   442  		iptablesData:          bytes.NewBuffer(nil),
   443  		filterChainsData:      bytes.NewBuffer(nil),
   444  		natChains:             proxyutil.NewLineBuffer(),
   445  		natRules:              proxyutil.NewLineBuffer(),
   446  		filterChains:          proxyutil.NewLineBuffer(),
   447  		filterRules:           proxyutil.NewLineBuffer(),
   448  		netlinkHandle:         NewNetLinkHandle(ipFamily == v1.IPv6Protocol),
   449  		ipset:                 ipset,
   450  		nodePortAddresses:     nodePortAddresses,
   451  		networkInterfacer:     proxyutil.RealNetwork{},
   452  		gracefuldeleteManager: NewGracefulTerminationManager(ipvs),
   453  	}
   454  	// initialize ipsetList with all sets we needed
   455  	proxier.ipsetList = make(map[string]*IPSet)
   456  	for _, is := range ipsetInfo {
   457  		proxier.ipsetList[is.name] = NewIPSet(ipset, is.name, is.setType, (ipFamily == v1.IPv6Protocol), is.comment)
   458  	}
   459  	burstSyncs := 2
   460  	klog.V(2).InfoS("ipvs sync params", "ipFamily", ipt.Protocol(), "minSyncPeriod", minSyncPeriod, "syncPeriod", syncPeriod, "burstSyncs", burstSyncs)
   461  	proxier.syncRunner = async.NewBoundedFrequencyRunner("sync-runner", proxier.syncProxyRules, minSyncPeriod, syncPeriod, burstSyncs)
   462  	proxier.gracefuldeleteManager.Run()
   463  	return proxier, nil
   464  }
   465  
   466  // NewDualStackProxier returns a new Proxier for dual-stack operation
   467  func NewDualStackProxier(
   468  	ipt [2]utiliptables.Interface,
   469  	ipvs utilipvs.Interface,
   470  	ipset utilipset.Interface,
   471  	sysctl utilsysctl.Interface,
   472  	exec utilexec.Interface,
   473  	syncPeriod time.Duration,
   474  	minSyncPeriod time.Duration,
   475  	excludeCIDRs []string,
   476  	strictARP bool,
   477  	tcpTimeout time.Duration,
   478  	tcpFinTimeout time.Duration,
   479  	udpTimeout time.Duration,
   480  	masqueradeAll bool,
   481  	masqueradeBit int,
   482  	localDetectors [2]proxyutiliptables.LocalTrafficDetector,
   483  	hostname string,
   484  	nodeIPs map[v1.IPFamily]net.IP,
   485  	recorder events.EventRecorder,
   486  	healthzServer *healthcheck.ProxierHealthServer,
   487  	scheduler string,
   488  	nodePortAddresses []string,
   489  	initOnly bool,
   490  ) (proxy.Provider, error) {
   491  
   492  	safeIpset := newSafeIpset(ipset)
   493  
   494  	// Create an ipv4 instance of the single-stack proxier
   495  	ipv4Proxier, err := NewProxier(v1.IPv4Protocol, ipt[0], ipvs, safeIpset, sysctl,
   496  		exec, syncPeriod, minSyncPeriod, filterCIDRs(false, excludeCIDRs), strictARP,
   497  		tcpTimeout, tcpFinTimeout, udpTimeout, masqueradeAll, masqueradeBit,
   498  		localDetectors[0], hostname, nodeIPs[v1.IPv4Protocol], recorder,
   499  		healthzServer, scheduler, nodePortAddresses, initOnly)
   500  	if err != nil {
   501  		return nil, fmt.Errorf("unable to create ipv4 proxier: %v", err)
   502  	}
   503  
   504  	ipv6Proxier, err := NewProxier(v1.IPv6Protocol, ipt[1], ipvs, safeIpset, sysctl,
   505  		exec, syncPeriod, minSyncPeriod, filterCIDRs(true, excludeCIDRs), strictARP,
   506  		tcpTimeout, tcpFinTimeout, udpTimeout, masqueradeAll, masqueradeBit,
   507  		localDetectors[1], hostname, nodeIPs[v1.IPv6Protocol], recorder,
   508  		healthzServer, scheduler, nodePortAddresses, initOnly)
   509  	if err != nil {
   510  		return nil, fmt.Errorf("unable to create ipv6 proxier: %v", err)
   511  	}
   512  	if initOnly {
   513  		return nil, nil
   514  	}
   515  
   516  	// Return a meta-proxier that dispatch calls between the two
   517  	// single-stack proxier instances
   518  	return metaproxier.NewMetaProxier(ipv4Proxier, ipv6Proxier), nil
   519  }
   520  
   521  func filterCIDRs(wantIPv6 bool, cidrs []string) []string {
   522  	var filteredCIDRs []string
   523  	for _, cidr := range cidrs {
   524  		if netutils.IsIPv6CIDRString(cidr) == wantIPv6 {
   525  			filteredCIDRs = append(filteredCIDRs, cidr)
   526  		}
   527  	}
   528  	return filteredCIDRs
   529  }
   530  
   531  // internal struct for string service information
   532  type servicePortInfo struct {
   533  	*proxy.BaseServicePortInfo
   534  	// The following fields are computed and stored for performance reasons.
   535  	nameString string
   536  }
   537  
   538  // returns a new proxy.ServicePort which abstracts a serviceInfo
   539  func newServiceInfo(port *v1.ServicePort, service *v1.Service, bsvcPortInfo *proxy.BaseServicePortInfo) proxy.ServicePort {
   540  	svcPort := &servicePortInfo{BaseServicePortInfo: bsvcPortInfo}
   541  
   542  	// Store the following for performance reasons.
   543  	svcName := types.NamespacedName{Namespace: service.Namespace, Name: service.Name}
   544  	svcPortName := proxy.ServicePortName{NamespacedName: svcName, Port: port.Name}
   545  	svcPort.nameString = svcPortName.String()
   546  
   547  	return svcPort
   548  }
   549  
   550  // getFirstColumn reads all the content from r into memory and return a
   551  // slice which consists of the first word from each line.
   552  func getFirstColumn(r io.Reader) ([]string, error) {
   553  	b, err := io.ReadAll(r)
   554  	if err != nil {
   555  		return nil, err
   556  	}
   557  
   558  	lines := strings.Split(string(b), "\n")
   559  	words := make([]string, 0, len(lines))
   560  	for i := range lines {
   561  		fields := strings.Fields(lines[i])
   562  		if len(fields) > 0 {
   563  			words = append(words, fields[0])
   564  		}
   565  	}
   566  	return words, nil
   567  }
   568  
   569  // CanUseIPVSProxier checks if we can use the ipvs Proxier.
   570  // The ipset version and the scheduler are checked. If any virtual servers (VS)
   571  // already exist with the configured scheduler, we just return. Otherwise
   572  // we check if a dummy VS can be configured with the configured scheduler.
   573  // Kernel modules will be loaded automatically if necessary.
   574  func CanUseIPVSProxier(ipvs utilipvs.Interface, ipsetver IPSetVersioner, scheduler string) error {
   575  	// BUG: https://github.com/moby/ipvs/issues/27
   576  	// If ipvs is not compiled into the kernel no error is returned and handle==nil.
   577  	// This in turn causes ipvs.GetVirtualServers and ipvs.AddVirtualServer
   578  	// to return ok (err==nil). If/when this bug is fixed parameter "ipvs" will be nil
   579  	// if ipvs is not supported by the kernel. Until then a re-read work-around is used.
   580  	if ipvs == nil {
   581  		return fmt.Errorf("Ipvs not supported by the kernel")
   582  	}
   583  
   584  	// Check ipset version
   585  	versionString, err := ipsetver.GetVersion()
   586  	if err != nil {
   587  		return fmt.Errorf("error getting ipset version, error: %v", err)
   588  	}
   589  	if !checkMinVersion(versionString) {
   590  		return fmt.Errorf("ipset version: %s is less than min required version: %s", versionString, MinIPSetCheckVersion)
   591  	}
   592  
   593  	if scheduler == "" {
   594  		scheduler = defaultScheduler
   595  	}
   596  
   597  	// If any virtual server (VS) using the scheduler exist we skip the checks.
   598  	vservers, err := ipvs.GetVirtualServers()
   599  	if err != nil {
   600  		klog.ErrorS(err, "Can't read the ipvs")
   601  		return err
   602  	}
   603  	klog.V(5).InfoS("Virtual Servers", "count", len(vservers))
   604  	if len(vservers) > 0 {
   605  		// This is most likely a kube-proxy re-start. We know that ipvs works
   606  		// and if any VS uses the configured scheduler, we are done.
   607  		for _, vs := range vservers {
   608  			if vs.Scheduler == scheduler {
   609  				klog.V(5).InfoS("VS exist, Skipping checks")
   610  				return nil
   611  			}
   612  		}
   613  		klog.V(5).InfoS("No existing VS uses the configured scheduler", "scheduler", scheduler)
   614  	}
   615  
   616  	// Try to insert a dummy VS with the passed scheduler.
   617  	// We should use a VIP address that is not used on the node.
   618  	// An address "198.51.100.0" from the TEST-NET-2 rage in https://datatracker.ietf.org/doc/html/rfc5737
   619  	// is used. These addresses are reserved for documentation. If the user is using
   620  	// this address for a VS anyway we *will* mess up, but that would be an invalid configuration.
   621  	// If the user have configured the address to an interface on the node (but not a VS)
   622  	// then traffic will temporary be routed to ipvs during the probe and dropped.
   623  	// The later case is also and invalid configuration, but the traffic impact will be minor.
   624  	// This should not be a problem if users honors reserved addresses, but cut/paste
   625  	// from documentation is not unheard of, so the restriction to not use the TEST-NET-2 range
   626  	// must be documented.
   627  	vs := utilipvs.VirtualServer{
   628  		Address:   netutils.ParseIPSloppy("198.51.100.0"),
   629  		Protocol:  "TCP",
   630  		Port:      20000,
   631  		Scheduler: scheduler,
   632  	}
   633  	if err := ipvs.AddVirtualServer(&vs); err != nil {
   634  		klog.ErrorS(err, "Could not create dummy VS", "scheduler", scheduler)
   635  		return err
   636  	}
   637  
   638  	// To overcome the BUG described above we check that the VS is *really* added.
   639  	vservers, err = ipvs.GetVirtualServers()
   640  	if err != nil {
   641  		klog.ErrorS(err, "ipvs.GetVirtualServers")
   642  		return err
   643  	}
   644  	klog.V(5).InfoS("Virtual Servers after adding dummy", "count", len(vservers))
   645  	if len(vservers) == 0 {
   646  		klog.InfoS("Dummy VS not created", "scheduler", scheduler)
   647  		return fmt.Errorf("Ipvs not supported") // This is a BUG work-around
   648  	}
   649  	klog.V(5).InfoS("Dummy VS created", "vs", vs)
   650  
   651  	if err := ipvs.DeleteVirtualServer(&vs); err != nil {
   652  		klog.ErrorS(err, "Could not delete dummy VS")
   653  		return err
   654  	}
   655  
   656  	return nil
   657  }
   658  
   659  // CleanupIptablesLeftovers removes all iptables rules and chains created by the Proxier
   660  // It returns true if an error was encountered. Errors are logged.
   661  func cleanupIptablesLeftovers(ipt utiliptables.Interface) (encounteredError bool) {
   662  	// Unlink the iptables chains created by ipvs Proxier
   663  	for _, jc := range iptablesJumpChain {
   664  		args := []string{
   665  			"-m", "comment", "--comment", jc.comment,
   666  			"-j", string(jc.to),
   667  		}
   668  		if err := ipt.DeleteRule(jc.table, jc.from, args...); err != nil {
   669  			if !utiliptables.IsNotFoundError(err) {
   670  				klog.ErrorS(err, "Error removing iptables rules in ipvs proxier")
   671  				encounteredError = true
   672  			}
   673  		}
   674  	}
   675  
   676  	// Flush and remove all of our chains. Flushing all chains before removing them also removes all links between chains first.
   677  	for _, ch := range iptablesCleanupChains {
   678  		if err := ipt.FlushChain(ch.table, ch.chain); err != nil {
   679  			if !utiliptables.IsNotFoundError(err) {
   680  				klog.ErrorS(err, "Error removing iptables rules in ipvs proxier")
   681  				encounteredError = true
   682  			}
   683  		}
   684  	}
   685  
   686  	// Remove all of our chains.
   687  	for _, ch := range iptablesCleanupChains {
   688  		if err := ipt.DeleteChain(ch.table, ch.chain); err != nil {
   689  			if !utiliptables.IsNotFoundError(err) {
   690  				klog.ErrorS(err, "Error removing iptables rules in ipvs proxier")
   691  				encounteredError = true
   692  			}
   693  		}
   694  	}
   695  
   696  	return encounteredError
   697  }
   698  
   699  // CleanupLeftovers clean up all ipvs and iptables rules created by ipvs Proxier.
   700  func CleanupLeftovers(ipvs utilipvs.Interface, ipt utiliptables.Interface, ipset utilipset.Interface) (encounteredError bool) {
   701  	// Clear all ipvs rules
   702  	if ipvs != nil {
   703  		err := ipvs.Flush()
   704  		if err != nil {
   705  			klog.ErrorS(err, "Error flushing ipvs rules")
   706  			encounteredError = true
   707  		}
   708  	}
   709  	// Delete dummy interface created by ipvs Proxier.
   710  	nl := NewNetLinkHandle(false)
   711  	err := nl.DeleteDummyDevice(defaultDummyDevice)
   712  	if err != nil {
   713  		klog.ErrorS(err, "Error deleting dummy device created by ipvs proxier", "device", defaultDummyDevice)
   714  		encounteredError = true
   715  	}
   716  	// Clear iptables created by ipvs Proxier.
   717  	encounteredError = cleanupIptablesLeftovers(ipt) || encounteredError
   718  	// Destroy ip sets created by ipvs Proxier.  We should call it after cleaning up
   719  	// iptables since we can NOT delete ip set which is still referenced by iptables.
   720  	for _, set := range ipsetInfo {
   721  		err = ipset.DestroySet(set.name)
   722  		if err != nil {
   723  			if !utilipset.IsNotFoundError(err) {
   724  				klog.ErrorS(err, "Error removing ipset", "ipset", set.name)
   725  				encounteredError = true
   726  			}
   727  		}
   728  	}
   729  	return encounteredError
   730  }
   731  
   732  // Sync is called to synchronize the proxier state to iptables and ipvs as soon as possible.
   733  func (proxier *Proxier) Sync() {
   734  	if proxier.healthzServer != nil {
   735  		proxier.healthzServer.QueuedUpdate(proxier.ipFamily)
   736  	}
   737  	metrics.SyncProxyRulesLastQueuedTimestamp.SetToCurrentTime()
   738  	proxier.syncRunner.Run()
   739  }
   740  
   741  // SyncLoop runs periodic work.  This is expected to run as a goroutine or as the main loop of the app.  It does not return.
   742  func (proxier *Proxier) SyncLoop() {
   743  	// Update healthz timestamp at beginning in case Sync() never succeeds.
   744  	if proxier.healthzServer != nil {
   745  		proxier.healthzServer.Updated(proxier.ipFamily)
   746  	}
   747  	// synthesize "last change queued" time as the informers are syncing.
   748  	metrics.SyncProxyRulesLastQueuedTimestamp.SetToCurrentTime()
   749  	proxier.syncRunner.Loop(wait.NeverStop)
   750  }
   751  
   752  func (proxier *Proxier) setInitialized(value bool) {
   753  	var initialized int32
   754  	if value {
   755  		initialized = 1
   756  	}
   757  	atomic.StoreInt32(&proxier.initialized, initialized)
   758  }
   759  
   760  func (proxier *Proxier) isInitialized() bool {
   761  	return atomic.LoadInt32(&proxier.initialized) > 0
   762  }
   763  
   764  // OnServiceAdd is called whenever creation of new service object is observed.
   765  func (proxier *Proxier) OnServiceAdd(service *v1.Service) {
   766  	proxier.OnServiceUpdate(nil, service)
   767  }
   768  
   769  // OnServiceUpdate is called whenever modification of an existing service object is observed.
   770  func (proxier *Proxier) OnServiceUpdate(oldService, service *v1.Service) {
   771  	if proxier.serviceChanges.Update(oldService, service) && proxier.isInitialized() {
   772  		proxier.Sync()
   773  	}
   774  }
   775  
   776  // OnServiceDelete is called whenever deletion of an existing service object is observed.
   777  func (proxier *Proxier) OnServiceDelete(service *v1.Service) {
   778  	proxier.OnServiceUpdate(service, nil)
   779  }
   780  
   781  // OnServiceSynced is called once all the initial event handlers were called and the state is fully propagated to local cache.
   782  func (proxier *Proxier) OnServiceSynced() {
   783  	proxier.mu.Lock()
   784  	proxier.servicesSynced = true
   785  	proxier.setInitialized(proxier.endpointSlicesSynced)
   786  	proxier.mu.Unlock()
   787  
   788  	// Sync unconditionally - this is called once per lifetime.
   789  	proxier.syncProxyRules()
   790  }
   791  
   792  // OnEndpointSliceAdd is called whenever creation of a new endpoint slice object
   793  // is observed.
   794  func (proxier *Proxier) OnEndpointSliceAdd(endpointSlice *discovery.EndpointSlice) {
   795  	if proxier.endpointsChanges.EndpointSliceUpdate(endpointSlice, false) && proxier.isInitialized() {
   796  		proxier.Sync()
   797  	}
   798  }
   799  
   800  // OnEndpointSliceUpdate is called whenever modification of an existing endpoint
   801  // slice object is observed.
   802  func (proxier *Proxier) OnEndpointSliceUpdate(_, endpointSlice *discovery.EndpointSlice) {
   803  	if proxier.endpointsChanges.EndpointSliceUpdate(endpointSlice, false) && proxier.isInitialized() {
   804  		proxier.Sync()
   805  	}
   806  }
   807  
   808  // OnEndpointSliceDelete is called whenever deletion of an existing endpoint slice
   809  // object is observed.
   810  func (proxier *Proxier) OnEndpointSliceDelete(endpointSlice *discovery.EndpointSlice) {
   811  	if proxier.endpointsChanges.EndpointSliceUpdate(endpointSlice, true) && proxier.isInitialized() {
   812  		proxier.Sync()
   813  	}
   814  }
   815  
   816  // OnEndpointSlicesSynced is called once all the initial event handlers were
   817  // called and the state is fully propagated to local cache.
   818  func (proxier *Proxier) OnEndpointSlicesSynced() {
   819  	proxier.mu.Lock()
   820  	proxier.endpointSlicesSynced = true
   821  	proxier.setInitialized(proxier.servicesSynced)
   822  	proxier.mu.Unlock()
   823  
   824  	// Sync unconditionally - this is called once per lifetime.
   825  	proxier.syncProxyRules()
   826  }
   827  
   828  // OnNodeAdd is called whenever creation of new node object
   829  // is observed.
   830  func (proxier *Proxier) OnNodeAdd(node *v1.Node) {
   831  	if node.Name != proxier.hostname {
   832  		klog.ErrorS(nil, "Received a watch event for a node that doesn't match the current node", "eventNode", node.Name, "currentNode", proxier.hostname)
   833  		return
   834  	}
   835  
   836  	if reflect.DeepEqual(proxier.nodeLabels, node.Labels) {
   837  		return
   838  	}
   839  
   840  	proxier.mu.Lock()
   841  	proxier.nodeLabels = map[string]string{}
   842  	for k, v := range node.Labels {
   843  		proxier.nodeLabels[k] = v
   844  	}
   845  	proxier.mu.Unlock()
   846  	klog.V(4).InfoS("Updated proxier node labels", "labels", node.Labels)
   847  
   848  	proxier.Sync()
   849  }
   850  
   851  // OnNodeUpdate is called whenever modification of an existing
   852  // node object is observed.
   853  func (proxier *Proxier) OnNodeUpdate(oldNode, node *v1.Node) {
   854  	if node.Name != proxier.hostname {
   855  		klog.ErrorS(nil, "Received a watch event for a node that doesn't match the current node", "eventNode", node.Name, "currentNode", proxier.hostname)
   856  		return
   857  	}
   858  
   859  	if reflect.DeepEqual(proxier.nodeLabels, node.Labels) {
   860  		return
   861  	}
   862  
   863  	proxier.mu.Lock()
   864  	proxier.nodeLabels = map[string]string{}
   865  	for k, v := range node.Labels {
   866  		proxier.nodeLabels[k] = v
   867  	}
   868  	proxier.mu.Unlock()
   869  	klog.V(4).InfoS("Updated proxier node labels", "labels", node.Labels)
   870  
   871  	proxier.Sync()
   872  }
   873  
   874  // OnNodeDelete is called whenever deletion of an existing node
   875  // object is observed.
   876  func (proxier *Proxier) OnNodeDelete(node *v1.Node) {
   877  	if node.Name != proxier.hostname {
   878  		klog.ErrorS(nil, "Received a watch event for a node that doesn't match the current node", "eventNode", node.Name, "currentNode", proxier.hostname)
   879  		return
   880  	}
   881  
   882  	proxier.mu.Lock()
   883  	proxier.nodeLabels = nil
   884  	proxier.mu.Unlock()
   885  
   886  	proxier.Sync()
   887  }
   888  
   889  // OnNodeSynced is called once all the initial event handlers were
   890  // called and the state is fully propagated to local cache.
   891  func (proxier *Proxier) OnNodeSynced() {
   892  }
   893  
   894  // This is where all of the ipvs calls happen.
   895  func (proxier *Proxier) syncProxyRules() {
   896  	proxier.mu.Lock()
   897  	defer proxier.mu.Unlock()
   898  
   899  	// don't sync rules till we've received services and endpoints
   900  	if !proxier.isInitialized() {
   901  		klog.V(2).InfoS("Not syncing ipvs rules until Services and Endpoints have been received from master")
   902  		return
   903  	}
   904  
   905  	// its safe to set initialSync to false as it acts as a flag for startup actions
   906  	// and the mutex is held.
   907  	defer func() {
   908  		proxier.initialSync = false
   909  	}()
   910  
   911  	// Keep track of how long syncs take.
   912  	start := time.Now()
   913  	defer func() {
   914  		metrics.SyncProxyRulesLatency.Observe(metrics.SinceInSeconds(start))
   915  		klog.V(4).InfoS("syncProxyRules complete", "elapsed", time.Since(start))
   916  	}()
   917  
   918  	// We assume that if this was called, we really want to sync them,
   919  	// even if nothing changed in the meantime. In other words, callers are
   920  	// responsible for detecting no-op changes and not calling this function.
   921  	serviceUpdateResult := proxier.svcPortMap.Update(proxier.serviceChanges)
   922  	endpointUpdateResult := proxier.endpointsMap.Update(proxier.endpointsChanges)
   923  
   924  	klog.V(3).InfoS("Syncing ipvs proxier rules")
   925  
   926  	proxier.serviceNoLocalEndpointsInternal = sets.New[string]()
   927  	proxier.serviceNoLocalEndpointsExternal = sets.New[string]()
   928  
   929  	proxier.lbNoNodeAccessIPPortProtocolEntries = make([]*utilipset.Entry, 0)
   930  
   931  	// Begin install iptables
   932  
   933  	// Reset all buffers used later.
   934  	// This is to avoid memory reallocations and thus improve performance.
   935  	proxier.natChains.Reset()
   936  	proxier.natRules.Reset()
   937  	proxier.filterChains.Reset()
   938  	proxier.filterRules.Reset()
   939  
   940  	// Write table headers.
   941  	proxier.filterChains.Write("*filter")
   942  	proxier.natChains.Write("*nat")
   943  
   944  	proxier.createAndLinkKubeChain()
   945  
   946  	// make sure dummy interface exists in the system where ipvs Proxier will bind service address on it
   947  	_, err := proxier.netlinkHandle.EnsureDummyDevice(defaultDummyDevice)
   948  	if err != nil {
   949  		klog.ErrorS(err, "Failed to create dummy interface", "interface", defaultDummyDevice)
   950  		return
   951  	}
   952  
   953  	// make sure ip sets exists in the system.
   954  	for _, set := range proxier.ipsetList {
   955  		if err := ensureIPSet(set); err != nil {
   956  			return
   957  		}
   958  		set.resetEntries()
   959  	}
   960  
   961  	// activeIPVSServices represents IPVS service successfully created in this round of sync
   962  	activeIPVSServices := sets.New[string]()
   963  	// activeBindAddrs Represents addresses we want on the defaultDummyDevice after this round of sync
   964  	activeBindAddrs := sets.New[string]()
   965  	// alreadyBoundAddrs Represents addresses currently assigned to the dummy interface
   966  	alreadyBoundAddrs, err := proxier.netlinkHandle.GetLocalAddresses(defaultDummyDevice)
   967  	if err != nil {
   968  		klog.ErrorS(err, "Error listing addresses binded to dummy interface")
   969  	}
   970  	// nodeAddressSet All addresses *except* those on the dummy interface
   971  	nodeAddressSet, err := proxier.netlinkHandle.GetAllLocalAddressesExcept(defaultDummyDevice)
   972  	if err != nil {
   973  		klog.ErrorS(err, "Error listing node addresses")
   974  	}
   975  
   976  	hasNodePort := false
   977  	for _, svc := range proxier.svcPortMap {
   978  		svcInfo, ok := svc.(*servicePortInfo)
   979  		if ok && svcInfo.NodePort() != 0 {
   980  			hasNodePort = true
   981  			break
   982  		}
   983  	}
   984  
   985  	// List of node IP addresses to be used as IPVS services if nodePort is set. This
   986  	// can be reused for all nodePort services.
   987  	var nodeIPs []net.IP
   988  	if hasNodePort {
   989  		if proxier.nodePortAddresses.MatchAll() {
   990  			for _, ipStr := range nodeAddressSet.UnsortedList() {
   991  				nodeIPs = append(nodeIPs, netutils.ParseIPSloppy(ipStr))
   992  			}
   993  		} else {
   994  			allNodeIPs, err := proxier.nodePortAddresses.GetNodeIPs(proxier.networkInterfacer)
   995  			if err != nil {
   996  				klog.ErrorS(err, "Failed to get node IP address matching nodeport cidr")
   997  			} else {
   998  				for _, ip := range allNodeIPs {
   999  					if !ip.IsLoopback() {
  1000  						nodeIPs = append(nodeIPs, ip)
  1001  					}
  1002  				}
  1003  			}
  1004  		}
  1005  	}
  1006  
  1007  	// Build IPVS rules for each service.
  1008  	for svcPortName, svcPort := range proxier.svcPortMap {
  1009  		svcInfo, ok := svcPort.(*servicePortInfo)
  1010  		if !ok {
  1011  			klog.ErrorS(nil, "Failed to cast serviceInfo", "servicePortName", svcPortName)
  1012  			continue
  1013  		}
  1014  
  1015  		protocol := strings.ToLower(string(svcInfo.Protocol()))
  1016  		// Precompute svcNameString; with many services the many calls
  1017  		// to ServicePortName.String() show up in CPU profiles.
  1018  		svcPortNameString := svcPortName.String()
  1019  
  1020  		// Handle traffic that loops back to the originator with SNAT.
  1021  		for _, e := range proxier.endpointsMap[svcPortName] {
  1022  			ep, ok := e.(*proxy.BaseEndpointInfo)
  1023  			if !ok {
  1024  				klog.ErrorS(nil, "Failed to cast BaseEndpointInfo", "endpoint", e)
  1025  				continue
  1026  			}
  1027  			if !ep.IsLocal() {
  1028  				continue
  1029  			}
  1030  			epIP := ep.IP()
  1031  			epPort := ep.Port()
  1032  			// Error parsing this endpoint has been logged. Skip to next endpoint.
  1033  			if epIP == "" || epPort == 0 {
  1034  				continue
  1035  			}
  1036  			entry := &utilipset.Entry{
  1037  				IP:       epIP,
  1038  				Port:     epPort,
  1039  				Protocol: protocol,
  1040  				IP2:      epIP,
  1041  				SetType:  utilipset.HashIPPortIP,
  1042  			}
  1043  			if valid := proxier.ipsetList[kubeLoopBackIPSet].validateEntry(entry); !valid {
  1044  				klog.ErrorS(nil, "Error adding entry to ipset", "entry", entry, "ipset", proxier.ipsetList[kubeLoopBackIPSet].Name)
  1045  				continue
  1046  			}
  1047  			proxier.ipsetList[kubeLoopBackIPSet].activeEntries.Insert(entry.String())
  1048  		}
  1049  
  1050  		// Capture the clusterIP.
  1051  		// ipset call
  1052  		entry := &utilipset.Entry{
  1053  			IP:       svcInfo.ClusterIP().String(),
  1054  			Port:     svcInfo.Port(),
  1055  			Protocol: protocol,
  1056  			SetType:  utilipset.HashIPPort,
  1057  		}
  1058  		// add service Cluster IP:Port to kubeServiceAccess ip set for the purpose of solving hairpin.
  1059  		// proxier.kubeServiceAccessSet.activeEntries.Insert(entry.String())
  1060  		if valid := proxier.ipsetList[kubeClusterIPSet].validateEntry(entry); !valid {
  1061  			klog.ErrorS(nil, "Error adding entry to ipset", "entry", entry, "ipset", proxier.ipsetList[kubeClusterIPSet].Name)
  1062  			continue
  1063  		}
  1064  		proxier.ipsetList[kubeClusterIPSet].activeEntries.Insert(entry.String())
  1065  		// ipvs call
  1066  		serv := &utilipvs.VirtualServer{
  1067  			Address:   svcInfo.ClusterIP(),
  1068  			Port:      uint16(svcInfo.Port()),
  1069  			Protocol:  string(svcInfo.Protocol()),
  1070  			Scheduler: proxier.ipvsScheduler,
  1071  		}
  1072  		// Set session affinity flag and timeout for IPVS service
  1073  		if svcInfo.SessionAffinityType() == v1.ServiceAffinityClientIP {
  1074  			serv.Flags |= utilipvs.FlagPersistent
  1075  			serv.Timeout = uint32(svcInfo.StickyMaxAgeSeconds())
  1076  		}
  1077  		// Set the source hash flag needed for the distribution method "mh"
  1078  		if proxier.ipvsScheduler == "mh" {
  1079  			serv.Flags |= utilipvs.FlagSourceHash
  1080  		}
  1081  		// We need to bind ClusterIP to dummy interface, so set `bindAddr` parameter to `true` in syncService()
  1082  		if err := proxier.syncService(svcPortNameString, serv, true, alreadyBoundAddrs); err == nil {
  1083  			activeIPVSServices.Insert(serv.String())
  1084  			activeBindAddrs.Insert(serv.Address.String())
  1085  			// ExternalTrafficPolicy only works for NodePort and external LB traffic, does not affect ClusterIP
  1086  			// So we still need clusterIP rules in onlyNodeLocalEndpoints mode.
  1087  			internalNodeLocal := false
  1088  			if svcInfo.InternalPolicyLocal() {
  1089  				internalNodeLocal = true
  1090  			}
  1091  			if err := proxier.syncEndpoint(svcPortName, internalNodeLocal, serv); err != nil {
  1092  				klog.ErrorS(err, "Failed to sync endpoint for service", "servicePortName", svcPortName, "virtualServer", serv)
  1093  			}
  1094  		} else {
  1095  			klog.ErrorS(err, "Failed to sync service", "servicePortName", svcPortName, "virtualServer", serv)
  1096  		}
  1097  
  1098  		// Capture externalIPs.
  1099  		for _, externalIP := range svcInfo.ExternalIPStrings() {
  1100  			// ipset call
  1101  			entry := &utilipset.Entry{
  1102  				IP:       externalIP,
  1103  				Port:     svcInfo.Port(),
  1104  				Protocol: protocol,
  1105  				SetType:  utilipset.HashIPPort,
  1106  			}
  1107  
  1108  			if svcInfo.ExternalPolicyLocal() {
  1109  				if valid := proxier.ipsetList[kubeExternalIPLocalSet].validateEntry(entry); !valid {
  1110  					klog.ErrorS(nil, "Error adding entry to ipset", "entry", entry, "ipset", proxier.ipsetList[kubeExternalIPLocalSet].Name)
  1111  					continue
  1112  				}
  1113  				proxier.ipsetList[kubeExternalIPLocalSet].activeEntries.Insert(entry.String())
  1114  			} else {
  1115  				// We have to SNAT packets to external IPs.
  1116  				if valid := proxier.ipsetList[kubeExternalIPSet].validateEntry(entry); !valid {
  1117  					klog.ErrorS(nil, "Error adding entry to ipset", "entry", entry, "ipset", proxier.ipsetList[kubeExternalIPSet].Name)
  1118  					continue
  1119  				}
  1120  				proxier.ipsetList[kubeExternalIPSet].activeEntries.Insert(entry.String())
  1121  			}
  1122  
  1123  			// ipvs call
  1124  			serv := &utilipvs.VirtualServer{
  1125  				Address:   netutils.ParseIPSloppy(externalIP),
  1126  				Port:      uint16(svcInfo.Port()),
  1127  				Protocol:  string(svcInfo.Protocol()),
  1128  				Scheduler: proxier.ipvsScheduler,
  1129  			}
  1130  			if svcInfo.SessionAffinityType() == v1.ServiceAffinityClientIP {
  1131  				serv.Flags |= utilipvs.FlagPersistent
  1132  				serv.Timeout = uint32(svcInfo.StickyMaxAgeSeconds())
  1133  			}
  1134  			// Set the source hash flag needed for the distribution method "mh"
  1135  			if proxier.ipvsScheduler == "mh" {
  1136  				serv.Flags |= utilipvs.FlagSourceHash
  1137  			}
  1138  			// We must not add the address to the dummy device if it exist on another interface
  1139  			shouldBind := !nodeAddressSet.Has(serv.Address.String())
  1140  			if err := proxier.syncService(svcPortNameString, serv, shouldBind, alreadyBoundAddrs); err == nil {
  1141  				activeIPVSServices.Insert(serv.String())
  1142  				if shouldBind {
  1143  					activeBindAddrs.Insert(serv.Address.String())
  1144  				}
  1145  				if err := proxier.syncEndpoint(svcPortName, svcInfo.ExternalPolicyLocal(), serv); err != nil {
  1146  					klog.ErrorS(err, "Failed to sync endpoint for service", "servicePortName", svcPortName, "virtualServer", serv)
  1147  				}
  1148  			} else {
  1149  				klog.ErrorS(err, "Failed to sync service", "servicePortName", svcPortName, "virtualServer", serv)
  1150  			}
  1151  		}
  1152  
  1153  		// Capture load-balancer ingress.
  1154  		for _, ingress := range svcInfo.LoadBalancerVIPStrings() {
  1155  			// ipset call
  1156  			entry = &utilipset.Entry{
  1157  				IP:       ingress,
  1158  				Port:     svcInfo.Port(),
  1159  				Protocol: protocol,
  1160  				SetType:  utilipset.HashIPPort,
  1161  			}
  1162  			// add service load balancer ingressIP:Port to kubeServiceAccess ip set for the purpose of solving hairpin.
  1163  			// proxier.kubeServiceAccessSet.activeEntries.Insert(entry.String())
  1164  			// If we are proxying globally, we need to masquerade in case we cross nodes.
  1165  			// If we are proxying only locally, we can retain the source IP.
  1166  			if valid := proxier.ipsetList[kubeLoadBalancerSet].validateEntry(entry); !valid {
  1167  				klog.ErrorS(nil, "Error adding entry to ipset", "entry", entry, "ipset", proxier.ipsetList[kubeLoadBalancerSet].Name)
  1168  				continue
  1169  			}
  1170  			proxier.ipsetList[kubeLoadBalancerSet].activeEntries.Insert(entry.String())
  1171  			// insert loadbalancer entry to lbIngressLocalSet if service externaltrafficpolicy=local
  1172  			if svcInfo.ExternalPolicyLocal() {
  1173  				if valid := proxier.ipsetList[kubeLoadBalancerLocalSet].validateEntry(entry); !valid {
  1174  					klog.ErrorS(nil, "Error adding entry to ipset", "entry", entry, "ipset", proxier.ipsetList[kubeLoadBalancerLocalSet].Name)
  1175  					continue
  1176  				}
  1177  				proxier.ipsetList[kubeLoadBalancerLocalSet].activeEntries.Insert(entry.String())
  1178  			}
  1179  			if len(svcInfo.LoadBalancerSourceRanges()) != 0 {
  1180  				// The service firewall rules are created based on ServiceSpec.loadBalancerSourceRanges field.
  1181  				// This currently works for loadbalancers that preserves source ips.
  1182  				// For loadbalancers which direct traffic to service NodePort, the firewall rules will not apply.
  1183  				if valid := proxier.ipsetList[kubeLoadBalancerFWSet].validateEntry(entry); !valid {
  1184  					klog.ErrorS(nil, "Error adding entry to ipset", "entry", entry, "ipset", proxier.ipsetList[kubeLoadBalancerFWSet].Name)
  1185  					continue
  1186  				}
  1187  				proxier.ipsetList[kubeLoadBalancerFWSet].activeEntries.Insert(entry.String())
  1188  				allowFromNode := false
  1189  				for _, src := range svcInfo.LoadBalancerSourceRanges() {
  1190  					// ipset call
  1191  					entry = &utilipset.Entry{
  1192  						IP:       ingress,
  1193  						Port:     svcInfo.Port(),
  1194  						Protocol: protocol,
  1195  						Net:      src,
  1196  						SetType:  utilipset.HashIPPortNet,
  1197  					}
  1198  					// enumerate all white list source cidr
  1199  					if valid := proxier.ipsetList[kubeLoadBalancerSourceCIDRSet].validateEntry(entry); !valid {
  1200  						klog.ErrorS(nil, "Error adding entry to ipset", "entry", entry, "ipset", proxier.ipsetList[kubeLoadBalancerSourceCIDRSet].Name)
  1201  						continue
  1202  					}
  1203  					proxier.ipsetList[kubeLoadBalancerSourceCIDRSet].activeEntries.Insert(entry.String())
  1204  
  1205  					// ignore error because it has been validated
  1206  					_, cidr, _ := netutils.ParseCIDRSloppy(src)
  1207  					if cidr.Contains(proxier.nodeIP) {
  1208  						allowFromNode = true
  1209  					}
  1210  				}
  1211  				// generally, ip route rule was added to intercept request to loadbalancer vip from the
  1212  				// loadbalancer's backend hosts. In this case, request will not hit the loadbalancer but loop back directly.
  1213  				// Need to add the following rule to allow request on host.
  1214  				if allowFromNode {
  1215  					entry = &utilipset.Entry{
  1216  						IP:       ingress,
  1217  						Port:     svcInfo.Port(),
  1218  						Protocol: protocol,
  1219  						IP2:      ingress,
  1220  						SetType:  utilipset.HashIPPortIP,
  1221  					}
  1222  					// enumerate all white list source ip
  1223  					if valid := proxier.ipsetList[kubeLoadBalancerSourceIPSet].validateEntry(entry); !valid {
  1224  						klog.ErrorS(nil, "Error adding entry to ipset", "entry", entry, "ipset", proxier.ipsetList[kubeLoadBalancerSourceIPSet].Name)
  1225  						continue
  1226  					}
  1227  					proxier.ipsetList[kubeLoadBalancerSourceIPSet].activeEntries.Insert(entry.String())
  1228  				} else {
  1229  					// since nodeIP is not covered in any of SourceRange we need to explicitly block the lbIP access from k8s nodes.
  1230  					proxier.lbNoNodeAccessIPPortProtocolEntries = append(proxier.lbNoNodeAccessIPPortProtocolEntries, entry)
  1231  
  1232  				}
  1233  			}
  1234  			// ipvs call
  1235  			serv := &utilipvs.VirtualServer{
  1236  				Address:   netutils.ParseIPSloppy(ingress),
  1237  				Port:      uint16(svcInfo.Port()),
  1238  				Protocol:  string(svcInfo.Protocol()),
  1239  				Scheduler: proxier.ipvsScheduler,
  1240  			}
  1241  			if svcInfo.SessionAffinityType() == v1.ServiceAffinityClientIP {
  1242  				serv.Flags |= utilipvs.FlagPersistent
  1243  				serv.Timeout = uint32(svcInfo.StickyMaxAgeSeconds())
  1244  			}
  1245  			// Set the source hash flag needed for the distribution method "mh"
  1246  			if proxier.ipvsScheduler == "mh" {
  1247  				serv.Flags |= utilipvs.FlagSourceHash
  1248  			}
  1249  			// We must not add the address to the dummy device if it exist on another interface
  1250  			shouldBind := !nodeAddressSet.Has(serv.Address.String())
  1251  			if err := proxier.syncService(svcPortNameString, serv, shouldBind, alreadyBoundAddrs); err == nil {
  1252  				activeIPVSServices.Insert(serv.String())
  1253  				if shouldBind {
  1254  					activeBindAddrs.Insert(serv.Address.String())
  1255  				}
  1256  				if err := proxier.syncEndpoint(svcPortName, svcInfo.ExternalPolicyLocal(), serv); err != nil {
  1257  					klog.ErrorS(err, "Failed to sync endpoint for service", "servicePortName", svcPortName, "virtualServer", serv)
  1258  				}
  1259  			} else {
  1260  				klog.ErrorS(err, "Failed to sync service", "servicePortName", svcPortName, "virtualServer", serv)
  1261  			}
  1262  		}
  1263  
  1264  		if svcInfo.NodePort() != 0 {
  1265  			if len(nodeIPs) == 0 {
  1266  				// Skip nodePort configuration since an error occurred when
  1267  				// computing nodeAddresses or nodeIPs.
  1268  				continue
  1269  			}
  1270  
  1271  			// Nodeports need SNAT, unless they're local.
  1272  			// ipset call
  1273  
  1274  			var (
  1275  				nodePortSet *IPSet
  1276  				entries     []*utilipset.Entry
  1277  			)
  1278  
  1279  			switch protocol {
  1280  			case utilipset.ProtocolTCP:
  1281  				nodePortSet = proxier.ipsetList[kubeNodePortSetTCP]
  1282  				entries = []*utilipset.Entry{{
  1283  					// No need to provide ip info
  1284  					Port:     svcInfo.NodePort(),
  1285  					Protocol: protocol,
  1286  					SetType:  utilipset.BitmapPort,
  1287  				}}
  1288  			case utilipset.ProtocolUDP:
  1289  				nodePortSet = proxier.ipsetList[kubeNodePortSetUDP]
  1290  				entries = []*utilipset.Entry{{
  1291  					// No need to provide ip info
  1292  					Port:     svcInfo.NodePort(),
  1293  					Protocol: protocol,
  1294  					SetType:  utilipset.BitmapPort,
  1295  				}}
  1296  			case utilipset.ProtocolSCTP:
  1297  				nodePortSet = proxier.ipsetList[kubeNodePortSetSCTP]
  1298  				// Since hash ip:port is used for SCTP, all the nodeIPs to be used in the SCTP ipset entries.
  1299  				entries = []*utilipset.Entry{}
  1300  				for _, nodeIP := range nodeIPs {
  1301  					entries = append(entries, &utilipset.Entry{
  1302  						IP:       nodeIP.String(),
  1303  						Port:     svcInfo.NodePort(),
  1304  						Protocol: protocol,
  1305  						SetType:  utilipset.HashIPPort,
  1306  					})
  1307  				}
  1308  			default:
  1309  				// It should never hit
  1310  				klog.ErrorS(nil, "Unsupported protocol type", "protocol", protocol)
  1311  			}
  1312  			if nodePortSet != nil {
  1313  				entryInvalidErr := false
  1314  				for _, entry := range entries {
  1315  					if valid := nodePortSet.validateEntry(entry); !valid {
  1316  						klog.ErrorS(nil, "Error adding entry to ipset", "entry", entry, "ipset", nodePortSet.Name)
  1317  						entryInvalidErr = true
  1318  						break
  1319  					}
  1320  					nodePortSet.activeEntries.Insert(entry.String())
  1321  				}
  1322  				if entryInvalidErr {
  1323  					continue
  1324  				}
  1325  			}
  1326  
  1327  			// Add externaltrafficpolicy=local type nodeport entry
  1328  			if svcInfo.ExternalPolicyLocal() {
  1329  				var nodePortLocalSet *IPSet
  1330  				switch protocol {
  1331  				case utilipset.ProtocolTCP:
  1332  					nodePortLocalSet = proxier.ipsetList[kubeNodePortLocalSetTCP]
  1333  				case utilipset.ProtocolUDP:
  1334  					nodePortLocalSet = proxier.ipsetList[kubeNodePortLocalSetUDP]
  1335  				case utilipset.ProtocolSCTP:
  1336  					nodePortLocalSet = proxier.ipsetList[kubeNodePortLocalSetSCTP]
  1337  				default:
  1338  					// It should never hit
  1339  					klog.ErrorS(nil, "Unsupported protocol type", "protocol", protocol)
  1340  				}
  1341  				if nodePortLocalSet != nil {
  1342  					entryInvalidErr := false
  1343  					for _, entry := range entries {
  1344  						if valid := nodePortLocalSet.validateEntry(entry); !valid {
  1345  							klog.ErrorS(nil, "Error adding entry to ipset", "entry", entry, "ipset", nodePortLocalSet.Name)
  1346  							entryInvalidErr = true
  1347  							break
  1348  						}
  1349  						nodePortLocalSet.activeEntries.Insert(entry.String())
  1350  					}
  1351  					if entryInvalidErr {
  1352  						continue
  1353  					}
  1354  				}
  1355  			}
  1356  
  1357  			// Build ipvs kernel routes for each node ip address
  1358  			for _, nodeIP := range nodeIPs {
  1359  				// ipvs call
  1360  				serv := &utilipvs.VirtualServer{
  1361  					Address:   nodeIP,
  1362  					Port:      uint16(svcInfo.NodePort()),
  1363  					Protocol:  string(svcInfo.Protocol()),
  1364  					Scheduler: proxier.ipvsScheduler,
  1365  				}
  1366  				if svcInfo.SessionAffinityType() == v1.ServiceAffinityClientIP {
  1367  					serv.Flags |= utilipvs.FlagPersistent
  1368  					serv.Timeout = uint32(svcInfo.StickyMaxAgeSeconds())
  1369  				}
  1370  				// Set the source hash flag needed for the distribution method "mh"
  1371  				if proxier.ipvsScheduler == "mh" {
  1372  					serv.Flags |= utilipvs.FlagSourceHash
  1373  				}
  1374  				// There is no need to bind Node IP to dummy interface, so set parameter `bindAddr` to `false`.
  1375  				if err := proxier.syncService(svcPortNameString, serv, false, alreadyBoundAddrs); err == nil {
  1376  					activeIPVSServices.Insert(serv.String())
  1377  					if err := proxier.syncEndpoint(svcPortName, svcInfo.ExternalPolicyLocal(), serv); err != nil {
  1378  						klog.ErrorS(err, "Failed to sync endpoint for service", "servicePortName", svcPortName, "virtualServer", serv)
  1379  					}
  1380  				} else {
  1381  					klog.ErrorS(err, "Failed to sync service", "servicePortName", svcPortName, "virtualServer", serv)
  1382  				}
  1383  			}
  1384  		}
  1385  
  1386  		if svcInfo.HealthCheckNodePort() != 0 {
  1387  			nodePortSet := proxier.ipsetList[kubeHealthCheckNodePortSet]
  1388  			entry := &utilipset.Entry{
  1389  				// No need to provide ip info
  1390  				Port:     svcInfo.HealthCheckNodePort(),
  1391  				Protocol: "tcp",
  1392  				SetType:  utilipset.BitmapPort,
  1393  			}
  1394  
  1395  			if valid := nodePortSet.validateEntry(entry); !valid {
  1396  				klog.ErrorS(nil, "Error adding entry to ipset", "entry", entry, "ipset", nodePortSet.Name)
  1397  				continue
  1398  			}
  1399  			nodePortSet.activeEntries.Insert(entry.String())
  1400  		}
  1401  	}
  1402  
  1403  	// Set the KUBE-IPVS-IPS set to the "activeBindAddrs"
  1404  	proxier.ipsetList[kubeIPVSSet].activeEntries = activeBindAddrs
  1405  
  1406  	// sync ipset entries
  1407  	for _, set := range proxier.ipsetList {
  1408  		set.syncIPSetEntries()
  1409  	}
  1410  
  1411  	// Tail call iptables rules for ipset, make sure only call iptables once
  1412  	// in a single loop per ip set.
  1413  	proxier.writeIptablesRules()
  1414  
  1415  	// Sync iptables rules.
  1416  	// NOTE: NoFlushTables is used so we don't flush non-kubernetes chains in the table.
  1417  	proxier.iptablesData.Reset()
  1418  	proxier.iptablesData.Write(proxier.natChains.Bytes())
  1419  	proxier.iptablesData.Write(proxier.natRules.Bytes())
  1420  	proxier.iptablesData.Write(proxier.filterChains.Bytes())
  1421  	proxier.iptablesData.Write(proxier.filterRules.Bytes())
  1422  
  1423  	klog.V(5).InfoS("Restoring iptables", "rules", proxier.iptablesData.Bytes())
  1424  	err = proxier.iptables.RestoreAll(proxier.iptablesData.Bytes(), utiliptables.NoFlushTables, utiliptables.RestoreCounters)
  1425  	if err != nil {
  1426  		if pErr, ok := err.(utiliptables.ParseError); ok {
  1427  			lines := utiliptables.ExtractLines(proxier.iptablesData.Bytes(), pErr.Line(), 3)
  1428  			klog.ErrorS(pErr, "Failed to execute iptables-restore", "rules", lines)
  1429  		} else {
  1430  			klog.ErrorS(err, "Failed to execute iptables-restore", "rules", proxier.iptablesData.Bytes())
  1431  		}
  1432  		metrics.IptablesRestoreFailuresTotal.Inc()
  1433  		return
  1434  	}
  1435  	for name, lastChangeTriggerTimes := range endpointUpdateResult.LastChangeTriggerTimes {
  1436  		for _, lastChangeTriggerTime := range lastChangeTriggerTimes {
  1437  			latency := metrics.SinceInSeconds(lastChangeTriggerTime)
  1438  			metrics.NetworkProgrammingLatency.Observe(latency)
  1439  			klog.V(4).InfoS("Network programming", "endpoint", klog.KRef(name.Namespace, name.Name), "elapsed", latency)
  1440  		}
  1441  	}
  1442  
  1443  	// Remove superfluous addresses from the dummy device
  1444  	superfluousAddresses := alreadyBoundAddrs.Difference(activeBindAddrs)
  1445  	if superfluousAddresses.Len() > 0 {
  1446  		klog.V(2).InfoS("Removing addresses", "interface", defaultDummyDevice, "addresses", superfluousAddresses)
  1447  		for adr := range superfluousAddresses {
  1448  			if err := proxier.netlinkHandle.UnbindAddress(adr, defaultDummyDevice); err != nil {
  1449  				klog.ErrorS(err, "UnbindAddress", "interface", defaultDummyDevice, "address", adr)
  1450  			}
  1451  		}
  1452  	}
  1453  
  1454  	// currentIPVSServices represent IPVS services listed from the system
  1455  	// (including any we have created in this sync)
  1456  	currentIPVSServices := make(map[string]*utilipvs.VirtualServer)
  1457  	appliedSvcs, err := proxier.ipvs.GetVirtualServers()
  1458  	if err == nil {
  1459  		for _, appliedSvc := range appliedSvcs {
  1460  			currentIPVSServices[appliedSvc.String()] = appliedSvc
  1461  		}
  1462  	} else {
  1463  		klog.ErrorS(err, "Failed to get ipvs service")
  1464  	}
  1465  	proxier.cleanLegacyService(activeIPVSServices, currentIPVSServices)
  1466  
  1467  	if proxier.healthzServer != nil {
  1468  		proxier.healthzServer.Updated(proxier.ipFamily)
  1469  	}
  1470  	metrics.SyncProxyRulesLastTimestamp.SetToCurrentTime()
  1471  
  1472  	// Update service healthchecks.  The endpoints list might include services that are
  1473  	// not "OnlyLocal", but the services list will not, and the serviceHealthServer
  1474  	// will just drop those endpoints.
  1475  	if err := proxier.serviceHealthServer.SyncServices(proxier.svcPortMap.HealthCheckNodePorts()); err != nil {
  1476  		klog.ErrorS(err, "Error syncing healthcheck services")
  1477  	}
  1478  	if err := proxier.serviceHealthServer.SyncEndpoints(proxier.endpointsMap.LocalReadyEndpoints()); err != nil {
  1479  		klog.ErrorS(err, "Error syncing healthcheck endpoints")
  1480  	}
  1481  
  1482  	metrics.SyncProxyRulesNoLocalEndpointsTotal.WithLabelValues("internal").Set(float64(proxier.serviceNoLocalEndpointsInternal.Len()))
  1483  	metrics.SyncProxyRulesNoLocalEndpointsTotal.WithLabelValues("external").Set(float64(proxier.serviceNoLocalEndpointsExternal.Len()))
  1484  
  1485  	// Finish housekeeping, clear stale conntrack entries for UDP Services
  1486  	conntrack.CleanStaleEntries(proxier.ipFamily == v1.IPv6Protocol, proxier.exec, proxier.svcPortMap, serviceUpdateResult, endpointUpdateResult)
  1487  }
  1488  
  1489  // writeIptablesRules write all iptables rules to proxier.natRules or proxier.FilterRules that ipvs proxier needed
  1490  // according to proxier.ipsetList information and the ipset match relationship that `ipsetWithIptablesChain` specified.
  1491  // some ipset(kubeClusterIPSet for example) have particular match rules and iptables jump relation should be sync separately.
  1492  func (proxier *Proxier) writeIptablesRules() {
  1493  
  1494  	// Dismiss connects to localhost early in the service chain
  1495  	loAddr := "127.0.0.0/8"
  1496  	if proxier.ipFamily == v1.IPv6Protocol {
  1497  		loAddr = "::1/128"
  1498  	}
  1499  	proxier.natRules.Write("-A", string(kubeServicesChain), "-s", loAddr, "-j", "RETURN")
  1500  
  1501  	// We are creating those slices ones here to avoid memory reallocations
  1502  	// in every loop. Note that reuse the memory, instead of doing:
  1503  	//   slice = <some new slice>
  1504  	// you should always do one of the below:
  1505  	//   slice = slice[:0] // and then append to it
  1506  	//   slice = append(slice[:0], ...)
  1507  	// To avoid growing this slice, we arbitrarily set its size to 64,
  1508  	// there is never more than that many arguments for a single line.
  1509  	// Note that even if we go over 64, it will still be correct - it
  1510  	// is just for efficiency, not correctness.
  1511  	args := make([]string, 64)
  1512  
  1513  	for _, set := range ipsetWithIptablesChain {
  1514  		if _, find := proxier.ipsetList[set.name]; find && !proxier.ipsetList[set.name].isEmpty() {
  1515  			args = append(args[:0], "-A", set.from)
  1516  			if set.protocolMatch != "" {
  1517  				args = append(args, "-p", set.protocolMatch)
  1518  			}
  1519  			args = append(args,
  1520  				"-m", "comment", "--comment", proxier.ipsetList[set.name].getComment(),
  1521  				"-m", "set", "--match-set", proxier.ipsetList[set.name].Name,
  1522  				set.matchType,
  1523  			)
  1524  			if set.table == utiliptables.TableFilter {
  1525  				proxier.filterRules.Write(args, "-j", set.to)
  1526  			} else {
  1527  				proxier.natRules.Write(args, "-j", set.to)
  1528  			}
  1529  		}
  1530  	}
  1531  
  1532  	if !proxier.ipsetList[kubeClusterIPSet].isEmpty() {
  1533  		args = append(args[:0],
  1534  			"-A", string(kubeServicesChain),
  1535  			"-m", "comment", "--comment", proxier.ipsetList[kubeClusterIPSet].getComment(),
  1536  			"-m", "set", "--match-set", proxier.ipsetList[kubeClusterIPSet].Name,
  1537  		)
  1538  		if proxier.masqueradeAll {
  1539  			proxier.natRules.Write(
  1540  				args, "dst,dst",
  1541  				"-j", string(kubeMarkMasqChain))
  1542  		} else if proxier.localDetector.IsImplemented() {
  1543  			// This masquerades off-cluster traffic to a service VIP.  The idea
  1544  			// is that you can establish a static route for your Service range,
  1545  			// routing to any node, and that node will bridge into the Service
  1546  			// for you.  Since that might bounce off-node, we masquerade here.
  1547  			// If/when we support "Local" policy for VIPs, we should update this.
  1548  			proxier.natRules.Write(
  1549  				args, "dst,dst",
  1550  				proxier.localDetector.IfNotLocal(),
  1551  				"-j", string(kubeMarkMasqChain))
  1552  		} else {
  1553  			// Masquerade all OUTPUT traffic coming from a service ip.
  1554  			// The kube dummy interface has all service VIPs assigned which
  1555  			// results in the service VIP being picked as the source IP to reach
  1556  			// a VIP. This leads to a connection from VIP:<random port> to
  1557  			// VIP:<service port>.
  1558  			// Always masquerading OUTPUT (node-originating) traffic with a VIP
  1559  			// source ip and service port destination fixes the outgoing connections.
  1560  			proxier.natRules.Write(
  1561  				args, "src,dst",
  1562  				"-j", string(kubeMarkMasqChain))
  1563  		}
  1564  	}
  1565  
  1566  	// externalIPRules adds iptables rules applies to Service ExternalIPs
  1567  	externalIPRules := func(args []string) {
  1568  		// Allow traffic for external IPs that does not come from a bridge (i.e. not from a container)
  1569  		// nor from a local process to be forwarded to the service.
  1570  		// This rule roughly translates to "all traffic from off-machine".
  1571  		// This is imperfect in the face of network plugins that might not use a bridge, but we can revisit that later.
  1572  		externalTrafficOnlyArgs := append(args,
  1573  			"-m", "physdev", "!", "--physdev-is-in",
  1574  			"-m", "addrtype", "!", "--src-type", "LOCAL")
  1575  		proxier.natRules.Write(externalTrafficOnlyArgs, "-j", "ACCEPT")
  1576  		dstLocalOnlyArgs := append(args, "-m", "addrtype", "--dst-type", "LOCAL")
  1577  		// Allow traffic bound for external IPs that happen to be recognized as local IPs to stay local.
  1578  		// This covers cases like GCE load-balancers which get added to the local routing table.
  1579  		proxier.natRules.Write(dstLocalOnlyArgs, "-j", "ACCEPT")
  1580  	}
  1581  
  1582  	if !proxier.ipsetList[kubeExternalIPSet].isEmpty() {
  1583  		// Build masquerade rules for packets to external IPs.
  1584  		args = append(args[:0],
  1585  			"-A", string(kubeServicesChain),
  1586  			"-m", "comment", "--comment", proxier.ipsetList[kubeExternalIPSet].getComment(),
  1587  			"-m", "set", "--match-set", proxier.ipsetList[kubeExternalIPSet].Name,
  1588  			"dst,dst",
  1589  		)
  1590  		proxier.natRules.Write(args, "-j", string(kubeMarkMasqChain))
  1591  		externalIPRules(args)
  1592  	}
  1593  
  1594  	if !proxier.ipsetList[kubeExternalIPLocalSet].isEmpty() {
  1595  		args = append(args[:0],
  1596  			"-A", string(kubeServicesChain),
  1597  			"-m", "comment", "--comment", proxier.ipsetList[kubeExternalIPLocalSet].getComment(),
  1598  			"-m", "set", "--match-set", proxier.ipsetList[kubeExternalIPLocalSet].Name,
  1599  			"dst,dst",
  1600  		)
  1601  		externalIPRules(args)
  1602  	}
  1603  
  1604  	// -A KUBE-SERVICES  -m addrtype  --dst-type LOCAL -j KUBE-NODE-PORT
  1605  	args = append(args[:0],
  1606  		"-A", string(kubeServicesChain),
  1607  		"-m", "addrtype", "--dst-type", "LOCAL",
  1608  	)
  1609  	proxier.natRules.Write(args, "-j", string(kubeNodePortChain))
  1610  
  1611  	// mark for masquerading for KUBE-LOAD-BALANCER
  1612  	proxier.natRules.Write(
  1613  		"-A", string(kubeLoadBalancerChain),
  1614  		"-j", string(kubeMarkMasqChain),
  1615  	)
  1616  
  1617  	// drop packets filtered by KUBE-SOURCE-RANGES-FIREWALL
  1618  	proxier.filterRules.Write(
  1619  		"-A", string(kubeSourceRangesFirewallChain),
  1620  		"-j", "DROP",
  1621  	)
  1622  
  1623  	// disable LB access from node
  1624  	// for IPVS src and dst both would be lbIP
  1625  	for _, entry := range proxier.lbNoNodeAccessIPPortProtocolEntries {
  1626  		proxier.filterRules.Write(
  1627  			"-A", string(kubeIPVSOutFilterChain),
  1628  			"-s", entry.IP,
  1629  			"-m", "ipvs", "--vaddr", entry.IP, "--vproto", entry.Protocol, "--vport", strconv.Itoa(entry.Port),
  1630  			"-j", "DROP",
  1631  		)
  1632  	}
  1633  
  1634  	// Accept all traffic with destination of ipvs virtual service, in case other iptables rules
  1635  	// block the traffic, that may result in ipvs rules invalid.
  1636  	// Those rules must be in the end of KUBE-SERVICE chain
  1637  	proxier.acceptIPVSTraffic()
  1638  
  1639  	// If the masqueradeMark has been added then we want to forward that same
  1640  	// traffic, this allows NodePort traffic to be forwarded even if the default
  1641  	// FORWARD policy is not accept.
  1642  	proxier.filterRules.Write(
  1643  		"-A", string(kubeForwardChain),
  1644  		"-m", "comment", "--comment", `"kubernetes forwarding rules"`,
  1645  		"-m", "mark", "--mark", fmt.Sprintf("%s/%s", proxier.masqueradeMark, proxier.masqueradeMark),
  1646  		"-j", "ACCEPT",
  1647  	)
  1648  
  1649  	// The following rule ensures the traffic after the initial packet accepted
  1650  	// by the "kubernetes forwarding rules" rule above will be accepted.
  1651  	proxier.filterRules.Write(
  1652  		"-A", string(kubeForwardChain),
  1653  		"-m", "comment", "--comment", `"kubernetes forwarding conntrack rule"`,
  1654  		"-m", "conntrack",
  1655  		"--ctstate", "RELATED,ESTABLISHED",
  1656  		"-j", "ACCEPT",
  1657  	)
  1658  
  1659  	// Add rule to accept traffic towards health check node port
  1660  	proxier.filterRules.Write(
  1661  		"-A", string(kubeNodePortChain),
  1662  		"-m", "comment", "--comment", proxier.ipsetList[kubeHealthCheckNodePortSet].getComment(),
  1663  		"-m", "set", "--match-set", proxier.ipsetList[kubeHealthCheckNodePortSet].Name, "dst",
  1664  		"-j", "ACCEPT",
  1665  	)
  1666  
  1667  	// Add rules to the filter/KUBE-IPVS-FILTER chain to prevent access to ports on the host through VIP addresses.
  1668  	// https://github.com/kubernetes/kubernetes/issues/72236
  1669  	proxier.filterRules.Write(
  1670  		"-A", string(kubeIPVSFilterChain),
  1671  		"-m", "set", "--match-set", proxier.ipsetList[kubeLoadBalancerSet].Name, "dst,dst", "-j", "RETURN")
  1672  	proxier.filterRules.Write(
  1673  		"-A", string(kubeIPVSFilterChain),
  1674  		"-m", "set", "--match-set", proxier.ipsetList[kubeClusterIPSet].Name, "dst,dst", "-j", "RETURN")
  1675  	proxier.filterRules.Write(
  1676  		"-A", string(kubeIPVSFilterChain),
  1677  		"-m", "set", "--match-set", proxier.ipsetList[kubeExternalIPSet].Name, "dst,dst", "-j", "RETURN")
  1678  	proxier.filterRules.Write(
  1679  		"-A", string(kubeIPVSFilterChain),
  1680  		"-m", "set", "--match-set", proxier.ipsetList[kubeExternalIPLocalSet].Name, "dst,dst", "-j", "RETURN")
  1681  	proxier.filterRules.Write(
  1682  		"-A", string(kubeIPVSFilterChain),
  1683  		"-m", "set", "--match-set", proxier.ipsetList[kubeHealthCheckNodePortSet].Name, "dst", "-j", "RETURN")
  1684  	proxier.filterRules.Write(
  1685  		"-A", string(kubeIPVSFilterChain),
  1686  		"-m", "conntrack", "--ctstate", "NEW",
  1687  		"-m", "set", "--match-set", proxier.ipsetList[kubeIPVSSet].Name, "dst", "-j", "REJECT")
  1688  
  1689  	// Install the kubernetes-specific postrouting rules. We use a whole chain for
  1690  	// this so that it is easier to flush and change, for example if the mark
  1691  	// value should ever change.
  1692  
  1693  	proxier.natRules.Write(
  1694  		"-A", string(kubePostroutingChain),
  1695  		"-m", "mark", "!", "--mark", fmt.Sprintf("%s/%s", proxier.masqueradeMark, proxier.masqueradeMark),
  1696  		"-j", "RETURN",
  1697  	)
  1698  	// Clear the mark to avoid re-masquerading if the packet re-traverses the network stack.
  1699  	proxier.natRules.Write(
  1700  		"-A", string(kubePostroutingChain),
  1701  		// XOR proxier.masqueradeMark to unset it
  1702  		"-j", "MARK", "--xor-mark", proxier.masqueradeMark,
  1703  	)
  1704  	masqRule := []string{
  1705  		"-A", string(kubePostroutingChain),
  1706  		"-m", "comment", "--comment", `"kubernetes service traffic requiring SNAT"`,
  1707  		"-j", "MASQUERADE",
  1708  	}
  1709  	if proxier.iptables.HasRandomFully() {
  1710  		masqRule = append(masqRule, "--random-fully")
  1711  	}
  1712  	proxier.natRules.Write(masqRule)
  1713  
  1714  	// Install the kubernetes-specific masquerade mark rule. We use a whole chain for
  1715  	// this so that it is easier to flush and change, for example if the mark
  1716  	// value should ever change.
  1717  	proxier.natRules.Write(
  1718  		"-A", string(kubeMarkMasqChain),
  1719  		"-j", "MARK", "--or-mark", proxier.masqueradeMark,
  1720  	)
  1721  
  1722  	// Write the end-of-table markers.
  1723  	proxier.filterRules.Write("COMMIT")
  1724  	proxier.natRules.Write("COMMIT")
  1725  }
  1726  
  1727  func (proxier *Proxier) acceptIPVSTraffic() {
  1728  	sets := []string{kubeClusterIPSet, kubeLoadBalancerSet}
  1729  	for _, set := range sets {
  1730  		var matchType string
  1731  		if !proxier.ipsetList[set].isEmpty() {
  1732  			switch proxier.ipsetList[set].SetType {
  1733  			case utilipset.BitmapPort:
  1734  				matchType = "dst"
  1735  			default:
  1736  				matchType = "dst,dst"
  1737  			}
  1738  			proxier.natRules.Write(
  1739  				"-A", string(kubeServicesChain),
  1740  				"-m", "set", "--match-set", proxier.ipsetList[set].Name, matchType,
  1741  				"-j", "ACCEPT",
  1742  			)
  1743  		}
  1744  	}
  1745  }
  1746  
  1747  // createAndLinkKubeChain create all kube chains that ipvs proxier need and write basic link.
  1748  func (proxier *Proxier) createAndLinkKubeChain() {
  1749  	for _, ch := range iptablesChains {
  1750  		if _, err := proxier.iptables.EnsureChain(ch.table, ch.chain); err != nil {
  1751  			klog.ErrorS(err, "Failed to ensure chain exists", "table", ch.table, "chain", ch.chain)
  1752  			return
  1753  		}
  1754  		if ch.table == utiliptables.TableNAT {
  1755  			proxier.natChains.Write(utiliptables.MakeChainLine(ch.chain))
  1756  		} else {
  1757  			proxier.filterChains.Write(utiliptables.MakeChainLine(ch.chain))
  1758  		}
  1759  	}
  1760  
  1761  	for _, jc := range iptablesJumpChain {
  1762  		args := []string{"-m", "comment", "--comment", jc.comment, "-j", string(jc.to)}
  1763  		if _, err := proxier.iptables.EnsureRule(utiliptables.Prepend, jc.table, jc.from, args...); err != nil {
  1764  			klog.ErrorS(err, "Failed to ensure chain jumps", "table", jc.table, "srcChain", jc.from, "dstChain", jc.to)
  1765  		}
  1766  	}
  1767  
  1768  }
  1769  
  1770  func (proxier *Proxier) syncService(svcName string, vs *utilipvs.VirtualServer, bindAddr bool, alreadyBoundAddrs sets.Set[string]) error {
  1771  	appliedVirtualServer, _ := proxier.ipvs.GetVirtualServer(vs)
  1772  	if appliedVirtualServer == nil || !appliedVirtualServer.Equal(vs) {
  1773  		if appliedVirtualServer == nil {
  1774  			// IPVS service is not found, create a new service
  1775  			klog.V(3).InfoS("Adding new service", "serviceName", svcName, "virtualServer", vs)
  1776  			if err := proxier.ipvs.AddVirtualServer(vs); err != nil {
  1777  				klog.ErrorS(err, "Failed to add IPVS service", "serviceName", svcName)
  1778  				return err
  1779  			}
  1780  		} else {
  1781  			// IPVS service was changed, update the existing one
  1782  			// During updates, service VIP will not go down
  1783  			klog.V(3).InfoS("IPVS service was changed", "serviceName", svcName)
  1784  			if err := proxier.ipvs.UpdateVirtualServer(vs); err != nil {
  1785  				klog.ErrorS(err, "Failed to update IPVS service")
  1786  				return err
  1787  			}
  1788  		}
  1789  	}
  1790  
  1791  	// bind service address to dummy interface
  1792  	if bindAddr {
  1793  		// always attempt to bind if alreadyBoundAddrs is nil,
  1794  		// otherwise check if it's already binded and return early
  1795  		if alreadyBoundAddrs != nil && alreadyBoundAddrs.Has(vs.Address.String()) {
  1796  			return nil
  1797  		}
  1798  
  1799  		klog.V(4).InfoS("Bind address", "address", vs.Address)
  1800  		_, err := proxier.netlinkHandle.EnsureAddressBind(vs.Address.String(), defaultDummyDevice)
  1801  		if err != nil {
  1802  			klog.ErrorS(err, "Failed to bind service address to dummy device", "serviceName", svcName)
  1803  			return err
  1804  		}
  1805  	}
  1806  
  1807  	return nil
  1808  }
  1809  
  1810  func (proxier *Proxier) syncEndpoint(svcPortName proxy.ServicePortName, onlyNodeLocalEndpoints bool, vs *utilipvs.VirtualServer) error {
  1811  	appliedVirtualServer, err := proxier.ipvs.GetVirtualServer(vs)
  1812  	if err != nil {
  1813  		klog.ErrorS(err, "Failed to get IPVS service")
  1814  		return err
  1815  	}
  1816  	if appliedVirtualServer == nil {
  1817  		return errors.New("IPVS virtual service does not exist")
  1818  	}
  1819  
  1820  	// curEndpoints represents IPVS destinations listed from current system.
  1821  	curEndpoints := sets.New[string]()
  1822  	curDests, err := proxier.ipvs.GetRealServers(appliedVirtualServer)
  1823  	if err != nil {
  1824  		klog.ErrorS(err, "Failed to list IPVS destinations")
  1825  		return err
  1826  	}
  1827  	for _, des := range curDests {
  1828  		curEndpoints.Insert(des.String())
  1829  	}
  1830  
  1831  	endpoints := proxier.endpointsMap[svcPortName]
  1832  
  1833  	// Filtering for topology aware endpoints. This function will only
  1834  	// filter endpoints if appropriate feature gates are enabled and the
  1835  	// Service does not have conflicting configuration such as
  1836  	// externalTrafficPolicy=Local.
  1837  	svcInfo, ok := proxier.svcPortMap[svcPortName]
  1838  	if !ok {
  1839  		klog.InfoS("Unable to filter endpoints due to missing service info", "servicePortName", svcPortName)
  1840  	} else {
  1841  		clusterEndpoints, localEndpoints, _, hasAnyEndpoints := proxy.CategorizeEndpoints(endpoints, svcInfo, proxier.nodeLabels)
  1842  		if onlyNodeLocalEndpoints {
  1843  			if len(localEndpoints) > 0 {
  1844  				endpoints = localEndpoints
  1845  			} else {
  1846  				// https://github.com/kubernetes/kubernetes/pull/97081
  1847  				// Allow access from local PODs even if no local endpoints exist.
  1848  				// Traffic from an external source will be routed but the reply
  1849  				// will have the POD address and will be discarded.
  1850  				endpoints = clusterEndpoints
  1851  
  1852  				if hasAnyEndpoints && svcInfo.InternalPolicyLocal() {
  1853  					proxier.serviceNoLocalEndpointsInternal.Insert(svcPortName.NamespacedName.String())
  1854  				}
  1855  
  1856  				if hasAnyEndpoints && svcInfo.ExternalPolicyLocal() {
  1857  					proxier.serviceNoLocalEndpointsExternal.Insert(svcPortName.NamespacedName.String())
  1858  				}
  1859  			}
  1860  		} else {
  1861  			endpoints = clusterEndpoints
  1862  		}
  1863  	}
  1864  
  1865  	newEndpoints := sets.New[string]()
  1866  	for _, epInfo := range endpoints {
  1867  		newEndpoints.Insert(epInfo.String())
  1868  	}
  1869  
  1870  	// Create new endpoints
  1871  	for _, ep := range sets.List(newEndpoints) {
  1872  		ip, port, err := net.SplitHostPort(ep)
  1873  		if err != nil {
  1874  			klog.ErrorS(err, "Failed to parse endpoint", "endpoint", ep)
  1875  			continue
  1876  		}
  1877  		portNum, err := strconv.Atoi(port)
  1878  		if err != nil {
  1879  			klog.ErrorS(err, "Failed to parse endpoint port", "port", port)
  1880  			continue
  1881  		}
  1882  
  1883  		newDest := &utilipvs.RealServer{
  1884  			Address: netutils.ParseIPSloppy(ip),
  1885  			Port:    uint16(portNum),
  1886  			Weight:  1,
  1887  		}
  1888  
  1889  		if curEndpoints.Has(ep) {
  1890  			// if we are syncing for the first time, loop through all current destinations and
  1891  			// reset their weight.
  1892  			if proxier.initialSync {
  1893  				for _, dest := range curDests {
  1894  					if dest.Weight != newDest.Weight {
  1895  						err = proxier.ipvs.UpdateRealServer(appliedVirtualServer, newDest)
  1896  						if err != nil {
  1897  							klog.ErrorS(err, "Failed to update destination", "newDest", newDest)
  1898  							continue
  1899  						}
  1900  					}
  1901  				}
  1902  			}
  1903  			// check if newEndpoint is in gracefulDelete list, if true, delete this ep immediately
  1904  			uniqueRS := GetUniqueRSName(vs, newDest)
  1905  			if !proxier.gracefuldeleteManager.InTerminationList(uniqueRS) {
  1906  				continue
  1907  			}
  1908  			klog.V(5).InfoS("new ep is in graceful delete list", "uniqueRealServer", uniqueRS)
  1909  			err := proxier.gracefuldeleteManager.MoveRSOutofGracefulDeleteList(uniqueRS)
  1910  			if err != nil {
  1911  				klog.ErrorS(err, "Failed to delete endpoint in gracefulDeleteQueue", "endpoint", ep)
  1912  				continue
  1913  			}
  1914  		}
  1915  		err = proxier.ipvs.AddRealServer(appliedVirtualServer, newDest)
  1916  		if err != nil {
  1917  			klog.ErrorS(err, "Failed to add destination", "newDest", newDest)
  1918  			continue
  1919  		}
  1920  	}
  1921  
  1922  	// Delete old endpoints
  1923  	for _, ep := range curEndpoints.Difference(newEndpoints).UnsortedList() {
  1924  		// if curEndpoint is in gracefulDelete, skip
  1925  		uniqueRS := vs.String() + "/" + ep
  1926  		if proxier.gracefuldeleteManager.InTerminationList(uniqueRS) {
  1927  			continue
  1928  		}
  1929  		ip, port, err := net.SplitHostPort(ep)
  1930  		if err != nil {
  1931  			klog.ErrorS(err, "Failed to parse endpoint", "endpoint", ep)
  1932  			continue
  1933  		}
  1934  		portNum, err := strconv.Atoi(port)
  1935  		if err != nil {
  1936  			klog.ErrorS(err, "Failed to parse endpoint port", "port", port)
  1937  			continue
  1938  		}
  1939  
  1940  		delDest := &utilipvs.RealServer{
  1941  			Address: netutils.ParseIPSloppy(ip),
  1942  			Port:    uint16(portNum),
  1943  		}
  1944  
  1945  		klog.V(5).InfoS("Using graceful delete", "uniqueRealServer", uniqueRS)
  1946  		err = proxier.gracefuldeleteManager.GracefulDeleteRS(appliedVirtualServer, delDest)
  1947  		if err != nil {
  1948  			klog.ErrorS(err, "Failed to delete destination", "uniqueRealServer", uniqueRS)
  1949  			continue
  1950  		}
  1951  	}
  1952  	return nil
  1953  }
  1954  
  1955  func (proxier *Proxier) cleanLegacyService(activeServices sets.Set[string], currentServices map[string]*utilipvs.VirtualServer) {
  1956  	for cs, svc := range currentServices {
  1957  		if proxier.isIPInExcludeCIDRs(svc.Address) {
  1958  			continue
  1959  		}
  1960  		if getIPFamily(svc.Address) != proxier.ipFamily {
  1961  			// Not our family
  1962  			continue
  1963  		}
  1964  		if !activeServices.Has(cs) {
  1965  			klog.V(4).InfoS("Delete service", "virtualServer", svc)
  1966  			if err := proxier.ipvs.DeleteVirtualServer(svc); err != nil {
  1967  				klog.ErrorS(err, "Failed to delete service", "virtualServer", svc)
  1968  			}
  1969  		}
  1970  	}
  1971  }
  1972  
  1973  func (proxier *Proxier) isIPInExcludeCIDRs(ip net.IP) bool {
  1974  	// make sure it does not fall within an excluded CIDR range.
  1975  	for _, excludedCIDR := range proxier.excludeCIDRs {
  1976  		if excludedCIDR.Contains(ip) {
  1977  			return true
  1978  		}
  1979  	}
  1980  	return false
  1981  }
  1982  
  1983  func getIPFamily(ip net.IP) v1.IPFamily {
  1984  	if netutils.IsIPv4(ip) {
  1985  		return v1.IPv4Protocol
  1986  	}
  1987  	return v1.IPv6Protocol
  1988  }
  1989  
  1990  // ipvs Proxier fall back on iptables when it needs to do SNAT for engress packets
  1991  // It will only operate iptables *nat table.
  1992  // Create and link the kube postrouting chain for SNAT packets.
  1993  // Chain POSTROUTING (policy ACCEPT)
  1994  // target     prot opt source               destination
  1995  // KUBE-POSTROUTING  all  --  0.0.0.0/0            0.0.0.0/0            /* kubernetes postrouting rules *
  1996  // Maintain by kubelet network sync loop
  1997  
  1998  // *nat
  1999  // :KUBE-POSTROUTING - [0:0]
  2000  // Chain KUBE-POSTROUTING (1 references)
  2001  // target     prot opt source               destination
  2002  // MASQUERADE  all  --  0.0.0.0/0            0.0.0.0/0            /* kubernetes service traffic requiring SNAT */ mark match 0x4000/0x4000
  2003  
  2004  // :KUBE-MARK-MASQ - [0:0]
  2005  // Chain KUBE-MARK-MASQ (0 references)
  2006  // target     prot opt source               destination
  2007  // MARK       all  --  0.0.0.0/0            0.0.0.0/0            MARK or 0x4000