istio.io/istio@v0.0.0-20240520182934-d79c90f27776/cni/pkg/iptables/iptables.go (about)

     1  // Copyright Istio Authors
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package iptables
    16  
    17  import (
    18  	"errors"
    19  	"fmt"
    20  	"net/netip"
    21  	"strings"
    22  
    23  	"istio.io/istio/cni/pkg/ipset"
    24  	istiolog "istio.io/istio/pkg/log"
    25  	"istio.io/istio/tools/istio-iptables/pkg/builder"
    26  	iptablesconfig "istio.io/istio/tools/istio-iptables/pkg/config"
    27  	iptablesconstants "istio.io/istio/tools/istio-iptables/pkg/constants"
    28  	dep "istio.io/istio/tools/istio-iptables/pkg/dependencies"
    29  	iptableslog "istio.io/istio/tools/istio-iptables/pkg/log"
    30  )
    31  
    32  const (
    33  	// INPOD marks/masks
    34  	InpodTProxyMark      = 0x111
    35  	InpodTProxyMask      = 0xfff
    36  	InpodMark            = 1337 // this needs to match the inpod config mark in ztunnel.
    37  	InpodMask            = 0xfff
    38  	InpodRestoreMask     = 0xffffffff
    39  	ChainInpodOutput     = "ISTIO_OUTPUT"
    40  	ChainInpodPrerouting = "ISTIO_PRERT"
    41  	ChainHostPostrouting = "ISTIO_POSTRT"
    42  	RouteTableInbound    = 100
    43  
    44  	DNSCapturePort              = 15053
    45  	ZtunnelInboundPort          = 15008
    46  	ZtunnelOutboundPort         = 15001
    47  	ZtunnelInboundPlaintextPort = 15006
    48  	ProbeIPSet                  = "istio-inpod-probes"
    49  )
    50  
    51  var log = istiolog.RegisterScope("iptables", "iptables helper")
    52  
    53  type Config struct {
    54  	RestoreFormat bool `json:"RESTORE_FORMAT"`
    55  	TraceLogging  bool `json:"IPTABLES_TRACE_LOGGING"`
    56  	EnableIPv6    bool `json:"ENABLE_INBOUND_IPV6"`
    57  	RedirectDNS   bool `json:"REDIRECT_DNS"`
    58  }
    59  
    60  type IptablesConfigurator struct {
    61  	ext    dep.Dependencies
    62  	nlDeps NetlinkDependencies
    63  	cfg    *Config
    64  	iptV   dep.IptablesVersion
    65  	ipt6V  dep.IptablesVersion
    66  }
    67  
    68  func ipbuildConfig(c *Config) *iptablesconfig.Config {
    69  	return &iptablesconfig.Config{
    70  		RestoreFormat: c.RestoreFormat,
    71  		TraceLogging:  c.TraceLogging,
    72  		EnableIPv6:    c.EnableIPv6,
    73  		RedirectDNS:   c.RedirectDNS,
    74  	}
    75  }
    76  
    77  func NewIptablesConfigurator(cfg *Config, ext dep.Dependencies, nlDeps NetlinkDependencies) (*IptablesConfigurator, error) {
    78  	if cfg == nil {
    79  		cfg = &Config{
    80  			RestoreFormat: true,
    81  		}
    82  	}
    83  
    84  	configurator := &IptablesConfigurator{
    85  		ext:    ext,
    86  		nlDeps: nlDeps,
    87  		cfg:    cfg,
    88  	}
    89  
    90  	// By detecting iptables versions *here* once-for-all we are
    91  	// committing to using the same binary/variant (legacy or nft)
    92  	// within all pods as we do on the host.
    93  	//
    94  	// This should be fine, as the host binaries are all we have to work with here anyway,
    95  	// as we are running within a privileged container - and we don't want to take the time to
    96  	// redetect for each pod anyway.
    97  	//
    98  	// Extreme corner case:
    99  	// If for some reason your host had both binaries, and you were injecting out-of-band
   100  	// iptables rules within a pod context into `legacy` tables, but your host context preferred
   101  	// `nft`, we would still inject our rules in-pod into nft tables, which is a bit wonky.
   102  	//
   103  	// But that's stunningly unlikely (and would still work either way)
   104  	iptVer, err := ext.DetectIptablesVersion(false)
   105  	if err != nil {
   106  		return nil, err
   107  	}
   108  
   109  	log.Debugf("found iptables binary: %+v", iptVer)
   110  
   111  	configurator.iptV = iptVer
   112  
   113  	ipt6Ver, err := ext.DetectIptablesVersion(true)
   114  	if err != nil {
   115  		return nil, err
   116  	}
   117  
   118  	log.Debugf("found iptables v6 binary: %+v", iptVer)
   119  
   120  	configurator.ipt6V = ipt6Ver
   121  
   122  	return configurator, nil
   123  }
   124  
   125  func (cfg *IptablesConfigurator) DeleteInpodRules() error {
   126  	var inpodErrs []error
   127  
   128  	log.Debug("Deleting iptables rules")
   129  
   130  	inpodErrs = append(inpodErrs, cfg.executeDeleteCommands(), cfg.delInpodMarkIPRule(), cfg.delLoopbackRoute())
   131  	return errors.Join(inpodErrs...)
   132  }
   133  
   134  func (cfg *IptablesConfigurator) executeDeleteCommands() error {
   135  	deleteCmds := [][]string{
   136  		{"-t", iptablesconstants.MANGLE, "-D", iptablesconstants.PREROUTING, "-j", ChainInpodPrerouting},
   137  		{"-t", iptablesconstants.MANGLE, "-D", iptablesconstants.OUTPUT, "-j", ChainInpodOutput},
   138  		{"-t", iptablesconstants.NAT, "-D", iptablesconstants.OUTPUT, "-j", ChainInpodOutput},
   139  	}
   140  
   141  	// these sometimes fail due to "Device or resource busy"
   142  	optionalDeleteCmds := [][]string{
   143  		// flush-then-delete our created chains
   144  		{"-t", iptablesconstants.MANGLE, "-F", ChainInpodPrerouting},
   145  		{"-t", iptablesconstants.MANGLE, "-F", ChainInpodOutput},
   146  		{"-t", iptablesconstants.NAT, "-F", ChainInpodOutput},
   147  		{"-t", iptablesconstants.MANGLE, "-X", ChainInpodPrerouting},
   148  		{"-t", iptablesconstants.MANGLE, "-X", ChainInpodOutput},
   149  		{"-t", iptablesconstants.NAT, "-X", ChainInpodOutput},
   150  	}
   151  
   152  	var delErrs []error
   153  
   154  	iptablesVariant := []dep.IptablesVersion{}
   155  	iptablesVariant = append(iptablesVariant, cfg.iptV)
   156  
   157  	if cfg.cfg.EnableIPv6 {
   158  		iptablesVariant = append(iptablesVariant, cfg.ipt6V)
   159  	}
   160  
   161  	for _, iptVer := range iptablesVariant {
   162  		for _, cmd := range deleteCmds {
   163  			delErrs = append(delErrs, cfg.ext.Run(iptablesconstants.IPTables, &iptVer, nil, cmd...))
   164  		}
   165  
   166  		for _, cmd := range optionalDeleteCmds {
   167  			err := cfg.ext.Run(iptablesconstants.IPTables, &iptVer, nil, cmd...)
   168  			if err != nil {
   169  				log.Debugf("ignoring error deleting optional iptables rule: %v", err)
   170  			}
   171  		}
   172  	}
   173  	return errors.Join(delErrs...)
   174  }
   175  
   176  // Setup iptables rules for in-pod mode. Ideally this should be an idempotent function.
   177  // NOTE that this expects to be run from within the pod network namespace!
   178  func (cfg *IptablesConfigurator) CreateInpodRules(hostProbeSNAT *netip.Addr) error {
   179  	// Append our rules here
   180  	builder := cfg.appendInpodRules(hostProbeSNAT)
   181  
   182  	if err := cfg.addLoopbackRoute(); err != nil {
   183  		return err
   184  	}
   185  
   186  	if err := cfg.addInpodMarkIPRule(); err != nil {
   187  		return err
   188  	}
   189  
   190  	log.Debug("Adding iptables rules")
   191  	if err := cfg.executeCommands(builder); err != nil {
   192  		log.Errorf("failed to restore iptables rules: %v", err)
   193  		return err
   194  	}
   195  
   196  	return nil
   197  }
   198  
   199  func (cfg *IptablesConfigurator) appendInpodRules(hostProbeSNAT *netip.Addr) *builder.IptablesRuleBuilder {
   200  	redirectDNS := cfg.cfg.RedirectDNS
   201  
   202  	inpodMark := fmt.Sprintf("0x%x", InpodMark) + "/" + fmt.Sprintf("0x%x", InpodMask)
   203  	inpodTproxyMark := fmt.Sprintf("0x%x", InpodTProxyMark) + "/" + fmt.Sprintf("0x%x", InpodTProxyMask)
   204  
   205  	iptablesBuilder := builder.NewIptablesRuleBuilder(ipbuildConfig(cfg.cfg))
   206  
   207  	// Insert jumps to our custom chains
   208  	// This is mostly just for visual tidiness and cleanup, as we can delete the secondary chains and jumps
   209  	// without polluting the main table too much.
   210  
   211  	// -t mangle -A PREROUTING -j ISTIO_PRERT
   212  	iptablesBuilder.AppendRule(
   213  		iptableslog.UndefinedCommand, iptablesconstants.PREROUTING, iptablesconstants.MANGLE,
   214  		"-j", ChainInpodPrerouting,
   215  	)
   216  
   217  	// -t mangle -A OUTPUT -p tcp -j ISTIO_OUTPUT
   218  	iptablesBuilder.AppendRule(
   219  		iptableslog.UndefinedCommand, iptablesconstants.OUTPUT, iptablesconstants.MANGLE,
   220  		"-j", ChainInpodOutput,
   221  	)
   222  
   223  	// -t nat -A OUTPUT -p tcp -j ISTIO_OUTPUT
   224  	iptablesBuilder.AppendRule(
   225  		iptableslog.UndefinedCommand, iptablesconstants.OUTPUT, iptablesconstants.NAT,
   226  		"-j", ChainInpodOutput,
   227  	)
   228  
   229  	// From here on, we should be only inserting rules into our custom chains.
   230  
   231  	// CLI: -A ISTIO_PRERT -m mark --mark 0x539/0xfff -j CONNMARK --set-xmark 0x111/0xfff
   232  	//
   233  	// DESC: If we have a packet mark, set a connmark.
   234  	iptablesBuilder.AppendRule(iptableslog.UndefinedCommand, ChainInpodPrerouting, iptablesconstants.MANGLE, "-m", "mark",
   235  		"--mark", inpodMark,
   236  		"-j", "CONNMARK",
   237  		"--set-xmark", inpodTproxyMark)
   238  
   239  	// Handle healthcheck probes from the host node. In the host netns, before the packet enters the pod, we SNAT
   240  	// the healthcheck packet to a fixed IP if the packet is coming from a node-local process with a socket.
   241  	//
   242  	// We do this so we can exempt this traffic from ztunnel capture/proxy - otherwise both kube-proxy (legit)
   243  	// and kubelet (skippable) traffic would have the same srcip once they got to the pod, and would be indistinguishable.
   244  	//
   245  	// Note that SortedList is used here because the istio sets class has no order guarantees,
   246  	// and our unit tests will flake if rules have a nondeterministic ordering.
   247  	// CLI: -t mangle -A ISTIO_PRERT -s 169.254.7.127 -p tcp -m tcp --dport <PROBEPORT> -j ACCEPT
   248  	//
   249  	// DESC: If this is one of our node-probe ports and is from our SNAT-ed/"special" hostside IP, short-circuit out here
   250  	iptablesBuilder.AppendRule(iptableslog.UndefinedCommand, ChainInpodPrerouting, iptablesconstants.MANGLE,
   251  		"-s", hostProbeSNAT.String(),
   252  		"-p", "tcp",
   253  		"-m", "tcp",
   254  		"-j", "ACCEPT",
   255  	)
   256  
   257  	// CLI: -t NAT -A ISTIO_OUTPUT -d 169.254.7.127 -p tcp -m tcp -j ACCEPT
   258  	//
   259  	// DESC: Anything coming BACK from the pod healthcheck port with a dest of our SNAT-ed hostside IP
   260  	// we also short-circuit.
   261  	iptablesBuilder.AppendRule(
   262  		iptableslog.UndefinedCommand, ChainInpodOutput, iptablesconstants.NAT,
   263  		"-d", hostProbeSNAT.String(),
   264  		"-p", "tcp",
   265  		"-m", "tcp",
   266  		"-j", "ACCEPT",
   267  	)
   268  
   269  	// prevent intercept traffic from app ==> app by pod ip
   270  	iptablesBuilder.AppendVersionedRule("127.0.0.1/32", "::1/128",
   271  		iptableslog.UndefinedCommand, ChainInpodPrerouting, iptablesconstants.MANGLE,
   272  		"!", "-d", iptablesconstants.IPVersionSpecific, // ignore traffic to localhost ip, as this rule means to catch traffic to pod ip.
   273  		"-p", iptablesconstants.TCP,
   274  		"-i", "lo",
   275  		"-j", "ACCEPT")
   276  
   277  	// CLI: -A ISTIO_PRERT -p tcp -m tcp --dport <INPORT> -m mark ! --mark 0x539/0xfff -j TPROXY --on-port <INPORT> --on-ip 127.0.0.1 --tproxy-mark 0x111/0xfff
   278  	//
   279  	// DESC: Anything heading to <INPORT> that does not have the mark, TPROXY to ztunnel inbound port <INPORT>
   280  	iptablesBuilder.AppendRule(
   281  		iptableslog.UndefinedCommand, ChainInpodPrerouting, iptablesconstants.MANGLE,
   282  		"-p", "tcp",
   283  		"-m", "tcp",
   284  		"--dport", fmt.Sprintf("%d", ZtunnelInboundPort),
   285  		"-m", "mark", "!",
   286  		"--mark", inpodMark,
   287  		"-j", "TPROXY",
   288  		"--on-port", fmt.Sprintf("%d", ZtunnelInboundPort),
   289  		// "--on-ip", "127.0.0.1",
   290  		"--tproxy-mark", inpodTproxyMark,
   291  	)
   292  
   293  	// CLI: -A ISTIO_PRERT -p tcp -m conntrack --ctstate RELATED,ESTABLISHED -j ACCEPT
   294  	//
   295  	// DESC: Anything that's already in conntrack as an established connection, accept
   296  	iptablesBuilder.AppendRule(
   297  		iptableslog.UndefinedCommand, ChainInpodPrerouting, iptablesconstants.MANGLE,
   298  		"-p", "tcp",
   299  		"-m", "conntrack",
   300  		"--ctstate", "RELATED,ESTABLISHED",
   301  		"-j", "ACCEPT",
   302  	)
   303  
   304  	// CLI: -A ISTIO_PRERT ! -d 127.0.0.1/32 -p tcp -m mark ! --mark 0x539/0xfff -j TPROXY --on-port <INPLAINPORT> --on-ip 127.0.0.1 --tproxy-mark 0x111/0xfff
   305  	//
   306  	// DESC: Anything that is not bound for localhost and does not have the mark, TPROXY to ztunnel inbound plaintext port <INPLAINPORT>
   307  	iptablesBuilder.AppendVersionedRule("127.0.0.1/32", "::1/128",
   308  		iptableslog.UndefinedCommand, ChainInpodPrerouting, iptablesconstants.MANGLE,
   309  		"!", "-d", iptablesconstants.IPVersionSpecific,
   310  		"-p", "tcp",
   311  		"-m", "mark", "!",
   312  		"--mark", inpodMark,
   313  		"-j", "TPROXY",
   314  		"--on-port", fmt.Sprintf("%d", ZtunnelInboundPlaintextPort),
   315  		// "--on-ip", "127.0.0.1",
   316  		"--tproxy-mark", inpodTproxyMark,
   317  	)
   318  
   319  	// CLI: -A ISTIO_OUTPUT -m connmark --mark 0x111/0xfff -j CONNMARK --restore-mark --nfmask 0xffffffff --ctmask 0xffffffff
   320  	//
   321  	// DESC: Propagate/restore connmark (if we had one) for outbound
   322  	iptablesBuilder.AppendRule(
   323  		iptableslog.UndefinedCommand, ChainInpodOutput, iptablesconstants.MANGLE,
   324  		"-m", "connmark",
   325  		"--mark", inpodTproxyMark,
   326  		"-j", "CONNMARK",
   327  		"--restore-mark",
   328  		"--nfmask", fmt.Sprintf("0x%x", InpodRestoreMask),
   329  		"--ctmask", fmt.Sprintf("0x%x", InpodRestoreMask),
   330  	)
   331  
   332  	// CLI: -A ISTIO_OUTPUT ! -o lo -p udp -m udp --dport 53 -j REDIRECT --to-port 15053
   333  	//
   334  	// DESC: If this is a UDP DNS request to a non-localhost resolver, send it to ztunnel DNS proxy port
   335  	if redirectDNS {
   336  		iptablesBuilder.AppendRule(
   337  			iptableslog.UndefinedCommand, ChainInpodOutput, iptablesconstants.NAT,
   338  			"!", "-o", "lo",
   339  			"-p", "udp",
   340  			"-m", "udp",
   341  			"--dport", "53",
   342  			"-j", "REDIRECT",
   343  			"--to-port", fmt.Sprintf("%d", DNSCapturePort),
   344  		)
   345  	}
   346  
   347  	// CLI: -A ISTIO_OUTPUT -p tcp -m mark --mark 0x111/0xfff -j ACCEPT
   348  	//
   349  	// DESC: If this is outbound and has our mark, let it go.
   350  	iptablesBuilder.AppendRule(
   351  		iptableslog.UndefinedCommand, ChainInpodOutput, iptablesconstants.NAT,
   352  		"-p", "tcp",
   353  		"-m", "mark",
   354  		"--mark", inpodTproxyMark,
   355  		"-j", "ACCEPT",
   356  	)
   357  
   358  	// Do not redirect app calls to back itself via Ztunnel when using the endpoint address
   359  	// e.g. appN => appN by lo
   360  	iptablesBuilder.AppendVersionedRule("127.0.0.1/32", "::1/128",
   361  		iptableslog.UndefinedCommand, ChainInpodOutput, iptablesconstants.NAT,
   362  		"!", "-d", iptablesconstants.IPVersionSpecific,
   363  		"-o", "lo",
   364  		"-j", "ACCEPT",
   365  	)
   366  
   367  	// CLI: -A ISTIO_OUTPUT ! -d 127.0.0.1/32 -p tcp -m mark ! --mark 0x539/0xfff -j REDIRECT --to-ports <OUTPORT>
   368  	//
   369  	// DESC: If this is outbound, not bound for localhost, and does not have our packet mark, redirect to ztunnel proxy <OUTPORT>
   370  	iptablesBuilder.AppendVersionedRule("127.0.0.1/32", "::1/128",
   371  		iptableslog.UndefinedCommand, ChainInpodOutput, iptablesconstants.NAT,
   372  		"!", "-d", iptablesconstants.IPVersionSpecific,
   373  		"-p", "tcp",
   374  		"-m", "mark", "!",
   375  		"--mark", inpodMark,
   376  		"-j", "REDIRECT",
   377  		"--to-ports", fmt.Sprintf("%d", ZtunnelOutboundPort),
   378  	)
   379  	return iptablesBuilder
   380  }
   381  
   382  func (cfg *IptablesConfigurator) executeCommands(iptablesBuilder *builder.IptablesRuleBuilder) error {
   383  	var execErrs []error
   384  
   385  	if cfg.cfg.RestoreFormat {
   386  		// Execute iptables-restore
   387  		execErrs = append(execErrs, cfg.executeIptablesRestoreCommand(iptablesBuilder, &cfg.iptV, true))
   388  		// Execute ip6tables-restore
   389  		if cfg.cfg.EnableIPv6 {
   390  			execErrs = append(execErrs, cfg.executeIptablesRestoreCommand(iptablesBuilder, &cfg.ipt6V, false))
   391  		}
   392  	} else {
   393  		// Execute iptables commands
   394  		execErrs = append(execErrs,
   395  			cfg.executeIptablesCommands(&cfg.iptV, iptablesBuilder.BuildV4()))
   396  		// Execute ip6tables commands
   397  		if cfg.cfg.EnableIPv6 {
   398  			execErrs = append(execErrs,
   399  				cfg.executeIptablesCommands(&cfg.ipt6V, iptablesBuilder.BuildV6()))
   400  		}
   401  	}
   402  	return errors.Join(execErrs...)
   403  }
   404  
   405  func (cfg *IptablesConfigurator) executeIptablesCommands(iptVer *dep.IptablesVersion, args [][]string) error {
   406  	var iptErrs []error
   407  	for _, argSet := range args {
   408  		iptErrs = append(iptErrs, cfg.ext.Run(iptablesconstants.IPTables, iptVer, nil, argSet...))
   409  	}
   410  	return errors.Join(iptErrs...)
   411  }
   412  
   413  func (cfg *IptablesConfigurator) executeIptablesRestoreCommand(iptablesBuilder *builder.IptablesRuleBuilder, iptVer *dep.IptablesVersion, isIpv4 bool) error {
   414  	cmd := iptablesconstants.IPTablesRestore
   415  	var data string
   416  
   417  	if isIpv4 {
   418  		data = iptablesBuilder.BuildV4Restore()
   419  	} else {
   420  		data = iptablesBuilder.BuildV6Restore()
   421  	}
   422  
   423  	log.Infof("Running %s with the following input:\n%v", iptVer.CmdToString(cmd), strings.TrimSpace(data))
   424  	// --noflush to prevent flushing/deleting previous contents from table
   425  	return cfg.ext.Run(cmd, iptVer, strings.NewReader(data), "--noflush", "-v")
   426  }
   427  
   428  func (cfg *IptablesConfigurator) addLoopbackRoute() error {
   429  	return cfg.nlDeps.AddLoopbackRoutes(cfg.cfg)
   430  }
   431  
   432  func (cfg *IptablesConfigurator) delLoopbackRoute() error {
   433  	return cfg.nlDeps.DelLoopbackRoutes(cfg.cfg)
   434  }
   435  
   436  func (cfg *IptablesConfigurator) addInpodMarkIPRule() error {
   437  	return cfg.nlDeps.AddInpodMarkIPRule(cfg.cfg)
   438  }
   439  
   440  func (cfg *IptablesConfigurator) delInpodMarkIPRule() error {
   441  	return cfg.nlDeps.DelInpodMarkIPRule(cfg.cfg)
   442  }
   443  
   444  // Setup iptables rules for HOST netnamespace. Ideally this should be an idempotent function.
   445  // NOTE that this expects to be run from within the HOST network namespace!
   446  //
   447  // We need to do this specifically to be able to distinguish between traffic coming from different node-level processes
   448  // via the nodeIP
   449  // - kubelet (node-local healthchecks, which we do not capture)
   450  // - kube-proxy (fowarded/proxied traffic from LoadBalancer-backed services, potentially with public IPs, which we must capture)
   451  func (cfg *IptablesConfigurator) CreateHostRulesForHealthChecks(hostSNATIP, hostSNATIPV6 *netip.Addr) error {
   452  	// Append our rules here
   453  	builder := cfg.appendHostRules(hostSNATIP, hostSNATIPV6)
   454  
   455  	log.Info("Adding host netnamespace iptables rules")
   456  
   457  	if err := cfg.executeCommands(builder); err != nil {
   458  		log.Errorf("failed to add host netnamespace iptables rules: %v", err)
   459  		return err
   460  	}
   461  	return nil
   462  }
   463  
   464  func (cfg *IptablesConfigurator) DeleteHostRules() {
   465  	log.Debug("Attempting to delete hostside iptables rules (if they exist)")
   466  
   467  	cfg.executeHostDeleteCommands()
   468  }
   469  
   470  func (cfg *IptablesConfigurator) executeHostDeleteCommands() {
   471  	optionalDeleteCmds := [][]string{
   472  		// delete our main jump in the host ruleset. If it's not there, NBD.
   473  		{"-t", iptablesconstants.NAT, "-D", iptablesconstants.POSTROUTING, "-j", ChainHostPostrouting},
   474  		// flush-then-delete our created chains
   475  		// these sometimes fail due to "Device or resource busy" - again NBD.
   476  		{"-t", iptablesconstants.NAT, "-F", ChainHostPostrouting},
   477  		{"-t", iptablesconstants.NAT, "-X", ChainHostPostrouting},
   478  	}
   479  
   480  	// iptablei seems like a reasonable pluralization of iptables
   481  	iptablesVariant := []dep.IptablesVersion{}
   482  	iptablesVariant = append(iptablesVariant, cfg.iptV)
   483  
   484  	if cfg.cfg.EnableIPv6 {
   485  		iptablesVariant = append(iptablesVariant, cfg.ipt6V)
   486  	}
   487  	for _, iptVer := range iptablesVariant {
   488  		for _, cmd := range optionalDeleteCmds {
   489  			err := cfg.ext.Run(iptablesconstants.IPTables, &iptVer, nil, cmd...)
   490  			if err != nil {
   491  				log.Debugf("ignoring error deleting optional iptables rule: %v", err)
   492  			}
   493  		}
   494  	}
   495  }
   496  
   497  func (cfg *IptablesConfigurator) appendHostRules(hostSNATIP, hostSNATIPV6 *netip.Addr) *builder.IptablesRuleBuilder {
   498  	log.Info("configuring host-level iptables rules (healthchecks, etc)")
   499  
   500  	iptablesBuilder := builder.NewIptablesRuleBuilder(ipbuildConfig(cfg.cfg))
   501  
   502  	// For easier cleanup, insert a jump into an owned chain
   503  	// -A POSTROUTING -p tcp -j ISTIO_POSTRT
   504  	iptablesBuilder.AppendRule(
   505  		iptableslog.UndefinedCommand, iptablesconstants.POSTROUTING, iptablesconstants.NAT,
   506  		"-j", ChainHostPostrouting,
   507  	)
   508  
   509  	// TODO BML I don't think we need UDP? TCP healthcheck redir should catch everything.
   510  
   511  	// This is effectively an analog for Istio's old-style podSpec-based health check rewrites.
   512  	// Before Istio would update the pod manifest to rewrite healthchecks to go to sidecar Envoy port 15021,
   513  	// so that it could distinguish things that can be unauthenticated (healthchecks) from other kinds of node traffic
   514  	// (e.g. LoadBalanced Service packets, etc) that need to be authenticated/captured/proxied.
   515  	//
   516  	// We want to do the same thing in ambient but can't rely on podSpec injection. So, do effectively the same thing,
   517  	// but with iptables rules - use `--socket-exists` as a proxy for "is this a forwarded packet" vs "is this originating from
   518  	// a local node socket". If the latter, outside the pod in the host netns, redirect that traffic to a hardcoded/custom proxy
   519  	// healthcheck port, just like we used to. Otherwise, we can't assume it's local-node privileged traffic, and will capture and process it normally.
   520  	//
   521  	// All this is necessary because quite often apps use the same port for healthchecks as they do for reg. traffic, and
   522  	// we cannot make assumptions there.
   523  
   524  	// -A OUTPUT -m owner --socket-exists -p tcp -m set --match-set istio-inpod-probes dst,dst -j SNAT --to-source 169.254.7.127
   525  	iptablesBuilder.AppendRuleV4(
   526  		iptableslog.UndefinedCommand, ChainHostPostrouting, iptablesconstants.NAT,
   527  		"-m", "owner",
   528  		"--socket-exists",
   529  		"-p", "tcp",
   530  		"-m", "set",
   531  		"--match-set", fmt.Sprintf(ipset.V4Name, ProbeIPSet),
   532  		"dst",
   533  		"-j", "SNAT",
   534  		"--to-source", hostSNATIP.String(),
   535  	)
   536  
   537  	// For V6 we have to use a different set and a different SNAT IP
   538  	if cfg.cfg.EnableIPv6 {
   539  		iptablesBuilder.AppendRuleV6(
   540  			iptableslog.UndefinedCommand, ChainHostPostrouting, iptablesconstants.NAT,
   541  			"-m", "owner",
   542  			"--socket-exists",
   543  			"-p", "tcp",
   544  			"-m", "set",
   545  			"--match-set", fmt.Sprintf(ipset.V6Name, ProbeIPSet),
   546  			"dst",
   547  			"-j", "SNAT",
   548  			"--to-source", hostSNATIPV6.String(),
   549  		)
   550  	}
   551  
   552  	return iptablesBuilder
   553  }