github.com/cilium/cilium@v1.16.2/pkg/datapath/iptables/iptables.go (about) 1 // SPDX-License-Identifier: Apache-2.0 2 // Copyright Authors of Cilium 3 4 package iptables 5 6 import ( 7 "bufio" 8 "context" 9 "fmt" 10 "net" 11 "net/netip" 12 "os" 13 "regexp" 14 "strconv" 15 "strings" 16 17 "github.com/blang/semver/v4" 18 "github.com/cilium/hive/cell" 19 "github.com/cilium/hive/job" 20 "github.com/cilium/statedb" 21 "github.com/mattn/go-shellwords" 22 "github.com/sirupsen/logrus" 23 "github.com/vishvananda/netlink" 24 25 "github.com/cilium/cilium/daemon/cmd/cni" 26 "github.com/cilium/cilium/pkg/byteorder" 27 "github.com/cilium/cilium/pkg/cidr" 28 "github.com/cilium/cilium/pkg/command/exec" 29 "github.com/cilium/cilium/pkg/datapath/iptables/ipset" 30 "github.com/cilium/cilium/pkg/datapath/linux/linux_defaults" 31 "github.com/cilium/cilium/pkg/datapath/linux/modules" 32 "github.com/cilium/cilium/pkg/datapath/linux/route" 33 "github.com/cilium/cilium/pkg/datapath/linux/sysctl" 34 "github.com/cilium/cilium/pkg/datapath/tables" 35 "github.com/cilium/cilium/pkg/defaults" 36 "github.com/cilium/cilium/pkg/fqdn/proxy/ipfamily" 37 ipamOption "github.com/cilium/cilium/pkg/ipam/option" 38 lb "github.com/cilium/cilium/pkg/loadbalancer" 39 "github.com/cilium/cilium/pkg/lock" 40 "github.com/cilium/cilium/pkg/logging/logfields" 41 "github.com/cilium/cilium/pkg/node" 42 "github.com/cilium/cilium/pkg/option" 43 "github.com/cilium/cilium/pkg/time" 44 "github.com/cilium/cilium/pkg/versioncheck" 45 ) 46 47 const ( 48 oldCiliumPrefix = "OLD_" 49 ciliumInputChain = "CILIUM_INPUT" 50 ciliumOutputChain = "CILIUM_OUTPUT" 51 ciliumOutputRawChain = "CILIUM_OUTPUT_raw" 52 ciliumPostNatChain = "CILIUM_POST_nat" 53 ciliumOutputNatChain = "CILIUM_OUTPUT_nat" 54 ciliumPreNatChain = "CILIUM_PRE_nat" 55 ciliumPostMangleChain = "CILIUM_POST_mangle" 56 ciliumPreMangleChain = "CILIUM_PRE_mangle" 57 ciliumPreRawChain = "CILIUM_PRE_raw" 58 ciliumForwardChain = "CILIUM_FORWARD" 59 feederDescription = "cilium-feeder:" 60 xfrmDescription = "cilium-xfrm-notrack:" 61 ) 62 63 // Minimum iptables versions supporting the -w and -w<seconds> flags 64 var ( 65 isWaitMinVersion = versioncheck.MustCompile(">=1.4.20") 66 isWaitSecondsMinVersion = versioncheck.MustCompile(">=1.4.22") 67 noTrackPorts = func(port uint16) []*lb.L4Addr { 68 return []*lb.L4Addr{ 69 { 70 Protocol: lb.TCP, 71 Port: port, 72 }, 73 { 74 Protocol: lb.UDP, 75 Port: port, 76 }, 77 } 78 } 79 ) 80 81 const ( 82 waitString = "-w" 83 ) 84 85 type runnable interface { 86 runProgOutput(args []string) (string, error) 87 runProg(args []string) error 88 } 89 90 type iptablesInterface interface { 91 runnable 92 93 getProg() string 94 getIpset() string 95 } 96 97 type ipt struct { 98 prog string 99 ipset string 100 waitArgs []string 101 } 102 103 func (ipt *ipt) initArgs(ctx context.Context, waitSeconds int) { 104 v, err := ipt.getVersion(ctx) 105 if err == nil { 106 switch { 107 case isWaitSecondsMinVersion(v): 108 ipt.waitArgs = []string{waitString, fmt.Sprintf("%d", waitSeconds)} 109 case isWaitMinVersion(v): 110 ipt.waitArgs = []string{waitString} 111 } 112 } 113 } 114 115 // package name is iptables so we use ip4tables internally for "iptables" 116 var ( 117 ip4tables = &ipt{prog: "iptables", ipset: ipset.CiliumNodeIPSetV4} 118 ip6tables = &ipt{prog: "ip6tables", ipset: ipset.CiliumNodeIPSetV6} 119 ) 120 121 func (ipt *ipt) getProg() string { 122 return ipt.prog 123 } 124 125 func (ipt *ipt) getIpset() string { 126 return ipt.ipset 127 } 128 129 func (ipt *ipt) getVersion(ctx context.Context) (semver.Version, error) { 130 b, err := exec.CommandContext(ctx, ipt.prog, "--version").CombinedOutput(log, false) 131 if err != nil { 132 return semver.Version{}, err 133 } 134 v := regexp.MustCompile(`v([0-9]+(\.[0-9]+)+)`) 135 vString := v.FindStringSubmatch(string(b)) 136 if vString == nil { 137 return semver.Version{}, fmt.Errorf("no iptables version found in string: %s", string(b)) 138 } 139 return versioncheck.Version(vString[1]) 140 } 141 142 func (ipt *ipt) runProgOutput(args []string) (string, error) { 143 fullCommand := fmt.Sprintf("%s %s", ipt.getProg(), strings.Join(args, " ")) 144 145 log.Debugf("Running '%s' command", fullCommand) 146 147 // Add wait argument to deal with concurrent calls that would fail otherwise 148 iptArgs := make([]string, 0, len(ipt.waitArgs)+len(args)) 149 iptArgs = append(iptArgs, ipt.waitArgs...) 150 iptArgs = append(iptArgs, args...) 151 out, err := exec.WithTimeout(defaults.ExecTimeout, ipt.prog, iptArgs...).Output(log, false) 152 153 if err != nil { 154 return "", fmt.Errorf("unable to run '%s' iptables command: %w", fullCommand, err) 155 } 156 return string(out), nil 157 } 158 159 func (ipt *ipt) runProg(args []string) error { 160 _, err := ipt.runProgOutput(args) 161 return err 162 } 163 164 func reverseRule(rule string) ([]string, error) { 165 if strings.HasPrefix(rule, "-A") { 166 // From: -A POSTROUTING -m comment [...] 167 // To: -D POSTROUTING -m comment [...] 168 return shellwords.Parse(strings.Replace(rule, "-A", "-D", 1)) 169 } 170 171 if strings.HasPrefix(rule, "-I") { 172 // From: -I POSTROUTING -m comment [...] 173 // To: -D POSTROUTING -m comment [...] 174 return shellwords.Parse(strings.Replace(rule, "-I", "-D", 1)) 175 } 176 177 return []string{}, nil 178 } 179 180 func ruleReferencesDisabledChain(disableIptablesFeederRules []string, rule string) (bool, string) { 181 for _, disabledChain := range disableIptablesFeederRules { 182 if strings.Contains(rule, " "+strings.ToUpper(disabledChain)+" ") { 183 return true, disabledChain 184 } 185 } 186 187 return false, "" 188 } 189 190 func isDisabledChain(disableIptablesFeederRules []string, chain string) bool { 191 for _, disabledChain := range disableIptablesFeederRules { 192 if strings.EqualFold(chain, disabledChain) { 193 return true 194 } 195 } 196 197 return false 198 } 199 200 func (m *Manager) removeCiliumRules(table string, prog runnable, match string) error { 201 rules, err := prog.runProgOutput([]string{"-t", table, "-S"}) 202 if err != nil { 203 return err 204 } 205 206 scanner := bufio.NewScanner(strings.NewReader(rules)) 207 for scanner.Scan() { 208 rule := scanner.Text() 209 210 // All rules installed by cilium either belong to a chain with 211 // the name CILIUM_ or call a chain with the name CILIUM_: 212 // -A CILIUM_FORWARD -o cilium_host -m comment --comment "cilium: any->cluster on cilium_host forward accept" -j ACCEPT 213 // -A POSTROUTING -m comment --comment "cilium-feeder: CILIUM_POST" -j CILIUM_POST 214 if !strings.Contains(rule, match) { 215 continue 216 } 217 218 // Temporary fix while Iptables is upgraded to >= 1.8.5 219 // (See GH-20884). 220 // 221 // The version currently shipped with Cilium (1.8.4) does not 222 // support the deletion of NOTRACK rules, so we will just ignore 223 // them here and let the agent remove them when it deletes the 224 // entire chain. 225 if strings.Contains(rule, "-j NOTRACK") { 226 continue 227 } 228 229 // do not remove feeder for chains that are set to be disabled 230 // ie catch the beginning of the rule like -A POSTROUTING to match it against 231 // disabled chains 232 if skip, disabledChain := ruleReferencesDisabledChain(m.cfg.DisableIptablesFeederRules, rule); skip { 233 log.WithField(logfields.Chain, disabledChain).Info("Skipping the removal of feeder chain") 234 continue 235 } 236 237 reversedRule, err := reverseRule(rule) 238 if err != nil { 239 log.WithError(err).WithField(logfields.Object, rule).Warnf("Unable to parse %s rule into slice. Leaving rule behind.", prog) 240 continue 241 } 242 243 if len(reversedRule) > 0 { 244 deleteRule := append([]string{"-t", table}, reversedRule...) 245 if err := prog.runProg(deleteRule); err != nil { 246 return err 247 } 248 } 249 } 250 251 return nil 252 } 253 254 // Manager manages the iptables-related configuration for Cilium. 255 type Manager struct { 256 // This lock ensures there are no concurrent executions of the doInstallRules() and 257 // GetProxyPort() methods. 258 lock lock.Mutex 259 260 logger logrus.FieldLogger 261 modulesMgr *modules.Manager 262 sysctl sysctl.Sysctl 263 264 cfg Config 265 sharedCfg SharedConfig 266 267 // anything that can trigger a reconciliation 268 reconcilerParams reconcilerParams 269 270 haveIp6tables bool 271 haveSocketMatch bool 272 haveBPFSocketAssign bool 273 ipEarlyDemuxDisabled bool 274 cniConfigManager cni.CNIConfigManager 275 } 276 277 type reconcilerParams struct { 278 localNodeStore *node.LocalNodeStore 279 db *statedb.DB 280 devices statedb.Table[*tables.Device] 281 proxies chan reconciliationRequest[proxyInfo] 282 addNoTrackPod chan reconciliationRequest[noTrackPodInfo] 283 delNoTrackPod chan reconciliationRequest[noTrackPodInfo] 284 } 285 286 type params struct { 287 cell.In 288 289 Logger logrus.FieldLogger 290 Lifecycle cell.Lifecycle 291 292 ModulesMgr *modules.Manager 293 Sysctl sysctl.Sysctl 294 CNIConfigManager cni.CNIConfigManager 295 LocalNodeStore *node.LocalNodeStore 296 297 Cfg Config 298 SharedCfg SharedConfig 299 300 JobGroup job.Group 301 DB *statedb.DB 302 Devices statedb.Table[*tables.Device] 303 } 304 305 func newIptablesManager(p params) *Manager { 306 iptMgr := &Manager{ 307 logger: p.Logger, 308 modulesMgr: p.ModulesMgr, 309 sysctl: p.Sysctl, 310 cfg: p.Cfg, 311 sharedCfg: p.SharedCfg, 312 reconcilerParams: reconcilerParams{ 313 localNodeStore: p.LocalNodeStore, 314 db: p.DB, 315 devices: p.Devices, 316 proxies: make(chan reconciliationRequest[proxyInfo]), 317 addNoTrackPod: make(chan reconciliationRequest[noTrackPodInfo]), 318 delNoTrackPod: make(chan reconciliationRequest[noTrackPodInfo]), 319 }, 320 haveIp6tables: true, 321 cniConfigManager: p.CNIConfigManager, 322 } 323 324 argsInit := make(chan struct{}) 325 326 // init iptables/ip6tables wait arguments before using them in the reconciler or in the manager (e.g: GetProxyPorts) 327 p.Lifecycle.Append(cell.Hook{ 328 OnStart: func(ctx cell.HookContext) error { 329 defer close(argsInit) 330 ip4tables.initArgs(ctx, int(p.Cfg.IPTablesLockTimeout/time.Second)) 331 if p.SharedCfg.EnableIPv6 { 332 ip6tables.initArgs(ctx, int(p.Cfg.IPTablesLockTimeout/time.Second)) 333 } 334 return nil 335 }, 336 }) 337 338 p.Lifecycle.Append(iptMgr) 339 340 p.JobGroup.Add( 341 job.OneShot("iptables-reconciliation-loop", func(ctx context.Context, health cell.Health) error { 342 // each job runs in an independent goroutine, so we need to explicitly wait for 343 // iptables arguments initialization before starting the reconciler. 344 <-argsInit 345 return reconciliationLoop( 346 ctx, p.Logger, health, 347 iptMgr.sharedCfg.InstallIptRules, &iptMgr.reconcilerParams, 348 iptMgr.doInstallRules, 349 iptMgr.doInstallProxyRules, 350 iptMgr.installNoTrackRules, 351 iptMgr.removeNoTrackRules, 352 ) 353 }), 354 ) 355 356 return iptMgr 357 } 358 359 // Start initializes the iptables manager and checks for iptables kernel modules availability. 360 func (m *Manager) Start(ctx cell.HookContext) error { 361 if os.Getenv("CILIUM_PREPEND_IPTABLES_CHAIN") != "" { 362 m.logger.Warning("CILIUM_PREPEND_IPTABLES_CHAIN env var has been deprecated. Please use 'CILIUM_PREPEND_IPTABLES_CHAINS' " + 363 "env var or '--prepend-iptables-chains' command line flag instead") 364 } 365 366 if err := enableIPForwarding(m.sysctl, m.sharedCfg.EnableIPv6); err != nil { 367 m.logger.WithError(err).Warning("enabling IP forwarding via sysctl failed") 368 } 369 370 if m.sharedCfg.EnableIPSec && m.sharedCfg.EnableL7Proxy { 371 m.disableIPEarlyDemux() 372 } 373 374 if err := m.modulesMgr.FindOrLoadModules( 375 "ip_tables", "iptable_nat", "iptable_mangle", "iptable_raw", "iptable_filter", 376 ); err != nil { 377 m.logger.WithError(err).Warning( 378 "iptables modules could not be initialized. It probably means that iptables is not available on this system") 379 } 380 381 if err := m.modulesMgr.FindOrLoadModules( 382 "ip6_tables", "ip6table_mangle", "ip6table_raw", "ip6table_filter", 383 ); err != nil { 384 if m.sharedCfg.EnableIPv6 { 385 return fmt.Errorf( 386 "IPv6 is enabled and ip6tables modules initialization failed: %w "+ 387 "(try disabling IPv6 in Cilium or loading ip6_tables, ip6table_mangle, ip6table_raw and ip6table_filter kernel modules)", err) 388 } 389 m.logger.WithError(err).Debug( 390 "ip6tables kernel modules could not be loaded, so IPv6 cannot be used") 391 m.haveIp6tables = false 392 } else { 393 ipv6Disabled, err := os.ReadFile("/sys/module/ipv6/parameters/disable") 394 if err != nil { 395 if m.sharedCfg.EnableIPv6 { 396 return fmt.Errorf( 397 "IPv6 is enabled but IPv6 kernel support probing failed with: %w", err) 398 } 399 m.logger.WithError(err).Warning( 400 "Unable to read /sys/module/ipv6/parameters/disable, disabling IPv6 iptables support") 401 m.haveIp6tables = false 402 } else if strings.TrimSuffix(string(ipv6Disabled), "\n") == "1" { 403 m.logger.Debug( 404 "Kernel does not support IPv6, disabling IPv6 iptables support") 405 m.haveIp6tables = false 406 } 407 } 408 409 if err := m.modulesMgr.FindOrLoadModules("xt_socket"); err != nil { 410 if !m.sharedCfg.TunnelingEnabled { 411 // xt_socket module is needed to circumvent an explicit drop in ip_forward() 412 // logic for packets for which a local socket is found by ip early 413 // demux. xt_socket performs a local socket match and sets an skb mark on 414 // match, which will divert the packet to the local stack using our policy 415 // routing rule, thus avoiding being processed by ip_forward() at all. 416 // 417 // If xt_socket module does not exist we can disable ip early demux to to 418 // avoid the explicit drop in ip_forward(). This is not needed in tunneling 419 // modes, as then we'll set the skb mark in the bpf logic before the policy 420 // routing stage so that the packet is routed locally instead of being 421 // forwarded by ip_forward(). 422 // 423 // We would not need the xt_socket at all if the datapath universally would 424 // set the "to proxy" skb mark bits on before the packet hits policy routing 425 // stage. Currently this is not true for endpoint routing modes. 426 m.logger.WithError(err).Warning("xt_socket kernel module could not be loaded") 427 428 if m.sharedCfg.EnableXTSocketFallback { 429 m.disableIPEarlyDemux() 430 } 431 } 432 } else { 433 m.haveSocketMatch = true 434 } 435 m.haveBPFSocketAssign = m.sharedCfg.EnableBPFTProxy 436 437 return nil 438 } 439 440 func (m *Manager) Stop(ctx cell.HookContext) error { 441 close(m.reconcilerParams.proxies) 442 close(m.reconcilerParams.addNoTrackPod) 443 close(m.reconcilerParams.delNoTrackPod) 444 return nil 445 } 446 447 func (m *Manager) disableIPEarlyDemux() { 448 if m.ipEarlyDemuxDisabled { 449 return 450 } 451 452 disabled := m.sysctl.Disable([]string{"net", "ipv4", "ip_early_demux"}) == nil 453 if disabled { 454 m.ipEarlyDemuxDisabled = true 455 m.logger.Info("Disabled ip_early_demux to allow proxy redirection with original source/destination address without xt_socket support also in non-tunneled datapath modes.") 456 } else { 457 m.logger.Warning("Could not disable ip_early_demux, traffic redirected due to an HTTP policy or visibility may be dropped unexpectedly") 458 } 459 } 460 461 // SupportsOriginalSourceAddr tells if an L7 proxy can use POD's original source address and port in 462 // the upstream connection to allow the destination to properly derive the source security ID from 463 // the source IP address. 464 func (m *Manager) SupportsOriginalSourceAddr() bool { 465 // Original source address use works if xt_socket match is supported, or if ip early demux 466 // is disabled 467 return m.haveSocketMatch || m.ipEarlyDemuxDisabled 468 } 469 470 // removeRules removes iptables rules installed by Cilium. 471 func (m *Manager) removeRules(prefix string) error { 472 // Set of tables that have had iptables rules in any Cilium version 473 tables := []string{"nat", "mangle", "raw", "filter"} 474 for _, t := range tables { 475 if err := m.removeCiliumRules(t, ip4tables, prefix+"CILIUM_"); err != nil { 476 return err 477 } 478 479 if m.haveIp6tables { 480 if err := m.removeCiliumRules(t, ip6tables, prefix+"CILIUM_"); err != nil { 481 return err 482 } 483 } 484 } 485 486 for _, c := range ciliumChains { 487 c.name = prefix + c.name 488 if err := c.remove(true, m.haveIp6tables); err != nil { 489 return err 490 } 491 } 492 493 return nil 494 } 495 496 // renameChains renames iptables chains installed by Cilium. 497 func (m *Manager) renameChains(prefix string) error { 498 for _, c := range ciliumChains { 499 if err := c.rename(true, m.haveIp6tables, prefix+c.name); err != nil { 500 return err 501 } 502 } 503 504 return nil 505 } 506 507 func (m *Manager) inboundProxyRedirectRule(cmd string) []string { 508 // Mark host proxy transparent connections to be routed to the local stack. 509 // This comes before the TPROXY rules in the chain, and setting the mark 510 // without the proxy port number will make the TPROXY rule to not match, 511 // as we do not want to try to tproxy packets that are going to the stack 512 // already. 513 // This rule is needed for couple of reasons: 514 // 1. route return traffic to the proxy 515 // 2. route original direction traffic that would otherwise be intercepted 516 // by ip_early_demux 517 // Explicitly support chaining Envoy listeners via the loopback device by 518 // excluding traffic for the loopback device. 519 toProxyMark := fmt.Sprintf("%#08x", linux_defaults.MagicMarkIsToProxy) 520 matchFromIPSecEncrypt := fmt.Sprintf("%#08x/%#08x", linux_defaults.RouteMarkEncrypt, linux_defaults.RouteMarkMask) 521 matchProxyToWorld := fmt.Sprintf("%#08x/%#08x", linux_defaults.MarkProxyToWorld, linux_defaults.RouteMarkMask) 522 return []string{ 523 "-t", "mangle", 524 cmd, ciliumPreMangleChain, 525 "-m", "socket", "--transparent", 526 "!", "-o", "lo", 527 "-m", "mark", "!", "--mark", matchFromIPSecEncrypt, 528 "-m", "mark", "!", "--mark", matchProxyToWorld, 529 "-m", "comment", "--comment", "cilium: any->pod redirect proxied traffic to host proxy", 530 "-j", "MARK", 531 "--set-mark", toProxyMark} 532 } 533 534 func (m *Manager) iptProxyRule(rules string, prog runnable, l4proto, ip string, proxyPort uint16, name string) error { 535 // Match 536 port := uint32(byteorder.HostToNetwork16(proxyPort)) << 16 537 markMatch := fmt.Sprintf("%#x", linux_defaults.MagicMarkIsToProxy|port) 538 // TPROXY params 539 tProxyMark := fmt.Sprintf("%#x", linux_defaults.MagicMarkIsToProxy) 540 tProxyPort := fmt.Sprintf("%d", proxyPort) 541 542 existingRuleRegex := regexp.MustCompile(fmt.Sprintf("-A CILIUM_PRE_mangle -p %s -m mark --mark %s.*--on-ip %s", l4proto, markMatch, ip)) 543 if existingRuleRegex.MatchString(rules) { 544 return nil 545 } 546 547 rule := []string{ 548 "-t", "mangle", 549 "-A", ciliumPreMangleChain, 550 "-p", l4proto, 551 "-m", "mark", "--mark", markMatch, 552 "-m", "comment", "--comment", "cilium: TPROXY to host " + name + " proxy", 553 "-j", "TPROXY", 554 "--tproxy-mark", tProxyMark, 555 "--on-ip", ip, 556 "--on-port", tProxyPort, 557 } 558 return prog.runProg(rule) 559 } 560 561 func (m *Manager) installStaticProxyRules() error { 562 // match traffic to a proxy (upper 16 bits has the proxy port, which is masked out) 563 matchToProxy := fmt.Sprintf("%#08x/%#08x", linux_defaults.MagicMarkIsToProxy, linux_defaults.MagicMarkHostMask) 564 // proxy return traffic has 0 ID in the mask 565 matchProxyReply := fmt.Sprintf("%#08x/%#08x", linux_defaults.MagicMarkIsProxy, linux_defaults.MagicMarkProxyNoIDMask) 566 // proxy forward traffic 567 matchProxyForward := fmt.Sprintf("%#08x/%#08x", linux_defaults.MagicMarkEgress, linux_defaults.MagicMarkHostMask) 568 // L7 proxy upstream return traffic has Endpoint ID in the mask 569 matchL7ProxyUpstream := fmt.Sprintf("%#08x/%#08x", linux_defaults.MagicMarkIsProxyEPID, linux_defaults.MagicMarkProxyMask) 570 // match traffic from a proxy (either in forward or in return direction) 571 matchFromProxy := fmt.Sprintf("%#08x/%#08x", linux_defaults.MagicMarkIsProxy, linux_defaults.MagicMarkProxyMask) 572 573 if m.sharedCfg.EnableIPv4 { 574 // No conntrack for traffic to proxy 575 if err := ip4tables.runProg([]string{ 576 "-t", "raw", 577 "-A", ciliumPreRawChain, 578 "-m", "mark", "--mark", matchToProxy, 579 "-m", "comment", "--comment", "cilium: NOTRACK for proxy traffic", 580 "-j", "CT", "--notrack"}); err != nil { 581 return err 582 } 583 584 // Explicit ACCEPT for the proxy traffic. Needed when the INPUT defaults to DROP. 585 // Matching needs to be the same as for the NOTRACK rule above. 586 if err := ip4tables.runProg([]string{ 587 "-t", "filter", 588 "-A", ciliumInputChain, 589 "-m", "mark", "--mark", matchToProxy, 590 "-m", "comment", "--comment", "cilium: ACCEPT for proxy traffic", 591 "-j", "ACCEPT"}); err != nil { 592 return err 593 } 594 595 // No conntrack for proxy return traffic that is heading to lxc+ 596 if err := ip4tables.runProg([]string{ 597 "-t", "raw", 598 "-A", ciliumOutputRawChain, 599 "-o", "lxc+", 600 "-m", "mark", "--mark", matchProxyReply, 601 "-m", "comment", "--comment", "cilium: NOTRACK for proxy return traffic", 602 "-j", "CT", "--notrack"}); err != nil { 603 return err 604 } 605 606 // No conntrack for proxy return traffic that is heading to cilium_host 607 if err := ip4tables.runProg([]string{ 608 "-t", "raw", 609 "-A", ciliumOutputRawChain, 610 "-o", defaults.HostDevice, 611 "-m", "mark", "--mark", matchProxyReply, 612 "-m", "comment", "--comment", "cilium: NOTRACK for proxy return traffic", 613 "-j", "CT", "--notrack"}); err != nil { 614 return err 615 } 616 617 // No conntrack for proxy forward traffic that is heading to cilium_host 618 if option.Config.EnableIPSec { 619 if err := ip4tables.runProg([]string{ 620 "-t", "raw", 621 "-A", ciliumOutputRawChain, 622 "-o", defaults.HostDevice, 623 "-m", "mark", "--mark", matchProxyForward, 624 "-m", "comment", "--comment", "cilium: NOTRACK for proxy forward traffic", 625 "-j", "CT", "--notrack"}); err != nil { 626 return err 627 } 628 } 629 630 // No conntrack for proxy upstream traffic that is heading to lxc+ 631 if err := ip4tables.runProg([]string{ 632 "-t", "raw", 633 "-A", ciliumOutputRawChain, 634 "-o", "lxc+", 635 "-m", "mark", "--mark", matchL7ProxyUpstream, 636 "-m", "comment", "--comment", "cilium: NOTRACK for L7 proxy upstream traffic", 637 "-j", "CT", "--notrack"}); err != nil { 638 return err 639 } 640 641 // No conntrack for proxy upstream traffic that is heading to cilium_host 642 if err := ip4tables.runProg([]string{ 643 "-t", "raw", 644 "-A", ciliumOutputRawChain, 645 "-o", defaults.HostDevice, 646 "-m", "mark", "--mark", matchL7ProxyUpstream, 647 "-m", "comment", "--comment", "cilium: NOTRACK for L7 proxy upstream traffic", 648 "-j", "CT", "--notrack"}); err != nil { 649 return err 650 } 651 652 // Explicit ACCEPT for the proxy return traffic. Needed when the OUTPUT defaults to DROP. 653 // Matching needs to be the same as for the NOTRACK rule above. 654 if err := ip4tables.runProg([]string{ 655 "-t", "filter", 656 "-A", ciliumOutputChain, 657 "-m", "mark", "--mark", matchFromProxy, 658 "-m", "comment", "--comment", "cilium: ACCEPT for proxy traffic", 659 "-j", "ACCEPT"}); err != nil { 660 return err 661 } 662 663 // Explicit ACCEPT for the l7 proxy upstream traffic. Needed when the OUTPUT defaults to DROP. 664 // TODO: See if this is really needed. We do not have an ACCEPT for normal proxy upstream traffic. 665 if err := ip4tables.runProg([]string{ 666 "-t", "filter", 667 "-A", ciliumOutputChain, 668 "-m", "mark", "--mark", matchL7ProxyUpstream, 669 "-m", "comment", "--comment", "cilium: ACCEPT for l7 proxy upstream traffic", 670 "-j", "ACCEPT"}); err != nil { 671 return err 672 } 673 674 if m.haveSocketMatch { 675 // Direct inbound TPROXYed traffic towards the socket 676 if err := ip4tables.runProg(m.inboundProxyRedirectRule("-A")); err != nil { 677 return err 678 } 679 } 680 } 681 682 if m.sharedCfg.EnableIPv6 { 683 // No conntrack for traffic to ingress proxy 684 if err := ip6tables.runProg([]string{ 685 "-t", "raw", 686 "-A", ciliumPreRawChain, 687 "-m", "mark", "--mark", matchToProxy, 688 "-m", "comment", "--comment", "cilium: NOTRACK for proxy traffic", 689 "-j", "CT", "--notrack"}); err != nil { 690 return err 691 } 692 693 // Explicit ACCEPT for the proxy traffic. Needed when the INPUT defaults to DROP. 694 // Matching needs to be the same as for the NOTRACK rule above. 695 if err := ip6tables.runProg([]string{ 696 "-t", "filter", 697 "-A", ciliumInputChain, 698 "-m", "mark", "--mark", matchToProxy, 699 "-m", "comment", "--comment", "cilium: ACCEPT for proxy traffic", 700 "-j", "ACCEPT"}); err != nil { 701 return err 702 } 703 704 // No conntrack for proxy return traffic that is heading to cilium_host 705 if err := ip6tables.runProg([]string{ 706 "-t", "raw", 707 "-A", ciliumOutputRawChain, 708 "-o", defaults.HostDevice, 709 "-m", "mark", "--mark", matchProxyReply, 710 "-m", "comment", "--comment", "cilium: NOTRACK for proxy return traffic", 711 "-j", "CT", "--notrack"}); err != nil { 712 return err 713 } 714 715 // No conntrack for proxy upstream traffic that is heading to lxc+ 716 if err := ip6tables.runProg([]string{ 717 "-t", "raw", 718 "-A", ciliumOutputRawChain, 719 "-o", "lxc+", 720 "-m", "mark", "--mark", matchProxyReply, 721 "-m", "comment", "--comment", "cilium: NOTRACK for proxy return traffic", 722 "-j", "CT", "--notrack"}); err != nil { 723 return err 724 } 725 726 // Explicit ACCEPT for the proxy return traffic. Needed when the OUTPUT defaults to DROP. 727 // Matching needs to be the same as for the NOTRACK rule above. 728 if err := ip6tables.runProg([]string{ 729 "-t", "filter", 730 "-A", ciliumOutputChain, 731 "-m", "mark", "--mark", matchFromProxy, 732 "-m", "comment", "--comment", "cilium: ACCEPT for proxy traffic", 733 "-j", "ACCEPT"}); err != nil { 734 return err 735 } 736 737 if m.haveSocketMatch { 738 // Direct inbound TPROXYed traffic towards the socket 739 if err := ip6tables.runProg(m.inboundProxyRedirectRule("-A")); err != nil { 740 return err 741 } 742 } 743 } 744 745 return nil 746 } 747 748 func (m *Manager) doCopyProxyRules(prog iptablesInterface, table string, re *regexp.Regexp, match, oldChain, newChain string) error { 749 rules, err := prog.runProgOutput([]string{"-t", table, "-S"}) 750 if err != nil { 751 return err 752 } 753 754 scanner := bufio.NewScanner(strings.NewReader(rules)) 755 for scanner.Scan() { 756 rule := scanner.Text() 757 if !re.MatchString(rule) || !strings.Contains(rule, match) { 758 continue 759 } 760 761 args, err := shellwords.Parse(strings.Replace(rule, oldChain, newChain, 1)) 762 if err != nil { 763 log.WithFields(logrus.Fields{ 764 "table": table, 765 "prog": prog.getProg(), 766 logfields.Object: rule, 767 }).WithError(err).Warn("Unable to parse TPROXY rule, disruption to traffic selected by L7 policy possible") 768 continue 769 } 770 771 copyRule := append([]string{"-t", table}, args...) 772 if err := prog.runProg(copyRule); err != nil { 773 return err 774 } 775 } 776 777 return nil 778 } 779 780 var tproxyMatch = regexp.MustCompile("CILIUM_PRE_mangle .*cilium: TPROXY") 781 782 // copies old proxy rules 783 func (m *Manager) copyProxyRules(oldChain string, match string) error { 784 if m.sharedCfg.EnableIPv4 { 785 if err := m.doCopyProxyRules(ip4tables, "mangle", tproxyMatch, match, oldChain, ciliumPreMangleChain); err != nil { 786 return err 787 } 788 } 789 790 if m.sharedCfg.EnableIPv6 { 791 if err := m.doCopyProxyRules(ip6tables, "mangle", tproxyMatch, match, oldChain, ciliumPreMangleChain); err != nil { 792 return err 793 } 794 } 795 796 return nil 797 } 798 799 // Redirect packets to the host proxy via TPROXY, as directed by the Cilium 800 // datapath bpf programs via skb marks. 801 func (m *Manager) addProxyRules(prog runnable, ip string, proxyPort uint16, name string) error { 802 rules, err := prog.runProgOutput([]string{"-t", "mangle", "-S"}) 803 if err != nil { 804 return err 805 } 806 807 for _, proto := range []string{"tcp", "udp"} { 808 if err := m.iptProxyRule(rules, prog, proto, ip, proxyPort, name); err != nil { 809 return err 810 } 811 } 812 813 // Delete all other rules for this same proxy name 814 // These may accumulate if there is a bind failure on a previously used port 815 portAndIPMatch := fmt.Sprintf("TPROXY --on-port %d --on-ip %s ", proxyPort, ip) 816 scanner := bufio.NewScanner(strings.NewReader(rules)) 817 for scanner.Scan() { 818 rule := scanner.Text() 819 if !strings.Contains(rule, "-A CILIUM_PRE_mangle ") || !strings.Contains(rule, "cilium: TPROXY to host "+name) || strings.Contains(rule, portAndIPMatch) { 820 continue 821 } 822 823 args, err := shellwords.Parse(strings.Replace(rule, "-A", "-D", 1)) 824 if err != nil { 825 log.WithError(err).WithField(logfields.Object, rule).Warnf("Unable to parse %s TPROXY rule", prog) 826 continue 827 } 828 829 deleteRule := append([]string{"-t", "mangle"}, args...) 830 if err := prog.runProg(deleteRule); err != nil { 831 return err 832 } 833 } 834 835 return nil 836 } 837 838 func (m *Manager) endpointNoTrackRules(prog runnable, cmd string, IP string, port *lb.L4Addr) error { 839 var err error 840 841 protocol := strings.ToLower(port.Protocol) 842 p := strconv.FormatUint(uint64(port.Port), 10) 843 844 // currently the only use case for this is node-local-dns 845 // with LRP, node-local-dns should be deployed as a non-host-namespaced 846 // pod and we want to skip kernel conntrack for any traffic between the 847 // application pod and the node-local-dns pod 848 // There are 4 types of packets that we want to skip conntrack: 849 // 1. From a non-host pod to the node-local-dns pod 850 // 2. From the node-local-dns pod to a non-host pod 851 // 3. From a hostNetwork pod to the node-local-dns pod 852 // 4. From the node-local-dns pod to a hostNetwork pod 853 854 // 1. The following 2 rules cover packets from non-host pod to node-local-dns 855 if err = prog.runProg([]string{ 856 "-t", "raw", 857 cmd, ciliumPreRawChain, 858 "-p", protocol, 859 "-d", IP, 860 "--dport", p, 861 "-j", "CT", 862 "--notrack"}); err != nil { 863 log.WithError(err).Warning("Failed to enforce endpoint notrack") 864 } 865 if err = prog.runProg([]string{ 866 "-t", "filter", 867 cmd, ciliumForwardChain, 868 "-p", protocol, 869 "-d", IP, 870 "--dport", 871 p, "-j", 872 "ACCEPT"}); err != nil { 873 log.WithError(err).Warning("Failed to enforce endpoint notrack") 874 } 875 876 // 2. The following 2 rules cover packets from node-local-dns to 877 // non-host pod 878 if err = prog.runProg([]string{ 879 "-t", "raw", 880 cmd, ciliumPreRawChain, 881 "-p", protocol, 882 "-s", IP, 883 "--sport", p, 884 "-j", "CT", 885 "--notrack"}); err != nil { 886 log.WithError(err).Warning("Failed to enforce endpoint notrack") 887 } 888 if err = prog.runProg([]string{ 889 "-t", "filter", 890 cmd, ciliumForwardChain, 891 "-p", protocol, 892 "-s", IP, 893 "--sport", 894 p, "-j", 895 "ACCEPT"}); err != nil { 896 log.WithError(err).Warning("Failed to enforce endpoint notrack") 897 } 898 899 // 3. The following 2 rules cover packets from host namespaced pod to 900 // node-local-dns 901 if err = prog.runProg([]string{ 902 "-t", "raw", 903 cmd, ciliumOutputRawChain, 904 "-p", protocol, 905 "-d", IP, 906 "--dport", p, 907 "-j", "CT", 908 "--notrack"}); err != nil { 909 log.WithError(err).Warning("Failed to enforce endpoint notrack") 910 } 911 if err = prog.runProg([]string{ 912 "-t", "filter", 913 cmd, ciliumOutputChain, 914 "-p", protocol, 915 "-d", IP, 916 "--dport", p, 917 "-j", "ACCEPT"}); err != nil { 918 log.WithError(err).Warning("Failed to enforce endpoint notrack") 919 } 920 921 // 4. The following rule (and the prerouting rule in case 2) 922 // covers packets from node-local-dns to host namespaced pod 923 if err = prog.runProg([]string{ 924 "-t", "filter", 925 cmd, ciliumInputChain, 926 "-p", protocol, 927 "-s", IP, 928 "--sport", 929 p, "-j", 930 "ACCEPT"}); err != nil { 931 log.WithError(err).Warning("Failed to enforce endpoint notrack") 932 } 933 934 // The following rules are kept for compatibility with host-namespaced 935 // node-local-dns if user already deploys in the legacy mode without 936 // LRP. 937 if err = prog.runProg([]string{ 938 "-t", "raw", 939 cmd, ciliumOutputRawChain, 940 "-p", protocol, 941 "-s", IP, 942 "--sport", p, 943 "-j", "CT", 944 "--notrack"}); err != nil { 945 log.WithError(err).Warning("Failed to enforce endpoint notrack") 946 } 947 if err = prog.runProg([]string{ 948 "-t", "filter", 949 cmd, ciliumOutputChain, 950 "-p", protocol, 951 "-s", IP, 952 "--sport", p, 953 "-j", "ACCEPT"}); err != nil { 954 log.WithError(err).Warning("Failed to enforce endpoint notrack") 955 } 956 if err = prog.runProg([]string{ 957 "-t", "filter", 958 cmd, ciliumInputChain, 959 "-p", protocol, 960 "-d", IP, 961 "--dport", 962 p, "-j", 963 "ACCEPT"}); err != nil { 964 log.WithError(err).Warning("Failed to enforce endpoint notrack") 965 } 966 return err 967 } 968 969 // InstallNoTrackRules is explicitly called when a pod has valid "policy.cilium.io/no-track-port" annotation. 970 // When InstallNoConntrackIptRules flag is set, a super set of v4 NOTRACK rules will be automatically 971 // installed upon agent bootstrap (via function addNoTrackPodTrafficRules) and this function will be skipped. 972 // When InstallNoConntrackIptRules is not set, this function will be executed to install NOTRACK rules. 973 // The rules installed by this function is very specific, for now, the only user is node-local-dns pods. 974 func (m *Manager) InstallNoTrackRules(ip netip.Addr, port uint16) { 975 if m.skipPodTrafficConntrack(ip) { 976 return 977 } 978 979 reconciled := make(chan struct{}) 980 m.reconcilerParams.addNoTrackPod <- reconciliationRequest[noTrackPodInfo]{noTrackPodInfo{ip, port}, reconciled} 981 <-reconciled 982 } 983 984 // See comments for InstallNoTrackRules. 985 func (m *Manager) RemoveNoTrackRules(ip netip.Addr, port uint16) { 986 if m.skipPodTrafficConntrack(ip) { 987 return 988 } 989 990 reconciled := make(chan struct{}) 991 m.reconcilerParams.delNoTrackPod <- reconciliationRequest[noTrackPodInfo]{noTrackPodInfo{ip, port}, reconciled} 992 <-reconciled 993 } 994 995 func (m *Manager) InstallProxyRules(proxyPort uint16, name string) { 996 reconciled := make(chan struct{}) 997 m.reconcilerParams.proxies <- reconciliationRequest[proxyInfo]{proxyInfo{name, proxyPort}, reconciled} 998 <-reconciled 999 } 1000 1001 func (m *Manager) doInstallProxyRules(proxyPort uint16, name string) error { 1002 if m.haveBPFSocketAssign { 1003 log.WithField("port", proxyPort). 1004 Debug("Skipping proxy rule install due to BPF support") 1005 return nil 1006 } 1007 1008 if m.sharedCfg.EnableIPv4 { 1009 if err := m.addProxyRules(ip4tables, "127.0.0.1", proxyPort, name); err != nil { 1010 return err 1011 } 1012 } 1013 if m.sharedCfg.EnableIPv6 { 1014 if err := m.addProxyRules(ip6tables, "::1", proxyPort, name); err != nil { 1015 return err 1016 } 1017 } 1018 1019 return nil 1020 } 1021 1022 // GetProxyPorts enumerates all existing TPROXY rules in the datapath installed earlier with 1023 // InstallProxyRules and returns all proxy ports found. 1024 func (m *Manager) GetProxyPorts() map[string]uint16 { 1025 prog := ip4tables 1026 if !m.sharedCfg.EnableIPv4 { 1027 prog = ip6tables 1028 } 1029 1030 return m.doGetProxyPorts(prog) 1031 } 1032 1033 func (m *Manager) doGetProxyPorts(prog iptablesInterface) map[string]uint16 { 1034 portMap := make(map[string]uint16) 1035 1036 m.lock.Lock() 1037 defer m.lock.Unlock() 1038 1039 rules, err := prog.runProgOutput([]string{"-t", "mangle", "-n", "-L", ciliumPreMangleChain}) 1040 if err != nil { 1041 return portMap 1042 } 1043 1044 re := regexp.MustCompile( 1045 "(cilium-[^ ]*) proxy.*TPROXY redirect " + 1046 "(0.0.0.0|" + ipfamily.IPv4().Localhost + 1047 "|::|" + ipfamily.IPv6().Localhost + ")" + 1048 ":([1-9][0-9]*) mark", 1049 ) 1050 strs := re.FindAllString(rules, -1) 1051 for _, str := range strs { 1052 // Pick the name and port number from each match 1053 name := re.ReplaceAllString(str, "$1") 1054 portStr := re.ReplaceAllString(str, "$3") 1055 portUInt64, err := strconv.ParseUint(portStr, 10, 16) 1056 if err == nil { 1057 portMap[name] = uint16(portUInt64) 1058 } 1059 } 1060 return portMap 1061 } 1062 1063 func (m *Manager) getDeliveryInterface(ifName string) string { 1064 switch { 1065 case m.sharedCfg.EnableEndpointRoutes: 1066 // aws-cni creates container interfaces with names like eni621c0fc8425. 1067 if m.cniConfigManager.GetChainingMode() == "aws-cni" { 1068 return "eni+" 1069 } 1070 return "lxc+" 1071 1072 case m.sharedCfg.IPAM == ipamOption.IPAMENI || 1073 m.sharedCfg.IPAM == ipamOption.IPAMAlibabaCloud: 1074 return "lxc+" 1075 1076 default: 1077 return ifName 1078 } 1079 } 1080 1081 func (m *Manager) installForwardChainRules(ifName, localDeliveryInterface, forwardChain string) error { 1082 if m.sharedCfg.EnableIPv4 { 1083 if err := m.installForwardChainRulesIpX(ip4tables, ifName, localDeliveryInterface, forwardChain); err != nil { 1084 return err 1085 } 1086 } 1087 if m.sharedCfg.EnableIPv6 { 1088 return m.installForwardChainRulesIpX(ip6tables, ifName, localDeliveryInterface, forwardChain) 1089 } 1090 1091 return nil 1092 } 1093 1094 func (m *Manager) installForwardChainRulesIpX(prog runnable, ifName, localDeliveryInterface, forwardChain string) error { 1095 // While kube-proxy does change the policy of the iptables FORWARD chain 1096 // it doesn't seem to handle all cases, e.g. host network pods that use 1097 // the node IP which would still end up in default DENY. Similarly, for 1098 // plain Docker setup, we would otherwise hit default DENY in FORWARD chain. 1099 // Also, k8s 1.15 introduced "-m conntrack --ctstate INVALID -j DROP" which 1100 // in the direct routing case can drop EP replies. 1101 // 1102 // Therefore, add the rules below to avoid having a user to manually opt-in. 1103 // See also: https://github.com/kubernetes/kubernetes/issues/39823 1104 // In here can only be basic ACCEPT rules, nothing more complicated. 1105 // 1106 // The 2nd and 3rd rule are for the case of nodeport traffic where the backend is 1107 // remote. The traffic flow in FORWARD is as follows: 1108 // 1109 // - Node serving nodeport request: 1110 // IN=eno1 OUT=cilium_host 1111 // IN=cilium_host OUT=eno1 1112 // 1113 // - Node running backend: 1114 // IN=eno1 OUT=cilium_host 1115 // IN=lxc... OUT=eno1 1116 if err := prog.runProg([]string{ 1117 "-A", forwardChain, 1118 "-o", ifName, 1119 "-m", "comment", "--comment", "cilium: any->cluster on " + ifName + " forward accept", 1120 "-j", "ACCEPT"}); err != nil { 1121 return err 1122 } 1123 if err := prog.runProg([]string{ 1124 "-A", forwardChain, 1125 "-i", ifName, 1126 "-m", "comment", "--comment", "cilium: cluster->any on " + ifName + " forward accept (nodeport)", 1127 "-j", "ACCEPT"}); err != nil { 1128 return err 1129 } 1130 if err := prog.runProg([]string{ 1131 "-A", forwardChain, 1132 "-i", "lxc+", 1133 "-m", "comment", "--comment", "cilium: cluster->any on lxc+ forward accept", 1134 "-j", "ACCEPT"}); err != nil { 1135 return err 1136 } 1137 // Proxy return traffic to a remote source needs '-i cilium_net'. 1138 if ifName == defaults.HostDevice { 1139 ifPeerName := defaults.SecondHostDevice 1140 if err := prog.runProg([]string{ 1141 "-A", forwardChain, 1142 "-i", ifPeerName, 1143 "-m", "comment", "--comment", "cilium: cluster->any on " + ifPeerName + " forward accept (nodeport)", 1144 "-j", "ACCEPT"}); err != nil { 1145 return err 1146 } 1147 } 1148 // In case the delivery interface and the host interface are not the 1149 // same (enable-endpoint-routes), a separate set of rules to allow 1150 // from/to delivery interface is required. 1151 if localDeliveryInterface != ifName { 1152 if err := prog.runProg([]string{ 1153 "-A", forwardChain, 1154 "-o", localDeliveryInterface, 1155 "-m", "comment", "--comment", "cilium: any->cluster on " + localDeliveryInterface + " forward accept", 1156 "-j", "ACCEPT"}); err != nil { 1157 return err 1158 } 1159 if err := prog.runProg([]string{ 1160 "-A", forwardChain, 1161 "-i", localDeliveryInterface, 1162 "-m", "comment", "--comment", "cilium: cluster->any on " + localDeliveryInterface + " forward accept (nodeport)", 1163 "-j", "ACCEPT"}); err != nil { 1164 return err 1165 } 1166 } 1167 return nil 1168 } 1169 1170 func (m *Manager) installMasqueradeRules( 1171 prog iptablesInterface, nativeDevices []string, 1172 localDeliveryInterface, snatDstExclusionCIDR, allocRange, hostMasqueradeIP string, 1173 ) error { 1174 devices := nativeDevices 1175 1176 if m.sharedCfg.NodeIpsetNeeded { 1177 // Exclude traffic to nodes from masquerade. 1178 progArgs := []string{ 1179 "-t", "nat", 1180 "-A", ciliumPostNatChain, 1181 } 1182 1183 // If MasqueradeInterfaces is set, we need to mirror base condition of the 1184 // "cilium masquerade non-cluster" rule below, as the allocRange might not 1185 // be valid in such setups (e.g. in ENI mode). 1186 if len(m.sharedCfg.MasqueradeInterfaces) > 0 { 1187 progArgs = append(progArgs, "-o", strings.Join(m.sharedCfg.MasqueradeInterfaces, ",")) 1188 } else { 1189 progArgs = append(progArgs, "-s", allocRange) 1190 } 1191 1192 progArgs = append(progArgs, 1193 "-m", "set", "--match-set", prog.getIpset(), "dst", 1194 "-m", "comment", "--comment", "exclude traffic to cluster nodes from masquerade", 1195 "-j", "ACCEPT", 1196 ) 1197 if err := prog.runProg(progArgs); err != nil { 1198 return err 1199 } 1200 } 1201 1202 // Masquerade egress traffic leaving the node based on source routing 1203 // 1204 // If this option is enabled, then it takes precedence over the catch-all 1205 // MASQUERADE further below. 1206 if m.sharedCfg.EnableMasqueradeRouteSource { 1207 var defaultRoutes []netlink.Route 1208 1209 if len(m.sharedCfg.MasqueradeInterfaces) > 0 { 1210 devices = m.sharedCfg.MasqueradeInterfaces 1211 } 1212 family := netlink.FAMILY_V4 1213 if prog == ip6tables { 1214 family = netlink.FAMILY_V6 1215 } 1216 initialPass := true 1217 if routes, err := netlink.RouteList(nil, family); err == nil { 1218 nextPass: 1219 for _, r := range routes { 1220 var link netlink.Link 1221 match := false 1222 if r.LinkIndex > 0 { 1223 link, err = netlink.LinkByIndex(r.LinkIndex) 1224 if err != nil { 1225 continue 1226 } 1227 // Routes are dedicated to the specific interface, so we 1228 // need to install the SNAT rules also for that interface 1229 // via -o. If we cannot correlate to anything because no 1230 // devices were specified, we need to bail out. 1231 if len(devices) == 0 { 1232 return fmt.Errorf("cannot correlate source route device for generating masquerading rules") 1233 } 1234 for _, device := range devices { 1235 if device == link.Attrs().Name { 1236 match = true 1237 break 1238 } 1239 } 1240 } else { 1241 // There might be next hop groups where ifindex is zero 1242 // and the underlying next hop devices might not be known 1243 // to Cilium. In this case, assume match and don't encode 1244 // -o device. 1245 match = true 1246 } 1247 _, exclusionCIDR, err := net.ParseCIDR(snatDstExclusionCIDR) 1248 if !match || r.Src == nil || (err == nil && cidr.Equal(r.Dst, exclusionCIDR)) { 1249 continue 1250 } 1251 if initialPass && cidr.Equal(r.Dst, cidr.ZeroNet(r.Family)) { 1252 defaultRoutes = append(defaultRoutes, r) 1253 continue 1254 } 1255 progArgs := []string{ 1256 "-t", "nat", 1257 "-A", ciliumPostNatChain, 1258 "-s", allocRange, 1259 } 1260 if cidr.Equal(r.Dst, cidr.ZeroNet(r.Family)) { 1261 progArgs = append( 1262 progArgs, 1263 "!", "-d", snatDstExclusionCIDR) 1264 } else { 1265 progArgs = append( 1266 progArgs, 1267 "-d", r.Dst.String()) 1268 } 1269 if link != nil { 1270 progArgs = append( 1271 progArgs, 1272 "-o", link.Attrs().Name) 1273 } else { 1274 progArgs = append( 1275 progArgs, 1276 "!", "-o", "cilium_+") 1277 } 1278 progArgs = append( 1279 progArgs, 1280 "-m", "comment", "--comment", "cilium snat non-cluster via source route", 1281 "-j", "SNAT", 1282 "--to-source", r.Src.String()) 1283 if m.cfg.IPTablesRandomFully { 1284 progArgs = append(progArgs, "--random-fully") 1285 } 1286 if err := prog.runProg(progArgs); err != nil { 1287 return err 1288 } 1289 } 1290 if initialPass { 1291 initialPass = false 1292 routes = defaultRoutes 1293 goto nextPass 1294 } 1295 } 1296 } else { 1297 // Masquerade all egress traffic leaving the node (catch-all) 1298 // 1299 // This rule must be first as the node ipset rule as it has different 1300 // exclusion criteria than the other rules in this table. 1301 // 1302 // The following conditions must be met: 1303 // * May not leave on a cilium_ interface, this excludes all 1304 // tunnel traffic 1305 // * Must originate from an IP in the local allocation range 1306 // * Must not be reply if BPF NodePort is enabled 1307 // * Tunnel mode: 1308 // * May not be targeted to an IP in the local allocation 1309 // range 1310 // * Non-tunnel mode: 1311 // * May not be targeted to an IP in the cluster range 1312 progArgs := []string{ 1313 "-t", "nat", 1314 "-A", ciliumPostNatChain, 1315 "!", "-d", snatDstExclusionCIDR, 1316 } 1317 if len(m.sharedCfg.MasqueradeInterfaces) > 0 { 1318 progArgs = append( 1319 progArgs, 1320 "-o", strings.Join(m.sharedCfg.MasqueradeInterfaces, ",")) 1321 } else { 1322 progArgs = append( 1323 progArgs, 1324 "-s", allocRange, 1325 "!", "-o", "cilium_+") 1326 } 1327 progArgs = append( 1328 progArgs, 1329 "-m", "comment", "--comment", "cilium masquerade non-cluster", 1330 "-j", "MASQUERADE") 1331 if m.cfg.IPTablesRandomFully { 1332 progArgs = append(progArgs, "--random-fully") 1333 } 1334 if err := prog.runProg(progArgs); err != nil { 1335 return err 1336 } 1337 } 1338 1339 // The following rule exclude traffic from the remaining rules in this chain. 1340 // If this rule matches, none of the remaining rules in this chain 1341 // are considered. 1342 1343 // Exclude proxy return traffic from the masquarade rules. 1344 if err := prog.runProg([]string{ 1345 "-t", "nat", 1346 "-A", ciliumPostNatChain, 1347 // Don't match proxy (return) traffic 1348 "-m", "mark", "--mark", fmt.Sprintf("%#08x/%#08x", linux_defaults.MagicMarkIsProxy, linux_defaults.MagicMarkProxyMask), 1349 "-m", "comment", "--comment", "exclude proxy return traffic from masquerade", 1350 "-j", "ACCEPT"}); err != nil { 1351 return err 1352 } 1353 1354 if m.sharedCfg.TunnelingEnabled { 1355 // Masquerade all traffic from the host into the ifName 1356 // interface if the source is not in the node's pod CIDR. 1357 // 1358 // The following conditions must be met: 1359 // * Must be targeted for the ifName interface 1360 // * Must be targeted to an IP that is not local 1361 // * May not already be originating from the node's pod CIDR. 1362 if err := prog.runProg([]string{ 1363 "-t", "nat", 1364 "-A", ciliumPostNatChain, 1365 "!", "-s", allocRange, 1366 "!", "-d", allocRange, 1367 "-o", defaults.HostDevice, 1368 "-m", "comment", "--comment", "cilium host->cluster masquerade", 1369 "-j", "SNAT", "--to-source", hostMasqueradeIP}); err != nil { 1370 return err 1371 } 1372 } 1373 1374 loopbackAddr := "127.0.0.1" 1375 if prog == ip6tables { 1376 loopbackAddr = "::1" 1377 } 1378 1379 // Masquerade all traffic from the host into local 1380 // endpoints if the source is 127.0.0.1. This is 1381 // required to force replies out of the endpoint's 1382 // network namespace. 1383 // 1384 // The following conditions must be met: 1385 // * Must be targeted for local endpoint 1386 // * Must be from 127.0.0.1 1387 if err := prog.runProg([]string{ 1388 "-t", "nat", 1389 "-A", ciliumPostNatChain, 1390 "-s", loopbackAddr, 1391 "-o", localDeliveryInterface, 1392 "-m", "comment", "--comment", "cilium host->cluster from " + loopbackAddr + " masquerade", 1393 "-j", "SNAT", "--to-source", hostMasqueradeIP}); err != nil { 1394 return err 1395 } 1396 1397 // Masquerade all traffic that originated from a local 1398 // pod and thus carries a security identity and that 1399 // was also DNAT'ed. It must be masqueraded to ensure 1400 // that reverse NAT can be performed. Otherwise the 1401 // reply traffic would be sent directly to the pod 1402 // without traversing the Linux stack again. 1403 // 1404 // This is only done if EnableEndpointRoutes is 1405 // disabled, if EnableEndpointRoutes is enabled, then 1406 // all traffic always passes through the stack anyway. 1407 // 1408 // This is required for: 1409 // - portmap/host if both source and destination are 1410 // on the same node 1411 // - kiam if source and server are on the same node 1412 if !m.sharedCfg.EnableEndpointRoutes { 1413 if err := prog.runProg([]string{ 1414 "-t", "nat", 1415 "-A", ciliumPostNatChain, 1416 "-m", "mark", "--mark", fmt.Sprintf("%#08x/%#08x", linux_defaults.MagicMarkIdentity, linux_defaults.MagicMarkHostMask), 1417 "-o", localDeliveryInterface, 1418 "-m", "conntrack", "--ctstate", "DNAT", 1419 "-m", "comment", "--comment", "hairpin traffic that originated from a local pod", 1420 "-j", "SNAT", "--to-source", hostMasqueradeIP}); err != nil { 1421 return err 1422 } 1423 } 1424 1425 return nil 1426 } 1427 1428 func (m *Manager) installHostTrafficMarkRule(prog runnable) error { 1429 // Mark all packets sourced from processes running on the host with a 1430 // special marker so that we can differentiate traffic sourced locally 1431 // vs. traffic from the outside world that was masqueraded to appear 1432 // like it's from the host. 1433 // 1434 // Originally we set this mark only for traffic destined to the 1435 // ifName device, to ensure that any traffic directly reaching 1436 // to a Cilium-managed IP could be classified as from the host. 1437 // 1438 // However, there's another case where a local process attempts to 1439 // reach a service IP which is backed by a Cilium-managed pod. The 1440 // service implementation is outside of Cilium's control, for example, 1441 // handled by kube-proxy. We can tag even this traffic with a magic 1442 // mark, then when the service implementation proxies it back into 1443 // Cilium the BPF will see this mark and understand that the packet 1444 // originated from the host. 1445 matchFromIPSecEncrypt := fmt.Sprintf("%#08x/%#08x", linux_defaults.RouteMarkDecrypt, linux_defaults.RouteMarkMask) 1446 matchFromIPSecDecrypt := fmt.Sprintf("%#08x/%#08x", linux_defaults.RouteMarkEncrypt, linux_defaults.RouteMarkMask) 1447 matchOverlay := fmt.Sprintf("%#08x/%#08x", linux_defaults.MagicMarkOverlay, linux_defaults.MagicMarkHostMask) 1448 matchFromProxy := fmt.Sprintf("%#08x/%#08x", linux_defaults.MagicMarkIsProxy, linux_defaults.MagicMarkProxyMask) 1449 matchFromProxyEPID := fmt.Sprintf("%#08x/%#08x", linux_defaults.MagicMarkIsProxyEPID, linux_defaults.MagicMarkProxyMask) 1450 matchFromDNSProxy := fmt.Sprintf("%#08x/%#08x", linux_defaults.MagicMarkIdentity, linux_defaults.MagicMarkHostMask) 1451 markAsFromHost := fmt.Sprintf("%#08x/%#08x", linux_defaults.MagicMarkHost, linux_defaults.MagicMarkHostMask) 1452 1453 return prog.runProg([]string{ 1454 "-t", "filter", 1455 "-A", ciliumOutputChain, 1456 "-m", "mark", "!", "--mark", matchFromIPSecDecrypt, // Don't match ipsec traffic 1457 "-m", "mark", "!", "--mark", matchFromIPSecEncrypt, // Don't match ipsec traffic 1458 "-m", "mark", "!", "--mark", matchOverlay, // Don't match Cilium's overlay traffic 1459 "-m", "mark", "!", "--mark", matchFromProxy, // Don't match proxy traffic 1460 "-m", "mark", "!", "--mark", matchFromProxyEPID, // Don't match proxy traffic 1461 "-m", "mark", "!", "--mark", matchFromDNSProxy, // Don't match DNS proxy egress traffic 1462 "-m", "comment", "--comment", "cilium: host->any mark as from host", 1463 "-j", "MARK", "--set-xmark", markAsFromHost}) 1464 } 1465 1466 func (m *Manager) doInstallRules(state desiredState, firstInit bool) error { 1467 m.lock.Lock() 1468 defer m.lock.Unlock() 1469 1470 // Make sure we have no old "backups" 1471 if err := m.removeRules(oldCiliumPrefix); err != nil { 1472 return fmt.Errorf("failed to remove old backup rules: %w", err) 1473 } 1474 1475 if err := m.renameChains(oldCiliumPrefix); err != nil { 1476 return fmt.Errorf("failed to rename chains: %w", err) 1477 } 1478 1479 // install rules if needed 1480 if state.installRules { 1481 if err := m.installRules(state); err != nil { 1482 return fmt.Errorf("failed to install rules: %w", err) 1483 } 1484 1485 // copy old proxy rules over at initialization 1486 if firstInit { 1487 if err := m.copyProxyRules(oldCiliumPrefix+ciliumPreMangleChain, "cilium-dns-egress"); err != nil { 1488 return fmt.Errorf("cannot copy old proxy rules, disruption to traffic selected by L7 policy possible: %w", err) 1489 } 1490 } 1491 1492 for _, proxy := range state.proxies { 1493 if err := m.doInstallProxyRules(proxy.port, proxy.name); err != nil { 1494 return fmt.Errorf("cannot install proxy rules for %s: %w", proxy.name, err) 1495 } 1496 } 1497 } 1498 1499 if err := m.removeRules(oldCiliumPrefix); err != nil { 1500 return fmt.Errorf("failed to remove old rules: %w", err) 1501 } 1502 1503 return nil 1504 } 1505 1506 // installRules installs iptables rules for Cilium in specific use-cases 1507 // (most specifically, interaction with kube-proxy). 1508 func (m *Manager) installRules(state desiredState) error { 1509 // Install new rules 1510 for _, c := range ciliumChains { 1511 if err := c.add(m.sharedCfg.EnableIPv4, m.sharedCfg.EnableIPv6); err != nil { 1512 // do not return error for chain creation that are linked to disabled feeder rules 1513 if isDisabledChain(m.cfg.DisableIptablesFeederRules, c.hook) { 1514 log.WithField(logfields.Chain, c.name).Warningf("ignoring creation of chain since feeder rules for %s is disabled", c.hook) 1515 continue 1516 } 1517 1518 return fmt.Errorf("cannot add custom chain %s: %w", c.name, err) 1519 } 1520 } 1521 1522 if err := m.installStaticProxyRules(); err != nil { 1523 return fmt.Errorf("cannot install static proxy rules: %w", err) 1524 } 1525 1526 if err := m.addCiliumAcceptXfrmRules(); err != nil { 1527 return fmt.Errorf("cannot install xfrm rules: %w", err) 1528 } 1529 1530 localDeliveryInterface := m.getDeliveryInterface(defaults.HostDevice) 1531 1532 if err := m.installForwardChainRules(defaults.HostDevice, localDeliveryInterface, ciliumForwardChain); err != nil { 1533 return fmt.Errorf("cannot install forward chain rules to %s: %w", ciliumForwardChain, err) 1534 } 1535 1536 if m.sharedCfg.EnableIPv4 { 1537 if err := m.installHostTrafficMarkRule(ip4tables); err != nil { 1538 return fmt.Errorf("cannot install host traffic mark rule: %w", err) 1539 } 1540 1541 if m.sharedCfg.IptablesMasqueradingIPv4Enabled && state.localNodeInfo.internalIPv4 != nil { 1542 if err := m.installMasqueradeRules(ip4tables, state.devices.UnsortedList(), localDeliveryInterface, 1543 m.remoteSNATDstAddrExclusionCIDR(state.localNodeInfo.ipv4NativeRoutingCIDR, state.localNodeInfo.ipv4AllocCIDR), 1544 state.localNodeInfo.ipv4AllocCIDR, 1545 state.localNodeInfo.internalIPv4.String(), 1546 ); err != nil { 1547 return fmt.Errorf("cannot install masquerade rules: %w", err) 1548 } 1549 } 1550 } 1551 1552 if m.sharedCfg.EnableIPv6 { 1553 if err := m.installHostTrafficMarkRule(ip6tables); err != nil { 1554 return fmt.Errorf("cannot install host traffic mark rule: %w", err) 1555 } 1556 1557 if m.sharedCfg.IptablesMasqueradingIPv6Enabled && state.localNodeInfo.internalIPv6 != nil { 1558 if err := m.installMasqueradeRules(ip6tables, state.devices.UnsortedList(), localDeliveryInterface, 1559 m.remoteSNATDstAddrExclusionCIDR(state.localNodeInfo.ipv6NativeRoutingCIDR, state.localNodeInfo.ipv6AllocCIDR), 1560 state.localNodeInfo.ipv6AllocCIDR, 1561 state.localNodeInfo.internalIPv6.String(), 1562 ); err != nil { 1563 return fmt.Errorf("cannot install masquerade rules: %w", err) 1564 } 1565 } 1566 } 1567 1568 // AWS ENI requires to mark packets ingressing on the primary interface 1569 // and route them back the same way even if the pod responding is using 1570 // the IP of a different interface. Please see note in Reinitialize() 1571 // in pkg/datapath/loader for more details. 1572 if m.sharedCfg.IPAM == ipamOption.IPAMENI || m.sharedCfg.IPAM == ipamOption.IPAMAlibabaCloud { 1573 if err := m.addCiliumENIRules(); err != nil { 1574 return fmt.Errorf("cannot install rules for ENI multi-node NodePort: %w", err) 1575 } 1576 } 1577 1578 if m.sharedCfg.EnableIPSec { 1579 if err := m.addCiliumNoTrackXfrmRules(); err != nil { 1580 return fmt.Errorf("cannot install xfrm rules: %w", err) 1581 } 1582 } 1583 1584 podsCIDR := state.localNodeInfo.ipv4NativeRoutingCIDR 1585 if m.sharedCfg.InstallNoConntrackIptRules && podsCIDR != "" { 1586 if err := m.addNoTrackPodTrafficRules(ip4tables, podsCIDR); err != nil { 1587 return fmt.Errorf("cannot install pod traffic no CT rules: %w", err) 1588 } 1589 } 1590 1591 for noTrackPodInfo := range state.noTrackPods { 1592 if err := m.installNoTrackRules(noTrackPodInfo.ip, noTrackPodInfo.port); err != nil { 1593 return err 1594 } 1595 } 1596 1597 for _, c := range ciliumChains { 1598 // do not install feeder for chains that are set to be disabled 1599 if isDisabledChain(m.cfg.DisableIptablesFeederRules, c.hook) { 1600 log.WithField(logfields.Chain, c.hook).Infof("Skipping the install of feeder rule") 1601 continue 1602 } 1603 1604 if err := c.installFeeder(m.sharedCfg.EnableIPv4, m.sharedCfg.EnableIPv6, m.cfg.PrependIptablesChains); err != nil { 1605 return fmt.Errorf("cannot install feeder rule: %w", err) 1606 } 1607 } 1608 1609 return nil 1610 } 1611 1612 func (m *Manager) remoteSNATDstAddrExclusionCIDR(nativeRoutingCIDR, allocCIDR string) string { 1613 if nativeRoutingCIDR != "" { 1614 // ip{v4,v6}-native-routing-cidr is set, so use it 1615 return nativeRoutingCIDR 1616 } 1617 1618 return allocCIDR 1619 } 1620 1621 func (m *Manager) ciliumNoTrackXfrmRules(prog iptablesInterface, input string) error { 1622 matchFromIPSecEncrypt := fmt.Sprintf("%#08x/%#08x", linux_defaults.RouteMarkDecrypt, linux_defaults.RouteMarkMask) 1623 matchFromIPSecDecrypt := fmt.Sprintf("%#08x/%#08x", linux_defaults.RouteMarkEncrypt, linux_defaults.RouteMarkMask) 1624 1625 for _, match := range []string{matchFromIPSecDecrypt, matchFromIPSecEncrypt} { 1626 if err := prog.runProg([]string{ 1627 "-t", "raw", input, ciliumPreRawChain, 1628 "-m", "mark", "--mark", match, 1629 "-m", "comment", "--comment", xfrmDescription, 1630 "-j", "CT", "--notrack"}); err != nil { 1631 return err 1632 } 1633 } 1634 return nil 1635 } 1636 1637 // Exclude crypto traffic from the filter and nat table rules. 1638 // This avoids encryption bits and keyID, 0x*d00 for decryption 1639 // and 0x*e00 for encryption, colliding with existing rules. Needed 1640 // for kube-proxy for example. 1641 func (m *Manager) addCiliumAcceptXfrmRules() error { 1642 if !m.sharedCfg.EnableIPSec { 1643 return nil 1644 } 1645 1646 insertAcceptXfrm := func(ipt *ipt, table, chain string) error { 1647 matchFromIPSecEncrypt := fmt.Sprintf("%#08x/%#08x", linux_defaults.RouteMarkDecrypt, linux_defaults.RouteMarkMask) 1648 matchFromIPSecDecrypt := fmt.Sprintf("%#08x/%#08x", linux_defaults.RouteMarkEncrypt, linux_defaults.RouteMarkMask) 1649 1650 comment := "exclude xfrm marks from " + table + " " + chain + " chain" 1651 1652 if err := ipt.runProg([]string{ 1653 "-t", table, 1654 "-A", chain, 1655 "-m", "mark", "--mark", matchFromIPSecEncrypt, 1656 "-m", "comment", "--comment", comment, 1657 "-j", "ACCEPT"}); err != nil { 1658 return err 1659 } 1660 1661 return ipt.runProg([]string{ 1662 "-t", table, 1663 "-A", chain, 1664 "-m", "mark", "--mark", matchFromIPSecDecrypt, 1665 "-m", "comment", "--comment", comment, 1666 "-j", "ACCEPT"}) 1667 } 1668 1669 for _, chain := range ciliumChains { 1670 switch chain.table { 1671 case "filter", "nat": 1672 if m.sharedCfg.EnableIPv4 { 1673 if err := insertAcceptXfrm(ip4tables, chain.table, chain.name); err != nil { 1674 return err 1675 } 1676 } 1677 // ip6tables chain exists only if chain.ipv6 is true 1678 if m.sharedCfg.EnableIPv6 && chain.ipv6 { 1679 if err := insertAcceptXfrm(ip6tables, chain.table, chain.name); err != nil { 1680 return err 1681 } 1682 } 1683 } 1684 } 1685 return nil 1686 } 1687 1688 func (m *Manager) addCiliumNoTrackXfrmRules() (err error) { 1689 if m.sharedCfg.EnableIPv4 { 1690 if err = m.ciliumNoTrackXfrmRules(ip4tables, "-I"); err != nil { 1691 return 1692 } 1693 } 1694 if m.sharedCfg.EnableIPv6 { 1695 return m.ciliumNoTrackXfrmRules(ip6tables, "-I") 1696 } 1697 return nil 1698 } 1699 1700 func (m *Manager) installNoTrackRules(addr netip.Addr, port uint16) error { 1701 // Do not install per endpoint NOTRACK rules if we are already skipping 1702 // conntrack for all pod traffic. 1703 if m.skipPodTrafficConntrack(addr) { 1704 return nil 1705 } 1706 1707 prog := ip4tables 1708 if addr.Is6() { 1709 prog = ip6tables 1710 } 1711 for _, p := range noTrackPorts(port) { 1712 if err := m.endpointNoTrackRules(prog, "-A", addr.String(), p); err != nil { 1713 return err 1714 } 1715 } 1716 return nil 1717 } 1718 1719 func (m *Manager) removeNoTrackRules(addr netip.Addr, port uint16) error { 1720 // Do not remove per endpoint NOTRACK rules if we are already skipping 1721 // conntrack for all pod traffic. 1722 if m.skipPodTrafficConntrack(addr) { 1723 return nil 1724 } 1725 1726 prog := ip4tables 1727 if addr.Is6() { 1728 prog = ip6tables 1729 } 1730 for _, p := range noTrackPorts(port) { 1731 if err := m.endpointNoTrackRules(prog, "-D", addr.String(), p); err != nil { 1732 return err 1733 } 1734 } 1735 return nil 1736 } 1737 1738 // skipPodTrafficConntrack returns true if it's possible to install iptables 1739 // `-j CT --notrack` rules to skip tracking pod traffic. 1740 func (m *Manager) skipPodTrafficConntrack(addr netip.Addr) bool { 1741 if addr.Is4() && m.sharedCfg.InstallNoConntrackIptRules { 1742 return true 1743 } 1744 return false 1745 } 1746 1747 func (m *Manager) addNoTrackPodTrafficRules(prog runnable, podsCIDR string) error { 1748 for _, chain := range []string{ciliumPreRawChain, ciliumOutputRawChain} { 1749 if err := prog.runProg([]string{ 1750 "-t", "raw", 1751 "-I", chain, 1752 "-s", podsCIDR, 1753 "-m", "comment", "--comment", "cilium: NOTRACK for pod traffic", 1754 "-j", "CT", "--notrack"}); err != nil { 1755 return err 1756 } 1757 1758 if err := prog.runProg([]string{ 1759 "-t", "raw", 1760 "-I", chain, 1761 "-d", podsCIDR, 1762 "-m", "comment", "--comment", "cilium: NOTRACK for pod traffic", 1763 "-j", "CT", "--notrack"}); err != nil { 1764 return err 1765 } 1766 } 1767 1768 return nil 1769 } 1770 1771 func (m *Manager) addCiliumENIRules() error { 1772 if !m.sharedCfg.EnableIPv4 { 1773 return nil 1774 } 1775 1776 iface, err := route.NodeDeviceWithDefaultRoute(m.sharedCfg.EnableIPv4, m.sharedCfg.EnableIPv6) 1777 if err != nil { 1778 return fmt.Errorf("failed to find interface with default route: %w", err) 1779 } 1780 1781 nfmask := fmt.Sprintf("%#08x", linux_defaults.MarkMultinodeNodeport) 1782 ctmask := fmt.Sprintf("%#08x", linux_defaults.MaskMultinodeNodeport) 1783 1784 // Note: these rules need the xt_connmark module (iptables usually 1785 // loads it when required, unless loading modules after boot has been 1786 // disabled). 1787 if err := ip4tables.runProg([]string{ 1788 "-t", "mangle", 1789 "-A", ciliumPreMangleChain, 1790 "-i", iface.Attrs().Name, 1791 "-m", "comment", "--comment", "cilium: primary ENI", 1792 "-m", "addrtype", "--dst-type", "LOCAL", "--limit-iface-in", 1793 "-j", "CONNMARK", "--set-xmark", nfmask + "/" + ctmask}); err != nil { 1794 return err 1795 } 1796 1797 return ip4tables.runProg([]string{ 1798 "-t", "mangle", 1799 "-A", ciliumPreMangleChain, 1800 "-i", "lxc+", 1801 "-m", "comment", "--comment", "cilium: primary ENI", 1802 "-j", "CONNMARK", "--restore-mark", "--nfmask", nfmask, "--ctmask", ctmask}) 1803 }