github.com/Prakhar-Agarwal-byte/moby@v0.0.0-20231027092010-a14e3e8ab87e/libnetwork/iptables/iptables.go (about) 1 //go:build linux 2 3 package iptables 4 5 import ( 6 "context" 7 "errors" 8 "fmt" 9 "net" 10 "os/exec" 11 "strconv" 12 "strings" 13 "sync" 14 "time" 15 16 "github.com/containerd/log" 17 "github.com/Prakhar-Agarwal-byte/moby/errdefs" 18 "github.com/Prakhar-Agarwal-byte/moby/pkg/rootless" 19 ) 20 21 // Action signifies the iptable action. 22 type Action string 23 24 const ( 25 // Append appends the rule at the end of the chain. 26 Append Action = "-A" 27 // Delete deletes the rule from the chain. 28 Delete Action = "-D" 29 // Insert inserts the rule at the top of the chain. 30 Insert Action = "-I" 31 ) 32 33 // Policy is the default iptable policies 34 type Policy string 35 36 const ( 37 // Drop is the default iptables DROP policy. 38 Drop Policy = "DROP" 39 // Accept is the default iptables ACCEPT policy. 40 Accept Policy = "ACCEPT" 41 ) 42 43 // Table refers to Nat, Filter or Mangle. 44 type Table string 45 46 const ( 47 // Nat table is used for nat translation rules. 48 Nat Table = "nat" 49 // Filter table is used for filter rules. 50 Filter Table = "filter" 51 // Mangle table is used for mangling the packet. 52 Mangle Table = "mangle" 53 ) 54 55 // IPVersion refers to IP version, v4 or v6 56 type IPVersion string 57 58 const ( 59 // IPv4 is version 4. 60 IPv4 IPVersion = "IPV4" 61 // IPv6 is version 6. 62 IPv6 IPVersion = "IPV6" 63 ) 64 65 var ( 66 iptablesPath string 67 ip6tablesPath string 68 supportsXlock = false 69 // used to lock iptables commands if xtables lock is not supported 70 bestEffortLock sync.Mutex 71 initOnce sync.Once 72 ) 73 74 // IPTable defines struct with [IPVersion]. 75 type IPTable struct { 76 ipVersion IPVersion 77 } 78 79 // ChainInfo defines the iptables chain. 80 type ChainInfo struct { 81 Name string 82 Table Table 83 HairpinMode bool 84 IPVersion IPVersion 85 } 86 87 // ChainError is returned to represent errors during ip table operation. 88 type ChainError struct { 89 Chain string 90 Output []byte 91 } 92 93 func (e ChainError) Error() string { 94 return fmt.Sprintf("error iptables %s: %s", e.Chain, string(e.Output)) 95 } 96 97 // loopbackAddress returns the loopback address for the given IP version. 98 func loopbackAddress(version IPVersion) string { 99 switch version { 100 case IPv4, "": 101 // IPv4 (default for backward-compatibility) 102 return "127.0.0.0/8" 103 case IPv6: 104 return "::1/128" 105 default: 106 panic("unknown IP version: " + version) 107 } 108 } 109 110 func detectIptables() { 111 path, err := exec.LookPath("iptables") 112 if err != nil { 113 log.G(context.TODO()).WithError(err).Warnf("failed to find iptables") 114 return 115 } 116 iptablesPath = path 117 118 // The --wait flag was added in iptables v1.6.0. 119 // TODO remove this check once we drop support for CentOS/RHEL 7, which uses an older version of iptables 120 if out, err := exec.Command(path, "--wait", "-L", "-n").CombinedOutput(); err != nil { 121 log.G(context.TODO()).WithError(err).Infof("unable to detect if iptables supports xlock: 'iptables --wait -L -n': `%s`", strings.TrimSpace(string(out))) 122 } else { 123 supportsXlock = true 124 } 125 126 path, err = exec.LookPath("ip6tables") 127 if err != nil { 128 log.G(context.TODO()).WithError(err).Warnf("unable to find ip6tables") 129 } else { 130 ip6tablesPath = path 131 } 132 } 133 134 func initFirewalld() { 135 // When running with RootlessKit, firewalld is running as the root outside our network namespace 136 // https://github.com/moby/moby/issues/43781 137 if rootless.RunningWithRootlessKit() { 138 log.G(context.TODO()).Info("skipping firewalld management for rootless mode") 139 return 140 } 141 if err := firewalldInit(); err != nil { 142 log.G(context.TODO()).WithError(err).Debugf("unable to initialize firewalld; using raw iptables instead") 143 } 144 } 145 146 func initDependencies() { 147 initFirewalld() 148 detectIptables() 149 } 150 151 func initCheck() error { 152 initOnce.Do(initDependencies) 153 154 if iptablesPath == "" { 155 return errors.New("iptables not found") 156 } 157 return nil 158 } 159 160 // GetIptable returns an instance of IPTable with specified version ([IPv4] 161 // or [IPv6]). It panics if an invalid [IPVersion] is provided. 162 func GetIptable(version IPVersion) *IPTable { 163 switch version { 164 case IPv4, IPv6: 165 // valid version 166 case "": 167 // default is IPv4 for backward-compatibility 168 version = IPv4 169 default: 170 panic("unknown IP version: " + version) 171 } 172 return &IPTable{ipVersion: version} 173 } 174 175 // NewChain adds a new chain to ip table. 176 func (iptable IPTable) NewChain(name string, table Table, hairpinMode bool) (*ChainInfo, error) { 177 if name == "" { 178 return nil, fmt.Errorf("could not create chain: chain name is empty") 179 } 180 if table == "" { 181 return nil, fmt.Errorf("could not create chain %s: invalid table name: table name is empty", name) 182 } 183 // Add chain if it doesn't exist 184 if _, err := iptable.Raw("-t", string(table), "-n", "-L", name); err != nil { 185 if output, err := iptable.Raw("-t", string(table), "-N", name); err != nil { 186 return nil, err 187 } else if len(output) != 0 { 188 return nil, fmt.Errorf("could not create %s/%s chain: %s", table, name, output) 189 } 190 } 191 return &ChainInfo{ 192 Name: name, 193 Table: table, 194 HairpinMode: hairpinMode, 195 IPVersion: iptable.ipVersion, 196 }, nil 197 } 198 199 // ProgramChain is used to add rules to a chain 200 func (iptable IPTable) ProgramChain(c *ChainInfo, bridgeName string, hairpinMode, enable bool) error { 201 if c.Name == "" { 202 return errors.New("could not program chain, missing chain name") 203 } 204 205 // Either add or remove the interface from the firewalld zone, if firewalld is running. 206 if enable { 207 if err := AddInterfaceFirewalld(bridgeName); err != nil { 208 return err 209 } 210 } else { 211 if err := DelInterfaceFirewalld(bridgeName); err != nil && !errdefs.IsNotFound(err) { 212 return err 213 } 214 } 215 216 switch c.Table { 217 case Nat: 218 preroute := []string{ 219 "-m", "addrtype", 220 "--dst-type", "LOCAL", 221 "-j", c.Name, 222 } 223 if !iptable.Exists(Nat, "PREROUTING", preroute...) && enable { 224 if err := c.Prerouting(Append, preroute...); err != nil { 225 return fmt.Errorf("failed to inject %s in PREROUTING chain: %s", c.Name, err) 226 } 227 } else if iptable.Exists(Nat, "PREROUTING", preroute...) && !enable { 228 if err := c.Prerouting(Delete, preroute...); err != nil { 229 return fmt.Errorf("failed to remove %s in PREROUTING chain: %s", c.Name, err) 230 } 231 } 232 output := []string{ 233 "-m", "addrtype", 234 "--dst-type", "LOCAL", 235 "-j", c.Name, 236 } 237 if !hairpinMode { 238 output = append(output, "!", "--dst", loopbackAddress(iptable.ipVersion)) 239 } 240 if !iptable.Exists(Nat, "OUTPUT", output...) && enable { 241 if err := c.Output(Append, output...); err != nil { 242 return fmt.Errorf("failed to inject %s in OUTPUT chain: %s", c.Name, err) 243 } 244 } else if iptable.Exists(Nat, "OUTPUT", output...) && !enable { 245 if err := c.Output(Delete, output...); err != nil { 246 return fmt.Errorf("failed to inject %s in OUTPUT chain: %s", c.Name, err) 247 } 248 } 249 case Filter: 250 if bridgeName == "" { 251 return fmt.Errorf("could not program chain %s/%s, missing bridge name", c.Table, c.Name) 252 } 253 link := []string{ 254 "-o", bridgeName, 255 "-j", c.Name, 256 } 257 if !iptable.Exists(Filter, "FORWARD", link...) && enable { 258 insert := append([]string{string(Insert), "FORWARD"}, link...) 259 if output, err := iptable.Raw(insert...); err != nil { 260 return err 261 } else if len(output) != 0 { 262 return fmt.Errorf("could not create linking rule to %s/%s: %s", c.Table, c.Name, output) 263 } 264 } else if iptable.Exists(Filter, "FORWARD", link...) && !enable { 265 del := append([]string{string(Delete), "FORWARD"}, link...) 266 if output, err := iptable.Raw(del...); err != nil { 267 return err 268 } else if len(output) != 0 { 269 return fmt.Errorf("could not delete linking rule from %s/%s: %s", c.Table, c.Name, output) 270 } 271 } 272 establish := []string{ 273 "-o", bridgeName, 274 "-m", "conntrack", 275 "--ctstate", "RELATED,ESTABLISHED", 276 "-j", "ACCEPT", 277 } 278 if !iptable.Exists(Filter, "FORWARD", establish...) && enable { 279 insert := append([]string{string(Insert), "FORWARD"}, establish...) 280 if output, err := iptable.Raw(insert...); err != nil { 281 return err 282 } else if len(output) != 0 { 283 return fmt.Errorf("could not create establish rule to %s: %s", c.Table, output) 284 } 285 } else if iptable.Exists(Filter, "FORWARD", establish...) && !enable { 286 del := append([]string{string(Delete), "FORWARD"}, establish...) 287 if output, err := iptable.Raw(del...); err != nil { 288 return err 289 } else if len(output) != 0 { 290 return fmt.Errorf("could not delete establish rule from %s: %s", c.Table, output) 291 } 292 } 293 } 294 return nil 295 } 296 297 // RemoveExistingChain removes existing chain from the table. 298 func (iptable IPTable) RemoveExistingChain(name string, table Table) error { 299 if name == "" { 300 return fmt.Errorf("could not remove chain: chain name is empty") 301 } 302 if table == "" { 303 return fmt.Errorf("could not remove chain %s: invalid table name: table name is empty", name) 304 } 305 c := &ChainInfo{ 306 Name: name, 307 Table: table, 308 IPVersion: iptable.ipVersion, 309 } 310 return c.Remove() 311 } 312 313 // Forward adds forwarding rule to 'filter' table and corresponding nat rule to 'nat' table. 314 func (c *ChainInfo) Forward(action Action, ip net.IP, port int, proto, destAddr string, destPort int, bridgeName string) error { 315 iptable := GetIptable(c.IPVersion) 316 daddr := ip.String() 317 if ip.IsUnspecified() { 318 // iptables interprets "0.0.0.0" as "0.0.0.0/32", whereas we 319 // want "0.0.0.0/0". "0/0" is correctly interpreted as "any 320 // value" by both iptables and ip6tables. 321 daddr = "0/0" 322 } 323 324 args := []string{ 325 "-p", proto, 326 "-d", daddr, 327 "--dport", strconv.Itoa(port), 328 "-j", "DNAT", 329 "--to-destination", net.JoinHostPort(destAddr, strconv.Itoa(destPort)), 330 } 331 332 if !c.HairpinMode { 333 args = append(args, "!", "-i", bridgeName) 334 } 335 if err := iptable.ProgramRule(Nat, c.Name, action, args); err != nil { 336 return err 337 } 338 339 args = []string{ 340 "!", "-i", bridgeName, 341 "-o", bridgeName, 342 "-p", proto, 343 "-d", destAddr, 344 "--dport", strconv.Itoa(destPort), 345 "-j", "ACCEPT", 346 } 347 if err := iptable.ProgramRule(Filter, c.Name, action, args); err != nil { 348 return err 349 } 350 351 args = []string{ 352 "-p", proto, 353 "-s", destAddr, 354 "-d", destAddr, 355 "--dport", strconv.Itoa(destPort), 356 "-j", "MASQUERADE", 357 } 358 359 if err := iptable.ProgramRule(Nat, "POSTROUTING", action, args); err != nil { 360 return err 361 } 362 363 if proto == "sctp" { 364 // Linux kernel v4.9 and below enables NETIF_F_SCTP_CRC for veth by 365 // the following commit. 366 // This introduces a problem when conbined with a physical NIC without 367 // NETIF_F_SCTP_CRC. As for a workaround, here we add an iptables entry 368 // to fill the checksum. 369 // 370 // https://github.com/torvalds/linux/commit/c80fafbbb59ef9924962f83aac85531039395b18 371 args = []string{ 372 "-p", proto, 373 "--sport", strconv.Itoa(destPort), 374 "-j", "CHECKSUM", 375 "--checksum-fill", 376 } 377 if err := iptable.ProgramRule(Mangle, "POSTROUTING", action, args); err != nil { 378 return err 379 } 380 } 381 382 return nil 383 } 384 385 // Link adds reciprocal ACCEPT rule for two supplied IP addresses. 386 // Traffic is allowed from ip1 to ip2 and vice-versa 387 func (c *ChainInfo) Link(action Action, ip1, ip2 net.IP, port int, proto string, bridgeName string) error { 388 iptable := GetIptable(c.IPVersion) 389 // forward 390 args := []string{ 391 "-i", bridgeName, "-o", bridgeName, 392 "-p", proto, 393 "-s", ip1.String(), 394 "-d", ip2.String(), 395 "--dport", strconv.Itoa(port), 396 "-j", "ACCEPT", 397 } 398 399 if err := iptable.ProgramRule(Filter, c.Name, action, args); err != nil { 400 return err 401 } 402 // reverse 403 args[7], args[9] = args[9], args[7] 404 args[10] = "--sport" 405 return iptable.ProgramRule(Filter, c.Name, action, args) 406 } 407 408 // ProgramRule adds the rule specified by args only if the 409 // rule is not already present in the chain. Reciprocally, 410 // it removes the rule only if present. 411 func (iptable IPTable) ProgramRule(table Table, chain string, action Action, args []string) error { 412 if iptable.Exists(table, chain, args...) != (action == Delete) { 413 return nil 414 } 415 return iptable.RawCombinedOutput(append([]string{"-t", string(table), string(action), chain}, args...)...) 416 } 417 418 // Prerouting adds linking rule to nat/PREROUTING chain. 419 func (c *ChainInfo) Prerouting(action Action, args ...string) error { 420 iptable := GetIptable(c.IPVersion) 421 a := []string{"-t", string(Nat), string(action), "PREROUTING"} 422 if len(args) > 0 { 423 a = append(a, args...) 424 } 425 if output, err := iptable.Raw(a...); err != nil { 426 return err 427 } else if len(output) != 0 { 428 return ChainError{Chain: "PREROUTING", Output: output} 429 } 430 return nil 431 } 432 433 // Output adds linking rule to an OUTPUT chain. 434 func (c *ChainInfo) Output(action Action, args ...string) error { 435 a := []string{"-t", string(c.Table), string(action), "OUTPUT"} 436 if len(args) > 0 { 437 a = append(a, args...) 438 } 439 if output, err := GetIptable(c.IPVersion).Raw(a...); err != nil { 440 return err 441 } else if len(output) != 0 { 442 return ChainError{Chain: "OUTPUT", Output: output} 443 } 444 return nil 445 } 446 447 // Remove removes the chain. 448 func (c *ChainInfo) Remove() error { 449 // Ignore errors - This could mean the chains were never set up 450 if c.Table == Nat { 451 _ = c.Prerouting(Delete, "-m", "addrtype", "--dst-type", "LOCAL", "-j", c.Name) 452 _ = c.Output(Delete, "-m", "addrtype", "--dst-type", "LOCAL", "!", "--dst", loopbackAddress(c.IPVersion), "-j", c.Name) 453 _ = c.Output(Delete, "-m", "addrtype", "--dst-type", "LOCAL", "-j", c.Name) // Created in versions <= 0.1.6 454 _ = c.Prerouting(Delete) 455 _ = c.Output(Delete) 456 } 457 iptable := GetIptable(c.IPVersion) 458 _, _ = iptable.Raw("-t", string(c.Table), "-F", c.Name) 459 _, _ = iptable.Raw("-t", string(c.Table), "-X", c.Name) 460 return nil 461 } 462 463 // Exists checks if a rule exists 464 func (iptable IPTable) Exists(table Table, chain string, rule ...string) bool { 465 return iptable.exists(false, table, chain, rule...) 466 } 467 468 // ExistsNative behaves as Exists with the difference it 469 // will always invoke `iptables` binary. 470 func (iptable IPTable) ExistsNative(table Table, chain string, rule ...string) bool { 471 return iptable.exists(true, table, chain, rule...) 472 } 473 474 func (iptable IPTable) exists(native bool, table Table, chain string, rule ...string) bool { 475 if err := initCheck(); err != nil { 476 // The exists() signature does not allow us to return an error, but at least 477 // we can skip the (likely invalid) exec invocation. 478 return false 479 } 480 481 f := iptable.Raw 482 if native { 483 f = iptable.raw 484 } 485 486 if table == "" { 487 table = Filter 488 } 489 490 // if exit status is 0 then return true, the rule exists 491 _, err := f(append([]string{"-t", string(table), "-C", chain}, rule...)...) 492 return err == nil 493 } 494 495 const ( 496 // opWarnTime is the maximum duration that an iptables operation can take before flagging a warning. 497 opWarnTime = 2 * time.Second 498 499 // xLockWaitMsg is the iptables warning about xtables lock that can be suppressed. 500 xLockWaitMsg = "Another app is currently holding the xtables lock" 501 ) 502 503 func filterOutput(start time.Time, output []byte, args ...string) []byte { 504 if opTime := time.Since(start); opTime > opWarnTime { 505 // Flag operations that have taken a long time to complete 506 log.G(context.TODO()).Warnf("xtables contention detected while running [%s]: Waited for %.2f seconds and received %q", strings.Join(args, " "), float64(opTime)/float64(time.Second), string(output)) 507 } 508 // ignore iptables' message about xtables lock: 509 // it is a warning, not an error. 510 if strings.Contains(string(output), xLockWaitMsg) { 511 output = []byte("") 512 } 513 // Put further filters here if desired 514 return output 515 } 516 517 // Raw calls 'iptables' system command, passing supplied arguments. 518 func (iptable IPTable) Raw(args ...string) ([]byte, error) { 519 if firewalldRunning { 520 // select correct IP version for firewalld 521 ipv := Iptables 522 if iptable.ipVersion == IPv6 { 523 ipv = IP6Tables 524 } 525 526 startTime := time.Now() 527 output, err := Passthrough(ipv, args...) 528 if err == nil || !strings.Contains(err.Error(), "was not provided by any .service files") { 529 return filterOutput(startTime, output, args...), err 530 } 531 } 532 return iptable.raw(args...) 533 } 534 535 func (iptable IPTable) raw(args ...string) ([]byte, error) { 536 if err := initCheck(); err != nil { 537 return nil, err 538 } 539 path := iptablesPath 540 commandName := "iptables" 541 if iptable.ipVersion == IPv6 { 542 if ip6tablesPath == "" { 543 return nil, fmt.Errorf("ip6tables is missing") 544 } 545 path = ip6tablesPath 546 commandName = "ip6tables" 547 } 548 549 if supportsXlock { 550 args = append([]string{"--wait"}, args...) 551 } else { 552 bestEffortLock.Lock() 553 defer bestEffortLock.Unlock() 554 } 555 556 log.G(context.TODO()).Debugf("%s, %v", path, args) 557 558 startTime := time.Now() 559 output, err := exec.Command(path, args...).CombinedOutput() 560 if err != nil { 561 return nil, fmt.Errorf("iptables failed: %s %v: %s (%s)", commandName, strings.Join(args, " "), output, err) 562 } 563 564 return filterOutput(startTime, output, args...), err 565 } 566 567 // RawCombinedOutput internally calls the Raw function and returns a non nil 568 // error if Raw returned a non nil error or a non empty output 569 func (iptable IPTable) RawCombinedOutput(args ...string) error { 570 if output, err := iptable.Raw(args...); err != nil || len(output) != 0 { 571 return fmt.Errorf("%s (%v)", string(output), err) 572 } 573 return nil 574 } 575 576 // RawCombinedOutputNative behave as RawCombinedOutput with the difference it 577 // will always invoke `iptables` binary 578 func (iptable IPTable) RawCombinedOutputNative(args ...string) error { 579 if output, err := iptable.raw(args...); err != nil || len(output) != 0 { 580 return fmt.Errorf("%s (%v)", string(output), err) 581 } 582 return nil 583 } 584 585 // ExistChain checks if a chain exists 586 func (iptable IPTable) ExistChain(chain string, table Table) bool { 587 _, err := iptable.Raw("-t", string(table), "-nL", chain) 588 return err == nil 589 } 590 591 // SetDefaultPolicy sets the passed default policy for the table/chain 592 func (iptable IPTable) SetDefaultPolicy(table Table, chain string, policy Policy) error { 593 if err := iptable.RawCombinedOutput("-t", string(table), "-P", chain, string(policy)); err != nil { 594 return fmt.Errorf("setting default policy to %v in %v chain failed: %v", policy, chain, err) 595 } 596 return nil 597 } 598 599 // AddReturnRule adds a return rule for the chain in the filter table 600 func (iptable IPTable) AddReturnRule(chain string) error { 601 if iptable.Exists(Filter, chain, "-j", "RETURN") { 602 return nil 603 } 604 if err := iptable.RawCombinedOutput("-A", chain, "-j", "RETURN"); err != nil { 605 return fmt.Errorf("unable to add return rule in %s chain: %v", chain, err) 606 } 607 return nil 608 } 609 610 // EnsureJumpRule ensures the jump rule is on top 611 func (iptable IPTable) EnsureJumpRule(fromChain, toChain string) error { 612 if iptable.Exists(Filter, fromChain, "-j", toChain) { 613 if err := iptable.RawCombinedOutput("-D", fromChain, "-j", toChain); err != nil { 614 return fmt.Errorf("unable to remove jump to %s rule in %s chain: %v", toChain, fromChain, err) 615 } 616 } 617 if err := iptable.RawCombinedOutput("-I", fromChain, "-j", toChain); err != nil { 618 return fmt.Errorf("unable to insert jump to %s rule in %s chain: %v", toChain, fromChain, err) 619 } 620 return nil 621 }