github.com/nicocha30/gvisor-ligolo@v0.0.0-20230726075806-989fa2c0a413/pkg/tcpip/stack/iptables.go (about) 1 // Copyright 2019 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package stack 16 17 import ( 18 "fmt" 19 "math/rand" 20 "time" 21 22 "github.com/nicocha30/gvisor-ligolo/pkg/tcpip" 23 "github.com/nicocha30/gvisor-ligolo/pkg/tcpip/header" 24 ) 25 26 // TableID identifies a specific table. 27 type TableID int 28 29 // Each value identifies a specific table. 30 const ( 31 NATID TableID = iota 32 MangleID 33 FilterID 34 NumTables 35 ) 36 37 // HookUnset indicates that there is no hook set for an entrypoint or 38 // underflow. 39 const HookUnset = -1 40 41 // reaperDelay is how long to wait before starting to reap connections. 42 const reaperDelay = 5 * time.Second 43 44 // DefaultTables returns a default set of tables. Each chain is set to accept 45 // all packets. 46 func DefaultTables(clock tcpip.Clock, rand *rand.Rand) *IPTables { 47 return &IPTables{ 48 v4Tables: [NumTables]Table{ 49 NATID: { 50 Rules: []Rule{ 51 {Target: &AcceptTarget{NetworkProtocol: header.IPv4ProtocolNumber}}, 52 {Target: &AcceptTarget{NetworkProtocol: header.IPv4ProtocolNumber}}, 53 {Target: &AcceptTarget{NetworkProtocol: header.IPv4ProtocolNumber}}, 54 {Target: &AcceptTarget{NetworkProtocol: header.IPv4ProtocolNumber}}, 55 {Target: &ErrorTarget{NetworkProtocol: header.IPv4ProtocolNumber}}, 56 }, 57 BuiltinChains: [NumHooks]int{ 58 Prerouting: 0, 59 Input: 1, 60 Forward: HookUnset, 61 Output: 2, 62 Postrouting: 3, 63 }, 64 Underflows: [NumHooks]int{ 65 Prerouting: 0, 66 Input: 1, 67 Forward: HookUnset, 68 Output: 2, 69 Postrouting: 3, 70 }, 71 }, 72 MangleID: { 73 Rules: []Rule{ 74 {Target: &AcceptTarget{NetworkProtocol: header.IPv4ProtocolNumber}}, 75 {Target: &AcceptTarget{NetworkProtocol: header.IPv4ProtocolNumber}}, 76 {Target: &ErrorTarget{NetworkProtocol: header.IPv4ProtocolNumber}}, 77 }, 78 BuiltinChains: [NumHooks]int{ 79 Prerouting: 0, 80 Output: 1, 81 }, 82 Underflows: [NumHooks]int{ 83 Prerouting: 0, 84 Input: HookUnset, 85 Forward: HookUnset, 86 Output: 1, 87 Postrouting: HookUnset, 88 }, 89 }, 90 FilterID: { 91 Rules: []Rule{ 92 {Target: &AcceptTarget{NetworkProtocol: header.IPv4ProtocolNumber}}, 93 {Target: &AcceptTarget{NetworkProtocol: header.IPv4ProtocolNumber}}, 94 {Target: &AcceptTarget{NetworkProtocol: header.IPv4ProtocolNumber}}, 95 {Target: &ErrorTarget{NetworkProtocol: header.IPv4ProtocolNumber}}, 96 }, 97 BuiltinChains: [NumHooks]int{ 98 Prerouting: HookUnset, 99 Input: 0, 100 Forward: 1, 101 Output: 2, 102 Postrouting: HookUnset, 103 }, 104 Underflows: [NumHooks]int{ 105 Prerouting: HookUnset, 106 Input: 0, 107 Forward: 1, 108 Output: 2, 109 Postrouting: HookUnset, 110 }, 111 }, 112 }, 113 v6Tables: [NumTables]Table{ 114 NATID: { 115 Rules: []Rule{ 116 {Target: &AcceptTarget{NetworkProtocol: header.IPv6ProtocolNumber}}, 117 {Target: &AcceptTarget{NetworkProtocol: header.IPv6ProtocolNumber}}, 118 {Target: &AcceptTarget{NetworkProtocol: header.IPv6ProtocolNumber}}, 119 {Target: &AcceptTarget{NetworkProtocol: header.IPv6ProtocolNumber}}, 120 {Target: &ErrorTarget{NetworkProtocol: header.IPv6ProtocolNumber}}, 121 }, 122 BuiltinChains: [NumHooks]int{ 123 Prerouting: 0, 124 Input: 1, 125 Forward: HookUnset, 126 Output: 2, 127 Postrouting: 3, 128 }, 129 Underflows: [NumHooks]int{ 130 Prerouting: 0, 131 Input: 1, 132 Forward: HookUnset, 133 Output: 2, 134 Postrouting: 3, 135 }, 136 }, 137 MangleID: { 138 Rules: []Rule{ 139 {Target: &AcceptTarget{NetworkProtocol: header.IPv6ProtocolNumber}}, 140 {Target: &AcceptTarget{NetworkProtocol: header.IPv6ProtocolNumber}}, 141 {Target: &ErrorTarget{NetworkProtocol: header.IPv6ProtocolNumber}}, 142 }, 143 BuiltinChains: [NumHooks]int{ 144 Prerouting: 0, 145 Output: 1, 146 }, 147 Underflows: [NumHooks]int{ 148 Prerouting: 0, 149 Input: HookUnset, 150 Forward: HookUnset, 151 Output: 1, 152 Postrouting: HookUnset, 153 }, 154 }, 155 FilterID: { 156 Rules: []Rule{ 157 {Target: &AcceptTarget{NetworkProtocol: header.IPv6ProtocolNumber}}, 158 {Target: &AcceptTarget{NetworkProtocol: header.IPv6ProtocolNumber}}, 159 {Target: &AcceptTarget{NetworkProtocol: header.IPv6ProtocolNumber}}, 160 {Target: &ErrorTarget{NetworkProtocol: header.IPv6ProtocolNumber}}, 161 }, 162 BuiltinChains: [NumHooks]int{ 163 Prerouting: HookUnset, 164 Input: 0, 165 Forward: 1, 166 Output: 2, 167 Postrouting: HookUnset, 168 }, 169 Underflows: [NumHooks]int{ 170 Prerouting: HookUnset, 171 Input: 0, 172 Forward: 1, 173 Output: 2, 174 Postrouting: HookUnset, 175 }, 176 }, 177 }, 178 connections: ConnTrack{ 179 seed: rand.Uint32(), 180 clock: clock, 181 rand: rand, 182 }, 183 } 184 } 185 186 // EmptyFilterTable returns a Table with no rules and the filter table chains 187 // mapped to HookUnset. 188 func EmptyFilterTable() Table { 189 return Table{ 190 Rules: []Rule{}, 191 BuiltinChains: [NumHooks]int{ 192 Prerouting: HookUnset, 193 Postrouting: HookUnset, 194 }, 195 Underflows: [NumHooks]int{ 196 Prerouting: HookUnset, 197 Postrouting: HookUnset, 198 }, 199 } 200 } 201 202 // EmptyNATTable returns a Table with no rules and the filter table chains 203 // mapped to HookUnset. 204 func EmptyNATTable() Table { 205 return Table{ 206 Rules: []Rule{}, 207 BuiltinChains: [NumHooks]int{ 208 Forward: HookUnset, 209 }, 210 Underflows: [NumHooks]int{ 211 Forward: HookUnset, 212 }, 213 } 214 } 215 216 // GetTable returns a table with the given id and IP version. It panics when an 217 // invalid id is provided. 218 func (it *IPTables) GetTable(id TableID, ipv6 bool) Table { 219 it.mu.RLock() 220 defer it.mu.RUnlock() 221 return it.getTableRLocked(id, ipv6) 222 } 223 224 // +checklocksread:it.mu 225 func (it *IPTables) getTableRLocked(id TableID, ipv6 bool) Table { 226 if ipv6 { 227 return it.v6Tables[id] 228 } 229 return it.v4Tables[id] 230 } 231 232 // ReplaceTable replaces or inserts table by name. It panics when an invalid id 233 // is provided. 234 func (it *IPTables) ReplaceTable(id TableID, table Table, ipv6 bool) { 235 it.mu.Lock() 236 defer it.mu.Unlock() 237 // If iptables is being enabled, initialize the conntrack table and 238 // reaper. 239 if !it.modified { 240 it.connections.init() 241 it.startReaper(reaperDelay) 242 } 243 it.modified = true 244 if ipv6 { 245 it.v6Tables[id] = table 246 } else { 247 it.v4Tables[id] = table 248 } 249 } 250 251 // A chainVerdict is what a table decides should be done with a packet. 252 type chainVerdict int 253 254 const ( 255 // chainAccept indicates the packet should continue through netstack. 256 chainAccept chainVerdict = iota 257 258 // chainDrop indicates the packet should be dropped. 259 chainDrop 260 261 // chainReturn indicates the packet should return to the calling chain 262 // or the underflow rule of a builtin chain. 263 chainReturn 264 ) 265 266 type checkTable struct { 267 fn checkTableFn 268 tableID TableID 269 table Table 270 } 271 272 // shouldSkipOrPopulateTables returns true iff IPTables should be skipped. 273 // 274 // If IPTables should not be skipped, tables will be updated with the 275 // specified table. 276 // 277 // This is called in the hot path even when iptables are disabled, so we ensure 278 // it does not allocate. We check recursively for heap allocations, but not for: 279 // - Stack splitting, which can allocate. 280 // - Calls to interfaces, which can allocate. 281 // - Calls to dynamic functions, which can allocate. 282 // 283 // +checkescape:hard 284 func (it *IPTables) shouldSkipOrPopulateTables(tables []checkTable, pkt PacketBufferPtr) bool { 285 switch pkt.NetworkProtocolNumber { 286 case header.IPv4ProtocolNumber, header.IPv6ProtocolNumber: 287 default: 288 // IPTables only supports IPv4/IPv6. 289 return true 290 } 291 292 it.mu.RLock() 293 defer it.mu.RUnlock() 294 295 if !it.modified { 296 // Many users never configure iptables. Spare them the cost of rule 297 // traversal if rules have never been set. 298 return true 299 } 300 301 for i := range tables { 302 table := &tables[i] 303 table.table = it.getTableRLocked(table.tableID, pkt.NetworkProtocolNumber == header.IPv6ProtocolNumber) 304 } 305 return false 306 } 307 308 // CheckPrerouting performs the prerouting hook on the packet. 309 // 310 // Returns true iff the packet may continue traversing the stack; the packet 311 // must be dropped if false is returned. 312 // 313 // Precondition: The packet's network and transport header must be set. 314 // 315 // This is called in the hot path even when iptables are disabled, so we ensure 316 // that it does not allocate. Note that called functions (e.g. 317 // getConnAndUpdate) can allocate. 318 // TODO(b/233951539): checkescape fails on arm sometimes. Fix and re-add. 319 func (it *IPTables) CheckPrerouting(pkt PacketBufferPtr, addressEP AddressableEndpoint, inNicName string) bool { 320 tables := [...]checkTable{ 321 { 322 fn: check, 323 tableID: MangleID, 324 }, 325 { 326 fn: checkNAT, 327 tableID: NATID, 328 }, 329 } 330 331 if it.shouldSkipOrPopulateTables(tables[:], pkt) { 332 return true 333 } 334 335 pkt.tuple = it.connections.getConnAndUpdate(pkt, false /* skipChecksumValidation */) 336 337 for _, table := range tables { 338 if !table.fn(it, table.table, Prerouting, pkt, nil /* route */, addressEP, inNicName, "" /* outNicName */) { 339 return false 340 } 341 } 342 343 return true 344 } 345 346 // CheckInput performs the input hook on the packet. 347 // 348 // Returns true iff the packet may continue traversing the stack; the packet 349 // must be dropped if false is returned. 350 // 351 // Precondition: The packet's network and transport header must be set. 352 // 353 // This is called in the hot path even when iptables are disabled, so we ensure 354 // that it does not allocate. Note that called functions (e.g. 355 // getConnAndUpdate) can allocate. 356 // TODO(b/233951539): checkescape fails on arm sometimes. Fix and re-add. 357 func (it *IPTables) CheckInput(pkt PacketBufferPtr, inNicName string) bool { 358 tables := [...]checkTable{ 359 { 360 fn: checkNAT, 361 tableID: NATID, 362 }, 363 { 364 fn: check, 365 tableID: FilterID, 366 }, 367 } 368 369 if it.shouldSkipOrPopulateTables(tables[:], pkt) { 370 return true 371 } 372 373 for _, table := range tables { 374 if !table.fn(it, table.table, Input, pkt, nil /* route */, nil /* addressEP */, inNicName, "" /* outNicName */) { 375 return false 376 } 377 } 378 379 if t := pkt.tuple; t != nil { 380 pkt.tuple = nil 381 return t.conn.finalize() 382 } 383 return true 384 } 385 386 // CheckForward performs the forward hook on the packet. 387 // 388 // Returns true iff the packet may continue traversing the stack; the packet 389 // must be dropped if false is returned. 390 // 391 // Precondition: The packet's network and transport header must be set. 392 // 393 // This is called in the hot path even when iptables are disabled, so we ensure 394 // that it does not allocate. Note that called functions (e.g. 395 // getConnAndUpdate) can allocate. 396 // TODO(b/233951539): checkescape fails on arm sometimes. Fix and re-add. 397 func (it *IPTables) CheckForward(pkt PacketBufferPtr, inNicName, outNicName string) bool { 398 tables := [...]checkTable{ 399 { 400 fn: check, 401 tableID: FilterID, 402 }, 403 } 404 405 if it.shouldSkipOrPopulateTables(tables[:], pkt) { 406 return true 407 } 408 409 for _, table := range tables { 410 if !table.fn(it, table.table, Forward, pkt, nil /* route */, nil /* addressEP */, inNicName, outNicName) { 411 return false 412 } 413 } 414 415 return true 416 } 417 418 // CheckOutput performs the output hook on the packet. 419 // 420 // Returns true iff the packet may continue traversing the stack; the packet 421 // must be dropped if false is returned. 422 // 423 // Precondition: The packet's network and transport header must be set. 424 // 425 // This is called in the hot path even when iptables are disabled, so we ensure 426 // that it does not allocate. Note that called functions (e.g. 427 // getConnAndUpdate) can allocate. 428 // TODO(b/233951539): checkescape fails on arm sometimes. Fix and re-add. 429 func (it *IPTables) CheckOutput(pkt PacketBufferPtr, r *Route, outNicName string) bool { 430 tables := [...]checkTable{ 431 { 432 fn: check, 433 tableID: MangleID, 434 }, 435 { 436 fn: checkNAT, 437 tableID: NATID, 438 }, 439 { 440 fn: check, 441 tableID: FilterID, 442 }, 443 } 444 445 if it.shouldSkipOrPopulateTables(tables[:], pkt) { 446 return true 447 } 448 449 // We don't need to validate the checksum in the Output path: we can assume 450 // we calculate it correctly, plus checksumming may be deferred due to GSO. 451 pkt.tuple = it.connections.getConnAndUpdate(pkt, true /* skipChecksumValidation */) 452 453 for _, table := range tables { 454 if !table.fn(it, table.table, Output, pkt, r, nil /* addressEP */, "" /* inNicName */, outNicName) { 455 return false 456 } 457 } 458 459 return true 460 } 461 462 // CheckPostrouting performs the postrouting hook on the packet. 463 // 464 // Returns true iff the packet may continue traversing the stack; the packet 465 // must be dropped if false is returned. 466 // 467 // Precondition: The packet's network and transport header must be set. 468 // 469 // This is called in the hot path even when iptables are disabled, so we ensure 470 // that it does not allocate. Note that called functions (e.g. 471 // getConnAndUpdate) can allocate. 472 // TODO(b/233951539): checkescape fails on arm sometimes. Fix and re-add. 473 func (it *IPTables) CheckPostrouting(pkt PacketBufferPtr, r *Route, addressEP AddressableEndpoint, outNicName string) bool { 474 tables := [...]checkTable{ 475 { 476 fn: check, 477 tableID: MangleID, 478 }, 479 { 480 fn: checkNAT, 481 tableID: NATID, 482 }, 483 } 484 485 if it.shouldSkipOrPopulateTables(tables[:], pkt) { 486 return true 487 } 488 489 for _, table := range tables { 490 if !table.fn(it, table.table, Postrouting, pkt, r, addressEP, "" /* inNicName */, outNicName) { 491 return false 492 } 493 } 494 495 if t := pkt.tuple; t != nil { 496 pkt.tuple = nil 497 return t.conn.finalize() 498 } 499 return true 500 } 501 502 // Note: this used to omit the *IPTables parameter, but doing so caused 503 // unnecessary allocations. 504 type checkTableFn func(it *IPTables, table Table, hook Hook, pkt PacketBufferPtr, r *Route, addressEP AddressableEndpoint, inNicName, outNicName string) bool 505 506 func checkNAT(it *IPTables, table Table, hook Hook, pkt PacketBufferPtr, r *Route, addressEP AddressableEndpoint, inNicName, outNicName string) bool { 507 return it.checkNAT(table, hook, pkt, r, addressEP, inNicName, outNicName) 508 } 509 510 // checkNAT runs the packet through the NAT table. 511 // 512 // See check. 513 func (it *IPTables) checkNAT(table Table, hook Hook, pkt PacketBufferPtr, r *Route, addressEP AddressableEndpoint, inNicName, outNicName string) bool { 514 t := pkt.tuple 515 if t != nil && t.conn.handlePacket(pkt, hook, r) { 516 return true 517 } 518 519 if !it.check(table, hook, pkt, r, addressEP, inNicName, outNicName) { 520 return false 521 } 522 523 if t == nil { 524 return true 525 } 526 527 dnat, natDone := func() (bool, bool) { 528 switch hook { 529 case Prerouting, Output: 530 return true, pkt.dnatDone 531 case Input, Postrouting: 532 return false, pkt.snatDone 533 case Forward: 534 panic("should not attempt NAT in forwarding") 535 default: 536 panic(fmt.Sprintf("unhandled hook = %d", hook)) 537 } 538 }() 539 540 // Make sure the connection is NATed. 541 // 542 // If the packet was already NATed, the connection must be NATed. 543 if !natDone { 544 t.conn.maybePerformNoopNAT(dnat) 545 _ = t.conn.handlePacket(pkt, hook, r) 546 } 547 548 return true 549 } 550 551 func check(it *IPTables, table Table, hook Hook, pkt PacketBufferPtr, r *Route, addressEP AddressableEndpoint, inNicName, outNicName string) bool { 552 return it.check(table, hook, pkt, r, addressEP, inNicName, outNicName) 553 } 554 555 // check runs the packet through the rules in the specified table for the 556 // hook. It returns true if the packet should continue to traverse through the 557 // network stack or tables, or false when it must be dropped. 558 // 559 // Precondition: The packet's network and transport header must be set. 560 func (it *IPTables) check(table Table, hook Hook, pkt PacketBufferPtr, r *Route, addressEP AddressableEndpoint, inNicName, outNicName string) bool { 561 ruleIdx := table.BuiltinChains[hook] 562 switch verdict := it.checkChain(hook, pkt, table, ruleIdx, r, addressEP, inNicName, outNicName); verdict { 563 // If the table returns Accept, move on to the next table. 564 case chainAccept: 565 return true 566 // The Drop verdict is final. 567 case chainDrop: 568 return false 569 case chainReturn: 570 // Any Return from a built-in chain means we have to 571 // call the underflow. 572 underflow := table.Rules[table.Underflows[hook]] 573 switch v, _ := underflow.Target.Action(pkt, hook, r, addressEP); v { 574 case RuleAccept: 575 return true 576 case RuleDrop: 577 return false 578 case RuleJump, RuleReturn: 579 panic("Underflows should only return RuleAccept or RuleDrop.") 580 default: 581 panic(fmt.Sprintf("Unknown verdict: %d", v)) 582 } 583 default: 584 panic(fmt.Sprintf("Unknown verdict %v.", verdict)) 585 } 586 } 587 588 // beforeSave is invoked by stateify. 589 func (it *IPTables) beforeSave() { 590 // Ensure the reaper exits cleanly. 591 it.reaper.Stop() 592 // Prevent others from modifying the connection table. 593 it.connections.mu.Lock() 594 } 595 596 // afterLoad is invoked by stateify. 597 func (it *IPTables) afterLoad() { 598 it.startReaper(reaperDelay) 599 } 600 601 // startReaper periodically reaps timed out connections. 602 func (it *IPTables) startReaper(interval time.Duration) { 603 bucket := 0 604 it.reaper = it.connections.clock.AfterFunc(interval, func() { 605 bucket, interval = it.connections.reapUnused(bucket, interval) 606 it.reaper.Reset(interval) 607 }) 608 } 609 610 // Preconditions: 611 // - pkt is a IPv4 packet of at least length header.IPv4MinimumSize. 612 // - pkt.NetworkHeader is not nil. 613 func (it *IPTables) checkChain(hook Hook, pkt PacketBufferPtr, table Table, ruleIdx int, r *Route, addressEP AddressableEndpoint, inNicName, outNicName string) chainVerdict { 614 // Start from ruleIdx and walk the list of rules until a rule gives us 615 // a verdict. 616 for ruleIdx < len(table.Rules) { 617 switch verdict, jumpTo := it.checkRule(hook, pkt, table, ruleIdx, r, addressEP, inNicName, outNicName); verdict { 618 case RuleAccept: 619 return chainAccept 620 621 case RuleDrop: 622 return chainDrop 623 624 case RuleReturn: 625 return chainReturn 626 627 case RuleJump: 628 // "Jumping" to the next rule just means we're 629 // continuing on down the list. 630 if jumpTo == ruleIdx+1 { 631 ruleIdx++ 632 continue 633 } 634 switch verdict := it.checkChain(hook, pkt, table, jumpTo, r, addressEP, inNicName, outNicName); verdict { 635 case chainAccept: 636 return chainAccept 637 case chainDrop: 638 return chainDrop 639 case chainReturn: 640 ruleIdx++ 641 continue 642 default: 643 panic(fmt.Sprintf("Unknown verdict: %d", verdict)) 644 } 645 646 default: 647 panic(fmt.Sprintf("Unknown verdict: %d", verdict)) 648 } 649 650 } 651 652 // We got through the entire table without a decision. Default to DROP 653 // for safety. 654 return chainDrop 655 } 656 657 // Preconditions: 658 // - pkt is a IPv4 packet of at least length header.IPv4MinimumSize. 659 // - pkt.NetworkHeader is not nil. 660 // 661 // * pkt is a IPv4 packet of at least length header.IPv4MinimumSize. 662 // * pkt.NetworkHeader is not nil. 663 func (it *IPTables) checkRule(hook Hook, pkt PacketBufferPtr, table Table, ruleIdx int, r *Route, addressEP AddressableEndpoint, inNicName, outNicName string) (RuleVerdict, int) { 664 rule := table.Rules[ruleIdx] 665 666 // Check whether the packet matches the IP header filter. 667 if !rule.Filter.match(pkt, hook, inNicName, outNicName) { 668 // Continue on to the next rule. 669 return RuleJump, ruleIdx + 1 670 } 671 672 // Go through each rule matcher. If they all match, run 673 // the rule target. 674 for _, matcher := range rule.Matchers { 675 matches, hotdrop := matcher.Match(hook, pkt, inNicName, outNicName) 676 if hotdrop { 677 return RuleDrop, 0 678 } 679 if !matches { 680 // Continue on to the next rule. 681 return RuleJump, ruleIdx + 1 682 } 683 } 684 685 // All the matchers matched, so run the target. 686 return rule.Target.Action(pkt, hook, r, addressEP) 687 } 688 689 // OriginalDst returns the original destination of redirected connections. It 690 // returns an error if the connection doesn't exist or isn't redirected. 691 func (it *IPTables) OriginalDst(epID TransportEndpointID, netProto tcpip.NetworkProtocolNumber, transProto tcpip.TransportProtocolNumber) (tcpip.Address, uint16, tcpip.Error) { 692 it.mu.RLock() 693 defer it.mu.RUnlock() 694 if !it.modified { 695 return tcpip.Address{}, 0, &tcpip.ErrNotConnected{} 696 } 697 return it.connections.originalDst(epID, netProto, transProto) 698 }