inet.af/netstack@v0.0.0-20220214151720-7585b01ddccf/tcpip/stack/iptables.go (about) 1 // Copyright 2019 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package stack 16 17 import ( 18 "fmt" 19 "math/rand" 20 "time" 21 22 "inet.af/netstack/tcpip" 23 "inet.af/netstack/tcpip/header" 24 ) 25 26 // TableID identifies a specific table. 27 type TableID int 28 29 // Each value identifies a specific table. 30 const ( 31 NATID TableID = iota 32 MangleID 33 FilterID 34 NumTables 35 ) 36 37 // HookUnset indicates that there is no hook set for an entrypoint or 38 // underflow. 39 const HookUnset = -1 40 41 // reaperDelay is how long to wait before starting to reap connections. 42 const reaperDelay = 5 * time.Second 43 44 // DefaultTables returns a default set of tables. Each chain is set to accept 45 // all packets. 46 func DefaultTables(clock tcpip.Clock, rand *rand.Rand) *IPTables { 47 return &IPTables{ 48 v4Tables: [NumTables]Table{ 49 NATID: { 50 Rules: []Rule{ 51 {Target: &AcceptTarget{NetworkProtocol: header.IPv4ProtocolNumber}}, 52 {Target: &AcceptTarget{NetworkProtocol: header.IPv4ProtocolNumber}}, 53 {Target: &AcceptTarget{NetworkProtocol: header.IPv4ProtocolNumber}}, 54 {Target: &AcceptTarget{NetworkProtocol: header.IPv4ProtocolNumber}}, 55 {Target: &ErrorTarget{NetworkProtocol: header.IPv4ProtocolNumber}}, 56 }, 57 BuiltinChains: [NumHooks]int{ 58 Prerouting: 0, 59 Input: 1, 60 Forward: HookUnset, 61 Output: 2, 62 Postrouting: 3, 63 }, 64 Underflows: [NumHooks]int{ 65 Prerouting: 0, 66 Input: 1, 67 Forward: HookUnset, 68 Output: 2, 69 Postrouting: 3, 70 }, 71 }, 72 MangleID: { 73 Rules: []Rule{ 74 {Target: &AcceptTarget{NetworkProtocol: header.IPv4ProtocolNumber}}, 75 {Target: &AcceptTarget{NetworkProtocol: header.IPv4ProtocolNumber}}, 76 {Target: &ErrorTarget{NetworkProtocol: header.IPv4ProtocolNumber}}, 77 }, 78 BuiltinChains: [NumHooks]int{ 79 Prerouting: 0, 80 Output: 1, 81 }, 82 Underflows: [NumHooks]int{ 83 Prerouting: 0, 84 Input: HookUnset, 85 Forward: HookUnset, 86 Output: 1, 87 Postrouting: HookUnset, 88 }, 89 }, 90 FilterID: { 91 Rules: []Rule{ 92 {Target: &AcceptTarget{NetworkProtocol: header.IPv4ProtocolNumber}}, 93 {Target: &AcceptTarget{NetworkProtocol: header.IPv4ProtocolNumber}}, 94 {Target: &AcceptTarget{NetworkProtocol: header.IPv4ProtocolNumber}}, 95 {Target: &ErrorTarget{NetworkProtocol: header.IPv4ProtocolNumber}}, 96 }, 97 BuiltinChains: [NumHooks]int{ 98 Prerouting: HookUnset, 99 Input: 0, 100 Forward: 1, 101 Output: 2, 102 Postrouting: HookUnset, 103 }, 104 Underflows: [NumHooks]int{ 105 Prerouting: HookUnset, 106 Input: 0, 107 Forward: 1, 108 Output: 2, 109 Postrouting: HookUnset, 110 }, 111 }, 112 }, 113 v6Tables: [NumTables]Table{ 114 NATID: { 115 Rules: []Rule{ 116 {Target: &AcceptTarget{NetworkProtocol: header.IPv6ProtocolNumber}}, 117 {Target: &AcceptTarget{NetworkProtocol: header.IPv6ProtocolNumber}}, 118 {Target: &AcceptTarget{NetworkProtocol: header.IPv6ProtocolNumber}}, 119 {Target: &AcceptTarget{NetworkProtocol: header.IPv6ProtocolNumber}}, 120 {Target: &ErrorTarget{NetworkProtocol: header.IPv6ProtocolNumber}}, 121 }, 122 BuiltinChains: [NumHooks]int{ 123 Prerouting: 0, 124 Input: 1, 125 Forward: HookUnset, 126 Output: 2, 127 Postrouting: 3, 128 }, 129 Underflows: [NumHooks]int{ 130 Prerouting: 0, 131 Input: 1, 132 Forward: HookUnset, 133 Output: 2, 134 Postrouting: 3, 135 }, 136 }, 137 MangleID: { 138 Rules: []Rule{ 139 {Target: &AcceptTarget{NetworkProtocol: header.IPv6ProtocolNumber}}, 140 {Target: &AcceptTarget{NetworkProtocol: header.IPv6ProtocolNumber}}, 141 {Target: &ErrorTarget{NetworkProtocol: header.IPv6ProtocolNumber}}, 142 }, 143 BuiltinChains: [NumHooks]int{ 144 Prerouting: 0, 145 Output: 1, 146 }, 147 Underflows: [NumHooks]int{ 148 Prerouting: 0, 149 Input: HookUnset, 150 Forward: HookUnset, 151 Output: 1, 152 Postrouting: HookUnset, 153 }, 154 }, 155 FilterID: { 156 Rules: []Rule{ 157 {Target: &AcceptTarget{NetworkProtocol: header.IPv6ProtocolNumber}}, 158 {Target: &AcceptTarget{NetworkProtocol: header.IPv6ProtocolNumber}}, 159 {Target: &AcceptTarget{NetworkProtocol: header.IPv6ProtocolNumber}}, 160 {Target: &ErrorTarget{NetworkProtocol: header.IPv6ProtocolNumber}}, 161 }, 162 BuiltinChains: [NumHooks]int{ 163 Prerouting: HookUnset, 164 Input: 0, 165 Forward: 1, 166 Output: 2, 167 Postrouting: HookUnset, 168 }, 169 Underflows: [NumHooks]int{ 170 Prerouting: HookUnset, 171 Input: 0, 172 Forward: 1, 173 Output: 2, 174 Postrouting: HookUnset, 175 }, 176 }, 177 }, 178 connections: ConnTrack{ 179 seed: rand.Uint32(), 180 clock: clock, 181 rand: rand, 182 }, 183 reaperDone: make(chan struct{}, 1), 184 } 185 } 186 187 // EmptyFilterTable returns a Table with no rules and the filter table chains 188 // mapped to HookUnset. 189 func EmptyFilterTable() Table { 190 return Table{ 191 Rules: []Rule{}, 192 BuiltinChains: [NumHooks]int{ 193 Prerouting: HookUnset, 194 Postrouting: HookUnset, 195 }, 196 Underflows: [NumHooks]int{ 197 Prerouting: HookUnset, 198 Postrouting: HookUnset, 199 }, 200 } 201 } 202 203 // EmptyNATTable returns a Table with no rules and the filter table chains 204 // mapped to HookUnset. 205 func EmptyNATTable() Table { 206 return Table{ 207 Rules: []Rule{}, 208 BuiltinChains: [NumHooks]int{ 209 Forward: HookUnset, 210 }, 211 Underflows: [NumHooks]int{ 212 Forward: HookUnset, 213 }, 214 } 215 } 216 217 // GetTable returns a table with the given id and IP version. It panics when an 218 // invalid id is provided. 219 func (it *IPTables) GetTable(id TableID, ipv6 bool) Table { 220 it.mu.RLock() 221 defer it.mu.RUnlock() 222 if ipv6 { 223 return it.v6Tables[id] 224 } 225 return it.v4Tables[id] 226 } 227 228 // ReplaceTable replaces or inserts table by name. It panics when an invalid id 229 // is provided. 230 func (it *IPTables) ReplaceTable(id TableID, table Table, ipv6 bool) tcpip.Error { 231 it.mu.Lock() 232 defer it.mu.Unlock() 233 // If iptables is being enabled, initialize the conntrack table and 234 // reaper. 235 if !it.modified { 236 it.connections.init() 237 it.startReaper(reaperDelay) 238 } 239 it.modified = true 240 if ipv6 { 241 it.v6Tables[id] = table 242 } else { 243 it.v4Tables[id] = table 244 } 245 return nil 246 } 247 248 // A chainVerdict is what a table decides should be done with a packet. 249 type chainVerdict int 250 251 const ( 252 // chainAccept indicates the packet should continue through netstack. 253 chainAccept chainVerdict = iota 254 255 // chainDrop indicates the packet should be dropped. 256 chainDrop 257 258 // chainReturn indicates the packet should return to the calling chain 259 // or the underflow rule of a builtin chain. 260 chainReturn 261 ) 262 263 // CheckPrerouting performs the prerouting hook on the packet. 264 // 265 // Returns true iff the packet may continue traversing the stack; the packet 266 // must be dropped if false is returned. 267 // 268 // Precondition: The packet's network and transport header must be set. 269 func (it *IPTables) CheckPrerouting(pkt *PacketBuffer, addressEP AddressableEndpoint, inNicName string) bool { 270 it.mu.RLock() 271 defer it.mu.RUnlock() 272 273 if it.shouldSkipRLocked(pkt.NetworkProtocolNumber) { 274 return true 275 } 276 277 pkt.tuple = it.connections.getConnAndUpdate(pkt) 278 279 for _, check := range [...]checkTableFn{ 280 it.checkMangleRLocked, 281 it.checkNATRLocked, 282 } { 283 if !check(Prerouting, pkt, nil /* route */, addressEP, inNicName, "" /* outNicName */) { 284 return false 285 } 286 } 287 288 return true 289 } 290 291 // CheckInput performs the input hook on the packet. 292 // 293 // Returns true iff the packet may continue traversing the stack; the packet 294 // must be dropped if false is returned. 295 // 296 // Precondition: The packet's network and transport header must be set. 297 func (it *IPTables) CheckInput(pkt *PacketBuffer, inNicName string) bool { 298 it.mu.RLock() 299 defer it.mu.RUnlock() 300 301 if it.shouldSkipRLocked(pkt.NetworkProtocolNumber) { 302 return true 303 } 304 305 for _, check := range [...]checkTableFn{ 306 it.checkNATRLocked, 307 it.checkFilterRLocked, 308 } { 309 if !check(Input, pkt, nil /* route */, nil /* addressEP */, inNicName, "" /* outNicName */) { 310 return false 311 } 312 } 313 314 if t := pkt.tuple; t != nil { 315 pkt.tuple = nil 316 return t.conn.finalize() 317 } 318 return true 319 } 320 321 // CheckForward performs the forward hook on the packet. 322 // 323 // Returns true iff the packet may continue traversing the stack; the packet 324 // must be dropped if false is returned. 325 // 326 // Precondition: The packet's network and transport header must be set. 327 func (it *IPTables) CheckForward(pkt *PacketBuffer, inNicName, outNicName string) bool { 328 it.mu.RLock() 329 defer it.mu.RUnlock() 330 331 if it.shouldSkipRLocked(pkt.NetworkProtocolNumber) { 332 return true 333 } 334 335 return it.checkFilterRLocked(Forward, pkt, nil /* route */, nil /* addressEP */, inNicName, outNicName) 336 } 337 338 // CheckOutput performs the output hook on the packet. 339 // 340 // Returns true iff the packet may continue traversing the stack; the packet 341 // must be dropped if false is returned. 342 // 343 // Precondition: The packet's network and transport header must be set. 344 func (it *IPTables) CheckOutput(pkt *PacketBuffer, r *Route, outNicName string) bool { 345 it.mu.RLock() 346 defer it.mu.RUnlock() 347 348 if it.shouldSkipRLocked(pkt.NetworkProtocolNumber) { 349 return true 350 } 351 352 pkt.tuple = it.connections.getConnAndUpdate(pkt) 353 354 for _, check := range [...]checkTableFn{ 355 it.checkMangleRLocked, 356 it.checkNATRLocked, 357 it.checkFilterRLocked, 358 } { 359 if !check(Output, pkt, r, nil /* addressEP */, "" /* inNicName */, outNicName) { 360 return false 361 } 362 } 363 364 return true 365 } 366 367 // CheckPostrouting performs the postrouting hook on the packet. 368 // 369 // Returns true iff the packet may continue traversing the stack; the packet 370 // must be dropped if false is returned. 371 // 372 // Precondition: The packet's network and transport header must be set. 373 func (it *IPTables) CheckPostrouting(pkt *PacketBuffer, r *Route, addressEP AddressableEndpoint, outNicName string) bool { 374 it.mu.RLock() 375 defer it.mu.RUnlock() 376 377 if it.shouldSkipRLocked(pkt.NetworkProtocolNumber) { 378 return true 379 } 380 381 for _, check := range [...]checkTableFn{ 382 it.checkMangleRLocked, 383 it.checkNATRLocked, 384 } { 385 if !check(Postrouting, pkt, r, addressEP, "" /* inNicName */, outNicName) { 386 return false 387 } 388 } 389 390 if t := pkt.tuple; t != nil { 391 pkt.tuple = nil 392 return t.conn.finalize() 393 } 394 return true 395 } 396 397 // +checklocksread:it.mu 398 func (it *IPTables) shouldSkipRLocked(netProto tcpip.NetworkProtocolNumber) bool { 399 switch netProto { 400 case header.IPv4ProtocolNumber, header.IPv6ProtocolNumber: 401 default: 402 // IPTables only supports IPv4/IPv6. 403 return true 404 } 405 406 // Many users never configure iptables. Spare them the cost of rule 407 // traversal if rules have never been set. 408 return !it.modified 409 } 410 411 type checkTableFn func(hook Hook, pkt *PacketBuffer, r *Route, addressEP AddressableEndpoint, inNicName, outNicName string) bool 412 413 // checkMangleRLocked runs the packet through the mangle table. 414 // 415 // See checkRLocked. 416 // 417 // +checklocksread:it.mu 418 func (it *IPTables) checkMangleRLocked(hook Hook, pkt *PacketBuffer, r *Route, addressEP AddressableEndpoint, inNicName, outNicName string) bool { 419 return it.checkRLocked(MangleID, hook, pkt, r, addressEP, inNicName, outNicName) 420 } 421 422 // checkNATRLocked runs the packet through the NAT table. 423 // 424 // See checkRLocked. 425 // 426 // +checklocksread:it.mu 427 func (it *IPTables) checkNATRLocked(hook Hook, pkt *PacketBuffer, r *Route, addressEP AddressableEndpoint, inNicName, outNicName string) bool { 428 t := pkt.tuple 429 if t != nil && t.conn.handlePacket(pkt, hook, r) { 430 return true 431 } 432 433 if !it.checkRLocked(NATID, hook, pkt, r, addressEP, inNicName, outNicName) { 434 return false 435 } 436 437 if t == nil { 438 return true 439 } 440 441 var dnat bool 442 var natDone *bool 443 switch hook { 444 case Prerouting, Output: 445 dnat = true 446 natDone = &pkt.DNATDone 447 case Input, Postrouting: 448 dnat = false 449 natDone = &pkt.SNATDone 450 case Forward: 451 panic("should not attempt NAT in forwarding") 452 default: 453 panic(fmt.Sprintf("unhandled hook = %d", hook)) 454 } 455 456 // Make sure the connection is NATed. 457 // 458 // If the packet was already NATed, the connection must be NATed. 459 if !*natDone { 460 t.conn.maybePerformNoopNAT(dnat) 461 _ = t.conn.handlePacket(pkt, hook, r) 462 } 463 464 return true 465 } 466 467 // checkFilterRLocked runs the packet through the filter table. 468 // 469 // See checkRLocked. 470 // 471 // +checklocksread:it.mu 472 func (it *IPTables) checkFilterRLocked(hook Hook, pkt *PacketBuffer, r *Route, addressEP AddressableEndpoint, inNicName, outNicName string) bool { 473 return it.checkRLocked(FilterID, hook, pkt, r, addressEP, inNicName, outNicName) 474 } 475 476 // checkRLocked runs the packet through the rules in the specified table for the 477 // hook. It returns true if the packet should continue to traverse through the 478 // network stack or tables, or false when it must be dropped. 479 // 480 // Precondition: The packet's network and transport header must be set. 481 // 482 // +checklocksread:it.mu 483 func (it *IPTables) checkRLocked(tableID TableID, hook Hook, pkt *PacketBuffer, r *Route, addressEP AddressableEndpoint, inNicName, outNicName string) bool { 484 var table Table 485 if pkt.NetworkProtocolNumber == header.IPv6ProtocolNumber { 486 table = it.v6Tables[tableID] 487 } else { 488 table = it.v4Tables[tableID] 489 } 490 ruleIdx := table.BuiltinChains[hook] 491 switch verdict := it.checkChain(hook, pkt, table, ruleIdx, r, addressEP, inNicName, outNicName); verdict { 492 // If the table returns Accept, move on to the next table. 493 case chainAccept: 494 return true 495 // The Drop verdict is final. 496 case chainDrop: 497 return false 498 case chainReturn: 499 // Any Return from a built-in chain means we have to 500 // call the underflow. 501 underflow := table.Rules[table.Underflows[hook]] 502 switch v, _ := underflow.Target.Action(pkt, hook, r, addressEP); v { 503 case RuleAccept: 504 return true 505 case RuleDrop: 506 return false 507 case RuleJump, RuleReturn: 508 panic("Underflows should only return RuleAccept or RuleDrop.") 509 default: 510 panic(fmt.Sprintf("Unknown verdict: %d", v)) 511 } 512 default: 513 panic(fmt.Sprintf("Unknown verdict %v.", verdict)) 514 } 515 } 516 517 // beforeSave is invoked by stateify. 518 func (it *IPTables) beforeSave() { 519 // Ensure the reaper exits cleanly. 520 it.reaperDone <- struct{}{} 521 // Prevent others from modifying the connection table. 522 it.connections.mu.Lock() 523 } 524 525 // afterLoad is invoked by stateify. 526 func (it *IPTables) afterLoad() { 527 it.startReaper(reaperDelay) 528 } 529 530 // startReaper starts a goroutine that wakes up periodically to reap timed out 531 // connections. 532 func (it *IPTables) startReaper(interval time.Duration) { 533 go func() { // S/R-SAFE: reaperDone is signalled when iptables is saved. 534 bucket := 0 535 for { 536 select { 537 case <-it.reaperDone: 538 return 539 // TODO(gvisor.dev/issue/5939): do not use the ambient clock. 540 case <-time.After(interval): 541 bucket, interval = it.connections.reapUnused(bucket, interval) 542 } 543 } 544 }() 545 } 546 547 // CheckOutputPackets performs the output hook on the packets. 548 // 549 // Returns a map of packets that must be dropped. 550 // 551 // Precondition: The packets' network and transport header must be set. 552 func (it *IPTables) CheckOutputPackets(pkts PacketBufferList, r *Route, outNicName string) (drop map[*PacketBuffer]struct{}, natPkts map[*PacketBuffer]struct{}) { 553 return checkPackets(pkts, func(pkt *PacketBuffer) bool { 554 return it.CheckOutput(pkt, r, outNicName) 555 }, true /* dnat */) 556 } 557 558 // CheckPostroutingPackets performs the postrouting hook on the packets. 559 // 560 // Returns a map of packets that must be dropped. 561 // 562 // Precondition: The packets' network and transport header must be set. 563 func (it *IPTables) CheckPostroutingPackets(pkts PacketBufferList, r *Route, addressEP AddressableEndpoint, outNicName string) (drop map[*PacketBuffer]struct{}, natPkts map[*PacketBuffer]struct{}) { 564 return checkPackets(pkts, func(pkt *PacketBuffer) bool { 565 return it.CheckPostrouting(pkt, r, addressEP, outNicName) 566 }, false /* dnat */) 567 } 568 569 func checkPackets(pkts PacketBufferList, f func(*PacketBuffer) bool, dnat bool) (drop map[*PacketBuffer]struct{}, natPkts map[*PacketBuffer]struct{}) { 570 for pkt := pkts.Front(); pkt != nil; pkt = pkt.Next() { 571 natDone := &pkt.SNATDone 572 if dnat { 573 natDone = &pkt.DNATDone 574 } 575 576 if ok := f(pkt); !ok { 577 if drop == nil { 578 drop = make(map[*PacketBuffer]struct{}) 579 } 580 drop[pkt] = struct{}{} 581 } 582 if *natDone { 583 if natPkts == nil { 584 natPkts = make(map[*PacketBuffer]struct{}) 585 } 586 natPkts[pkt] = struct{}{} 587 } 588 } 589 return drop, natPkts 590 } 591 592 // Preconditions: 593 // * pkt is a IPv4 packet of at least length header.IPv4MinimumSize. 594 // * pkt.NetworkHeader is not nil. 595 func (it *IPTables) checkChain(hook Hook, pkt *PacketBuffer, table Table, ruleIdx int, r *Route, addressEP AddressableEndpoint, inNicName, outNicName string) chainVerdict { 596 // Start from ruleIdx and walk the list of rules until a rule gives us 597 // a verdict. 598 for ruleIdx < len(table.Rules) { 599 switch verdict, jumpTo := it.checkRule(hook, pkt, table, ruleIdx, r, addressEP, inNicName, outNicName); verdict { 600 case RuleAccept: 601 return chainAccept 602 603 case RuleDrop: 604 return chainDrop 605 606 case RuleReturn: 607 return chainReturn 608 609 case RuleJump: 610 // "Jumping" to the next rule just means we're 611 // continuing on down the list. 612 if jumpTo == ruleIdx+1 { 613 ruleIdx++ 614 continue 615 } 616 switch verdict := it.checkChain(hook, pkt, table, jumpTo, r, addressEP, inNicName, outNicName); verdict { 617 case chainAccept: 618 return chainAccept 619 case chainDrop: 620 return chainDrop 621 case chainReturn: 622 ruleIdx++ 623 continue 624 default: 625 panic(fmt.Sprintf("Unknown verdict: %d", verdict)) 626 } 627 628 default: 629 panic(fmt.Sprintf("Unknown verdict: %d", verdict)) 630 } 631 632 } 633 634 // We got through the entire table without a decision. Default to DROP 635 // for safety. 636 return chainDrop 637 } 638 639 // Preconditions: 640 // * pkt is a IPv4 packet of at least length header.IPv4MinimumSize. 641 // * pkt.NetworkHeader is not nil. 642 func (it *IPTables) checkRule(hook Hook, pkt *PacketBuffer, table Table, ruleIdx int, r *Route, addressEP AddressableEndpoint, inNicName, outNicName string) (RuleVerdict, int) { 643 rule := table.Rules[ruleIdx] 644 645 // Check whether the packet matches the IP header filter. 646 if !rule.Filter.match(pkt, hook, inNicName, outNicName) { 647 // Continue on to the next rule. 648 return RuleJump, ruleIdx + 1 649 } 650 651 // Go through each rule matcher. If they all match, run 652 // the rule target. 653 for _, matcher := range rule.Matchers { 654 matches, hotdrop := matcher.Match(hook, pkt, inNicName, outNicName) 655 if hotdrop { 656 return RuleDrop, 0 657 } 658 if !matches { 659 // Continue on to the next rule. 660 return RuleJump, ruleIdx + 1 661 } 662 } 663 664 // All the matchers matched, so run the target. 665 return rule.Target.Action(pkt, hook, r, addressEP) 666 } 667 668 // OriginalDst returns the original destination of redirected connections. It 669 // returns an error if the connection doesn't exist or isn't redirected. 670 func (it *IPTables) OriginalDst(epID TransportEndpointID, netProto tcpip.NetworkProtocolNumber, transProto tcpip.TransportProtocolNumber) (tcpip.Address, uint16, tcpip.Error) { 671 it.mu.RLock() 672 defer it.mu.RUnlock() 673 if !it.modified { 674 return "", 0, &tcpip.ErrNotConnected{} 675 } 676 return it.connections.originalDst(epID, netProto, transProto) 677 }