github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/pkg/tcpip/stack/iptables.go (about) 1 // Copyright 2019 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package stack 16 17 import ( 18 "fmt" 19 "time" 20 21 "github.com/SagerNet/gvisor/pkg/tcpip" 22 "github.com/SagerNet/gvisor/pkg/tcpip/header" 23 ) 24 25 // TableID identifies a specific table. 26 type TableID int 27 28 // Each value identifies a specific table. 29 const ( 30 NATID TableID = iota 31 MangleID 32 FilterID 33 NumTables 34 ) 35 36 // HookUnset indicates that there is no hook set for an entrypoint or 37 // underflow. 38 const HookUnset = -1 39 40 // reaperDelay is how long to wait before starting to reap connections. 41 const reaperDelay = 5 * time.Second 42 43 // DefaultTables returns a default set of tables. Each chain is set to accept 44 // all packets. 45 func DefaultTables(seed uint32) *IPTables { 46 return &IPTables{ 47 v4Tables: [NumTables]Table{ 48 NATID: { 49 Rules: []Rule{ 50 {Target: &AcceptTarget{NetworkProtocol: header.IPv4ProtocolNumber}}, 51 {Target: &AcceptTarget{NetworkProtocol: header.IPv4ProtocolNumber}}, 52 {Target: &AcceptTarget{NetworkProtocol: header.IPv4ProtocolNumber}}, 53 {Target: &AcceptTarget{NetworkProtocol: header.IPv4ProtocolNumber}}, 54 {Target: &ErrorTarget{NetworkProtocol: header.IPv4ProtocolNumber}}, 55 }, 56 BuiltinChains: [NumHooks]int{ 57 Prerouting: 0, 58 Input: 1, 59 Forward: HookUnset, 60 Output: 2, 61 Postrouting: 3, 62 }, 63 Underflows: [NumHooks]int{ 64 Prerouting: 0, 65 Input: 1, 66 Forward: HookUnset, 67 Output: 2, 68 Postrouting: 3, 69 }, 70 }, 71 MangleID: { 72 Rules: []Rule{ 73 {Target: &AcceptTarget{NetworkProtocol: header.IPv4ProtocolNumber}}, 74 {Target: &AcceptTarget{NetworkProtocol: header.IPv4ProtocolNumber}}, 75 {Target: &ErrorTarget{NetworkProtocol: header.IPv4ProtocolNumber}}, 76 }, 77 BuiltinChains: [NumHooks]int{ 78 Prerouting: 0, 79 Output: 1, 80 }, 81 Underflows: [NumHooks]int{ 82 Prerouting: 0, 83 Input: HookUnset, 84 Forward: HookUnset, 85 Output: 1, 86 Postrouting: HookUnset, 87 }, 88 }, 89 FilterID: { 90 Rules: []Rule{ 91 {Target: &AcceptTarget{NetworkProtocol: header.IPv4ProtocolNumber}}, 92 {Target: &AcceptTarget{NetworkProtocol: header.IPv4ProtocolNumber}}, 93 {Target: &AcceptTarget{NetworkProtocol: header.IPv4ProtocolNumber}}, 94 {Target: &ErrorTarget{NetworkProtocol: header.IPv4ProtocolNumber}}, 95 }, 96 BuiltinChains: [NumHooks]int{ 97 Prerouting: HookUnset, 98 Input: 0, 99 Forward: 1, 100 Output: 2, 101 Postrouting: HookUnset, 102 }, 103 Underflows: [NumHooks]int{ 104 Prerouting: HookUnset, 105 Input: 0, 106 Forward: 1, 107 Output: 2, 108 Postrouting: HookUnset, 109 }, 110 }, 111 }, 112 v6Tables: [NumTables]Table{ 113 NATID: { 114 Rules: []Rule{ 115 {Target: &AcceptTarget{NetworkProtocol: header.IPv6ProtocolNumber}}, 116 {Target: &AcceptTarget{NetworkProtocol: header.IPv6ProtocolNumber}}, 117 {Target: &AcceptTarget{NetworkProtocol: header.IPv6ProtocolNumber}}, 118 {Target: &AcceptTarget{NetworkProtocol: header.IPv6ProtocolNumber}}, 119 {Target: &ErrorTarget{NetworkProtocol: header.IPv6ProtocolNumber}}, 120 }, 121 BuiltinChains: [NumHooks]int{ 122 Prerouting: 0, 123 Input: 1, 124 Forward: HookUnset, 125 Output: 2, 126 Postrouting: 3, 127 }, 128 Underflows: [NumHooks]int{ 129 Prerouting: 0, 130 Input: 1, 131 Forward: HookUnset, 132 Output: 2, 133 Postrouting: 3, 134 }, 135 }, 136 MangleID: { 137 Rules: []Rule{ 138 {Target: &AcceptTarget{NetworkProtocol: header.IPv6ProtocolNumber}}, 139 {Target: &AcceptTarget{NetworkProtocol: header.IPv6ProtocolNumber}}, 140 {Target: &ErrorTarget{NetworkProtocol: header.IPv6ProtocolNumber}}, 141 }, 142 BuiltinChains: [NumHooks]int{ 143 Prerouting: 0, 144 Output: 1, 145 }, 146 Underflows: [NumHooks]int{ 147 Prerouting: 0, 148 Input: HookUnset, 149 Forward: HookUnset, 150 Output: 1, 151 Postrouting: HookUnset, 152 }, 153 }, 154 FilterID: { 155 Rules: []Rule{ 156 {Target: &AcceptTarget{NetworkProtocol: header.IPv6ProtocolNumber}}, 157 {Target: &AcceptTarget{NetworkProtocol: header.IPv6ProtocolNumber}}, 158 {Target: &AcceptTarget{NetworkProtocol: header.IPv6ProtocolNumber}}, 159 {Target: &ErrorTarget{NetworkProtocol: header.IPv6ProtocolNumber}}, 160 }, 161 BuiltinChains: [NumHooks]int{ 162 Prerouting: HookUnset, 163 Input: 0, 164 Forward: 1, 165 Output: 2, 166 Postrouting: HookUnset, 167 }, 168 Underflows: [NumHooks]int{ 169 Prerouting: HookUnset, 170 Input: 0, 171 Forward: 1, 172 Output: 2, 173 Postrouting: HookUnset, 174 }, 175 }, 176 }, 177 priorities: [NumHooks][]TableID{ 178 Prerouting: {MangleID, NATID}, 179 Input: {NATID, FilterID}, 180 Forward: {FilterID}, 181 Output: {MangleID, NATID, FilterID}, 182 Postrouting: {MangleID, NATID}, 183 }, 184 connections: ConnTrack{ 185 seed: seed, 186 }, 187 reaperDone: make(chan struct{}, 1), 188 } 189 } 190 191 // EmptyFilterTable returns a Table with no rules and the filter table chains 192 // mapped to HookUnset. 193 func EmptyFilterTable() Table { 194 return Table{ 195 Rules: []Rule{}, 196 BuiltinChains: [NumHooks]int{ 197 Prerouting: HookUnset, 198 Postrouting: HookUnset, 199 }, 200 Underflows: [NumHooks]int{ 201 Prerouting: HookUnset, 202 Postrouting: HookUnset, 203 }, 204 } 205 } 206 207 // EmptyNATTable returns a Table with no rules and the filter table chains 208 // mapped to HookUnset. 209 func EmptyNATTable() Table { 210 return Table{ 211 Rules: []Rule{}, 212 BuiltinChains: [NumHooks]int{ 213 Forward: HookUnset, 214 }, 215 Underflows: [NumHooks]int{ 216 Forward: HookUnset, 217 }, 218 } 219 } 220 221 // GetTable returns a table with the given id and IP version. It panics when an 222 // invalid id is provided. 223 func (it *IPTables) GetTable(id TableID, ipv6 bool) Table { 224 it.mu.RLock() 225 defer it.mu.RUnlock() 226 if ipv6 { 227 return it.v6Tables[id] 228 } 229 return it.v4Tables[id] 230 } 231 232 // ReplaceTable replaces or inserts table by name. It panics when an invalid id 233 // is provided. 234 func (it *IPTables) ReplaceTable(id TableID, table Table, ipv6 bool) tcpip.Error { 235 it.mu.Lock() 236 defer it.mu.Unlock() 237 // If iptables is being enabled, initialize the conntrack table and 238 // reaper. 239 if !it.modified { 240 it.connections.init() 241 it.startReaper(reaperDelay) 242 } 243 it.modified = true 244 if ipv6 { 245 it.v6Tables[id] = table 246 } else { 247 it.v4Tables[id] = table 248 } 249 return nil 250 } 251 252 // A chainVerdict is what a table decides should be done with a packet. 253 type chainVerdict int 254 255 const ( 256 // chainAccept indicates the packet should continue through netstack. 257 chainAccept chainVerdict = iota 258 259 // chainAccept indicates the packet should be dropped. 260 chainDrop 261 262 // chainReturn indicates the packet should return to the calling chain 263 // or the underflow rule of a builtin chain. 264 chainReturn 265 ) 266 267 // Check runs pkt through the rules for hook. It returns true when the packet 268 // should continue traversing the network stack and false when it should be 269 // dropped. 270 // 271 // Precondition: pkt.NetworkHeader is set. 272 func (it *IPTables) Check(hook Hook, pkt *PacketBuffer, r *Route, preroutingAddr tcpip.Address, inNicName, outNicName string) bool { 273 if pkt.NetworkProtocolNumber != header.IPv4ProtocolNumber && pkt.NetworkProtocolNumber != header.IPv6ProtocolNumber { 274 return true 275 } 276 // Many users never configure iptables. Spare them the cost of rule 277 // traversal if rules have never been set. 278 it.mu.RLock() 279 defer it.mu.RUnlock() 280 if !it.modified { 281 return true 282 } 283 284 // Packets are manipulated only if connection and matching 285 // NAT rule exists. 286 shouldTrack := it.connections.handlePacket(pkt, hook, r) 287 288 // Go through each table containing the hook. 289 priorities := it.priorities[hook] 290 for _, tableID := range priorities { 291 // If handlePacket already NATed the packet, we don't need to 292 // check the NAT table. 293 if tableID == NATID && pkt.NatDone { 294 continue 295 } 296 var table Table 297 if pkt.NetworkProtocolNumber == header.IPv6ProtocolNumber { 298 table = it.v6Tables[tableID] 299 } else { 300 table = it.v4Tables[tableID] 301 } 302 ruleIdx := table.BuiltinChains[hook] 303 switch verdict := it.checkChain(hook, pkt, table, ruleIdx, r, preroutingAddr, inNicName, outNicName); verdict { 304 // If the table returns Accept, move on to the next table. 305 case chainAccept: 306 continue 307 // The Drop verdict is final. 308 case chainDrop: 309 return false 310 case chainReturn: 311 // Any Return from a built-in chain means we have to 312 // call the underflow. 313 underflow := table.Rules[table.Underflows[hook]] 314 switch v, _ := underflow.Target.Action(pkt, &it.connections, hook, r, preroutingAddr); v { 315 case RuleAccept: 316 continue 317 case RuleDrop: 318 return false 319 case RuleJump, RuleReturn: 320 panic("Underflows should only return RuleAccept or RuleDrop.") 321 default: 322 panic(fmt.Sprintf("Unknown verdict: %d", v)) 323 } 324 325 default: 326 panic(fmt.Sprintf("Unknown verdict %v.", verdict)) 327 } 328 } 329 330 // If this connection should be tracked, try to add an entry for it. If 331 // traversing the nat table didn't end in adding an entry, 332 // maybeInsertNoop will add a no-op entry for the connection. This is 333 // needeed when establishing connections so that the SYN/ACK reply to an 334 // outgoing SYN is delivered to the correct endpoint rather than being 335 // redirected by a prerouting rule. 336 // 337 // From the iptables documentation: "If there is no rule, a `null' 338 // binding is created: this usually does not map the packet, but exists 339 // to ensure we don't map another stream over an existing one." 340 if shouldTrack { 341 it.connections.maybeInsertNoop(pkt, hook) 342 } 343 344 // Every table returned Accept. 345 return true 346 } 347 348 // beforeSave is invoked by stateify. 349 func (it *IPTables) beforeSave() { 350 // Ensure the reaper exits cleanly. 351 it.reaperDone <- struct{}{} 352 // Prevent others from modifying the connection table. 353 it.connections.mu.Lock() 354 } 355 356 // afterLoad is invoked by stateify. 357 func (it *IPTables) afterLoad() { 358 it.startReaper(reaperDelay) 359 } 360 361 // startReaper starts a goroutine that wakes up periodically to reap timed out 362 // connections. 363 func (it *IPTables) startReaper(interval time.Duration) { 364 go func() { // S/R-SAFE: reaperDone is signalled when iptables is saved. 365 bucket := 0 366 for { 367 select { 368 case <-it.reaperDone: 369 return 370 // TODO(github.com/SagerNet/issue/5939): do not use the ambient clock. 371 case <-time.After(interval): 372 bucket, interval = it.connections.reapUnused(bucket, interval) 373 } 374 } 375 }() 376 } 377 378 // CheckPackets runs pkts through the rules for hook and returns a map of packets that 379 // should not go forward. 380 // 381 // Preconditions: 382 // * pkt is a IPv4 packet of at least length header.IPv4MinimumSize. 383 // * pkt.NetworkHeader is not nil. 384 // 385 // NOTE: unlike the Check API the returned map contains packets that should be 386 // dropped. 387 func (it *IPTables) CheckPackets(hook Hook, pkts PacketBufferList, r *Route, inNicName, outNicName string) (drop map[*PacketBuffer]struct{}, natPkts map[*PacketBuffer]struct{}) { 388 for pkt := pkts.Front(); pkt != nil; pkt = pkt.Next() { 389 if !pkt.NatDone { 390 if ok := it.Check(hook, pkt, r, "", inNicName, outNicName); !ok { 391 if drop == nil { 392 drop = make(map[*PacketBuffer]struct{}) 393 } 394 drop[pkt] = struct{}{} 395 } 396 if pkt.NatDone { 397 if natPkts == nil { 398 natPkts = make(map[*PacketBuffer]struct{}) 399 } 400 natPkts[pkt] = struct{}{} 401 } 402 } 403 } 404 return drop, natPkts 405 } 406 407 // Preconditions: 408 // * pkt is a IPv4 packet of at least length header.IPv4MinimumSize. 409 // * pkt.NetworkHeader is not nil. 410 func (it *IPTables) checkChain(hook Hook, pkt *PacketBuffer, table Table, ruleIdx int, r *Route, preroutingAddr tcpip.Address, inNicName, outNicName string) chainVerdict { 411 // Start from ruleIdx and walk the list of rules until a rule gives us 412 // a verdict. 413 for ruleIdx < len(table.Rules) { 414 switch verdict, jumpTo := it.checkRule(hook, pkt, table, ruleIdx, r, preroutingAddr, inNicName, outNicName); verdict { 415 case RuleAccept: 416 return chainAccept 417 418 case RuleDrop: 419 return chainDrop 420 421 case RuleReturn: 422 return chainReturn 423 424 case RuleJump: 425 // "Jumping" to the next rule just means we're 426 // continuing on down the list. 427 if jumpTo == ruleIdx+1 { 428 ruleIdx++ 429 continue 430 } 431 switch verdict := it.checkChain(hook, pkt, table, jumpTo, r, preroutingAddr, inNicName, outNicName); verdict { 432 case chainAccept: 433 return chainAccept 434 case chainDrop: 435 return chainDrop 436 case chainReturn: 437 ruleIdx++ 438 continue 439 default: 440 panic(fmt.Sprintf("Unknown verdict: %d", verdict)) 441 } 442 443 default: 444 panic(fmt.Sprintf("Unknown verdict: %d", verdict)) 445 } 446 447 } 448 449 // We got through the entire table without a decision. Default to DROP 450 // for safety. 451 return chainDrop 452 } 453 454 // Preconditions: 455 // * pkt is a IPv4 packet of at least length header.IPv4MinimumSize. 456 // * pkt.NetworkHeader is not nil. 457 func (it *IPTables) checkRule(hook Hook, pkt *PacketBuffer, table Table, ruleIdx int, r *Route, preroutingAddr tcpip.Address, inNicName, outNicName string) (RuleVerdict, int) { 458 rule := table.Rules[ruleIdx] 459 460 // Check whether the packet matches the IP header filter. 461 if !rule.Filter.match(pkt, hook, inNicName, outNicName) { 462 // Continue on to the next rule. 463 return RuleJump, ruleIdx + 1 464 } 465 466 // Go through each rule matcher. If they all match, run 467 // the rule target. 468 for _, matcher := range rule.Matchers { 469 matches, hotdrop := matcher.Match(hook, pkt, inNicName, outNicName) 470 if hotdrop { 471 return RuleDrop, 0 472 } 473 if !matches { 474 // Continue on to the next rule. 475 return RuleJump, ruleIdx + 1 476 } 477 } 478 479 // All the matchers matched, so run the target. 480 return rule.Target.Action(pkt, &it.connections, hook, r, preroutingAddr) 481 } 482 483 // OriginalDst returns the original destination of redirected connections. It 484 // returns an error if the connection doesn't exist or isn't redirected. 485 func (it *IPTables) OriginalDst(epID TransportEndpointID, netProto tcpip.NetworkProtocolNumber) (tcpip.Address, uint16, tcpip.Error) { 486 it.mu.RLock() 487 defer it.mu.RUnlock() 488 if !it.modified { 489 return "", 0, &tcpip.ErrNotConnected{} 490 } 491 return it.connections.originalDst(epID, netProto) 492 }