github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/pkg/tcpip/stack/iptables.go (about)

     1  // Copyright 2019 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package stack
    16  
    17  import (
    18  	"fmt"
    19  	"time"
    20  
    21  	"github.com/SagerNet/gvisor/pkg/tcpip"
    22  	"github.com/SagerNet/gvisor/pkg/tcpip/header"
    23  )
    24  
    25  // TableID identifies a specific table.
    26  type TableID int
    27  
    28  // Each value identifies a specific table.
    29  const (
    30  	NATID TableID = iota
    31  	MangleID
    32  	FilterID
    33  	NumTables
    34  )
    35  
    36  // HookUnset indicates that there is no hook set for an entrypoint or
    37  // underflow.
    38  const HookUnset = -1
    39  
    40  // reaperDelay is how long to wait before starting to reap connections.
    41  const reaperDelay = 5 * time.Second
    42  
    43  // DefaultTables returns a default set of tables. Each chain is set to accept
    44  // all packets.
    45  func DefaultTables(seed uint32) *IPTables {
    46  	return &IPTables{
    47  		v4Tables: [NumTables]Table{
    48  			NATID: {
    49  				Rules: []Rule{
    50  					{Target: &AcceptTarget{NetworkProtocol: header.IPv4ProtocolNumber}},
    51  					{Target: &AcceptTarget{NetworkProtocol: header.IPv4ProtocolNumber}},
    52  					{Target: &AcceptTarget{NetworkProtocol: header.IPv4ProtocolNumber}},
    53  					{Target: &AcceptTarget{NetworkProtocol: header.IPv4ProtocolNumber}},
    54  					{Target: &ErrorTarget{NetworkProtocol: header.IPv4ProtocolNumber}},
    55  				},
    56  				BuiltinChains: [NumHooks]int{
    57  					Prerouting:  0,
    58  					Input:       1,
    59  					Forward:     HookUnset,
    60  					Output:      2,
    61  					Postrouting: 3,
    62  				},
    63  				Underflows: [NumHooks]int{
    64  					Prerouting:  0,
    65  					Input:       1,
    66  					Forward:     HookUnset,
    67  					Output:      2,
    68  					Postrouting: 3,
    69  				},
    70  			},
    71  			MangleID: {
    72  				Rules: []Rule{
    73  					{Target: &AcceptTarget{NetworkProtocol: header.IPv4ProtocolNumber}},
    74  					{Target: &AcceptTarget{NetworkProtocol: header.IPv4ProtocolNumber}},
    75  					{Target: &ErrorTarget{NetworkProtocol: header.IPv4ProtocolNumber}},
    76  				},
    77  				BuiltinChains: [NumHooks]int{
    78  					Prerouting: 0,
    79  					Output:     1,
    80  				},
    81  				Underflows: [NumHooks]int{
    82  					Prerouting:  0,
    83  					Input:       HookUnset,
    84  					Forward:     HookUnset,
    85  					Output:      1,
    86  					Postrouting: HookUnset,
    87  				},
    88  			},
    89  			FilterID: {
    90  				Rules: []Rule{
    91  					{Target: &AcceptTarget{NetworkProtocol: header.IPv4ProtocolNumber}},
    92  					{Target: &AcceptTarget{NetworkProtocol: header.IPv4ProtocolNumber}},
    93  					{Target: &AcceptTarget{NetworkProtocol: header.IPv4ProtocolNumber}},
    94  					{Target: &ErrorTarget{NetworkProtocol: header.IPv4ProtocolNumber}},
    95  				},
    96  				BuiltinChains: [NumHooks]int{
    97  					Prerouting:  HookUnset,
    98  					Input:       0,
    99  					Forward:     1,
   100  					Output:      2,
   101  					Postrouting: HookUnset,
   102  				},
   103  				Underflows: [NumHooks]int{
   104  					Prerouting:  HookUnset,
   105  					Input:       0,
   106  					Forward:     1,
   107  					Output:      2,
   108  					Postrouting: HookUnset,
   109  				},
   110  			},
   111  		},
   112  		v6Tables: [NumTables]Table{
   113  			NATID: {
   114  				Rules: []Rule{
   115  					{Target: &AcceptTarget{NetworkProtocol: header.IPv6ProtocolNumber}},
   116  					{Target: &AcceptTarget{NetworkProtocol: header.IPv6ProtocolNumber}},
   117  					{Target: &AcceptTarget{NetworkProtocol: header.IPv6ProtocolNumber}},
   118  					{Target: &AcceptTarget{NetworkProtocol: header.IPv6ProtocolNumber}},
   119  					{Target: &ErrorTarget{NetworkProtocol: header.IPv6ProtocolNumber}},
   120  				},
   121  				BuiltinChains: [NumHooks]int{
   122  					Prerouting:  0,
   123  					Input:       1,
   124  					Forward:     HookUnset,
   125  					Output:      2,
   126  					Postrouting: 3,
   127  				},
   128  				Underflows: [NumHooks]int{
   129  					Prerouting:  0,
   130  					Input:       1,
   131  					Forward:     HookUnset,
   132  					Output:      2,
   133  					Postrouting: 3,
   134  				},
   135  			},
   136  			MangleID: {
   137  				Rules: []Rule{
   138  					{Target: &AcceptTarget{NetworkProtocol: header.IPv6ProtocolNumber}},
   139  					{Target: &AcceptTarget{NetworkProtocol: header.IPv6ProtocolNumber}},
   140  					{Target: &ErrorTarget{NetworkProtocol: header.IPv6ProtocolNumber}},
   141  				},
   142  				BuiltinChains: [NumHooks]int{
   143  					Prerouting: 0,
   144  					Output:     1,
   145  				},
   146  				Underflows: [NumHooks]int{
   147  					Prerouting:  0,
   148  					Input:       HookUnset,
   149  					Forward:     HookUnset,
   150  					Output:      1,
   151  					Postrouting: HookUnset,
   152  				},
   153  			},
   154  			FilterID: {
   155  				Rules: []Rule{
   156  					{Target: &AcceptTarget{NetworkProtocol: header.IPv6ProtocolNumber}},
   157  					{Target: &AcceptTarget{NetworkProtocol: header.IPv6ProtocolNumber}},
   158  					{Target: &AcceptTarget{NetworkProtocol: header.IPv6ProtocolNumber}},
   159  					{Target: &ErrorTarget{NetworkProtocol: header.IPv6ProtocolNumber}},
   160  				},
   161  				BuiltinChains: [NumHooks]int{
   162  					Prerouting:  HookUnset,
   163  					Input:       0,
   164  					Forward:     1,
   165  					Output:      2,
   166  					Postrouting: HookUnset,
   167  				},
   168  				Underflows: [NumHooks]int{
   169  					Prerouting:  HookUnset,
   170  					Input:       0,
   171  					Forward:     1,
   172  					Output:      2,
   173  					Postrouting: HookUnset,
   174  				},
   175  			},
   176  		},
   177  		priorities: [NumHooks][]TableID{
   178  			Prerouting:  {MangleID, NATID},
   179  			Input:       {NATID, FilterID},
   180  			Forward:     {FilterID},
   181  			Output:      {MangleID, NATID, FilterID},
   182  			Postrouting: {MangleID, NATID},
   183  		},
   184  		connections: ConnTrack{
   185  			seed: seed,
   186  		},
   187  		reaperDone: make(chan struct{}, 1),
   188  	}
   189  }
   190  
   191  // EmptyFilterTable returns a Table with no rules and the filter table chains
   192  // mapped to HookUnset.
   193  func EmptyFilterTable() Table {
   194  	return Table{
   195  		Rules: []Rule{},
   196  		BuiltinChains: [NumHooks]int{
   197  			Prerouting:  HookUnset,
   198  			Postrouting: HookUnset,
   199  		},
   200  		Underflows: [NumHooks]int{
   201  			Prerouting:  HookUnset,
   202  			Postrouting: HookUnset,
   203  		},
   204  	}
   205  }
   206  
   207  // EmptyNATTable returns a Table with no rules and the filter table chains
   208  // mapped to HookUnset.
   209  func EmptyNATTable() Table {
   210  	return Table{
   211  		Rules: []Rule{},
   212  		BuiltinChains: [NumHooks]int{
   213  			Forward: HookUnset,
   214  		},
   215  		Underflows: [NumHooks]int{
   216  			Forward: HookUnset,
   217  		},
   218  	}
   219  }
   220  
   221  // GetTable returns a table with the given id and IP version. It panics when an
   222  // invalid id is provided.
   223  func (it *IPTables) GetTable(id TableID, ipv6 bool) Table {
   224  	it.mu.RLock()
   225  	defer it.mu.RUnlock()
   226  	if ipv6 {
   227  		return it.v6Tables[id]
   228  	}
   229  	return it.v4Tables[id]
   230  }
   231  
   232  // ReplaceTable replaces or inserts table by name. It panics when an invalid id
   233  // is provided.
   234  func (it *IPTables) ReplaceTable(id TableID, table Table, ipv6 bool) tcpip.Error {
   235  	it.mu.Lock()
   236  	defer it.mu.Unlock()
   237  	// If iptables is being enabled, initialize the conntrack table and
   238  	// reaper.
   239  	if !it.modified {
   240  		it.connections.init()
   241  		it.startReaper(reaperDelay)
   242  	}
   243  	it.modified = true
   244  	if ipv6 {
   245  		it.v6Tables[id] = table
   246  	} else {
   247  		it.v4Tables[id] = table
   248  	}
   249  	return nil
   250  }
   251  
   252  // A chainVerdict is what a table decides should be done with a packet.
   253  type chainVerdict int
   254  
   255  const (
   256  	// chainAccept indicates the packet should continue through netstack.
   257  	chainAccept chainVerdict = iota
   258  
   259  	// chainAccept indicates the packet should be dropped.
   260  	chainDrop
   261  
   262  	// chainReturn indicates the packet should return to the calling chain
   263  	// or the underflow rule of a builtin chain.
   264  	chainReturn
   265  )
   266  
   267  // Check runs pkt through the rules for hook. It returns true when the packet
   268  // should continue traversing the network stack and false when it should be
   269  // dropped.
   270  //
   271  // Precondition: pkt.NetworkHeader is set.
   272  func (it *IPTables) Check(hook Hook, pkt *PacketBuffer, r *Route, preroutingAddr tcpip.Address, inNicName, outNicName string) bool {
   273  	if pkt.NetworkProtocolNumber != header.IPv4ProtocolNumber && pkt.NetworkProtocolNumber != header.IPv6ProtocolNumber {
   274  		return true
   275  	}
   276  	// Many users never configure iptables. Spare them the cost of rule
   277  	// traversal if rules have never been set.
   278  	it.mu.RLock()
   279  	defer it.mu.RUnlock()
   280  	if !it.modified {
   281  		return true
   282  	}
   283  
   284  	// Packets are manipulated only if connection and matching
   285  	// NAT rule exists.
   286  	shouldTrack := it.connections.handlePacket(pkt, hook, r)
   287  
   288  	// Go through each table containing the hook.
   289  	priorities := it.priorities[hook]
   290  	for _, tableID := range priorities {
   291  		// If handlePacket already NATed the packet, we don't need to
   292  		// check the NAT table.
   293  		if tableID == NATID && pkt.NatDone {
   294  			continue
   295  		}
   296  		var table Table
   297  		if pkt.NetworkProtocolNumber == header.IPv6ProtocolNumber {
   298  			table = it.v6Tables[tableID]
   299  		} else {
   300  			table = it.v4Tables[tableID]
   301  		}
   302  		ruleIdx := table.BuiltinChains[hook]
   303  		switch verdict := it.checkChain(hook, pkt, table, ruleIdx, r, preroutingAddr, inNicName, outNicName); verdict {
   304  		// If the table returns Accept, move on to the next table.
   305  		case chainAccept:
   306  			continue
   307  		// The Drop verdict is final.
   308  		case chainDrop:
   309  			return false
   310  		case chainReturn:
   311  			// Any Return from a built-in chain means we have to
   312  			// call the underflow.
   313  			underflow := table.Rules[table.Underflows[hook]]
   314  			switch v, _ := underflow.Target.Action(pkt, &it.connections, hook, r, preroutingAddr); v {
   315  			case RuleAccept:
   316  				continue
   317  			case RuleDrop:
   318  				return false
   319  			case RuleJump, RuleReturn:
   320  				panic("Underflows should only return RuleAccept or RuleDrop.")
   321  			default:
   322  				panic(fmt.Sprintf("Unknown verdict: %d", v))
   323  			}
   324  
   325  		default:
   326  			panic(fmt.Sprintf("Unknown verdict %v.", verdict))
   327  		}
   328  	}
   329  
   330  	// If this connection should be tracked, try to add an entry for it. If
   331  	// traversing the nat table didn't end in adding an entry,
   332  	// maybeInsertNoop will add a no-op entry for the connection. This is
   333  	// needeed when establishing connections so that the SYN/ACK reply to an
   334  	// outgoing SYN is delivered to the correct endpoint rather than being
   335  	// redirected by a prerouting rule.
   336  	//
   337  	// From the iptables documentation: "If there is no rule, a `null'
   338  	// binding is created: this usually does not map the packet, but exists
   339  	// to ensure we don't map another stream over an existing one."
   340  	if shouldTrack {
   341  		it.connections.maybeInsertNoop(pkt, hook)
   342  	}
   343  
   344  	// Every table returned Accept.
   345  	return true
   346  }
   347  
   348  // beforeSave is invoked by stateify.
   349  func (it *IPTables) beforeSave() {
   350  	// Ensure the reaper exits cleanly.
   351  	it.reaperDone <- struct{}{}
   352  	// Prevent others from modifying the connection table.
   353  	it.connections.mu.Lock()
   354  }
   355  
   356  // afterLoad is invoked by stateify.
   357  func (it *IPTables) afterLoad() {
   358  	it.startReaper(reaperDelay)
   359  }
   360  
   361  // startReaper starts a goroutine that wakes up periodically to reap timed out
   362  // connections.
   363  func (it *IPTables) startReaper(interval time.Duration) {
   364  	go func() { // S/R-SAFE: reaperDone is signalled when iptables is saved.
   365  		bucket := 0
   366  		for {
   367  			select {
   368  			case <-it.reaperDone:
   369  				return
   370  				// TODO(github.com/SagerNet/issue/5939): do not use the ambient clock.
   371  			case <-time.After(interval):
   372  				bucket, interval = it.connections.reapUnused(bucket, interval)
   373  			}
   374  		}
   375  	}()
   376  }
   377  
   378  // CheckPackets runs pkts through the rules for hook and returns a map of packets that
   379  // should not go forward.
   380  //
   381  // Preconditions:
   382  // * pkt is a IPv4 packet of at least length header.IPv4MinimumSize.
   383  // * pkt.NetworkHeader is not nil.
   384  //
   385  // NOTE: unlike the Check API the returned map contains packets that should be
   386  // dropped.
   387  func (it *IPTables) CheckPackets(hook Hook, pkts PacketBufferList, r *Route, inNicName, outNicName string) (drop map[*PacketBuffer]struct{}, natPkts map[*PacketBuffer]struct{}) {
   388  	for pkt := pkts.Front(); pkt != nil; pkt = pkt.Next() {
   389  		if !pkt.NatDone {
   390  			if ok := it.Check(hook, pkt, r, "", inNicName, outNicName); !ok {
   391  				if drop == nil {
   392  					drop = make(map[*PacketBuffer]struct{})
   393  				}
   394  				drop[pkt] = struct{}{}
   395  			}
   396  			if pkt.NatDone {
   397  				if natPkts == nil {
   398  					natPkts = make(map[*PacketBuffer]struct{})
   399  				}
   400  				natPkts[pkt] = struct{}{}
   401  			}
   402  		}
   403  	}
   404  	return drop, natPkts
   405  }
   406  
   407  // Preconditions:
   408  // * pkt is a IPv4 packet of at least length header.IPv4MinimumSize.
   409  // * pkt.NetworkHeader is not nil.
   410  func (it *IPTables) checkChain(hook Hook, pkt *PacketBuffer, table Table, ruleIdx int, r *Route, preroutingAddr tcpip.Address, inNicName, outNicName string) chainVerdict {
   411  	// Start from ruleIdx and walk the list of rules until a rule gives us
   412  	// a verdict.
   413  	for ruleIdx < len(table.Rules) {
   414  		switch verdict, jumpTo := it.checkRule(hook, pkt, table, ruleIdx, r, preroutingAddr, inNicName, outNicName); verdict {
   415  		case RuleAccept:
   416  			return chainAccept
   417  
   418  		case RuleDrop:
   419  			return chainDrop
   420  
   421  		case RuleReturn:
   422  			return chainReturn
   423  
   424  		case RuleJump:
   425  			// "Jumping" to the next rule just means we're
   426  			// continuing on down the list.
   427  			if jumpTo == ruleIdx+1 {
   428  				ruleIdx++
   429  				continue
   430  			}
   431  			switch verdict := it.checkChain(hook, pkt, table, jumpTo, r, preroutingAddr, inNicName, outNicName); verdict {
   432  			case chainAccept:
   433  				return chainAccept
   434  			case chainDrop:
   435  				return chainDrop
   436  			case chainReturn:
   437  				ruleIdx++
   438  				continue
   439  			default:
   440  				panic(fmt.Sprintf("Unknown verdict: %d", verdict))
   441  			}
   442  
   443  		default:
   444  			panic(fmt.Sprintf("Unknown verdict: %d", verdict))
   445  		}
   446  
   447  	}
   448  
   449  	// We got through the entire table without a decision. Default to DROP
   450  	// for safety.
   451  	return chainDrop
   452  }
   453  
   454  // Preconditions:
   455  // * pkt is a IPv4 packet of at least length header.IPv4MinimumSize.
   456  // * pkt.NetworkHeader is not nil.
   457  func (it *IPTables) checkRule(hook Hook, pkt *PacketBuffer, table Table, ruleIdx int, r *Route, preroutingAddr tcpip.Address, inNicName, outNicName string) (RuleVerdict, int) {
   458  	rule := table.Rules[ruleIdx]
   459  
   460  	// Check whether the packet matches the IP header filter.
   461  	if !rule.Filter.match(pkt, hook, inNicName, outNicName) {
   462  		// Continue on to the next rule.
   463  		return RuleJump, ruleIdx + 1
   464  	}
   465  
   466  	// Go through each rule matcher. If they all match, run
   467  	// the rule target.
   468  	for _, matcher := range rule.Matchers {
   469  		matches, hotdrop := matcher.Match(hook, pkt, inNicName, outNicName)
   470  		if hotdrop {
   471  			return RuleDrop, 0
   472  		}
   473  		if !matches {
   474  			// Continue on to the next rule.
   475  			return RuleJump, ruleIdx + 1
   476  		}
   477  	}
   478  
   479  	// All the matchers matched, so run the target.
   480  	return rule.Target.Action(pkt, &it.connections, hook, r, preroutingAddr)
   481  }
   482  
   483  // OriginalDst returns the original destination of redirected connections. It
   484  // returns an error if the connection doesn't exist or isn't redirected.
   485  func (it *IPTables) OriginalDst(epID TransportEndpointID, netProto tcpip.NetworkProtocolNumber) (tcpip.Address, uint16, tcpip.Error) {
   486  	it.mu.RLock()
   487  	defer it.mu.RUnlock()
   488  	if !it.modified {
   489  		return "", 0, &tcpip.ErrNotConnected{}
   490  	}
   491  	return it.connections.originalDst(epID, netProto)
   492  }