k8s.io/kubernetes@v1.31.0-alpha.0.0.20240520171757-56147500dadc/pkg/proxy/nftables/helpers_test.go (about)

     1  //go:build linux
     2  // +build linux
     3  
     4  /*
     5  Copyright 2015 The Kubernetes Authors.
     6  
     7  Licensed under the Apache License, Version 2.0 (the "License");
     8  you may not use this file except in compliance with the License.
     9  You may obtain a copy of the License at
    10  
    11      http://www.apache.org/licenses/LICENSE-2.0
    12  
    13  Unless required by applicable law or agreed to in writing, software
    14  distributed under the License is distributed on an "AS IS" BASIS,
    15  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    16  See the License for the specific language governing permissions and
    17  limitations under the License.
    18  */
    19  
    20  package nftables
    21  
    22  import (
    23  	"context"
    24  	"fmt"
    25  	"net"
    26  	"regexp"
    27  	"runtime"
    28  	"sort"
    29  	"strings"
    30  	"testing"
    31  
    32  	"github.com/google/go-cmp/cmp"
    33  	"github.com/lithammer/dedent"
    34  
    35  	"k8s.io/api/core/v1"
    36  	"k8s.io/apimachinery/pkg/util/sets"
    37  	netutils "k8s.io/utils/net"
    38  	"sigs.k8s.io/knftables"
    39  )
    40  
    41  // getLine returns a string containing the file and line number of the caller, if
    42  // possible. This is useful in tests with a large number of cases - when something goes
    43  // wrong you can find which case more easily.
    44  func getLine() string {
    45  	_, file, line, ok := runtime.Caller(1)
    46  	if !ok {
    47  		return ""
    48  	}
    49  	return fmt.Sprintf(" (from %s:%d)", file, line)
    50  }
    51  
    52  // objectOrder defines the order we sort different types into (higher = earlier); while
    53  // not necessary just for comparison purposes, it's more intuitive in the Diff output to
    54  // see rules/sets/maps before chains/elements.
    55  var objectOrder = map[string]int{
    56  	"table":   10,
    57  	"chain":   9,
    58  	"rule":    8,
    59  	"set":     7,
    60  	"map":     6,
    61  	"element": 5,
    62  	// anything else: 0
    63  }
    64  
    65  // sortNFTablesTransaction sorts an nftables transaction into a standard order for comparison
    66  func sortNFTablesTransaction(tx string) string {
    67  	lines := strings.Split(tx, "\n")
    68  
    69  	// strip blank lines and comments
    70  	for i := 0; i < len(lines); {
    71  		if lines[i] == "" || lines[i][0] == '#' {
    72  			lines = append(lines[:i], lines[i+1:]...)
    73  		} else {
    74  			i++
    75  		}
    76  	}
    77  
    78  	// sort remaining lines
    79  	sort.SliceStable(lines, func(i, j int) bool {
    80  		li := lines[i]
    81  		wi := strings.Split(li, " ")
    82  		lj := lines[j]
    83  		wj := strings.Split(lj, " ")
    84  
    85  		// All lines will start with "add OBJECTTYPE ip kube-proxy". Everything
    86  		// except "add table" will have an object name after the table name, and
    87  		// "add table" will have a comment after the table name. So every line
    88  		// should have at least 5 words.
    89  		if len(wi) < 5 || len(wj) < 5 {
    90  			return false
    91  		}
    92  
    93  		// Sort by object type first.
    94  		if wi[1] != wj[1] {
    95  			return objectOrder[wi[1]] >= objectOrder[wj[1]]
    96  		}
    97  
    98  		// Sort by object name when object type is identical.
    99  		if wi[4] != wj[4] {
   100  			return wi[4] < wj[4]
   101  		}
   102  
   103  		// Leave rules in the order they were originally added.
   104  		if wi[1] == "rule" {
   105  			return false
   106  		}
   107  
   108  		// Sort by the whole line when object type and name is identical. (e.g.,
   109  		// individual "add rule" and "add element" lines in a chain/set/map.)
   110  		return li < lj
   111  	})
   112  	return strings.Join(lines, "\n")
   113  }
   114  
   115  // diffNFTablesTransaction is a (testable) helper function for assertNFTablesTransactionEqual
   116  func diffNFTablesTransaction(expected, result string) string {
   117  	expected = sortNFTablesTransaction(expected)
   118  	result = sortNFTablesTransaction(result)
   119  
   120  	return cmp.Diff(expected, result)
   121  }
   122  
   123  // assertNFTablesTransactionEqual asserts that expected and result are equal, ignoring
   124  // irrelevant differences.
   125  func assertNFTablesTransactionEqual(t *testing.T, line string, expected, result string) {
   126  	diff := diffNFTablesTransaction(expected, result)
   127  	if diff != "" {
   128  		t.Errorf("tables do not match%s:\ndiff:\n%s\nfull result: %+v", line, diff, result)
   129  	}
   130  }
   131  
   132  // diffNFTablesChain is a (testable) helper function for assertNFTablesChainEqual
   133  func diffNFTablesChain(nft *knftables.Fake, chain, expected string) string {
   134  	expected = strings.TrimSpace(expected)
   135  	result := ""
   136  	if ch := nft.Table.Chains[chain]; ch != nil {
   137  		for i, rule := range ch.Rules {
   138  			if i > 0 {
   139  				result += "\n"
   140  			}
   141  			result += rule.Rule
   142  		}
   143  	}
   144  
   145  	return cmp.Diff(expected, result)
   146  }
   147  
   148  // nftablesTracer holds data used while virtually tracing a packet through a set of
   149  // iptables rules
   150  type nftablesTracer struct {
   151  	nft     *knftables.Fake
   152  	nodeIPs sets.Set[string]
   153  	t       *testing.T
   154  
   155  	// matches accumulates the list of rules that were matched, for debugging purposes.
   156  	matches []string
   157  
   158  	// outputs accumulates the list of matched terminal rule targets (endpoint
   159  	// IP:ports, or a special target like "REJECT") and is eventually used to generate
   160  	// the return value of tracePacket.
   161  	outputs []string
   162  
   163  	// markMasq tracks whether the packet has been marked for masquerading
   164  	markMasq bool
   165  }
   166  
   167  // newNFTablesTracer creates an nftablesTracer. nodeIPs are the IP to treat as local node
   168  // IPs (for determining whether rules with "fib saddr type local" or "fib daddr type
   169  // local" match).
   170  func newNFTablesTracer(t *testing.T, nft *knftables.Fake, nodeIPs []string) *nftablesTracer {
   171  	return &nftablesTracer{
   172  		nft:     nft,
   173  		nodeIPs: sets.New(nodeIPs...),
   174  		t:       t,
   175  	}
   176  }
   177  
   178  func (tracer *nftablesTracer) addressMatches(ipStr string, wantMatch bool, ruleAddress string) bool {
   179  	ip := netutils.ParseIPSloppy(ipStr)
   180  	if ip == nil {
   181  		tracer.t.Fatalf("Bad IP in test case: %s", ipStr)
   182  	}
   183  
   184  	var match bool
   185  	if strings.Contains(ruleAddress, "/") {
   186  		_, cidr, err := netutils.ParseCIDRSloppy(ruleAddress)
   187  		if err != nil {
   188  			tracer.t.Errorf("Bad CIDR in kube-proxy output: %v", err)
   189  		}
   190  		match = cidr.Contains(ip)
   191  	} else {
   192  		ip2 := netutils.ParseIPSloppy(ruleAddress)
   193  		if ip2 == nil {
   194  			tracer.t.Errorf("Bad IP/CIDR in kube-proxy output: %s", ruleAddress)
   195  		}
   196  		match = ip.Equal(ip2)
   197  	}
   198  
   199  	return match == wantMatch
   200  }
   201  
   202  func (tracer *nftablesTracer) addressMatchesSet(ipStr string, wantMatch bool, ruleAddress string) bool {
   203  	ruleAddress = strings.ReplaceAll(ruleAddress, " ", "")
   204  	addresses := strings.Split(ruleAddress, ",")
   205  	var match bool
   206  	for _, address := range addresses {
   207  		match = tracer.addressMatches(ipStr, true, address)
   208  		if match != wantMatch {
   209  			return false
   210  		}
   211  	}
   212  	return true
   213  }
   214  
   215  // matchDestIPOnly checks an "ip daddr" against a set/map, and returns the matching
   216  // Element, if found.
   217  func (tracer *nftablesTracer) matchDestIPOnly(elements []*knftables.Element, destIP string) *knftables.Element {
   218  	for _, element := range elements {
   219  		if element.Key[0] == destIP {
   220  			return element
   221  		}
   222  	}
   223  	return nil
   224  }
   225  
   226  // matchDest checks an "ip daddr . meta l4proto . th dport" against a set/map, and returns
   227  // the matching Element, if found.
   228  func (tracer *nftablesTracer) matchDest(elements []*knftables.Element, destIP, protocol, destPort string) *knftables.Element {
   229  	for _, element := range elements {
   230  		if element.Key[0] == destIP && element.Key[1] == protocol && element.Key[2] == destPort {
   231  			return element
   232  		}
   233  	}
   234  	return nil
   235  }
   236  
   237  // matchDestPort checks an "meta l4proto . th dport" against a set/map, and returns the
   238  // matching Element, if found.
   239  func (tracer *nftablesTracer) matchDestPort(elements []*knftables.Element, protocol, destPort string) *knftables.Element {
   240  	for _, element := range elements {
   241  		if element.Key[0] == protocol && element.Key[1] == destPort {
   242  			return element
   243  		}
   244  	}
   245  	return nil
   246  }
   247  
   248  // We intentionally don't try to parse arbitrary nftables rules, as the syntax is quite
   249  // complicated and context sensitive. (E.g., "ip daddr" could be the start of an address
   250  // comparison, or it could be the start of a set/map lookup.) Instead, we just have
   251  // regexps to recognize the specific pieces of rules that we create in proxier.go.
   252  // Anything matching ignoredRegexp gets stripped out of the rule, and then what's left
   253  // *must* match one of the cases in runChain or an error will be logged. In cases where
   254  // the regexp doesn't end with `$`, and the matched rule succeeds against the input data,
   255  // runChain will continue trying to match the rest of the rule. E.g., "ip daddr 10.0.0.1
   256  // drop" would first match destAddrRegexp, and then (assuming destIP was "10.0.0.1") would
   257  // match verdictRegexp.
   258  
   259  var destAddrRegexp = regexp.MustCompile(`^ip6* daddr (!= )?(\S+)`)
   260  var destAddrLookupRegexp = regexp.MustCompile(`^ip6* daddr (!= )?\{([^}]*)\}`)
   261  var destAddrLocalRegexp = regexp.MustCompile(`^fib daddr type local`)
   262  var destPortRegexp = regexp.MustCompile(`^(tcp|udp|sctp) dport (\d+)`)
   263  var destIPOnlyLookupRegexp = regexp.MustCompile(`^ip6* daddr @(\S+)`)
   264  
   265  var destDispatchRegexp = regexp.MustCompile(`^ip6* daddr \. meta l4proto \. th dport vmap @(\S+)$`)
   266  var destPortDispatchRegexp = regexp.MustCompile(`^meta l4proto \. th dport vmap @(\S+)$`)
   267  
   268  var sourceAddrRegexp = regexp.MustCompile(`^ip6* saddr (!= )?(\S+)`)
   269  var sourceAddrLookupRegexp = regexp.MustCompile(`^ip6* saddr (!= )?\{([^}]*)\}`)
   270  var sourceAddrLocalRegexp = regexp.MustCompile(`^fib saddr type local`)
   271  
   272  var endpointVMAPRegexp = regexp.MustCompile(`^numgen random mod \d+ vmap \{(.*)\}$`)
   273  var endpointVMapEntryRegexp = regexp.MustCompile(`\d+ : goto (\S+)`)
   274  
   275  var masqueradeRegexp = regexp.MustCompile(`^jump ` + markMasqChain + `$`)
   276  var jumpRegexp = regexp.MustCompile(`^(jump|goto) (\S+)$`)
   277  var returnRegexp = regexp.MustCompile(`^return$`)
   278  var verdictRegexp = regexp.MustCompile(`^(drop|reject)$`)
   279  var dnatRegexp = regexp.MustCompile(`^meta l4proto (tcp|udp|sctp) dnat to (\S+)$`)
   280  
   281  var ignoredRegexp = regexp.MustCompile(strings.Join(
   282  	[]string{
   283  		// Ignore comments (which can only appear at the end of a rule).
   284  		` *comment "[^"]*"$`,
   285  
   286  		// The trace tests only check new connections, so for our purposes, this
   287  		// check always succeeds (and thus can be ignored).
   288  		`^ct state new`,
   289  	},
   290  	"|",
   291  ))
   292  
   293  // runChain runs the given packet through the rules in the given table and chain, updating
   294  // tracer's internal state accordingly. It returns true if it hits a terminal action.
   295  func (tracer *nftablesTracer) runChain(chname, sourceIP, protocol, destIP, destPort string) bool {
   296  	ch := tracer.nft.Table.Chains[chname]
   297  	if ch == nil {
   298  		tracer.t.Errorf("unknown chain %q", chname)
   299  		return true
   300  	}
   301  
   302  	for _, ruleObj := range ch.Rules {
   303  		rule := ignoredRegexp.ReplaceAllLiteralString(ruleObj.Rule, "")
   304  		for rule != "" {
   305  			rule = strings.TrimLeft(rule, " ")
   306  
   307  			// Note that the order of (some of) the cases is important. e.g.,
   308  			// masqueradeRegexp must be checked before jumpRegexp, since
   309  			// jumpRegexp would also match masqueradeRegexp but do the wrong
   310  			// thing with it.
   311  
   312  			switch {
   313  			case destIPOnlyLookupRegexp.MatchString(rule):
   314  				// `^ip6* daddr @(\S+)`
   315  				// Tests whether destIP is a member of the indicated set.
   316  				match := destIPOnlyLookupRegexp.FindStringSubmatch(rule)
   317  				rule = strings.TrimPrefix(rule, match[0])
   318  				set := match[1]
   319  				if tracer.matchDestIPOnly(tracer.nft.Table.Sets[set].Elements, destIP) == nil {
   320  					rule = ""
   321  					break
   322  				}
   323  
   324  			case destDispatchRegexp.MatchString(rule):
   325  				// `^ip6* daddr \. meta l4proto \. th dport vmap @(\S+)$`
   326  				// Looks up "destIP . protocol . destPort" in the indicated
   327  				// verdict map, and if found, runs the associated verdict.
   328  				match := destDispatchRegexp.FindStringSubmatch(rule)
   329  				mapName := match[1]
   330  				element := tracer.matchDest(tracer.nft.Table.Maps[mapName].Elements, destIP, protocol, destPort)
   331  				if element == nil {
   332  					rule = ""
   333  					break
   334  				} else {
   335  					rule = element.Value[0]
   336  				}
   337  
   338  			case destPortDispatchRegexp.MatchString(rule):
   339  				// `^meta l4proto \. th dport vmap @(\S+)$`
   340  				// Looks up "protocol . destPort" in the indicated verdict map,
   341  				// and if found, runs the assocated verdict.
   342  				match := destPortDispatchRegexp.FindStringSubmatch(rule)
   343  				mapName := match[1]
   344  				element := tracer.matchDestPort(tracer.nft.Table.Maps[mapName].Elements, protocol, destPort)
   345  				if element == nil {
   346  					rule = ""
   347  					break
   348  				} else {
   349  					rule = element.Value[0]
   350  				}
   351  
   352  			case destAddrLookupRegexp.MatchString(rule):
   353  				// `^ip6* daddr (!= )?\{([^}]*)\}`
   354  				// Tests whether destIP doesn't match an anonymous set.
   355  				match := destAddrLookupRegexp.FindStringSubmatch(rule)
   356  				rule = strings.TrimPrefix(rule, match[0])
   357  				wantMatch, set := match[1] != "!= ", match[2]
   358  				if !tracer.addressMatchesSet(destIP, wantMatch, set) {
   359  					rule = ""
   360  					break
   361  				}
   362  
   363  			case destAddrRegexp.MatchString(rule):
   364  				// `^ip6* daddr (!= )?(\S+)`
   365  				// Tests whether destIP does/doesn't match a literal.
   366  				match := destAddrRegexp.FindStringSubmatch(rule)
   367  				rule = strings.TrimPrefix(rule, match[0])
   368  				wantMatch, ip := match[1] != "!= ", match[2]
   369  				if !tracer.addressMatches(destIP, wantMatch, ip) {
   370  					rule = ""
   371  					break
   372  				}
   373  
   374  			case destAddrLocalRegexp.MatchString(rule):
   375  				// `^fib daddr type local`
   376  				// Tests whether destIP is a local IP.
   377  				match := destAddrLocalRegexp.FindStringSubmatch(rule)
   378  				rule = strings.TrimPrefix(rule, match[0])
   379  				if !tracer.nodeIPs.Has(destIP) {
   380  					rule = ""
   381  					break
   382  				}
   383  
   384  			case destPortRegexp.MatchString(rule):
   385  				// `^(tcp|udp|sctp) dport (\d+)`
   386  				// Tests whether destPort matches a literal.
   387  				match := destPortRegexp.FindStringSubmatch(rule)
   388  				rule = strings.TrimPrefix(rule, match[0])
   389  				proto, port := match[1], match[2]
   390  				if protocol != proto || destPort != port {
   391  					rule = ""
   392  					break
   393  				}
   394  
   395  			case sourceAddrLookupRegexp.MatchString(rule):
   396  				// `^ip6* saddr (!= )?\{([^}]*)\}`
   397  				// Tests whether sourceIP doesn't match an anonymous set.
   398  				match := sourceAddrLookupRegexp.FindStringSubmatch(rule)
   399  				rule = strings.TrimPrefix(rule, match[0])
   400  				wantMatch, set := match[1] != "!= ", match[2]
   401  				if !tracer.addressMatchesSet(sourceIP, wantMatch, set) {
   402  					rule = ""
   403  					break
   404  				}
   405  
   406  			case sourceAddrRegexp.MatchString(rule):
   407  				// `^ip6* saddr (!= )?(\S+)`
   408  				// Tests whether sourceIP does/doesn't match a literal.
   409  				match := sourceAddrRegexp.FindStringSubmatch(rule)
   410  				rule = strings.TrimPrefix(rule, match[0])
   411  				wantMatch, ip := match[1] != "!= ", match[2]
   412  				if !tracer.addressMatches(sourceIP, wantMatch, ip) {
   413  					rule = ""
   414  					break
   415  				}
   416  
   417  			case sourceAddrLocalRegexp.MatchString(rule):
   418  				// `^fib saddr type local`
   419  				// Tests whether sourceIP is a local IP.
   420  				match := sourceAddrLocalRegexp.FindStringSubmatch(rule)
   421  				rule = strings.TrimPrefix(rule, match[0])
   422  				if !tracer.nodeIPs.Has(sourceIP) {
   423  					rule = ""
   424  					break
   425  				}
   426  
   427  			case masqueradeRegexp.MatchString(rule):
   428  				// `^jump mark-for-masquerade$`
   429  				// Mark for masquerade: we just treat the jump rule itself as
   430  				// being what creates the mark, rather than trying to handle
   431  				// the rules inside that chain and the "masquerading" chain.
   432  				match := jumpRegexp.FindStringSubmatch(rule)
   433  				rule = strings.TrimPrefix(rule, match[0])
   434  
   435  				tracer.matches = append(tracer.matches, ruleObj.Rule)
   436  				tracer.markMasq = true
   437  
   438  			case jumpRegexp.MatchString(rule):
   439  				// `^(jump|goto) (\S+)$`
   440  				// Jumps to another chain.
   441  				match := jumpRegexp.FindStringSubmatch(rule)
   442  				rule = strings.TrimPrefix(rule, match[0])
   443  				action, destChain := match[1], match[2]
   444  
   445  				tracer.matches = append(tracer.matches, ruleObj.Rule)
   446  				terminated := tracer.runChain(destChain, sourceIP, protocol, destIP, destPort)
   447  				if terminated {
   448  					// destChain reached a terminal statement, so we
   449  					// terminate too.
   450  					return true
   451  				} else if action == "goto" {
   452  					// After a goto, return to our calling chain
   453  					// (without terminating) rather than continuing
   454  					// with this chain.
   455  					return false
   456  				}
   457  
   458  			case verdictRegexp.MatchString(rule):
   459  				// `^(drop|reject)$`
   460  				// Drop/reject the packet and terminate processing.
   461  				match := verdictRegexp.FindStringSubmatch(rule)
   462  				verdict := match[1]
   463  
   464  				tracer.matches = append(tracer.matches, ruleObj.Rule)
   465  				tracer.outputs = append(tracer.outputs, strings.ToUpper(verdict))
   466  				return true
   467  
   468  			case returnRegexp.MatchString(rule):
   469  				// `^return$`
   470  				// Returns to the calling chain.
   471  				tracer.matches = append(tracer.matches, ruleObj.Rule)
   472  				return false
   473  
   474  			case dnatRegexp.MatchString(rule):
   475  				// `meta l4proto (tcp|udp|sctp) dnat to (\S+)`
   476  				// DNAT to an endpoint IP and terminate processing.
   477  				match := dnatRegexp.FindStringSubmatch(rule)
   478  				destEndpoint := match[2]
   479  
   480  				tracer.matches = append(tracer.matches, ruleObj.Rule)
   481  				tracer.outputs = append(tracer.outputs, destEndpoint)
   482  				return true
   483  
   484  			case endpointVMAPRegexp.MatchString(rule):
   485  				// `^numgen random mod \d+ vmap \{(.*)\}$`
   486  				// Selects a random endpoint and jumps to it. For tracePacket's
   487  				// purposes, we jump to *all* of the endpoints.
   488  				match := endpointVMAPRegexp.FindStringSubmatch(rule)
   489  				elements := match[1]
   490  
   491  				for _, match = range endpointVMapEntryRegexp.FindAllStringSubmatch(elements, -1) {
   492  					// `\d+ : goto (\S+)`
   493  					destChain := match[1]
   494  
   495  					tracer.matches = append(tracer.matches, ruleObj.Rule)
   496  					// Ignore return value; we know each endpoint has a
   497  					// terminating dnat verdict, but we want to gather all
   498  					// of the endpoints into tracer.output.
   499  					_ = tracer.runChain(destChain, sourceIP, protocol, destIP, destPort)
   500  				}
   501  				return true
   502  
   503  			default:
   504  				tracer.t.Errorf("unmatched rule: %s", ruleObj.Rule)
   505  				rule = ""
   506  			}
   507  		}
   508  	}
   509  
   510  	return false
   511  }
   512  
   513  // tracePacket determines what would happen to a packet with the given sourceIP, destIP,
   514  // and destPort, given the indicated iptables ruleData. nodeIPs are the local node IPs (for
   515  // rules matching "local"). (The protocol value should be lowercase as in nftables
   516  // rules, not uppercase as in corev1.)
   517  //
   518  // The return values are: an array of matched rules (for debugging), the final packet
   519  // destinations (a comma-separated list of IPs, or one of the special targets "ACCEPT",
   520  // "DROP", or "REJECT"), and whether the packet would be masqueraded.
   521  func tracePacket(t *testing.T, nft *knftables.Fake, sourceIP, protocol, destIP, destPort string, nodeIPs []string) ([]string, string, bool) {
   522  	var err error
   523  	tracer := newNFTablesTracer(t, nft, nodeIPs)
   524  
   525  	// filter-prerouting goes first, then nat-prerouting if not terminated.
   526  	if tracer.runChain("filter-prerouting", sourceIP, protocol, destIP, destPort) {
   527  		return tracer.matches, strings.Join(tracer.outputs, ", "), tracer.markMasq
   528  	}
   529  	tracer.runChain("nat-prerouting", sourceIP, protocol, destIP, destPort)
   530  	// After the prerouting rules run, pending DNATs are processed (which would affect
   531  	// the destination IP that later rules match against).
   532  	if len(tracer.outputs) != 0 {
   533  		destIP, _, err = net.SplitHostPort(tracer.outputs[0])
   534  		if err != nil {
   535  			t.Errorf("failed to parse host port '%s': %s", tracer.outputs[0], err.Error())
   536  		}
   537  	}
   538  
   539  	// Run filter-forward, return if packet is terminated.
   540  	if tracer.runChain("filter-forward", sourceIP, protocol, destIP, destPort) {
   541  		return tracer.matches, strings.Join(tracer.outputs, ", "), tracer.markMasq
   542  	}
   543  
   544  	// Run filter-input
   545  	tracer.runChain("filter-input", sourceIP, protocol, destIP, destPort)
   546  
   547  	// Skip filter-output and nat-output as they ought to be fully redundant with the prerouting chains.
   548  	// Skip nat-postrouting because it only does masquerading and we handle that separately.
   549  	return tracer.matches, strings.Join(tracer.outputs, ", "), tracer.markMasq
   550  }
   551  
   552  type packetFlowTest struct {
   553  	name     string
   554  	sourceIP string
   555  	protocol v1.Protocol
   556  	destIP   string
   557  	destPort int
   558  	output   string
   559  	masq     bool
   560  }
   561  
   562  func runPacketFlowTests(t *testing.T, line string, nft *knftables.Fake, nodeIPs []string, testCases []packetFlowTest) {
   563  	for _, tc := range testCases {
   564  		t.Run(tc.name, func(t *testing.T) {
   565  			protocol := strings.ToLower(string(tc.protocol))
   566  			if protocol == "" {
   567  				protocol = "tcp"
   568  			}
   569  			matches, output, masq := tracePacket(t, nft, tc.sourceIP, protocol, tc.destIP, fmt.Sprintf("%d", tc.destPort), nodeIPs)
   570  			var errors []string
   571  			if output != tc.output {
   572  				errors = append(errors, fmt.Sprintf("wrong output: expected %q got %q", tc.output, output))
   573  			}
   574  			if masq != tc.masq {
   575  				errors = append(errors, fmt.Sprintf("wrong masq: expected %v got %v", tc.masq, masq))
   576  			}
   577  			if errors != nil {
   578  				t.Errorf("Test %q of a packet from %s to %s:%d%s got result:\n%s\n\nBy matching:\n%s\n\n",
   579  					tc.name, tc.sourceIP, tc.destIP, tc.destPort, line, strings.Join(errors, "\n"), strings.Join(matches, "\n"))
   580  			}
   581  		})
   582  	}
   583  }
   584  
   585  // helpers_test unit tests
   586  
   587  var testInput = dedent.Dedent(`
   588  	add table ip testing { comment "rules for kube-proxy" ; }
   589  
   590  	add chain ip testing mark-for-masquerade
   591  	add rule ip testing mark-for-masquerade mark set mark or 0x4000
   592  	add chain ip testing masquerading
   593  	add rule ip testing masquerading mark and 0x4000 == 0 return
   594  	add rule ip testing masquerading mark set mark xor 0x4000
   595  	add rule ip testing masquerading masquerade fully-random
   596  
   597  	add set ip testing firewall { type ipv4_addr . inet_proto . inet_service ; comment "destinations that are subject to LoadBalancerSourceRanges" ; }
   598  	add set ip testing firewall-allow { type ipv4_addr . inet_proto . inet_service . ipv4_addr ; flags interval ; comment "destinations+sources that are allowed by LoadBalancerSourceRanges" ; }
   599  	add chain ip testing firewall-check
   600  	add chain ip testing firewall-allow-check
   601  	add rule ip testing firewall-allow-check ip daddr . meta l4proto . th dport . ip saddr @firewall-allow return
   602  	add rule ip testing firewall-allow-check drop
   603  	add rule ip testing firewall-check ip daddr . meta l4proto . th dport @firewall jump firewall-allow-check
   604  
   605  	# svc1
   606  	add chain ip testing service-ULMVA6XW-ns1/svc1/tcp/p80
   607  	add rule ip testing service-ULMVA6XW-ns1/svc1/tcp/p80 ip daddr 172.30.0.41 tcp dport 80 ip saddr != 10.0.0.0/8 jump mark-for-masquerade
   608  	add rule ip testing service-ULMVA6XW-ns1/svc1/tcp/p80 numgen random mod 1 vmap { 0 : goto endpoint-5OJB2KTY-ns1/svc1/tcp/p80__10.180.0.1/80 }
   609  
   610  	add chain ip testing endpoint-5OJB2KTY-ns1/svc1/tcp/p80__10.180.0.1/80
   611  	add rule ip testing endpoint-5OJB2KTY-ns1/svc1/tcp/p80__10.180.0.1/80 ip saddr 10.180.0.1 jump mark-for-masquerade
   612  	add rule ip testing endpoint-5OJB2KTY-ns1/svc1/tcp/p80__10.180.0.1/80 meta l4proto tcp dnat to 10.180.0.1:80
   613  
   614  	add element ip testing service-ips { 172.30.0.41 . tcp . 80 : goto service-ULMVA6XW-ns1/svc1/tcp/p80 }
   615  
   616  	# svc2
   617  	add chain ip testing service-42NFTM6N-ns2/svc2/tcp/p80
   618  	add rule ip testing service-42NFTM6N-ns2/svc2/tcp/p80 ip daddr 172.30.0.42 tcp dport 80 ip saddr != 10.0.0.0/8 jump mark-for-masquerade
   619  	add rule ip testing service-42NFTM6N-ns2/svc2/tcp/p80 numgen random mod 1 vmap { 0 : goto endpoint-SGOXE6O3-ns2/svc2/tcp/p80__10.180.0.2/80 }
   620  	add chain ip testing external-42NFTM6N-ns2/svc2/tcp/p80
   621  	add rule ip testing external-42NFTM6N-ns2/svc2/tcp/p80 ip saddr 10.0.0.0/8 goto service-42NFTM6N-ns2/svc2/tcp/p80 comment "short-circuit pod traffic"
   622  	add rule ip testing external-42NFTM6N-ns2/svc2/tcp/p80 fib saddr type local jump mark-for-masquerade comment "masquerade local traffic"
   623  	add rule ip testing external-42NFTM6N-ns2/svc2/tcp/p80 fib saddr type local goto service-42NFTM6N-ns2/svc2/tcp/p80 comment "short-circuit local traffic"
   624  	add chain ip testing endpoint-SGOXE6O3-ns2/svc2/tcp/p80__10.180.0.2/80
   625  	add rule ip testing endpoint-SGOXE6O3-ns2/svc2/tcp/p80__10.180.0.2/80 ip saddr 10.180.0.2 jump mark-for-masquerade
   626  	add rule ip testing endpoint-SGOXE6O3-ns2/svc2/tcp/p80__10.180.0.2/80 meta l4proto tcp dnat to 10.180.0.2:80
   627  
   628  	add element ip testing service-ips { 172.30.0.42 . tcp . 80 : goto service-42NFTM6N-ns2/svc2/tcp/p80 }
   629  	add element ip testing service-ips { 192.168.99.22 . tcp . 80 : goto external-42NFTM6N-ns2/svc2/tcp/p80 }
   630  	add element ip testing service-ips { 1.2.3.4 . tcp . 80 : goto external-42NFTM6N-ns2/svc2/tcp/p80 }
   631  	add element ip testing service-nodeports { tcp . 3001 : goto external-42NFTM6N-ns2/svc2/tcp/p80 }
   632  
   633  	add element ip testing no-endpoint-nodeports { tcp . 3001 comment "ns2/svc2:p80" : drop }
   634  	add element ip testing no-endpoint-services { 1.2.3.4 . tcp . 80 comment "ns2/svc2:p80" : drop }
   635  	add element ip testing no-endpoint-services { 192.168.99.22 . tcp . 80 comment "ns2/svc2:p80" : drop }
   636  	`)
   637  
   638  var testExpected = dedent.Dedent(`
   639  	add table ip testing { comment "rules for kube-proxy" ; }
   640  	add chain ip testing endpoint-5OJB2KTY-ns1/svc1/tcp/p80__10.180.0.1/80
   641  	add chain ip testing endpoint-SGOXE6O3-ns2/svc2/tcp/p80__10.180.0.2/80
   642  	add chain ip testing external-42NFTM6N-ns2/svc2/tcp/p80
   643  	add chain ip testing firewall-allow-check
   644  	add chain ip testing firewall-check
   645  	add chain ip testing mark-for-masquerade
   646  	add chain ip testing masquerading
   647  	add chain ip testing service-42NFTM6N-ns2/svc2/tcp/p80
   648  	add chain ip testing service-ULMVA6XW-ns1/svc1/tcp/p80
   649  	add rule ip testing endpoint-5OJB2KTY-ns1/svc1/tcp/p80__10.180.0.1/80 ip saddr 10.180.0.1 jump mark-for-masquerade
   650  	add rule ip testing endpoint-5OJB2KTY-ns1/svc1/tcp/p80__10.180.0.1/80 meta l4proto tcp dnat to 10.180.0.1:80
   651  	add rule ip testing endpoint-SGOXE6O3-ns2/svc2/tcp/p80__10.180.0.2/80 ip saddr 10.180.0.2 jump mark-for-masquerade
   652  	add rule ip testing endpoint-SGOXE6O3-ns2/svc2/tcp/p80__10.180.0.2/80 meta l4proto tcp dnat to 10.180.0.2:80
   653  	add rule ip testing external-42NFTM6N-ns2/svc2/tcp/p80 ip saddr 10.0.0.0/8 goto service-42NFTM6N-ns2/svc2/tcp/p80 comment "short-circuit pod traffic"
   654  	add rule ip testing external-42NFTM6N-ns2/svc2/tcp/p80 fib saddr type local jump mark-for-masquerade comment "masquerade local traffic"
   655  	add rule ip testing external-42NFTM6N-ns2/svc2/tcp/p80 fib saddr type local goto service-42NFTM6N-ns2/svc2/tcp/p80 comment "short-circuit local traffic"
   656  	add rule ip testing firewall-allow-check ip daddr . meta l4proto . th dport . ip saddr @firewall-allow return
   657  	add rule ip testing firewall-allow-check drop
   658  	add rule ip testing firewall-check ip daddr . meta l4proto . th dport @firewall jump firewall-allow-check
   659  	add rule ip testing mark-for-masquerade mark set mark or 0x4000
   660  	add rule ip testing masquerading mark and 0x4000 == 0 return
   661  	add rule ip testing masquerading mark set mark xor 0x4000
   662  	add rule ip testing masquerading masquerade fully-random
   663  	add rule ip testing service-42NFTM6N-ns2/svc2/tcp/p80 ip daddr 172.30.0.42 tcp dport 80 ip saddr != 10.0.0.0/8 jump mark-for-masquerade
   664  	add rule ip testing service-42NFTM6N-ns2/svc2/tcp/p80 numgen random mod 1 vmap { 0 : goto endpoint-SGOXE6O3-ns2/svc2/tcp/p80__10.180.0.2/80 }
   665  	add rule ip testing service-ULMVA6XW-ns1/svc1/tcp/p80 ip daddr 172.30.0.41 tcp dport 80 ip saddr != 10.0.0.0/8 jump mark-for-masquerade
   666  	add rule ip testing service-ULMVA6XW-ns1/svc1/tcp/p80 numgen random mod 1 vmap { 0 : goto endpoint-5OJB2KTY-ns1/svc1/tcp/p80__10.180.0.1/80 }
   667  	add set ip testing firewall { type ipv4_addr . inet_proto . inet_service ; comment "destinations that are subject to LoadBalancerSourceRanges" ; }
   668  	add set ip testing firewall-allow { type ipv4_addr . inet_proto . inet_service . ipv4_addr ; flags interval ; comment "destinations+sources that are allowed by LoadBalancerSourceRanges" ; }
   669  	add element ip testing no-endpoint-nodeports { tcp . 3001 comment "ns2/svc2:p80" : drop }
   670  	add element ip testing no-endpoint-services { 1.2.3.4 . tcp . 80 comment "ns2/svc2:p80" : drop }
   671  	add element ip testing no-endpoint-services { 192.168.99.22 . tcp . 80 comment "ns2/svc2:p80" : drop }
   672  	add element ip testing service-ips { 1.2.3.4 . tcp . 80 : goto external-42NFTM6N-ns2/svc2/tcp/p80 }
   673  	add element ip testing service-ips { 172.30.0.41 . tcp . 80 : goto service-ULMVA6XW-ns1/svc1/tcp/p80 }
   674  	add element ip testing service-ips { 172.30.0.42 . tcp . 80 : goto service-42NFTM6N-ns2/svc2/tcp/p80 }
   675  	add element ip testing service-ips { 192.168.99.22 . tcp . 80 : goto external-42NFTM6N-ns2/svc2/tcp/p80 }
   676  	add element ip testing service-nodeports { tcp . 3001 : goto external-42NFTM6N-ns2/svc2/tcp/p80 }
   677  	`)
   678  
   679  func Test_sortNFTablesTransaction(t *testing.T) {
   680  	output := sortNFTablesTransaction(testInput)
   681  	expected := strings.TrimSpace(testExpected)
   682  
   683  	diff := cmp.Diff(expected, output)
   684  	if diff != "" {
   685  		t.Errorf("output does not match expected:\n%s", diff)
   686  	}
   687  }
   688  
   689  func Test_diffNFTablesTransaction(t *testing.T) {
   690  	diff := diffNFTablesTransaction(testInput, testExpected)
   691  	if diff != "" {
   692  		t.Errorf("found diff in inputs that should have been equal:\n%s", diff)
   693  	}
   694  
   695  	notExpected := strings.Join(strings.Split(testExpected, "\n")[2:], "\n")
   696  	diff = diffNFTablesTransaction(testInput, notExpected)
   697  	if diff == "" {
   698  		t.Errorf("found no diff in inputs that should have been different")
   699  	}
   700  }
   701  
   702  func Test_diffNFTablesChain(t *testing.T) {
   703  	fake := knftables.NewFake(knftables.IPv4Family, "testing")
   704  	tx := fake.NewTransaction()
   705  
   706  	tx.Add(&knftables.Table{})
   707  	tx.Add(&knftables.Chain{
   708  		Name: "mark-masq-chain",
   709  	})
   710  	tx.Add(&knftables.Chain{
   711  		Name: "masquerade-chain",
   712  	})
   713  	tx.Add(&knftables.Chain{
   714  		Name: "empty-chain",
   715  	})
   716  
   717  	tx.Add(&knftables.Rule{
   718  		Chain: "mark-masq-chain",
   719  		Rule:  "mark set mark or 0x4000",
   720  	})
   721  
   722  	tx.Add(&knftables.Rule{
   723  		Chain: "masquerade-chain",
   724  		Rule:  "mark and 0x4000 == 0 return",
   725  	})
   726  	tx.Add(&knftables.Rule{
   727  		Chain: "masquerade-chain",
   728  		Rule:  "mark set mark xor 0x4000",
   729  	})
   730  	tx.Add(&knftables.Rule{
   731  		Chain: "masquerade-chain",
   732  		Rule:  "masquerade fully-random",
   733  	})
   734  
   735  	err := fake.Run(context.Background(), tx)
   736  	if err != nil {
   737  		t.Fatalf("Unexpected error running transaction: %v", err)
   738  	}
   739  
   740  	diff := diffNFTablesChain(fake, "mark-masq-chain", "mark set mark or 0x4000")
   741  	if diff != "" {
   742  		t.Errorf("unexpected difference in mark-masq-chain:\n%s", diff)
   743  	}
   744  	diff = diffNFTablesChain(fake, "mark-masq-chain", "mark set mark or 0x4000\n")
   745  	if diff != "" {
   746  		t.Errorf("unexpected difference in mark-masq-chain with trailing newline:\n%s", diff)
   747  	}
   748  
   749  	diff = diffNFTablesChain(fake, "masquerade-chain", "mark and 0x4000 == 0 return\nmark set mark xor 0x4000\nmasquerade fully-random")
   750  	if diff != "" {
   751  		t.Errorf("unexpected difference in masquerade-chain:\n%s", diff)
   752  	}
   753  	diff = diffNFTablesChain(fake, "masquerade-chain", "mark set mark xor 0x4000\nmasquerade fully-random")
   754  	if diff == "" {
   755  		t.Errorf("unexpected lack of difference in wrong masquerade-chain")
   756  	}
   757  
   758  	diff = diffNFTablesChain(fake, "empty-chain", "")
   759  	if diff != "" {
   760  		t.Errorf("unexpected difference in empty-chain:\n%s", diff)
   761  	}
   762  	diff = diffNFTablesChain(fake, "empty-chain", "\n")
   763  	if diff != "" {
   764  		t.Errorf("unexpected difference in empty-chain with trailing newline:\n%s", diff)
   765  	}
   766  }
   767  
   768  // This tests tracePacket against static data, just to make sure we match things in the
   769  // way we expect to. We need separate tests for ipv4 and ipv6 because knftables.Fake only supports
   770  // one address family at a time.
   771  // The test data is based on the TestOverallNFTablesRules.
   772  func TestTracePacketV4(t *testing.T) {
   773  	rules := dedent.Dedent(`
   774  		add table ip kube-proxy { comment "rules for kube-proxy" ; }
   775  		
   776  		add chain ip kube-proxy mark-for-masquerade
   777  		add chain ip kube-proxy masquerading
   778  		add chain ip kube-proxy services
   779  		add chain ip kube-proxy firewall-check
   780  		add chain ip kube-proxy endpoints-check
   781  		add chain ip kube-proxy filter-prerouting { type filter hook prerouting priority -110 ; }
   782  		add chain ip kube-proxy filter-forward { type filter hook forward priority -110 ; }
   783  		add chain ip kube-proxy filter-input { type filter hook input priority -110 ; }
   784  		add chain ip kube-proxy filter-output { type filter hook output priority -110 ; }
   785  		add chain ip kube-proxy nat-output { type nat hook output priority -100 ; }
   786  		add chain ip kube-proxy nat-postrouting { type nat hook postrouting priority 100 ; }
   787  		add chain ip kube-proxy nat-prerouting { type nat hook prerouting priority -100 ; }
   788  		add chain ip kube-proxy reject-chain { comment "helper for @no-endpoint-services / @no-endpoint-nodeports" ; }
   789  		add chain ip kube-proxy service-ULMVA6XW-ns1/svc1/tcp/p80
   790  		add chain ip kube-proxy endpoint-5OJB2KTY-ns1/svc1/tcp/p80__10.180.0.1/80
   791  		add chain ip kube-proxy service-42NFTM6N-ns2/svc2/tcp/p80
   792  		add chain ip kube-proxy endpoint-SGOXE6O3-ns2/svc2/tcp/p80__10.180.0.2/80
   793  		add chain ip kube-proxy external-42NFTM6N-ns2/svc2/tcp/p80
   794  		add chain ip kube-proxy service-4AT6LBPK-ns3/svc3/tcp/p80
   795  		add chain ip kube-proxy endpoint-UEIP74TE-ns3/svc3/tcp/p80__10.180.0.3/80
   796  		add chain ip kube-proxy external-4AT6LBPK-ns3/svc3/tcp/p80
   797  		add chain ip kube-proxy service-LAUZTJTB-ns4/svc4/tcp/p80
   798  		add chain ip kube-proxy endpoint-UNZV3OEC-ns4/svc4/tcp/p80__10.180.0.4/80
   799  		add chain ip kube-proxy endpoint-5RFCDDV7-ns4/svc4/tcp/p80__10.180.0.5/80
   800  		add chain ip kube-proxy external-LAUZTJTB-ns4/svc4/tcp/p80
   801  		add chain ip kube-proxy service-HVFWP5L3-ns5/svc5/tcp/p80
   802  		add chain ip kube-proxy external-HVFWP5L3-ns5/svc5/tcp/p80
   803  		add chain ip kube-proxy endpoint-GTK6MW7G-ns5/svc5/tcp/p80__10.180.0.3/80
   804  		add chain ip kube-proxy firewall-HVFWP5L3-ns5/svc5/tcp/p80
   805  
   806  		add rule ip kube-proxy mark-for-masquerade mark set mark or 0x4000
   807  		add rule ip kube-proxy masquerading mark and 0x4000 == 0 return
   808  		add rule ip kube-proxy masquerading mark set mark xor 0x4000
   809  		add rule ip kube-proxy masquerading masquerade fully-random
   810  		add rule ip kube-proxy filter-prerouting ct state new jump firewall-check
   811  		add rule ip kube-proxy filter-forward ct state new jump endpoints-check
   812  		add rule ip kube-proxy filter-input ct state new jump endpoints-check
   813  		add rule ip kube-proxy filter-output ct state new jump endpoints-check
   814  		add rule ip kube-proxy filter-output ct state new jump firewall-check
   815  		add rule ip kube-proxy nat-output jump services
   816  		add rule ip kube-proxy nat-postrouting jump masquerading
   817  		add rule ip kube-proxy nat-prerouting jump services
   818  
   819  		add map ip kube-proxy firewall-ips { type ipv4_addr . inet_proto . inet_service : verdict ; comment "destinations that are subject to LoadBalancerSourceRanges" ; }
   820  		add rule ip kube-proxy firewall-check ip daddr . meta l4proto . th dport vmap @firewall-ips
   821  
   822  		add rule ip kube-proxy reject-chain reject
   823  
   824  		add map ip kube-proxy no-endpoint-services { type ipv4_addr . inet_proto . inet_service : verdict ; comment "vmap to drop or reject packets to services with no endpoints" ; }
   825  		add map ip kube-proxy no-endpoint-nodeports { type inet_proto . inet_service : verdict ; comment "vmap to drop or reject packets to service nodeports with no endpoints" ; }
   826  
   827  		add rule ip kube-proxy endpoints-check ip daddr . meta l4proto . th dport vmap @no-endpoint-services
   828  		add rule ip kube-proxy endpoints-check fib daddr type local ip daddr != 127.0.0.0/8 meta l4proto . th dport vmap @no-endpoint-nodeports
   829  
   830  		add map ip kube-proxy service-ips { type ipv4_addr . inet_proto . inet_service : verdict ; comment "ClusterIP, ExternalIP and LoadBalancer IP traffic" ; }
   831  		add map ip kube-proxy service-nodeports { type inet_proto . inet_service : verdict ; comment "NodePort traffic" ; }
   832  		add rule ip kube-proxy services ip daddr . meta l4proto . th dport vmap @service-ips
   833  		add rule ip kube-proxy services fib daddr type local ip daddr != 127.0.0.0/8 meta l4proto . th dport vmap @service-nodeports
   834  
   835  		# svc1
   836  		add rule ip kube-proxy service-ULMVA6XW-ns1/svc1/tcp/p80 ip daddr 172.30.0.41 tcp dport 80 ip saddr != 10.0.0.0/8 jump mark-for-masquerade
   837  		add rule ip kube-proxy service-ULMVA6XW-ns1/svc1/tcp/p80 numgen random mod 1 vmap { 0 : goto endpoint-5OJB2KTY-ns1/svc1/tcp/p80__10.180.0.1/80 }
   838  
   839  		add rule ip kube-proxy endpoint-5OJB2KTY-ns1/svc1/tcp/p80__10.180.0.1/80 ip saddr 10.180.0.1 jump mark-for-masquerade
   840  		add rule ip kube-proxy endpoint-5OJB2KTY-ns1/svc1/tcp/p80__10.180.0.1/80 meta l4proto tcp dnat to 10.180.0.1:80
   841  
   842  		add element ip kube-proxy service-ips { 172.30.0.41 . tcp . 80 : goto service-ULMVA6XW-ns1/svc1/tcp/p80 }
   843  
   844  		# svc2
   845  		add rule ip kube-proxy service-42NFTM6N-ns2/svc2/tcp/p80 ip daddr 172.30.0.42 tcp dport 80 ip saddr != 10.0.0.0/8 jump mark-for-masquerade
   846  		add rule ip kube-proxy service-42NFTM6N-ns2/svc2/tcp/p80 numgen random mod 1 vmap { 0 : goto endpoint-SGOXE6O3-ns2/svc2/tcp/p80__10.180.0.2/80 }
   847  		add rule ip kube-proxy external-42NFTM6N-ns2/svc2/tcp/p80 ip saddr 10.0.0.0/8 goto service-42NFTM6N-ns2/svc2/tcp/p80 comment "short-circuit pod traffic"
   848  		add rule ip kube-proxy external-42NFTM6N-ns2/svc2/tcp/p80 fib saddr type local jump mark-for-masquerade comment "masquerade local traffic"
   849  		add rule ip kube-proxy external-42NFTM6N-ns2/svc2/tcp/p80 fib saddr type local goto service-42NFTM6N-ns2/svc2/tcp/p80 comment "short-circuit local traffic"
   850  		add rule ip kube-proxy endpoint-SGOXE6O3-ns2/svc2/tcp/p80__10.180.0.2/80 ip saddr 10.180.0.2 jump mark-for-masquerade
   851  		add rule ip kube-proxy endpoint-SGOXE6O3-ns2/svc2/tcp/p80__10.180.0.2/80 meta l4proto tcp dnat to 10.180.0.2:80
   852  
   853  		add element ip kube-proxy service-ips { 172.30.0.42 . tcp . 80 : goto service-42NFTM6N-ns2/svc2/tcp/p80 }
   854  		add element ip kube-proxy service-ips { 192.168.99.22 . tcp . 80 : goto external-42NFTM6N-ns2/svc2/tcp/p80 }
   855  		add element ip kube-proxy service-ips { 1.2.3.4 . tcp . 80 : goto external-42NFTM6N-ns2/svc2/tcp/p80 }
   856  		add element ip kube-proxy service-nodeports { tcp . 3001 : goto external-42NFTM6N-ns2/svc2/tcp/p80 }
   857  
   858  		add element ip kube-proxy no-endpoint-nodeports { tcp . 3001 comment "ns2/svc2:p80" : drop }
   859  		add element ip kube-proxy no-endpoint-services { 1.2.3.4 . tcp . 80 comment "ns2/svc2:p80" : drop }
   860  		add element ip kube-proxy no-endpoint-services { 192.168.99.22 . tcp . 80 comment "ns2/svc2:p80" : drop }
   861  
   862  		# svc3
   863  		add rule ip kube-proxy service-4AT6LBPK-ns3/svc3/tcp/p80 ip daddr 172.30.0.43 tcp dport 80 ip saddr != 10.0.0.0/8 jump mark-for-masquerade
   864  		add rule ip kube-proxy service-4AT6LBPK-ns3/svc3/tcp/p80 numgen random mod 1 vmap { 0 : goto endpoint-UEIP74TE-ns3/svc3/tcp/p80__10.180.0.3/80 }
   865  		add rule ip kube-proxy external-4AT6LBPK-ns3/svc3/tcp/p80 jump mark-for-masquerade
   866  		add rule ip kube-proxy external-4AT6LBPK-ns3/svc3/tcp/p80 goto service-4AT6LBPK-ns3/svc3/tcp/p80
   867  		add rule ip kube-proxy endpoint-UEIP74TE-ns3/svc3/tcp/p80__10.180.0.3/80 ip saddr 10.180.0.3 jump mark-for-masquerade
   868  		add rule ip kube-proxy endpoint-UEIP74TE-ns3/svc3/tcp/p80__10.180.0.3/80 meta l4proto tcp dnat to 10.180.0.3:80
   869  
   870  		add element ip kube-proxy service-ips { 172.30.0.43 . tcp . 80 : goto service-4AT6LBPK-ns3/svc3/tcp/p80 }
   871  		add element ip kube-proxy service-nodeports { tcp . 3003 : goto external-4AT6LBPK-ns3/svc3/tcp/p80 }
   872  
   873  		# svc4
   874  		add rule ip kube-proxy service-LAUZTJTB-ns4/svc4/tcp/p80 ip daddr 172.30.0.44 tcp dport 80 ip saddr != 10.0.0.0/8 jump mark-for-masquerade
   875  		add rule ip kube-proxy service-LAUZTJTB-ns4/svc4/tcp/p80 numgen random mod 2 vmap { 0 : goto endpoint-UNZV3OEC-ns4/svc4/tcp/p80__10.180.0.4/80 , 1 : goto endpoint-5RFCDDV7-ns4/svc4/tcp/p80__10.180.0.5/80 }
   876  		add rule ip kube-proxy external-LAUZTJTB-ns4/svc4/tcp/p80 jump mark-for-masquerade
   877  		add rule ip kube-proxy external-LAUZTJTB-ns4/svc4/tcp/p80 goto service-LAUZTJTB-ns4/svc4/tcp/p80
   878  		add rule ip kube-proxy endpoint-5RFCDDV7-ns4/svc4/tcp/p80__10.180.0.5/80 ip saddr 10.180.0.5 jump mark-for-masquerade
   879  		add rule ip kube-proxy endpoint-5RFCDDV7-ns4/svc4/tcp/p80__10.180.0.5/80 meta l4proto tcp dnat to 10.180.0.5:80
   880  		add rule ip kube-proxy endpoint-UNZV3OEC-ns4/svc4/tcp/p80__10.180.0.4/80 ip saddr 10.180.0.4 jump mark-for-masquerade
   881  		add rule ip kube-proxy endpoint-UNZV3OEC-ns4/svc4/tcp/p80__10.180.0.4/80 meta l4proto tcp dnat to 10.180.0.4:80
   882  
   883  		add element ip kube-proxy service-ips { 172.30.0.44 . tcp . 80 : goto service-LAUZTJTB-ns4/svc4/tcp/p80 }
   884  		add element ip kube-proxy service-ips { 192.168.99.33 . tcp . 80 : goto external-LAUZTJTB-ns4/svc4/tcp/p80 }
   885  
   886  		# svc5
   887  		add set ip kube-proxy affinity-GTK6MW7G-ns5/svc5/tcp/p80__10.180.0.3/80 { type ipv4_addr ; flags dynamic,timeout ; timeout 10800s ; }
   888  		add rule ip kube-proxy service-HVFWP5L3-ns5/svc5/tcp/p80 ip daddr 172.30.0.45 tcp dport 80 ip saddr != 10.0.0.0/8 jump mark-for-masquerade
   889  		add rule ip kube-proxy service-HVFWP5L3-ns5/svc5/tcp/p80 ip saddr @affinity-GTK6MW7G-ns5/svc5/tcp/p80__10.180.0.3/80 goto endpoint-GTK6MW7G-ns5/svc5/tcp/p80__10.180.0.3/80
   890  		add rule ip kube-proxy service-HVFWP5L3-ns5/svc5/tcp/p80 numgen random mod 1 vmap { 0 : goto endpoint-GTK6MW7G-ns5/svc5/tcp/p80__10.180.0.3/80 }
   891  		add rule ip kube-proxy external-HVFWP5L3-ns5/svc5/tcp/p80 jump mark-for-masquerade
   892  		add rule ip kube-proxy external-HVFWP5L3-ns5/svc5/tcp/p80 goto service-HVFWP5L3-ns5/svc5/tcp/p80
   893  
   894  		add rule ip kube-proxy endpoint-GTK6MW7G-ns5/svc5/tcp/p80__10.180.0.3/80 ip saddr 10.180.0.3 jump mark-for-masquerade
   895  		add rule ip kube-proxy endpoint-GTK6MW7G-ns5/svc5/tcp/p80__10.180.0.3/80 update @affinity-GTK6MW7G-ns5/svc5/tcp/p80__10.180.0.3/80 { ip saddr }
   896  		add rule ip kube-proxy endpoint-GTK6MW7G-ns5/svc5/tcp/p80__10.180.0.3/80 meta l4proto tcp dnat to 10.180.0.3:80
   897  
   898  		add rule ip kube-proxy firewall-HVFWP5L3-ns5/svc5/tcp/p80 ip saddr != { 203.0.113.0/25 } drop
   899  
   900  		add element ip kube-proxy service-ips { 172.30.0.45 . tcp . 80 : goto service-HVFWP5L3-ns5/svc5/tcp/p80 }
   901  		add element ip kube-proxy service-ips { 5.6.7.8 . tcp . 80 : goto external-HVFWP5L3-ns5/svc5/tcp/p80 }
   902  		add element ip kube-proxy service-nodeports { tcp . 3002 : goto external-HVFWP5L3-ns5/svc5/tcp/p80 }
   903  		add element ip kube-proxy firewall-ips { 5.6.7.8 . tcp . 80 comment "ns5/svc5:p80" : goto firewall-HVFWP5L3-ns5/svc5/tcp/p80 }
   904  
   905  		# svc6
   906  		add element ip kube-proxy no-endpoint-services { 172.30.0.46 . tcp . 80 comment "ns6/svc6:p80" : goto reject-chain }
   907  		`)
   908  
   909  	nft := knftables.NewFake(knftables.IPv4Family, "kube-proxy")
   910  	err := nft.ParseDump(rules)
   911  	if err != nil {
   912  		t.Fatalf("failed to parse given nftables rules: %v", err)
   913  	}
   914  	// ensure rules were parsed correctly
   915  	assertNFTablesTransactionEqual(t, getLine(), rules, nft.Dump())
   916  	runPacketFlowTests(t, getLine(), nft, testNodeIPs, []packetFlowTest{
   917  		{
   918  			name:     "no match",
   919  			sourceIP: "10.0.0.2",
   920  			destIP:   "10.0.0.3",
   921  			destPort: 80,
   922  			output:   "",
   923  		},
   924  		{
   925  			name:     "single endpoint",
   926  			sourceIP: "10.0.0.2",
   927  			destIP:   "172.30.0.41",
   928  			destPort: 80,
   929  			output:   "10.180.0.1:80",
   930  		},
   931  		{
   932  			name:     "multiple endpoints",
   933  			sourceIP: "10.0.0.2",
   934  			destIP:   "172.30.0.44",
   935  			destPort: 80,
   936  			output:   "10.180.0.4:80, 10.180.0.5:80",
   937  		},
   938  		{
   939  			name:     "local, mark for masquerade",
   940  			sourceIP: testNodeIP,
   941  			destIP:   "192.168.99.22",
   942  			destPort: 80,
   943  			output:   "10.180.0.2:80",
   944  			masq:     true,
   945  		},
   946  		{
   947  			name:     "DROP",
   948  			sourceIP: testExternalClient,
   949  			destIP:   "192.168.99.22",
   950  			destPort: 80,
   951  			output:   "DROP",
   952  		},
   953  		{
   954  			name:     "REJECT",
   955  			sourceIP: "10.0.0.2",
   956  			destIP:   "172.30.0.46",
   957  			destPort: 80,
   958  			output:   "REJECT",
   959  		},
   960  		{
   961  			name:     "blocked external to loadbalancer IP",
   962  			sourceIP: testExternalClientBlocked,
   963  			destIP:   "5.6.7.8",
   964  			destPort: 80,
   965  			output:   "DROP",
   966  		},
   967  		{
   968  			name:     "pod to nodePort",
   969  			sourceIP: "10.0.0.2",
   970  			destIP:   testNodeIP,
   971  			destPort: 3001,
   972  			output:   "10.180.0.2:80",
   973  		},
   974  	})
   975  }
   976  
   977  // This tests tracePacket against static data, just to make sure we match things in the
   978  // way we expect to. We need separate tests for ipv4 and ipv6 because knftables.Fake only supports
   979  // one address family at a time.
   980  // The test data is based on "basic tests" of TestNodePorts for ipv6.
   981  func TestTracePacketV6(t *testing.T) {
   982  	rules := dedent.Dedent(`
   983  		add table ip6 kube-proxy { comment "rules for kube-proxy" ; }
   984  		add chain ip6 kube-proxy cluster-ips-check
   985  		add chain ip6 kube-proxy endpoint-2CRNCTTE-ns1/svc1/tcp/p80__fd00.10.180..2.1/80
   986  		add chain ip6 kube-proxy endpoint-ZVRFLKHO-ns1/svc1/tcp/p80__fd00.10.180..1/80
   987  		add chain ip6 kube-proxy external-ULMVA6XW-ns1/svc1/tcp/p80
   988  		add chain ip6 kube-proxy filter-forward { type filter hook forward priority -110 ; }
   989  		add chain ip6 kube-proxy filter-input { type filter hook input priority -110 ; }
   990  		add chain ip6 kube-proxy filter-output { type filter hook output priority -110 ; }
   991  		add chain ip6 kube-proxy filter-output-post-dnat { type filter hook output priority -90 ; }
   992  		add chain ip6 kube-proxy filter-prerouting { type filter hook prerouting priority -110 ; }
   993  		add chain ip6 kube-proxy firewall-check
   994  		add chain ip6 kube-proxy mark-for-masquerade
   995  		add chain ip6 kube-proxy masquerading
   996  		add chain ip6 kube-proxy nat-output { type nat hook output priority -100 ; }
   997  		add chain ip6 kube-proxy nat-postrouting { type nat hook postrouting priority 100 ; }
   998  		add chain ip6 kube-proxy nat-prerouting { type nat hook prerouting priority -100 ; }
   999  		add chain ip6 kube-proxy nodeport-endpoints-check
  1000  		add chain ip6 kube-proxy reject-chain { comment "helper for @no-endpoint-services / @no-endpoint-nodeports" ; }
  1001  		add chain ip6 kube-proxy service-ULMVA6XW-ns1/svc1/tcp/p80
  1002  		add chain ip6 kube-proxy service-endpoints-check
  1003  		add chain ip6 kube-proxy services
  1004  		add set ip6 kube-proxy cluster-ips { type ipv6_addr ; comment "Active ClusterIPs" ; }
  1005  		add set ip6 kube-proxy nodeport-ips { type ipv6_addr ; comment "IPs that accept NodePort traffic" ; }
  1006  		add map ip6 kube-proxy firewall-ips { type ipv6_addr . inet_proto . inet_service : verdict ; comment "destinations that are subject to LoadBalancerSourceRanges" ; }
  1007  		add map ip6 kube-proxy no-endpoint-nodeports { type inet_proto . inet_service : verdict ; comment "vmap to drop or reject packets to service nodeports with no endpoints" ; }
  1008  		add map ip6 kube-proxy no-endpoint-services { type ipv6_addr . inet_proto . inet_service : verdict ; comment "vmap to drop or reject packets to services with no endpoints" ; }
  1009  		add map ip6 kube-proxy service-ips { type ipv6_addr . inet_proto . inet_service : verdict ; comment "ClusterIP, ExternalIP and LoadBalancer IP traffic" ; }
  1010  		add map ip6 kube-proxy service-nodeports { type inet_proto . inet_service : verdict ; comment "NodePort traffic" ; }
  1011  		add rule ip6 kube-proxy cluster-ips-check ip6 daddr @cluster-ips reject comment "Reject traffic to invalid ports of ClusterIPs"
  1012  		add rule ip6 kube-proxy cluster-ips-check ip6 daddr { fd00:10:96::/112 } drop comment "Drop traffic to unallocated ClusterIPs"
  1013  		add rule ip6 kube-proxy endpoint-2CRNCTTE-ns1/svc1/tcp/p80__fd00.10.180..2.1/80 ip6 saddr fd00:10:180::2:1 jump mark-for-masquerade
  1014  		add rule ip6 kube-proxy endpoint-2CRNCTTE-ns1/svc1/tcp/p80__fd00.10.180..2.1/80 meta l4proto tcp dnat to [fd00:10:180::2:1]:80
  1015  		add rule ip6 kube-proxy endpoint-ZVRFLKHO-ns1/svc1/tcp/p80__fd00.10.180..1/80 ip6 saddr fd00:10:180::1 jump mark-for-masquerade
  1016  		add rule ip6 kube-proxy endpoint-ZVRFLKHO-ns1/svc1/tcp/p80__fd00.10.180..1/80 meta l4proto tcp dnat to [fd00:10:180::1]:80
  1017  		add rule ip6 kube-proxy external-ULMVA6XW-ns1/svc1/tcp/p80 jump mark-for-masquerade
  1018  		add rule ip6 kube-proxy external-ULMVA6XW-ns1/svc1/tcp/p80 goto service-ULMVA6XW-ns1/svc1/tcp/p80
  1019  		add rule ip6 kube-proxy filter-forward ct state new jump service-endpoints-check
  1020  		add rule ip6 kube-proxy filter-forward ct state new jump cluster-ips-check
  1021  		add rule ip6 kube-proxy filter-input ct state new jump nodeport-endpoints-check
  1022  		add rule ip6 kube-proxy filter-input ct state new jump service-endpoints-check
  1023  		add rule ip6 kube-proxy filter-output ct state new jump service-endpoints-check
  1024  		add rule ip6 kube-proxy filter-output ct state new jump firewall-check
  1025  		add rule ip6 kube-proxy filter-output-post-dnat ct state new jump cluster-ips-check
  1026  		add rule ip6 kube-proxy filter-prerouting ct state new jump firewall-check
  1027  		add rule ip6 kube-proxy firewall-check ip6 daddr . meta l4proto . th dport vmap @firewall-ips
  1028  		add rule ip6 kube-proxy mark-for-masquerade mark set mark or 0x4000
  1029  		add rule ip6 kube-proxy masquerading mark and 0x4000 == 0 return
  1030  		add rule ip6 kube-proxy masquerading mark set mark xor 0x4000
  1031  		add rule ip6 kube-proxy masquerading masquerade fully-random
  1032  		add rule ip6 kube-proxy nat-output jump services
  1033  		add rule ip6 kube-proxy nat-postrouting jump masquerading
  1034  		add rule ip6 kube-proxy nat-prerouting jump services
  1035  		add rule ip6 kube-proxy nodeport-endpoints-check ip6 daddr @nodeport-ips meta l4proto . th dport vmap @no-endpoint-nodeports
  1036  		add rule ip6 kube-proxy reject-chain reject
  1037  		add rule ip6 kube-proxy service-ULMVA6XW-ns1/svc1/tcp/p80 ip6 daddr fd00:172:30::41 tcp dport 80 ip6 saddr != fd00:10::/64 jump mark-for-masquerade
  1038  		add rule ip6 kube-proxy service-ULMVA6XW-ns1/svc1/tcp/p80 numgen random mod 2 vmap { 0 : goto endpoint-ZVRFLKHO-ns1/svc1/tcp/p80__fd00.10.180..1/80 , 1 : goto endpoint-2CRNCTTE-ns1/svc1/tcp/p80__fd00.10.180..2.1/80 }
  1039  		add rule ip6 kube-proxy service-endpoints-check ip6 daddr . meta l4proto . th dport vmap @no-endpoint-services
  1040  		add rule ip6 kube-proxy services ip6 daddr . meta l4proto . th dport vmap @service-ips
  1041  		add rule ip6 kube-proxy services ip6 daddr @nodeport-ips meta l4proto . th dport vmap @service-nodeports
  1042  		add element ip6 kube-proxy cluster-ips { fd00:172:30::41 }
  1043  		add element ip6 kube-proxy nodeport-ips { 2001:db8::1 }
  1044  		add element ip6 kube-proxy nodeport-ips { 2001:db8:1::2 }
  1045  		add element ip6 kube-proxy service-ips { fd00:172:30::41 . tcp . 80 : goto service-ULMVA6XW-ns1/svc1/tcp/p80 }
  1046  		add element ip6 kube-proxy service-nodeports { tcp . 3001 : goto external-ULMVA6XW-ns1/svc1/tcp/p80 }
  1047  		`)
  1048  
  1049  	nft := knftables.NewFake(knftables.IPv6Family, "kube-proxy")
  1050  	err := nft.ParseDump(rules)
  1051  	if err != nil {
  1052  		t.Fatalf("failed to parse given nftables rules: %v", err)
  1053  	}
  1054  	// ensure rules were parsed correctly
  1055  	assertNFTablesTransactionEqual(t, getLine(), rules, nft.Dump())
  1056  	output := "[fd00:10:180::1]:80, [fd00:10:180::2:1]:80"
  1057  
  1058  	runPacketFlowTests(t, getLine(), nft, testNodeIPs, []packetFlowTest{
  1059  		{
  1060  			name:     "pod to cluster IP",
  1061  			sourceIP: "fd00:10::2",
  1062  			destIP:   "fd00:172:30::41",
  1063  			destPort: 80,
  1064  			output:   output,
  1065  			masq:     false,
  1066  		},
  1067  		{
  1068  			name:     "external to nodePort",
  1069  			sourceIP: "2600:5200::1",
  1070  			destIP:   testNodeIPv6,
  1071  			destPort: 3001,
  1072  			output:   output,
  1073  			masq:     true,
  1074  		},
  1075  		{
  1076  			name:     "node to nodePort",
  1077  			sourceIP: testNodeIPv6,
  1078  			destIP:   testNodeIPv6,
  1079  			destPort: 3001,
  1080  			output:   output,
  1081  			masq:     true,
  1082  		},
  1083  	})
  1084  }