github.com/cilium/cilium@v1.16.2/pkg/datapath/linux/route/route_linux.go (about)

     1  // SPDX-License-Identifier: Apache-2.0
     2  // Copyright Authors of Cilium
     3  
     4  //go:build linux
     5  
     6  package route
     7  
     8  import (
     9  	"fmt"
    10  	"net"
    11  	"sort"
    12  
    13  	"github.com/vishvananda/netlink"
    14  	"golang.org/x/sys/unix"
    15  
    16  	"github.com/cilium/cilium/pkg/datapath/linux/linux_defaults"
    17  	"github.com/cilium/cilium/pkg/time"
    18  )
    19  
    20  const (
    21  	// RouteReplaceMaxTries is the number of attempts the route will be
    22  	// attempted to be added or updated in case the kernel returns an error
    23  	RouteReplaceMaxTries = 10
    24  
    25  	// RouteReplaceRetryInterval is the interval in which
    26  	// RouteReplaceMaxTries attempts are attempted
    27  	RouteReplaceRetryInterval = 100 * time.Millisecond
    28  
    29  	// RTN_LOCAL is a route type used to indicate packet should be "routed"
    30  	// locally and passed up the stack. Is used by IPSec to force encrypted
    31  	// packets to pass through XFRM layer.
    32  	RTN_LOCAL = 0x2
    33  
    34  	// MainTable is Linux's default routing table
    35  	MainTable = 254
    36  
    37  	// EncryptRouteProtocol for Encryption specific routes
    38  	EncryptRouteProtocol = 192
    39  )
    40  
    41  // getNetlinkRoute returns the route configuration as netlink.Route
    42  func (r *Route) getNetlinkRoute() netlink.Route {
    43  	rt := netlink.Route{
    44  		Dst:      &r.Prefix,
    45  		Src:      r.Local,
    46  		MTU:      r.MTU,
    47  		Priority: r.Priority,
    48  		Protocol: netlink.RouteProtocol(r.Proto),
    49  		Table:    r.Table,
    50  		Type:     r.Type,
    51  	}
    52  
    53  	if r.Nexthop != nil {
    54  		rt.Gw = *r.Nexthop
    55  	}
    56  
    57  	if r.Scope != netlink.SCOPE_UNIVERSE {
    58  		rt.Scope = r.Scope
    59  	} else if r.Scope == netlink.SCOPE_UNIVERSE && r.Type == RTN_LOCAL {
    60  		rt.Scope = netlink.SCOPE_HOST
    61  	}
    62  
    63  	return rt
    64  }
    65  
    66  // getNexthopAsIPNet returns the nexthop of the route as IPNet
    67  func (r *Route) getNexthopAsIPNet() *net.IPNet {
    68  	if r.Nexthop == nil {
    69  		return nil
    70  	}
    71  
    72  	if r.Nexthop.To4() != nil {
    73  		return &net.IPNet{IP: *r.Nexthop, Mask: net.CIDRMask(32, 32)}
    74  	}
    75  
    76  	return &net.IPNet{IP: *r.Nexthop, Mask: net.CIDRMask(128, 128)}
    77  }
    78  
    79  func ipFamily(ip net.IP) int {
    80  	if ip.To4() == nil {
    81  		return netlink.FAMILY_V6
    82  	}
    83  
    84  	return netlink.FAMILY_V4
    85  }
    86  
    87  // Lookup attempts to find the linux route based on the route specification.
    88  // If the route exists, the route is returned, otherwise an error is returned.
    89  func Lookup(route Route) (*Route, error) {
    90  	link, err := netlink.LinkByName(route.Device)
    91  	if err != nil {
    92  		return nil, fmt.Errorf("unable to find interface '%s' of route: %w", route.Device, err)
    93  	}
    94  
    95  	routeSpec := route.getNetlinkRoute()
    96  	routeSpec.LinkIndex = link.Attrs().Index
    97  
    98  	nlRoute := lookup(&routeSpec)
    99  	if nlRoute == nil {
   100  		return nil, nil
   101  	}
   102  
   103  	result := &Route{
   104  		Local:   nlRoute.Src,
   105  		Device:  link.Attrs().Name,
   106  		MTU:     nlRoute.MTU,
   107  		Scope:   nlRoute.Scope,
   108  		Nexthop: &nlRoute.Gw,
   109  	}
   110  
   111  	if nlRoute.Dst != nil {
   112  		result.Prefix = *nlRoute.Dst
   113  	}
   114  
   115  	return result, nil
   116  }
   117  
   118  // lookup finds a particular route as specified by the filter which points
   119  // to the specified device. The filter route can have the following fields set:
   120  //   - Dst
   121  //   - LinkIndex
   122  //   - Scope
   123  //   - Gw
   124  func lookup(route *netlink.Route) *netlink.Route {
   125  	var filter uint64
   126  	if route.Dst != nil {
   127  		filter |= netlink.RT_FILTER_DST
   128  	}
   129  	if route.Table != 0 {
   130  		filter |= netlink.RT_FILTER_TABLE
   131  	}
   132  	if route.Scope != 0 {
   133  		filter |= netlink.RT_FILTER_SCOPE
   134  	}
   135  	if route.Gw != nil {
   136  		filter |= netlink.RT_FILTER_GW
   137  	}
   138  	if route.LinkIndex != 0 {
   139  		filter |= netlink.RT_FILTER_OIF
   140  	}
   141  
   142  	routes, err := netlink.RouteListFiltered(ipFamily(route.Dst.IP), route, filter)
   143  	if err != nil {
   144  		return nil
   145  	}
   146  
   147  	for _, r := range routes {
   148  		if r.Dst != nil && route.Dst == nil {
   149  			continue
   150  		}
   151  
   152  		if route.Dst != nil && r.Dst == nil {
   153  			continue
   154  		}
   155  
   156  		if route.Table != 0 && route.Table != r.Table {
   157  			continue
   158  		}
   159  
   160  		aMaskLen, aMaskBits := r.Dst.Mask.Size()
   161  		bMaskLen, bMaskBits := route.Dst.Mask.Size()
   162  		if r.Scope == route.Scope &&
   163  			aMaskLen == bMaskLen && aMaskBits == bMaskBits &&
   164  			r.Dst.IP.Equal(route.Dst.IP) && r.Gw.Equal(route.Gw) {
   165  			return &r
   166  		}
   167  	}
   168  
   169  	return nil
   170  }
   171  
   172  func createNexthopRoute(route Route, link netlink.Link, routerNet *net.IPNet) *netlink.Route {
   173  	// This is the L2 route which makes router IP available behind the
   174  	// interface.
   175  	rt := &netlink.Route{
   176  		LinkIndex: link.Attrs().Index,
   177  		Dst:       routerNet,
   178  		Table:     route.Table,
   179  		Protocol:  linux_defaults.RTProto,
   180  	}
   181  
   182  	// Known issue: scope for IPv6 routes is not propagated correctly. If
   183  	// we set the scope here, lookup() will be unable to identify the route
   184  	// again and we will continuously re-add the route
   185  	if routerNet.IP.To4() != nil {
   186  		rt.Scope = netlink.SCOPE_LINK
   187  	}
   188  
   189  	return rt
   190  }
   191  
   192  // replaceNexthopRoute verifies that the L2 route for the router IP which is
   193  // used as nexthop for all node routes is properly installed. If unavailable or
   194  // incorrect, it will be replaced with the proper L2 route.
   195  func replaceNexthopRoute(route Route, link netlink.Link, routerNet *net.IPNet) (bool, error) {
   196  	if err := netlink.RouteReplace(createNexthopRoute(route, link, routerNet)); err != nil {
   197  		return false, fmt.Errorf("unable to add L2 nexthop route: %w", err)
   198  	}
   199  
   200  	return true, nil
   201  }
   202  
   203  // deleteNexthopRoute deletes
   204  func deleteNexthopRoute(route Route, link netlink.Link, routerNet *net.IPNet) error {
   205  	if err := netlink.RouteDel(createNexthopRoute(route, link, routerNet)); err != nil {
   206  		return fmt.Errorf("unable to delete L2 nexthop route: %w", err)
   207  	}
   208  
   209  	return nil
   210  }
   211  
   212  // Upsert adds or updates a Linux kernel route. The route described can be in
   213  // the following two forms:
   214  //
   215  // direct:
   216  //
   217  //	prefix dev foo
   218  //
   219  // nexthop:
   220  //
   221  //	prefix via nexthop dev foo
   222  //
   223  // If a nexthop route is specified, this function will check whether a direct
   224  // route to the nexthop exists and add if required. This means that the
   225  // following two routes will exist afterwards:
   226  //
   227  //	nexthop dev foo
   228  //	prefix via nexthop dev foo
   229  //
   230  // Due to a bug in the Linux kernel, the prefix route is attempted to be
   231  // updated RouteReplaceMaxTries with an interval of RouteReplaceRetryInterval.
   232  // This is a workaround for a race condition in which the direct route to the
   233  // nexthop is not available immediately and the prefix route can fail with
   234  // EINVAL if the Netlink calls are issued in short order.
   235  //
   236  // An error is returned if the route can not be added or updated.
   237  func Upsert(route Route) error {
   238  	var nexthopRouteCreated bool
   239  
   240  	link, err := netlink.LinkByName(route.Device)
   241  	if err != nil {
   242  		return fmt.Errorf("unable to lookup interface %s: %w", route.Device, err)
   243  	}
   244  
   245  	// Can't add local routes to an interface that's down ('lo' in new netns).
   246  	if link.Attrs().OperState == netlink.OperDown {
   247  		if err := netlink.LinkSetUp(link); err != nil {
   248  			return fmt.Errorf("unable to set interface up: %w", err)
   249  		}
   250  	}
   251  
   252  	routerNet := route.getNexthopAsIPNet()
   253  	if routerNet != nil {
   254  		if _, err := replaceNexthopRoute(route, link, routerNet); err != nil {
   255  			return fmt.Errorf("unable to add nexthop route: %w", err)
   256  		}
   257  
   258  		nexthopRouteCreated = true
   259  	}
   260  
   261  	routeSpec := route.getNetlinkRoute()
   262  	routeSpec.LinkIndex = link.Attrs().Index
   263  
   264  	err = fmt.Errorf("routeReplace not called yet")
   265  
   266  	// Workaround: See description of this function
   267  	for i := 0; err != nil && i < RouteReplaceMaxTries; i++ {
   268  		err = netlink.RouteReplace(&routeSpec)
   269  		if err == nil {
   270  			break
   271  		}
   272  		time.Sleep(RouteReplaceRetryInterval)
   273  	}
   274  
   275  	if err != nil {
   276  		if nexthopRouteCreated {
   277  			if err2 := deleteNexthopRoute(route, link, routerNet); err2 != nil {
   278  				// TODO: If this fails, we may want to add some retry logic.
   279  				log.WithError(err2).
   280  					Errorf("unable to clean up nexthop route following failure to replace route")
   281  			}
   282  		}
   283  		return err
   284  	}
   285  
   286  	return nil
   287  }
   288  
   289  // Delete deletes a Linux route. An error is returned if the route does not
   290  // exist or if the route could not be deleted.
   291  func Delete(route Route) error {
   292  	link, err := netlink.LinkByName(route.Device)
   293  	if err != nil {
   294  		return fmt.Errorf("unable to lookup interface %s: %w", route.Device, err)
   295  	}
   296  
   297  	// Deletion of routes with Nexthop or Local set fails for IPv6.
   298  	// Therefore do not use getNetlinkRoute().
   299  	routeSpec := netlink.Route{
   300  		Dst:       &route.Prefix,
   301  		LinkIndex: link.Attrs().Index,
   302  		Table:     route.Table,
   303  	}
   304  
   305  	// Scope can only be specified for IPv4
   306  	if route.Prefix.IP.To4() != nil {
   307  		routeSpec.Scope = route.Scope
   308  	}
   309  
   310  	if err := netlink.RouteDel(&routeSpec); err != nil {
   311  		return err
   312  	}
   313  
   314  	return nil
   315  }
   316  
   317  // Rule is the specification of an IP routing rule
   318  type Rule struct {
   319  	// Priority is the routing rule priority
   320  	Priority int
   321  
   322  	// Mark is the skb mark that needs to match
   323  	Mark int
   324  
   325  	// Mask is the mask to apply to the skb mark before matching the Mark
   326  	// field
   327  	Mask int
   328  
   329  	// From is the source address selector
   330  	From *net.IPNet
   331  
   332  	// To is the destination address selector
   333  	To *net.IPNet
   334  
   335  	// Table is the routing table to look up if the rule matches
   336  	Table int
   337  
   338  	// Protocol is the routing rule protocol (e.g. proto unspec/kernel)
   339  	Protocol uint8
   340  }
   341  
   342  // String returns the string representation of a Rule (adhering to the Stringer
   343  // interface).
   344  func (r Rule) String() string {
   345  	var (
   346  		str  string
   347  		from string
   348  		to   string
   349  	)
   350  
   351  	str += fmt.Sprintf("%d: ", r.Priority)
   352  
   353  	if r.From != nil {
   354  		from = r.From.String()
   355  	} else {
   356  		from = "all"
   357  	}
   358  
   359  	if r.To != nil {
   360  		to = r.To.String()
   361  	} else {
   362  		to = "all"
   363  	}
   364  
   365  	if r.Table == unix.RT_TABLE_MAIN {
   366  		str += fmt.Sprintf("from %s to %s lookup main", from, to)
   367  	} else {
   368  		str += fmt.Sprintf("from %s to %s lookup %d", from, to, r.Table)
   369  	}
   370  
   371  	if r.Mark != 0 {
   372  		str += fmt.Sprintf(" mark 0x%x mask 0x%x", r.Mark, r.Mask)
   373  	}
   374  
   375  	str += fmt.Sprintf(" proto %s", netlink.RouteProtocol(r.Protocol))
   376  
   377  	return str
   378  }
   379  
   380  func lookupRule(spec Rule, family int) (bool, error) {
   381  	rules, err := netlink.RuleList(family)
   382  	if err != nil {
   383  		return false, err
   384  	}
   385  	for _, r := range rules {
   386  		if spec.Priority != 0 && spec.Priority != r.Priority {
   387  			continue
   388  		}
   389  
   390  		if spec.From != nil && (r.Src == nil || r.Src.String() != spec.From.String()) {
   391  			continue
   392  		}
   393  
   394  		if spec.To != nil && (r.Dst == nil || r.Dst.String() != spec.To.String()) {
   395  			continue
   396  		}
   397  
   398  		if spec.Mark != 0 && r.Mark != spec.Mark {
   399  			continue
   400  		}
   401  
   402  		if spec.Mask != 0 && r.Mask != spec.Mask {
   403  			continue
   404  		}
   405  
   406  		if spec.Protocol != 0 && r.Protocol != spec.Protocol {
   407  			continue
   408  		}
   409  
   410  		if r.Table == spec.Table {
   411  			return true, nil
   412  		}
   413  	}
   414  	return false, nil
   415  }
   416  
   417  // ListRules will list IP routing rules on Linux, filtered by `filter`. When
   418  // `filter` is nil, this function will return all rules, "unfiltered". This
   419  // function is meant to replicate the behavior of `ip rule list`.
   420  func ListRules(family int, filter *Rule) ([]netlink.Rule, error) {
   421  	var nlFilter netlink.Rule
   422  	var mask uint64
   423  
   424  	if filter != nil {
   425  		if filter.From != nil {
   426  			mask |= netlink.RT_FILTER_SRC
   427  			nlFilter.Src = filter.From
   428  		}
   429  		if filter.To != nil {
   430  			mask |= netlink.RT_FILTER_DST
   431  			nlFilter.Dst = filter.To
   432  		}
   433  		if filter.Table != 0 {
   434  			mask |= netlink.RT_FILTER_TABLE
   435  			nlFilter.Table = filter.Table
   436  		}
   437  		if filter.Priority != 0 {
   438  			mask |= netlink.RT_FILTER_PRIORITY
   439  			nlFilter.Priority = filter.Priority
   440  		}
   441  		if filter.Mark != 0 {
   442  			mask |= netlink.RT_FILTER_MARK
   443  			nlFilter.Mark = filter.Mark
   444  		}
   445  		if filter.Mask != 0 {
   446  			mask |= netlink.RT_FILTER_MASK
   447  			nlFilter.Mask = filter.Mask
   448  		}
   449  
   450  		nlFilter.Priority = filter.Priority
   451  		nlFilter.Mark = filter.Mark
   452  		nlFilter.Mask = filter.Mask
   453  		nlFilter.Src = filter.From
   454  		nlFilter.Dst = filter.To
   455  		nlFilter.Table = filter.Table
   456  	}
   457  	return netlink.RuleListFiltered(family, &nlFilter, mask)
   458  }
   459  
   460  // ReplaceRule add or replace rule in the routing table using a mark to indicate
   461  // table. Used with BPF datapath to set mark and direct packets to route table.
   462  func ReplaceRule(spec Rule) error {
   463  	return replaceRule(spec, netlink.FAMILY_V4)
   464  }
   465  
   466  // ReplaceRuleIPv6 add or replace IPv6 rule in the routing table using a mark to
   467  // indicate table.
   468  func ReplaceRuleIPv6(spec Rule) error {
   469  	return replaceRule(spec, netlink.FAMILY_V6)
   470  }
   471  
   472  func replaceRule(spec Rule, family int) error {
   473  	exists, err := lookupRule(spec, family)
   474  	if err != nil {
   475  		return err
   476  	}
   477  	if exists {
   478  		return nil
   479  	}
   480  	rule := netlink.NewRule()
   481  	rule.Mark = spec.Mark
   482  	rule.Mask = spec.Mask
   483  	rule.Table = spec.Table
   484  	rule.Family = family
   485  	rule.Priority = spec.Priority
   486  	rule.Src = spec.From
   487  	rule.Dst = spec.To
   488  	rule.Protocol = spec.Protocol
   489  	return netlink.RuleAdd(rule)
   490  }
   491  
   492  // DeleteRule delete a mark based rule from the routing table.
   493  func DeleteRule(family int, spec Rule) error {
   494  	rule := netlink.NewRule()
   495  	rule.Mark = spec.Mark
   496  	rule.Mask = spec.Mask
   497  	rule.Table = spec.Table
   498  	rule.Priority = spec.Priority
   499  	rule.Src = spec.From
   500  	rule.Dst = spec.To
   501  	rule.Family = family
   502  	rule.Protocol = spec.Protocol
   503  	return netlink.RuleDel(rule)
   504  }
   505  
   506  func lookupDefaultRoute(family int) (netlink.Route, error) {
   507  	routes, err := netlink.RouteListFiltered(family, &netlink.Route{Dst: nil}, netlink.RT_FILTER_DST)
   508  	if err != nil {
   509  		return netlink.Route{}, fmt.Errorf("Unable to list direct routes: %w", err)
   510  	}
   511  
   512  	sort.Slice(routes, func(i, j int) bool {
   513  		return routes[i].Priority < routes[j].Priority
   514  	})
   515  
   516  	switch {
   517  	case len(routes) == 0:
   518  		return netlink.Route{}, fmt.Errorf("Default route not found for family %d", family)
   519  	case len(routes) > 1 && routes[0].Priority == routes[1].Priority:
   520  		return netlink.Route{}, fmt.Errorf("Found multiple default routes with the same priority: %v vs %v", routes[0], routes[1])
   521  	}
   522  
   523  	log.Debugf("Found default route on node %v", routes[0])
   524  	return routes[0], nil
   525  }
   526  
   527  func DeleteRouteTable(table, family int) error {
   528  	var routeErr error
   529  
   530  	routes, err := netlink.RouteListFiltered(family, &netlink.Route{Table: table}, netlink.RT_FILTER_TABLE)
   531  	if err != nil {
   532  		return fmt.Errorf("Unable to list table %d routes: %w", table, err)
   533  	}
   534  
   535  	routeErr = nil
   536  	for _, route := range routes {
   537  		err := netlink.RouteDel(&route)
   538  		if err != nil {
   539  			routeErr = fmt.Errorf("%w: Failed to delete route: %w", routeErr, err)
   540  		}
   541  	}
   542  	return routeErr
   543  }
   544  
   545  // NodeDeviceWithDefaultRoute returns the node's device which handles the
   546  // default route in the current namespace
   547  func NodeDeviceWithDefaultRoute(enableIPv4, enableIPv6 bool) (netlink.Link, error) {
   548  	linkIndex := 0
   549  	if enableIPv4 {
   550  		route, err := lookupDefaultRoute(netlink.FAMILY_V4)
   551  		if err != nil {
   552  			return nil, err
   553  		}
   554  		linkIndex = route.LinkIndex
   555  	}
   556  	if enableIPv6 {
   557  		route, err := lookupDefaultRoute(netlink.FAMILY_V6)
   558  		if err != nil {
   559  			return nil, err
   560  		}
   561  		if linkIndex != 0 && linkIndex != route.LinkIndex {
   562  			return nil, fmt.Errorf("IPv4/IPv6 have different link indices")
   563  		}
   564  		linkIndex = route.LinkIndex
   565  	}
   566  	link, err := netlink.LinkByIndex(linkIndex)
   567  	if err != nil {
   568  		return nil, err
   569  	}
   570  	return link, nil
   571  }