github.com/cilium/cilium@v1.16.2/pkg/datapath/linux/route/route_linux.go (about) 1 // SPDX-License-Identifier: Apache-2.0 2 // Copyright Authors of Cilium 3 4 //go:build linux 5 6 package route 7 8 import ( 9 "fmt" 10 "net" 11 "sort" 12 13 "github.com/vishvananda/netlink" 14 "golang.org/x/sys/unix" 15 16 "github.com/cilium/cilium/pkg/datapath/linux/linux_defaults" 17 "github.com/cilium/cilium/pkg/time" 18 ) 19 20 const ( 21 // RouteReplaceMaxTries is the number of attempts the route will be 22 // attempted to be added or updated in case the kernel returns an error 23 RouteReplaceMaxTries = 10 24 25 // RouteReplaceRetryInterval is the interval in which 26 // RouteReplaceMaxTries attempts are attempted 27 RouteReplaceRetryInterval = 100 * time.Millisecond 28 29 // RTN_LOCAL is a route type used to indicate packet should be "routed" 30 // locally and passed up the stack. Is used by IPSec to force encrypted 31 // packets to pass through XFRM layer. 32 RTN_LOCAL = 0x2 33 34 // MainTable is Linux's default routing table 35 MainTable = 254 36 37 // EncryptRouteProtocol for Encryption specific routes 38 EncryptRouteProtocol = 192 39 ) 40 41 // getNetlinkRoute returns the route configuration as netlink.Route 42 func (r *Route) getNetlinkRoute() netlink.Route { 43 rt := netlink.Route{ 44 Dst: &r.Prefix, 45 Src: r.Local, 46 MTU: r.MTU, 47 Priority: r.Priority, 48 Protocol: netlink.RouteProtocol(r.Proto), 49 Table: r.Table, 50 Type: r.Type, 51 } 52 53 if r.Nexthop != nil { 54 rt.Gw = *r.Nexthop 55 } 56 57 if r.Scope != netlink.SCOPE_UNIVERSE { 58 rt.Scope = r.Scope 59 } else if r.Scope == netlink.SCOPE_UNIVERSE && r.Type == RTN_LOCAL { 60 rt.Scope = netlink.SCOPE_HOST 61 } 62 63 return rt 64 } 65 66 // getNexthopAsIPNet returns the nexthop of the route as IPNet 67 func (r *Route) getNexthopAsIPNet() *net.IPNet { 68 if r.Nexthop == nil { 69 return nil 70 } 71 72 if r.Nexthop.To4() != nil { 73 return &net.IPNet{IP: *r.Nexthop, Mask: net.CIDRMask(32, 32)} 74 } 75 76 return &net.IPNet{IP: *r.Nexthop, Mask: net.CIDRMask(128, 128)} 77 } 78 79 func ipFamily(ip net.IP) int { 80 if ip.To4() == nil { 81 return netlink.FAMILY_V6 82 } 83 84 return netlink.FAMILY_V4 85 } 86 87 // Lookup attempts to find the linux route based on the route specification. 88 // If the route exists, the route is returned, otherwise an error is returned. 89 func Lookup(route Route) (*Route, error) { 90 link, err := netlink.LinkByName(route.Device) 91 if err != nil { 92 return nil, fmt.Errorf("unable to find interface '%s' of route: %w", route.Device, err) 93 } 94 95 routeSpec := route.getNetlinkRoute() 96 routeSpec.LinkIndex = link.Attrs().Index 97 98 nlRoute := lookup(&routeSpec) 99 if nlRoute == nil { 100 return nil, nil 101 } 102 103 result := &Route{ 104 Local: nlRoute.Src, 105 Device: link.Attrs().Name, 106 MTU: nlRoute.MTU, 107 Scope: nlRoute.Scope, 108 Nexthop: &nlRoute.Gw, 109 } 110 111 if nlRoute.Dst != nil { 112 result.Prefix = *nlRoute.Dst 113 } 114 115 return result, nil 116 } 117 118 // lookup finds a particular route as specified by the filter which points 119 // to the specified device. The filter route can have the following fields set: 120 // - Dst 121 // - LinkIndex 122 // - Scope 123 // - Gw 124 func lookup(route *netlink.Route) *netlink.Route { 125 var filter uint64 126 if route.Dst != nil { 127 filter |= netlink.RT_FILTER_DST 128 } 129 if route.Table != 0 { 130 filter |= netlink.RT_FILTER_TABLE 131 } 132 if route.Scope != 0 { 133 filter |= netlink.RT_FILTER_SCOPE 134 } 135 if route.Gw != nil { 136 filter |= netlink.RT_FILTER_GW 137 } 138 if route.LinkIndex != 0 { 139 filter |= netlink.RT_FILTER_OIF 140 } 141 142 routes, err := netlink.RouteListFiltered(ipFamily(route.Dst.IP), route, filter) 143 if err != nil { 144 return nil 145 } 146 147 for _, r := range routes { 148 if r.Dst != nil && route.Dst == nil { 149 continue 150 } 151 152 if route.Dst != nil && r.Dst == nil { 153 continue 154 } 155 156 if route.Table != 0 && route.Table != r.Table { 157 continue 158 } 159 160 aMaskLen, aMaskBits := r.Dst.Mask.Size() 161 bMaskLen, bMaskBits := route.Dst.Mask.Size() 162 if r.Scope == route.Scope && 163 aMaskLen == bMaskLen && aMaskBits == bMaskBits && 164 r.Dst.IP.Equal(route.Dst.IP) && r.Gw.Equal(route.Gw) { 165 return &r 166 } 167 } 168 169 return nil 170 } 171 172 func createNexthopRoute(route Route, link netlink.Link, routerNet *net.IPNet) *netlink.Route { 173 // This is the L2 route which makes router IP available behind the 174 // interface. 175 rt := &netlink.Route{ 176 LinkIndex: link.Attrs().Index, 177 Dst: routerNet, 178 Table: route.Table, 179 Protocol: linux_defaults.RTProto, 180 } 181 182 // Known issue: scope for IPv6 routes is not propagated correctly. If 183 // we set the scope here, lookup() will be unable to identify the route 184 // again and we will continuously re-add the route 185 if routerNet.IP.To4() != nil { 186 rt.Scope = netlink.SCOPE_LINK 187 } 188 189 return rt 190 } 191 192 // replaceNexthopRoute verifies that the L2 route for the router IP which is 193 // used as nexthop for all node routes is properly installed. If unavailable or 194 // incorrect, it will be replaced with the proper L2 route. 195 func replaceNexthopRoute(route Route, link netlink.Link, routerNet *net.IPNet) (bool, error) { 196 if err := netlink.RouteReplace(createNexthopRoute(route, link, routerNet)); err != nil { 197 return false, fmt.Errorf("unable to add L2 nexthop route: %w", err) 198 } 199 200 return true, nil 201 } 202 203 // deleteNexthopRoute deletes 204 func deleteNexthopRoute(route Route, link netlink.Link, routerNet *net.IPNet) error { 205 if err := netlink.RouteDel(createNexthopRoute(route, link, routerNet)); err != nil { 206 return fmt.Errorf("unable to delete L2 nexthop route: %w", err) 207 } 208 209 return nil 210 } 211 212 // Upsert adds or updates a Linux kernel route. The route described can be in 213 // the following two forms: 214 // 215 // direct: 216 // 217 // prefix dev foo 218 // 219 // nexthop: 220 // 221 // prefix via nexthop dev foo 222 // 223 // If a nexthop route is specified, this function will check whether a direct 224 // route to the nexthop exists and add if required. This means that the 225 // following two routes will exist afterwards: 226 // 227 // nexthop dev foo 228 // prefix via nexthop dev foo 229 // 230 // Due to a bug in the Linux kernel, the prefix route is attempted to be 231 // updated RouteReplaceMaxTries with an interval of RouteReplaceRetryInterval. 232 // This is a workaround for a race condition in which the direct route to the 233 // nexthop is not available immediately and the prefix route can fail with 234 // EINVAL if the Netlink calls are issued in short order. 235 // 236 // An error is returned if the route can not be added or updated. 237 func Upsert(route Route) error { 238 var nexthopRouteCreated bool 239 240 link, err := netlink.LinkByName(route.Device) 241 if err != nil { 242 return fmt.Errorf("unable to lookup interface %s: %w", route.Device, err) 243 } 244 245 // Can't add local routes to an interface that's down ('lo' in new netns). 246 if link.Attrs().OperState == netlink.OperDown { 247 if err := netlink.LinkSetUp(link); err != nil { 248 return fmt.Errorf("unable to set interface up: %w", err) 249 } 250 } 251 252 routerNet := route.getNexthopAsIPNet() 253 if routerNet != nil { 254 if _, err := replaceNexthopRoute(route, link, routerNet); err != nil { 255 return fmt.Errorf("unable to add nexthop route: %w", err) 256 } 257 258 nexthopRouteCreated = true 259 } 260 261 routeSpec := route.getNetlinkRoute() 262 routeSpec.LinkIndex = link.Attrs().Index 263 264 err = fmt.Errorf("routeReplace not called yet") 265 266 // Workaround: See description of this function 267 for i := 0; err != nil && i < RouteReplaceMaxTries; i++ { 268 err = netlink.RouteReplace(&routeSpec) 269 if err == nil { 270 break 271 } 272 time.Sleep(RouteReplaceRetryInterval) 273 } 274 275 if err != nil { 276 if nexthopRouteCreated { 277 if err2 := deleteNexthopRoute(route, link, routerNet); err2 != nil { 278 // TODO: If this fails, we may want to add some retry logic. 279 log.WithError(err2). 280 Errorf("unable to clean up nexthop route following failure to replace route") 281 } 282 } 283 return err 284 } 285 286 return nil 287 } 288 289 // Delete deletes a Linux route. An error is returned if the route does not 290 // exist or if the route could not be deleted. 291 func Delete(route Route) error { 292 link, err := netlink.LinkByName(route.Device) 293 if err != nil { 294 return fmt.Errorf("unable to lookup interface %s: %w", route.Device, err) 295 } 296 297 // Deletion of routes with Nexthop or Local set fails for IPv6. 298 // Therefore do not use getNetlinkRoute(). 299 routeSpec := netlink.Route{ 300 Dst: &route.Prefix, 301 LinkIndex: link.Attrs().Index, 302 Table: route.Table, 303 } 304 305 // Scope can only be specified for IPv4 306 if route.Prefix.IP.To4() != nil { 307 routeSpec.Scope = route.Scope 308 } 309 310 if err := netlink.RouteDel(&routeSpec); err != nil { 311 return err 312 } 313 314 return nil 315 } 316 317 // Rule is the specification of an IP routing rule 318 type Rule struct { 319 // Priority is the routing rule priority 320 Priority int 321 322 // Mark is the skb mark that needs to match 323 Mark int 324 325 // Mask is the mask to apply to the skb mark before matching the Mark 326 // field 327 Mask int 328 329 // From is the source address selector 330 From *net.IPNet 331 332 // To is the destination address selector 333 To *net.IPNet 334 335 // Table is the routing table to look up if the rule matches 336 Table int 337 338 // Protocol is the routing rule protocol (e.g. proto unspec/kernel) 339 Protocol uint8 340 } 341 342 // String returns the string representation of a Rule (adhering to the Stringer 343 // interface). 344 func (r Rule) String() string { 345 var ( 346 str string 347 from string 348 to string 349 ) 350 351 str += fmt.Sprintf("%d: ", r.Priority) 352 353 if r.From != nil { 354 from = r.From.String() 355 } else { 356 from = "all" 357 } 358 359 if r.To != nil { 360 to = r.To.String() 361 } else { 362 to = "all" 363 } 364 365 if r.Table == unix.RT_TABLE_MAIN { 366 str += fmt.Sprintf("from %s to %s lookup main", from, to) 367 } else { 368 str += fmt.Sprintf("from %s to %s lookup %d", from, to, r.Table) 369 } 370 371 if r.Mark != 0 { 372 str += fmt.Sprintf(" mark 0x%x mask 0x%x", r.Mark, r.Mask) 373 } 374 375 str += fmt.Sprintf(" proto %s", netlink.RouteProtocol(r.Protocol)) 376 377 return str 378 } 379 380 func lookupRule(spec Rule, family int) (bool, error) { 381 rules, err := netlink.RuleList(family) 382 if err != nil { 383 return false, err 384 } 385 for _, r := range rules { 386 if spec.Priority != 0 && spec.Priority != r.Priority { 387 continue 388 } 389 390 if spec.From != nil && (r.Src == nil || r.Src.String() != spec.From.String()) { 391 continue 392 } 393 394 if spec.To != nil && (r.Dst == nil || r.Dst.String() != spec.To.String()) { 395 continue 396 } 397 398 if spec.Mark != 0 && r.Mark != spec.Mark { 399 continue 400 } 401 402 if spec.Mask != 0 && r.Mask != spec.Mask { 403 continue 404 } 405 406 if spec.Protocol != 0 && r.Protocol != spec.Protocol { 407 continue 408 } 409 410 if r.Table == spec.Table { 411 return true, nil 412 } 413 } 414 return false, nil 415 } 416 417 // ListRules will list IP routing rules on Linux, filtered by `filter`. When 418 // `filter` is nil, this function will return all rules, "unfiltered". This 419 // function is meant to replicate the behavior of `ip rule list`. 420 func ListRules(family int, filter *Rule) ([]netlink.Rule, error) { 421 var nlFilter netlink.Rule 422 var mask uint64 423 424 if filter != nil { 425 if filter.From != nil { 426 mask |= netlink.RT_FILTER_SRC 427 nlFilter.Src = filter.From 428 } 429 if filter.To != nil { 430 mask |= netlink.RT_FILTER_DST 431 nlFilter.Dst = filter.To 432 } 433 if filter.Table != 0 { 434 mask |= netlink.RT_FILTER_TABLE 435 nlFilter.Table = filter.Table 436 } 437 if filter.Priority != 0 { 438 mask |= netlink.RT_FILTER_PRIORITY 439 nlFilter.Priority = filter.Priority 440 } 441 if filter.Mark != 0 { 442 mask |= netlink.RT_FILTER_MARK 443 nlFilter.Mark = filter.Mark 444 } 445 if filter.Mask != 0 { 446 mask |= netlink.RT_FILTER_MASK 447 nlFilter.Mask = filter.Mask 448 } 449 450 nlFilter.Priority = filter.Priority 451 nlFilter.Mark = filter.Mark 452 nlFilter.Mask = filter.Mask 453 nlFilter.Src = filter.From 454 nlFilter.Dst = filter.To 455 nlFilter.Table = filter.Table 456 } 457 return netlink.RuleListFiltered(family, &nlFilter, mask) 458 } 459 460 // ReplaceRule add or replace rule in the routing table using a mark to indicate 461 // table. Used with BPF datapath to set mark and direct packets to route table. 462 func ReplaceRule(spec Rule) error { 463 return replaceRule(spec, netlink.FAMILY_V4) 464 } 465 466 // ReplaceRuleIPv6 add or replace IPv6 rule in the routing table using a mark to 467 // indicate table. 468 func ReplaceRuleIPv6(spec Rule) error { 469 return replaceRule(spec, netlink.FAMILY_V6) 470 } 471 472 func replaceRule(spec Rule, family int) error { 473 exists, err := lookupRule(spec, family) 474 if err != nil { 475 return err 476 } 477 if exists { 478 return nil 479 } 480 rule := netlink.NewRule() 481 rule.Mark = spec.Mark 482 rule.Mask = spec.Mask 483 rule.Table = spec.Table 484 rule.Family = family 485 rule.Priority = spec.Priority 486 rule.Src = spec.From 487 rule.Dst = spec.To 488 rule.Protocol = spec.Protocol 489 return netlink.RuleAdd(rule) 490 } 491 492 // DeleteRule delete a mark based rule from the routing table. 493 func DeleteRule(family int, spec Rule) error { 494 rule := netlink.NewRule() 495 rule.Mark = spec.Mark 496 rule.Mask = spec.Mask 497 rule.Table = spec.Table 498 rule.Priority = spec.Priority 499 rule.Src = spec.From 500 rule.Dst = spec.To 501 rule.Family = family 502 rule.Protocol = spec.Protocol 503 return netlink.RuleDel(rule) 504 } 505 506 func lookupDefaultRoute(family int) (netlink.Route, error) { 507 routes, err := netlink.RouteListFiltered(family, &netlink.Route{Dst: nil}, netlink.RT_FILTER_DST) 508 if err != nil { 509 return netlink.Route{}, fmt.Errorf("Unable to list direct routes: %w", err) 510 } 511 512 sort.Slice(routes, func(i, j int) bool { 513 return routes[i].Priority < routes[j].Priority 514 }) 515 516 switch { 517 case len(routes) == 0: 518 return netlink.Route{}, fmt.Errorf("Default route not found for family %d", family) 519 case len(routes) > 1 && routes[0].Priority == routes[1].Priority: 520 return netlink.Route{}, fmt.Errorf("Found multiple default routes with the same priority: %v vs %v", routes[0], routes[1]) 521 } 522 523 log.Debugf("Found default route on node %v", routes[0]) 524 return routes[0], nil 525 } 526 527 func DeleteRouteTable(table, family int) error { 528 var routeErr error 529 530 routes, err := netlink.RouteListFiltered(family, &netlink.Route{Table: table}, netlink.RT_FILTER_TABLE) 531 if err != nil { 532 return fmt.Errorf("Unable to list table %d routes: %w", table, err) 533 } 534 535 routeErr = nil 536 for _, route := range routes { 537 err := netlink.RouteDel(&route) 538 if err != nil { 539 routeErr = fmt.Errorf("%w: Failed to delete route: %w", routeErr, err) 540 } 541 } 542 return routeErr 543 } 544 545 // NodeDeviceWithDefaultRoute returns the node's device which handles the 546 // default route in the current namespace 547 func NodeDeviceWithDefaultRoute(enableIPv4, enableIPv6 bool) (netlink.Link, error) { 548 linkIndex := 0 549 if enableIPv4 { 550 route, err := lookupDefaultRoute(netlink.FAMILY_V4) 551 if err != nil { 552 return nil, err 553 } 554 linkIndex = route.LinkIndex 555 } 556 if enableIPv6 { 557 route, err := lookupDefaultRoute(netlink.FAMILY_V6) 558 if err != nil { 559 return nil, err 560 } 561 if linkIndex != 0 && linkIndex != route.LinkIndex { 562 return nil, fmt.Errorf("IPv4/IPv6 have different link indices") 563 } 564 linkIndex = route.LinkIndex 565 } 566 link, err := netlink.LinkByIndex(linkIndex) 567 if err != nil { 568 return nil, err 569 } 570 return link, nil 571 }