github.com/cilium/cilium@v1.16.2/pkg/datapath/linux/routing/migrate_test.go (about) 1 // SPDX-License-Identifier: Apache-2.0 2 // Copyright Authors of Cilium 3 4 package linuxrouting 5 6 import ( 7 "errors" 8 "fmt" 9 "net" 10 "os/exec" 11 "testing" 12 13 "github.com/stretchr/testify/require" 14 "github.com/vishvananda/netlink" 15 16 "github.com/cilium/cilium/pkg/datapath/linux/linux_defaults" 17 "github.com/cilium/cilium/pkg/testutils" 18 "github.com/cilium/cilium/pkg/testutils/netns" 19 ) 20 21 type MigrateSuite struct { 22 // rpdb interface mock 23 OnRuleList func(int) ([]netlink.Rule, error) 24 OnRuleAdd func(*netlink.Rule) error 25 OnRuleDel func(*netlink.Rule) error 26 27 OnRouteListFiltered func(int, *netlink.Route, uint64) ([]netlink.Route, error) 28 OnRouteAdd func(*netlink.Route) error 29 OnRouteDel func(*netlink.Route) error 30 OnRouteReplace func(*netlink.Route) error 31 32 OnLinkList func() ([]netlink.Link, error) 33 OnLinkByIndex func(int) (netlink.Link, error) 34 35 // interfaceDB interface mock 36 OnGetInterfaceNumberByMAC func(mac string) (int, error) 37 OnGetMACByInterfaceNumber func(ifaceNum int) (string, error) 38 } 39 40 func setupMigrateSuite(tb testing.TB) *MigrateSuite { 41 testutils.PrivilegedTest(tb) 42 return &MigrateSuite{} 43 } 44 45 // n is the number of devices, routes, and rules that will be created in 46 // setUpRoutingTable() as fixtures for this test suite. 47 const n = 5 48 49 func TestMigrateENIDatapathUpgradeSuccess(t *testing.T) { 50 m := setupMigrateSuite(t) 51 // First, we need to setupMigrateSuite the Linux routing policy database to mimic a 52 // broken setupMigrateSuite (1). Then we will call MigrateENIDatapath (2). 53 54 // This test case will cover the successful path. We will create: 55 // - One rule with the old priority referencing the old table ID. 56 // - One route with the old table ID. 57 // After we call MigrateENIDatapath(), we assert that: 58 // - The rule has switched to the new priority and references the new 59 // table ID. 60 // - The route has the new table ID. 61 62 ns := netns.NewNetNS(t) 63 ns.Do(func() error { 64 // (1) Setting up the routing table. 65 66 // Pick an arbitrary iface index. In the old table ID scheme, we used this 67 // index as the table ID. All the old rules and routes will be set up with 68 // this table ID. 69 index := 5 70 tableID := 11 71 72 // (1) Setting up the routing table for testing upgrade. 73 // 74 // The reason we pass index twice is because we want to use the ifindex as 75 // the table ID. 76 devIfNumLookup, _ := setUpRoutingTable(t, index, index, linux_defaults.RulePriorityEgress) 77 78 // Set up the rpdb mocks to just forward to netlink implementation. 79 m.defaultNetlinkMock() 80 81 // Set up the interfaceDB mock. We don't actually need to search by MAC 82 // address in this test because we only have just one device. The actual 83 // implementation will search the CiliumNode resource for the ENI device 84 // matching. 85 m.OnGetInterfaceNumberByMAC = func(mac string) (int, error) { 86 // In setUpRoutingTable(), we used an arbitrary scheme that maps 87 // each device created with an interface number of loop count (i) 88 // plus one. 89 return devIfNumLookup[mac], nil 90 } 91 92 // (2) Make the call to modifying the routing table. 93 mig := migrator{rpdb: m, getter: m} 94 migrated, failed := mig.MigrateENIDatapath(false) 95 require.Equal(t, n, migrated) 96 require.Equal(t, 0, failed) 97 98 routes, err := netlink.RouteListFiltered(netlink.FAMILY_V4, &netlink.Route{ 99 Table: index, 100 }, netlink.RT_FILTER_TABLE) 101 require.Nil(t, err) 102 require.Equal(t, 0, len(routes)) // We don't expect any routes with the old table ID. 103 104 routes, err = netlink.RouteListFiltered(netlink.FAMILY_V4, &netlink.Route{ 105 Table: tableID, 106 }, netlink.RT_FILTER_TABLE) 107 require.Nil(t, err) 108 require.Equal(t, 1, len(routes)) // We only expect one route that we created above in the setupMigrateSuite. 109 require.NotEqual(t, index, routes[0].Table) 110 111 rules, err := findRulesByPriority(linux_defaults.RulePriorityEgress) 112 require.Nil(t, err) 113 require.Equal(t, 0, len(rules)) // We don't expect any rules from old priority. 114 115 rules, err = findRulesByPriority(linux_defaults.RulePriorityEgressv2) 116 require.Nil(t, err) 117 require.Equal(t, 5, len(rules)) // We expect all rules to be migrated to new priority. 118 require.NotEqual(t, index, rules[0].Table) 119 return nil 120 }) 121 } 122 123 func TestMigrateENIDatapathUpgradeFailure(t *testing.T) { 124 // This test case will cover one failure path where we successfully migrate 125 // all the old rules and routes, but fail to cleanup the old rule. This 126 // test case will be set up identically to the successful case. After we 127 // call MigrateENIDatapath(), we assert that we failed to migrate 1 rule. 128 // We assert that the revert of the upgrade was successfully as well, 129 // meaning we expect the old rules and routes to be reinstated. 130 m := setupMigrateSuite(t) 131 132 ns := netns.NewNetNS(t) 133 ns.Do(func() error { 134 index := 5 135 devIfNumLookup, _ := setUpRoutingTable(t, index, index, linux_defaults.RulePriorityEgress) 136 137 m.defaultNetlinkMock() 138 139 // Here we inject the error on deleting a rule. The first call we want to 140 // fail, but the second we want to succeed, because that will be the 141 // revert. 142 var onRuleDelCount int 143 m.OnRuleDel = func(r *netlink.Rule) error { 144 if onRuleDelCount == 0 { 145 onRuleDelCount++ 146 return errors.New("fake error") 147 } 148 return netlink.RuleDel(r) 149 } 150 151 // Set up the interfaceDB mock. We don't actually need to search by MAC 152 // address in this test because we only have just one device. The actual 153 // implementation will search the CiliumNode resource for the ENI device 154 // matching. 155 m.OnGetInterfaceNumberByMAC = func(mac string) (int, error) { 156 // In setUpRoutingTable(), we used an arbitrary scheme that maps 157 // each device created with an interface number of loop count (i) 158 // plus one. 159 return devIfNumLookup[mac], nil 160 } 161 162 mig := migrator{rpdb: m, getter: m} 163 migrated, failed := mig.MigrateENIDatapath(false) 164 require.Equal(t, 4, migrated) 165 require.Equal(t, 1, failed) 166 167 tableID := 11 168 routes, err := netlink.RouteListFiltered(netlink.FAMILY_V4, &netlink.Route{ 169 Table: index, 170 }, netlink.RT_FILTER_TABLE) 171 require.Nil(t, err) 172 require.Equal(t, 1, len(routes)) // We expect old route to be untouched b/c we failed. 173 require.Equal(t, index, routes[0].Table) 174 175 routes, err = netlink.RouteListFiltered(netlink.FAMILY_V4, &netlink.Route{ 176 Table: tableID, 177 }, netlink.RT_FILTER_TABLE) 178 require.Nil(t, err) 179 require.Equal(t, 0, len(routes)) // We don't expect any routes under new table ID b/c of revert. 180 181 rules, err := findRulesByPriority(linux_defaults.RulePriorityEgress) 182 require.Nil(t, err) 183 require.Equal(t, 1, len(rules)) // We expect the old rule to be reinstated. 184 require.Equal(t, index, rules[0].Table) 185 186 rules, err = findRulesByPriority(linux_defaults.RulePriorityEgressv2) 187 require.Nil(t, err) 188 require.Equal(t, 4, len(rules)) // We expect the rest of the rules to be upgraded. 189 return nil 190 }) 191 } 192 193 func TestMigrateENIDatapathDowngradeSuccess(t *testing.T) { 194 // This test case will cover the successful downgrade path. We will create: 195 // - One rule with the new priority referencing the new table ID. 196 // - One route with the new table ID. 197 // After we call MigrateENIDatapath(), we assert that: 198 // - The rule has switched to the old priority and references the old 199 // table ID. 200 // - The route has the old table ID. 201 m := setupMigrateSuite(t) 202 ns := netns.NewNetNS(t) 203 ns.Do(func() error { 204 // (1) Setting up the routing table. 205 206 // Pick an arbitrary table ID. In the new table ID scheme, it is the 207 // interface number + an offset of 10 208 // (linux_defaults.RouteTableInterfacesOffset). 209 // 210 // Pick an ifindex and table ID. 211 index := 5 212 tableID := 11 213 214 // (1) Setting up the routing table for testing downgrade, hence creating 215 // rules with RulePriorityEgressv2. 216 _, devMACLookup := setUpRoutingTable(t, index, tableID, linux_defaults.RulePriorityEgressv2) 217 218 // Set up the rpdb mocks to just forward to netlink implementation. 219 m.defaultNetlinkMock() 220 221 // Set up the interfaceDB mock. The MAC address returned is coming from the 222 // dummy ENI device we set up in setUpRoutingTable(). The actual 223 // implementation will search the CiliumNode resource for the ENI device 224 // matching. 225 m.OnGetMACByInterfaceNumber = func(i int) (string, error) { 226 // In setUpRoutingTable(), we used an arbitrary scheme for the 227 // device name. It is simply the loop counter. 228 return devMACLookup[fmt.Sprintf("gotestdummy%d", i)], nil 229 } 230 231 // (2) Make the call to modifying the routing table. 232 mig := migrator{rpdb: m, getter: m} 233 migrated, failed := mig.MigrateENIDatapath(true) 234 require.Equal(t, n, migrated) 235 require.Equal(t, 0, failed) 236 237 routes, err := netlink.RouteListFiltered(netlink.FAMILY_V4, &netlink.Route{ 238 Table: tableID, 239 }, netlink.RT_FILTER_TABLE) 240 require.Nil(t, err) 241 require.Equal(t, 0, len(routes)) // We don't expect any routes with the new table ID. 242 243 routes, err = netlink.RouteListFiltered(netlink.FAMILY_V4, &netlink.Route{ 244 Table: index, 245 }, netlink.RT_FILTER_TABLE) 246 require.Nil(t, err) 247 require.Equal(t, 1, len(routes)) // We only expect one route with the old table ID. 248 require.NotEqual(t, tableID, routes[0].Table) 249 250 rules, err := findRulesByPriority(linux_defaults.RulePriorityEgressv2) 251 require.Nil(t, err) 252 require.Equal(t, 0, len(rules)) // We don't expect any rules with this priority. 253 254 rules, err = findRulesByPriority(linux_defaults.RulePriorityEgress) 255 require.Nil(t, err) 256 require.Equal(t, 5, len(rules)) // We expect all rules to have the original priority. 257 require.NotEqual(t, tableID, rules[0].Table) 258 return nil 259 }) 260 } 261 262 func TestMigrateENIDatapathDowngradeFailure(t *testing.T) { 263 // This test case will cover one downgrade failure path where we failed to 264 // migrate the rule to the old scheme. This test case will be set up 265 // identically to the successful case. "New" meaning the rules and routes 266 // using the new datapath scheme, hence downgrading. After we call 267 // MigrateENIDatapath(), we assert that we failed to migrate 1 rule. We 268 // assert that the revert of the downgrade was successfully as well, 269 // meaning we expect the "newer" rules and routes to be reinstated. 270 m := setupMigrateSuite(t) 271 ns := netns.NewNetNS(t) 272 ns.Do(func() error { 273 index := 5 274 tableID := 11 275 _, devMACLookup := setUpRoutingTable(t, index, tableID, linux_defaults.RulePriorityEgressv2) 276 277 m.defaultNetlinkMock() 278 279 // Here we inject the error on adding a rule. The first call we want to 280 // fail, but the second we want to succeed, because that will be the 281 // revert. 282 var onRuleAddCount int 283 m.OnRuleAdd = func(r *netlink.Rule) error { 284 if onRuleAddCount == 0 { 285 onRuleAddCount++ 286 return errors.New("fake error") 287 } 288 return netlink.RuleAdd(r) 289 } 290 291 // Set up the interfaceDB mock. The MAC address returned is coming from the 292 // dummy ENI device we set up in setUpRoutingTable(). 293 m.OnGetMACByInterfaceNumber = func(i int) (string, error) { 294 // In setUpRoutingTable(), we used an arbitrary scheme for the 295 // device name. It is simply the loop counter. 296 return devMACLookup[fmt.Sprintf("gotestdummy%d", i)], nil 297 } 298 299 mig := migrator{rpdb: m, getter: m} 300 migrated, failed := mig.MigrateENIDatapath(true) 301 require.Equal(t, n-1, migrated) // One failed migration. 302 require.Equal(t, 1, failed) 303 304 routes, err := netlink.RouteListFiltered(netlink.FAMILY_V4, &netlink.Route{ 305 Table: tableID, 306 }, netlink.RT_FILTER_TABLE) 307 require.Nil(t, err) 308 require.Equal(t, 1, len(routes)) // We expect "new" route to be untouched b/c we failed to delete. 309 require.Equal(t, tableID, routes[0].Table) 310 311 routes, err = netlink.RouteListFiltered(netlink.FAMILY_V4, &netlink.Route{ 312 Table: index, 313 }, netlink.RT_FILTER_TABLE) 314 require.Nil(t, err) 315 require.Equal(t, 0, len(routes)) // We don't expect routes under original table ID b/c of revert. 316 317 rules, err := findRulesByPriority(linux_defaults.RulePriorityEgressv2) 318 require.Nil(t, err) 319 require.Equal(t, 1, len(rules)) // We expect the "new" rule to be reinstated. 320 require.Equal(t, tableID, rules[0].Table) 321 322 rules, err = findRulesByPriority(linux_defaults.RulePriorityEgress) 323 require.Nil(t, err) 324 require.Equal(t, n-1, len(rules)) // Successfully migrated rules. 325 return nil 326 }) 327 } 328 329 func TestMigrateENIDatapathPartial(t *testing.T) { 330 // This test case will cover one case where we find a partial rule. It will 331 // be set up with a rule with the newer priority and the user has indicated 332 // compatbility=false, meaning they intend to upgrade. The fact that 333 // there's already a rule with a newer priority indicates that a previous 334 // migration has taken place and potentially failed. This simulates Cilium 335 // starting up from a potentially failed previous migration. 336 // After we call MigrateENIDatapath(), we assert that: 337 // - We still upgrade the remaining rules that need to be migrated. 338 // - We ignore the partially migrated rule. 339 m := setupMigrateSuite(t) 340 341 ns := netns.NewNetNS(t) 342 ns.Do(func() error { 343 index := 5 344 // ifaceNumber := 1 345 newTableID := 11 346 347 devIfNumLookup, _ := setUpRoutingTable(t, index, index, linux_defaults.RulePriorityEgress) 348 349 // Insert fake rule that has the newer priority to simulate it as 350 // "partially migrated". 351 err := exec.Command("ip", "rule", "add", 352 "from", "10.1.0.0/24", 353 "to", "all", 354 "table", fmt.Sprintf("%d", newTableID), 355 "priority", fmt.Sprintf("%d", linux_defaults.RulePriorityEgressv2)).Run() 356 require.Nil(t, err) 357 358 m.defaultNetlinkMock() 359 360 m.OnGetInterfaceNumberByMAC = func(mac string) (int, error) { 361 return devIfNumLookup[mac], nil 362 } 363 364 mig := migrator{rpdb: m, getter: m} 365 migrated, failed := mig.MigrateENIDatapath(false) 366 require.Equal(t, n, migrated) 367 require.Equal(t, 0, failed) 368 369 routes, err := netlink.RouteListFiltered(netlink.FAMILY_V4, &netlink.Route{ 370 Table: newTableID, 371 }, netlink.RT_FILTER_TABLE) 372 require.Nil(t, err) 373 require.Equal(t, 1, len(routes)) // We expect one migrated route. 374 require.Equal(t, newTableID, routes[0].Table) 375 376 routes, err = netlink.RouteListFiltered(netlink.FAMILY_V4, &netlink.Route{ 377 Table: index, 378 }, netlink.RT_FILTER_TABLE) 379 require.Nil(t, err) 380 require.Equal(t, 0, len(routes)) // We don't expect any routes under old table ID. 381 382 rules, err := findRulesByPriority(linux_defaults.RulePriorityEgressv2) 383 require.Nil(t, err) 384 require.Equal(t, n+1, len(rules)) // We expect all migrated rules and the partially migrated rule. 385 require.Equal(t, newTableID, rules[0].Table) 386 require.Equal(t, newTableID, rules[1].Table) 387 388 rules, err = findRulesByPriority(linux_defaults.RulePriorityEgress) 389 require.Nil(t, err) 390 require.Equal(t, 0, len(rules)) // We don't expect any rules with the old priority. 391 392 return nil 393 }) 394 } 395 396 // setUpRoutingTable initializes the routing table for this test suite. The 397 // starting ifindex, tableID, and the priority are passed in to give contron to 398 // the caller on the setupMigrateSuite. The two return values are: 399 // 1. Map of string to int, representing a mapping from MAC addrs to 400 // interface numbers. 401 // 2. Map of string to string, representing a mapping from device name to MAC 402 // addrs. 403 // 404 // (1) is used for the upgrade test cases where the GetInterfaceNumberByMAC 405 // mock is used. (2) is used for the downgrade test cases where the 406 // GetMACByInterfaceNumber mock is used. These maps are used in their 407 // respectives mocks to return the desired result data depending on the test. 408 func setUpRoutingTable(t *testing.T, ifindex, tableID, priority int) (map[string]int, map[string]string) { 409 devIfNum := make(map[string]int) 410 devMAC := make(map[string]string) 411 412 // Create n sets of a dummy interface, a route, and a rule. 413 // 414 // Each dummy interface has a /24 from the private range of 172.16.0.0/20. 415 // 416 // Each route will be a default route to the gateway IP of the interface's 417 // subnet. 418 // 419 // Each rule will be from the interface's subnet to all. 420 for i := 1; i <= n; i++ { 421 devName := fmt.Sprintf("gotestdummy%d", i) 422 423 gw := net.ParseIP(fmt.Sprintf("172.16.%d.1", i)) 424 _, linkCIDR, err := net.ParseCIDR(fmt.Sprintf("172.16.%d.2/24", i)) 425 require.Nil(t, err) 426 427 linkIndex := ifindex + (i - 1) 428 newTableID := tableID + (i - 1) 429 430 dummyTmpl := &netlink.Dummy{ 431 LinkAttrs: netlink.LinkAttrs{ 432 Name: devName, 433 Index: linkIndex, 434 }, 435 } 436 require.Nil(t, netlink.LinkAdd(dummyTmpl)) 437 require.Nil(t, netlink.LinkSetUp(dummyTmpl)) 438 require.Nil(t, netlink.AddrAdd(dummyTmpl, &netlink.Addr{ 439 IPNet: linkCIDR, 440 })) 441 require.Nil(t, netlink.RouteAdd(&netlink.Route{ 442 Dst: &net.IPNet{IP: net.IPv4zero, Mask: net.CIDRMask(0, 32)}, 443 Gw: gw, 444 LinkIndex: dummyTmpl.Index, 445 Table: newTableID, 446 })) 447 448 rule := netlink.NewRule() 449 rule.Src = linkCIDR 450 rule.Priority = priority 451 rule.Table = newTableID 452 require.Nil(t, netlink.RuleAdd(rule)) 453 454 // Return the MAC address of the dummy device, which acts as the ENI. 455 link, err := netlink.LinkByName(devName) 456 require.Nil(t, err) 457 458 mac := link.Attrs().HardwareAddr.String() 459 460 // Arbitrarily use an offset of 1 as the interface number. It doesn't 461 // matter as long as we're consistent. 462 devIfNum[mac] = i 463 devMAC[devName] = mac 464 } 465 466 return devIfNum, devMAC 467 } 468 469 func findRulesByPriority(prio int) ([]netlink.Rule, error) { 470 rules, err := netlink.RuleList(netlink.FAMILY_V4) 471 if err != nil { 472 return nil, err 473 } 474 475 return filterRulesByPriority(rules, prio), nil 476 } 477 478 func (m *MigrateSuite) defaultNetlinkMock() { 479 m.OnRuleList = func(family int) ([]netlink.Rule, error) { return netlink.RuleList(family) } 480 m.OnRuleAdd = func(rule *netlink.Rule) error { return netlink.RuleAdd(rule) } 481 m.OnRuleDel = func(rule *netlink.Rule) error { return netlink.RuleDel(rule) } 482 m.OnRouteListFiltered = func(family int, filter *netlink.Route, mask uint64) ([]netlink.Route, error) { 483 return netlink.RouteListFiltered(family, filter, mask) 484 } 485 m.OnRouteAdd = func(route *netlink.Route) error { return netlink.RouteAdd(route) } 486 m.OnRouteDel = func(route *netlink.Route) error { return netlink.RouteDel(route) } 487 m.OnRouteReplace = func(route *netlink.Route) error { return netlink.RouteReplace(route) } 488 m.OnLinkList = func() ([]netlink.Link, error) { return netlink.LinkList() } 489 m.OnLinkByIndex = func(ifindex int) (netlink.Link, error) { return netlink.LinkByIndex(ifindex) } 490 } 491 492 func (m *MigrateSuite) RuleList(family int) ([]netlink.Rule, error) { 493 if m.OnRuleList != nil { 494 return m.OnRuleList(family) 495 } 496 panic("OnRuleList should not have been called") 497 } 498 499 func (m *MigrateSuite) RuleAdd(rule *netlink.Rule) error { 500 if m.OnRuleAdd != nil { 501 return m.OnRuleAdd(rule) 502 } 503 panic("OnRuleAdd should not have been called") 504 } 505 506 func (m *MigrateSuite) RuleDel(rule *netlink.Rule) error { 507 if m.OnRuleDel != nil { 508 return m.OnRuleDel(rule) 509 } 510 panic("OnRuleDel should not have been called") 511 } 512 513 func (m *MigrateSuite) RouteListFiltered(family int, filter *netlink.Route, mask uint64) ([]netlink.Route, error) { 514 if m.OnRouteListFiltered != nil { 515 return m.OnRouteListFiltered(family, filter, mask) 516 } 517 panic("OnRouteListFiltered should not have been called") 518 } 519 520 func (m *MigrateSuite) RouteAdd(route *netlink.Route) error { 521 if m.OnRouteAdd != nil { 522 return m.OnRouteAdd(route) 523 } 524 panic("OnRouteAdd should not have been called") 525 } 526 527 func (m *MigrateSuite) RouteDel(route *netlink.Route) error { 528 if m.OnRouteDel != nil { 529 return m.OnRouteDel(route) 530 } 531 panic("OnRouteDel should not have been called") 532 } 533 534 func (m *MigrateSuite) RouteReplace(route *netlink.Route) error { 535 if m.OnRouteReplace != nil { 536 return m.OnRouteReplace(route) 537 } 538 panic("OnRouteReplace should not have been called") 539 } 540 541 func (m *MigrateSuite) LinkList() ([]netlink.Link, error) { 542 if m.OnLinkList != nil { 543 return m.OnLinkList() 544 } 545 panic("OnLinkList should not have been called") 546 } 547 548 func (m *MigrateSuite) LinkByIndex(ifindex int) (netlink.Link, error) { 549 if m.OnLinkByIndex != nil { 550 return m.OnLinkByIndex(ifindex) 551 } 552 panic("OnLinkByIndex should not have been called") 553 } 554 555 func (m *MigrateSuite) GetInterfaceNumberByMAC(mac string) (int, error) { 556 if m.OnGetInterfaceNumberByMAC != nil { 557 return m.OnGetInterfaceNumberByMAC(mac) 558 } 559 panic("OnGetInterfaceNumberByMAC should not have been called") 560 } 561 562 func (m *MigrateSuite) GetMACByInterfaceNumber(ifaceNum int) (string, error) { 563 if m.OnGetMACByInterfaceNumber != nil { 564 return m.OnGetMACByInterfaceNumber(ifaceNum) 565 } 566 panic("OnGetMACByInterfaceNumber should not have been called") 567 }