github.com/k8snetworkplumbingwg/sriov-network-operator@v1.2.1-0.20240408194816-2d2e5a45d453/pkg/host/internal/sriov/sriov.go (about) 1 package sriov 2 3 import ( 4 "errors" 5 "fmt" 6 "os" 7 "path/filepath" 8 "strconv" 9 "strings" 10 "syscall" 11 "time" 12 13 "github.com/jaypipes/ghw" 14 "github.com/vishvananda/netlink" 15 "k8s.io/apimachinery/pkg/util/wait" 16 "sigs.k8s.io/controller-runtime/pkg/log" 17 18 sriovnetworkv1 "github.com/k8snetworkplumbingwg/sriov-network-operator/api/v1" 19 "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/consts" 20 dputilsPkg "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/host/internal/lib/dputils" 21 netlinkPkg "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/host/internal/lib/netlink" 22 "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/host/store" 23 "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/host/types" 24 "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/utils" 25 "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/vars" 26 ) 27 28 type interfaceToConfigure struct { 29 iface sriovnetworkv1.Interface 30 ifaceStatus sriovnetworkv1.InterfaceExt 31 } 32 33 type sriov struct { 34 utilsHelper utils.CmdInterface 35 kernelHelper types.KernelInterface 36 networkHelper types.NetworkInterface 37 udevHelper types.UdevInterface 38 vdpaHelper types.VdpaInterface 39 netlinkLib netlinkPkg.NetlinkLib 40 dputilsLib dputilsPkg.DPUtilsLib 41 } 42 43 func New(utilsHelper utils.CmdInterface, 44 kernelHelper types.KernelInterface, 45 networkHelper types.NetworkInterface, 46 udevHelper types.UdevInterface, 47 vdpaHelper types.VdpaInterface, 48 netlinkLib netlinkPkg.NetlinkLib, 49 dputilsLib dputilsPkg.DPUtilsLib) types.SriovInterface { 50 return &sriov{utilsHelper: utilsHelper, 51 kernelHelper: kernelHelper, 52 networkHelper: networkHelper, 53 udevHelper: udevHelper, 54 vdpaHelper: vdpaHelper, 55 netlinkLib: netlinkLib, 56 dputilsLib: dputilsLib, 57 } 58 } 59 60 func (s *sriov) SetSriovNumVfs(pciAddr string, numVfs int) error { 61 log.Log.V(2).Info("SetSriovNumVfs(): set NumVfs", "device", pciAddr, "numVfs", numVfs) 62 numVfsFilePath := filepath.Join(vars.FilesystemRoot, consts.SysBusPciDevices, pciAddr, consts.NumVfsFile) 63 bs := []byte(strconv.Itoa(numVfs)) 64 err := os.WriteFile(numVfsFilePath, []byte("0"), os.ModeAppend) 65 if err != nil { 66 log.Log.Error(err, "SetSriovNumVfs(): fail to reset NumVfs file", "path", numVfsFilePath) 67 return err 68 } 69 if numVfs == 0 { 70 return nil 71 } 72 err = os.WriteFile(numVfsFilePath, bs, os.ModeAppend) 73 if err != nil { 74 log.Log.Error(err, "SetSriovNumVfs(): fail to set NumVfs file", "path", numVfsFilePath) 75 return err 76 } 77 return nil 78 } 79 80 func (s *sriov) ResetSriovDevice(ifaceStatus sriovnetworkv1.InterfaceExt) error { 81 log.Log.V(2).Info("ResetSriovDevice(): reset SRIOV device", "address", ifaceStatus.PciAddress) 82 if ifaceStatus.LinkType == consts.LinkTypeETH { 83 var mtu int 84 eswitchMode := sriovnetworkv1.ESwithModeLegacy 85 is := sriovnetworkv1.InitialState.GetInterfaceStateByPciAddress(ifaceStatus.PciAddress) 86 if is != nil { 87 mtu = is.Mtu 88 eswitchMode = sriovnetworkv1.GetEswitchModeFromStatus(is) 89 } else { 90 mtu = 1500 91 } 92 log.Log.V(2).Info("ResetSriovDevice(): reset mtu", "value", mtu) 93 if err := s.networkHelper.SetNetdevMTU(ifaceStatus.PciAddress, mtu); err != nil { 94 return err 95 } 96 log.Log.V(2).Info("ResetSriovDevice(): reset eswitch mode and number of VFs", "mode", eswitchMode) 97 if err := s.setEswitchModeAndNumVFs(ifaceStatus.PciAddress, eswitchMode, 0); err != nil { 98 return err 99 } 100 } else if ifaceStatus.LinkType == consts.LinkTypeIB { 101 if err := s.SetSriovNumVfs(ifaceStatus.PciAddress, 0); err != nil { 102 return err 103 } 104 if err := s.networkHelper.SetNetdevMTU(ifaceStatus.PciAddress, 2048); err != nil { 105 return err 106 } 107 } 108 return nil 109 } 110 111 func (s *sriov) GetVfInfo(pciAddr string, devices []*ghw.PCIDevice) sriovnetworkv1.VirtualFunction { 112 driver, err := s.dputilsLib.GetDriverName(pciAddr) 113 if err != nil { 114 log.Log.Error(err, "getVfInfo(): unable to parse device driver", "device", pciAddr) 115 } 116 id, err := s.dputilsLib.GetVFID(pciAddr) 117 if err != nil { 118 log.Log.Error(err, "getVfInfo(): unable to get VF index", "device", pciAddr) 119 } 120 vf := sriovnetworkv1.VirtualFunction{ 121 PciAddress: pciAddr, 122 Driver: driver, 123 VfID: id, 124 VdpaType: s.vdpaHelper.DiscoverVDPAType(pciAddr), 125 } 126 127 if name := s.networkHelper.TryGetInterfaceName(pciAddr); name != "" { 128 link, err := s.netlinkLib.LinkByName(name) 129 if err != nil { 130 log.Log.Error(err, "getVfInfo(): unable to get VF Link Object", "name", name, "device", pciAddr) 131 } else { 132 vf.Name = name 133 vf.Mtu = link.Attrs().MTU 134 vf.Mac = link.Attrs().HardwareAddr.String() 135 } 136 } 137 138 for _, device := range devices { 139 if pciAddr == device.Address { 140 vf.Vendor = device.Vendor.ID 141 vf.DeviceID = device.Product.ID 142 break 143 } 144 } 145 return vf 146 } 147 148 func (s *sriov) SetVfGUID(vfAddr string, pfLink netlink.Link) error { 149 log.Log.Info("SetVfGUID()", "vf", vfAddr) 150 vfID, err := s.dputilsLib.GetVFID(vfAddr) 151 if err != nil { 152 log.Log.Error(err, "SetVfGUID(): unable to get VF id", "address", vfAddr) 153 return err 154 } 155 guid := utils.GenerateRandomGUID() 156 if err := s.netlinkLib.LinkSetVfNodeGUID(pfLink, vfID, guid); err != nil { 157 return err 158 } 159 if err := s.netlinkLib.LinkSetVfPortGUID(pfLink, vfID, guid); err != nil { 160 return err 161 } 162 if err = s.kernelHelper.Unbind(vfAddr); err != nil { 163 return err 164 } 165 166 return nil 167 } 168 169 func (s *sriov) VFIsReady(pciAddr string) (netlink.Link, error) { 170 log.Log.Info("VFIsReady()", "device", pciAddr) 171 var err error 172 var vfLink netlink.Link 173 err = wait.PollImmediate(time.Second, 10*time.Second, func() (bool, error) { 174 vfName := s.networkHelper.TryGetInterfaceName(pciAddr) 175 vfLink, err = s.netlinkLib.LinkByName(vfName) 176 if err != nil { 177 log.Log.Error(err, "VFIsReady(): unable to get VF link", "device", pciAddr) 178 } 179 return err == nil, nil 180 }) 181 if err != nil { 182 return vfLink, err 183 } 184 return vfLink, nil 185 } 186 187 func (s *sriov) SetVfAdminMac(vfAddr string, pfLink, vfLink netlink.Link) error { 188 log.Log.Info("SetVfAdminMac()", "vf", vfAddr) 189 190 vfID, err := s.dputilsLib.GetVFID(vfAddr) 191 if err != nil { 192 log.Log.Error(err, "SetVfAdminMac(): unable to get VF id", "address", vfAddr) 193 return err 194 } 195 196 if err := s.netlinkLib.LinkSetVfHardwareAddr(pfLink, vfID, vfLink.Attrs().HardwareAddr); err != nil { 197 return err 198 } 199 200 return nil 201 } 202 203 func (s *sriov) DiscoverSriovDevices(storeManager store.ManagerInterface) ([]sriovnetworkv1.InterfaceExt, error) { 204 log.Log.V(2).Info("DiscoverSriovDevices") 205 pfList := []sriovnetworkv1.InterfaceExt{} 206 207 pci, err := ghw.PCI() 208 if err != nil { 209 return nil, fmt.Errorf("DiscoverSriovDevices(): error getting PCI info: %v", err) 210 } 211 212 devices := pci.ListDevices() 213 if len(devices) == 0 { 214 return nil, fmt.Errorf("DiscoverSriovDevices(): could not retrieve PCI devices") 215 } 216 217 for _, device := range devices { 218 devClass, err := strconv.ParseInt(device.Class.ID, 16, 64) 219 if err != nil { 220 log.Log.Error(err, "DiscoverSriovDevices(): unable to parse device class, skipping", 221 "device", device) 222 continue 223 } 224 if devClass != consts.NetClass { 225 // Not network device 226 continue 227 } 228 229 // TODO: exclude devices used by host system 230 231 if s.dputilsLib.IsSriovVF(device.Address) { 232 continue 233 } 234 235 if !vars.DevMode { 236 if !sriovnetworkv1.IsSupportedModel(device.Vendor.ID, device.Product.ID) { 237 log.Log.Info("DiscoverSriovDevices(): unsupported device", "device", device) 238 continue 239 } 240 } 241 242 driver, err := s.dputilsLib.GetDriverName(device.Address) 243 if err != nil { 244 log.Log.Error(err, "DiscoverSriovDevices(): unable to parse device driver for device, skipping", "device", device) 245 continue 246 } 247 248 pfNetName := s.networkHelper.TryGetInterfaceName(device.Address) 249 250 if pfNetName == "" { 251 log.Log.Error(err, "DiscoverSriovDevices(): unable to get device name for device, skipping", "device", device.Address) 252 continue 253 } 254 255 link, err := s.netlinkLib.LinkByName(pfNetName) 256 if err != nil { 257 log.Log.Error(err, "DiscoverSriovDevices(): unable to get Link for device, skipping", "device", device.Address) 258 continue 259 } 260 261 iface := sriovnetworkv1.InterfaceExt{ 262 Name: pfNetName, 263 PciAddress: device.Address, 264 Driver: driver, 265 Vendor: device.Vendor.ID, 266 DeviceID: device.Product.ID, 267 Mtu: link.Attrs().MTU, 268 Mac: link.Attrs().HardwareAddr.String(), 269 LinkType: s.encapTypeToLinkType(link.Attrs().EncapType), 270 LinkSpeed: s.networkHelper.GetNetDevLinkSpeed(pfNetName), 271 } 272 273 pfStatus, exist, err := storeManager.LoadPfsStatus(iface.PciAddress) 274 if err != nil { 275 log.Log.Error(err, "DiscoverSriovDevices(): failed to load PF status from disk") 276 } else { 277 if exist { 278 iface.ExternallyManaged = pfStatus.ExternallyManaged 279 } 280 } 281 282 if s.dputilsLib.IsSriovPF(device.Address) { 283 iface.TotalVfs = s.dputilsLib.GetSriovVFcapacity(device.Address) 284 iface.NumVfs = s.dputilsLib.GetVFconfigured(device.Address) 285 iface.EswitchMode = s.GetNicSriovMode(device.Address) 286 if s.dputilsLib.SriovConfigured(device.Address) { 287 vfs, err := s.dputilsLib.GetVFList(device.Address) 288 if err != nil { 289 log.Log.Error(err, "DiscoverSriovDevices(): unable to parse VFs for device, skipping", 290 "device", device) 291 continue 292 } 293 for _, vf := range vfs { 294 instance := s.GetVfInfo(vf, devices) 295 iface.VFs = append(iface.VFs, instance) 296 } 297 } 298 } 299 pfList = append(pfList, iface) 300 } 301 302 return pfList, nil 303 } 304 305 func (s *sriov) configSriovPFDevice(iface *sriovnetworkv1.Interface) error { 306 log.Log.V(2).Info("configSriovPFDevice(): configure PF sriov device", 307 "device", iface.PciAddress) 308 totalVfs := s.dputilsLib.GetSriovVFcapacity(iface.PciAddress) 309 if iface.NumVfs > totalVfs { 310 err := fmt.Errorf("cannot config SRIOV device: NumVfs (%d) is larger than TotalVfs (%d)", iface.NumVfs, totalVfs) 311 log.Log.Error(err, "configSriovPFDevice(): fail to set NumVfs for device", "device", iface.PciAddress) 312 return err 313 } 314 if err := s.configureHWOptionsForSwitchdev(iface); err != nil { 315 return err 316 } 317 // remove all UDEV rules for the PF before adding new rules to 318 // make sure that rules are always in a consistent state, e.g. there is no 319 // switchdev-related rules for PF in legacy mode 320 if err := s.removeUdevRules(iface.PciAddress); err != nil { 321 log.Log.Error(err, "configSriovPFDevice(): fail to remove udev rules", "device", iface.PciAddress) 322 return err 323 } 324 err := s.addUdevRules(iface) 325 if err != nil { 326 log.Log.Error(err, "configSriovPFDevice(): fail to add udev rules", "device", iface.PciAddress) 327 return err 328 } 329 err = s.createVFs(iface) 330 if err != nil { 331 log.Log.Error(err, "configSriovPFDevice(): fail to set NumVfs for device", "device", iface.PciAddress) 332 return err 333 } 334 if err := s.addVfRepresentorUdevRule(iface); err != nil { 335 log.Log.Error(err, "configSriovPFDevice(): fail to add VR representor udev rule", "device", iface.PciAddress) 336 return err 337 } 338 // set PF mtu 339 if iface.Mtu > 0 && iface.Mtu > s.networkHelper.GetNetdevMTU(iface.PciAddress) { 340 err = s.networkHelper.SetNetdevMTU(iface.PciAddress, iface.Mtu) 341 if err != nil { 342 log.Log.Error(err, "configSriovPFDevice(): fail to set mtu for PF", "device", iface.PciAddress) 343 return err 344 } 345 } 346 return nil 347 } 348 349 func (s *sriov) configureHWOptionsForSwitchdev(iface *sriovnetworkv1.Interface) error { 350 log.Log.V(2).Info("configureHWOptionsForSwitchdev(): configure HW options for device", 351 "device", iface.PciAddress) 352 if sriovnetworkv1.GetEswitchModeFromSpec(iface) != sriovnetworkv1.ESwithModeSwitchDev { 353 // we need to configure HW options only for PFs for which switchdev is a target mode 354 return nil 355 } 356 if err := s.networkHelper.EnableHwTcOffload(iface.Name); err != nil { 357 return err 358 } 359 desiredFlowSteeringMode := "smfs" 360 currentFlowSteeringMode, err := s.networkHelper.GetDevlinkDeviceParam(iface.PciAddress, "flow_steering_mode") 361 if err != nil { 362 if errors.Is(err, syscall.EINVAL) || errors.Is(err, syscall.ENODEV) { 363 log.Log.V(2).Info("configureHWOptionsForSwitchdev(): device has no flow_steering_mode parameter, skip", 364 "device", iface.PciAddress) 365 return nil 366 } 367 log.Log.Error(err, "configureHWOptionsForSwitchdev(): fail to read current flow steering mode for the device", "device", iface.PciAddress) 368 return err 369 } 370 if currentFlowSteeringMode == desiredFlowSteeringMode { 371 return nil 372 } 373 // flow steering mode can be changed only when NIC is in legacy mode 374 if s.GetNicSriovMode(iface.PciAddress) != sriovnetworkv1.ESwithModeLegacy { 375 s.setEswitchModeAndNumVFs(iface.PciAddress, sriovnetworkv1.ESwithModeLegacy, 0) 376 } 377 if err := s.networkHelper.SetDevlinkDeviceParam(iface.PciAddress, "flow_steering_mode", desiredFlowSteeringMode); err != nil { 378 if errors.Is(err, syscall.ENOTSUP) { 379 log.Log.V(2).Info("configureHWOptionsForSwitchdev(): device doesn't support changing of flow_steering_mode, skip", "device", iface.PciAddress) 380 return nil 381 } 382 log.Log.Error(err, "configureHWOptionsForSwitchdev(): fail to configure flow steering mode for the device", "device", iface.PciAddress) 383 return err 384 } 385 return nil 386 } 387 388 func (s *sriov) checkExternallyManagedPF(iface *sriovnetworkv1.Interface) error { 389 log.Log.V(2).Info("checkExternallyManagedPF(): configure PF sriov device", 390 "device", iface.PciAddress) 391 currentNumVfs := s.dputilsLib.GetVFconfigured(iface.PciAddress) 392 if iface.NumVfs > currentNumVfs { 393 errMsg := fmt.Sprintf("checkExternallyManagedPF(): number of request virtual functions %d is not equal to configured virtual "+ 394 "functions %d but the policy is configured as ExternallyManaged for device %s", 395 iface.NumVfs, currentNumVfs, iface.PciAddress) 396 log.Log.Error(nil, errMsg) 397 return fmt.Errorf(errMsg) 398 } 399 currentEswitchMode := s.GetNicSriovMode(iface.PciAddress) 400 expectedEswitchMode := sriovnetworkv1.GetEswitchModeFromSpec(iface) 401 if currentEswitchMode != expectedEswitchMode { 402 errMsg := fmt.Sprintf("checkExternallyManagedPF(): requested ESwitchMode mode \"%s\" is not equal to configured \"%s\" "+ 403 "but the policy is configured as ExternallyManaged for device %s", expectedEswitchMode, currentEswitchMode, iface.PciAddress) 404 log.Log.Error(nil, errMsg) 405 return fmt.Errorf(errMsg) 406 } 407 currentMtu := s.networkHelper.GetNetdevMTU(iface.PciAddress) 408 if iface.Mtu > 0 && iface.Mtu > currentMtu { 409 err := fmt.Errorf("checkExternallyManagedPF(): requested MTU(%d) is greater than configured MTU(%d) for device %s. cannot change MTU as policy is configured as ExternallyManaged", 410 iface.Mtu, currentMtu, iface.PciAddress) 411 log.Log.Error(nil, err.Error()) 412 return err 413 } 414 return nil 415 } 416 417 func (s *sriov) configSriovVFDevices(iface *sriovnetworkv1.Interface) error { 418 log.Log.V(2).Info("configSriovVFDevices(): configure PF sriov device", 419 "device", iface.PciAddress) 420 if iface.NumVfs > 0 { 421 vfAddrs, err := s.dputilsLib.GetVFList(iface.PciAddress) 422 if err != nil { 423 log.Log.Error(err, "configSriovVFDevices(): unable to parse VFs for device", "device", iface.PciAddress) 424 } 425 pfLink, err := s.netlinkLib.LinkByName(iface.Name) 426 if err != nil { 427 log.Log.Error(err, "configSriovVFDevices(): unable to get PF link for device", "device", iface) 428 return err 429 } 430 431 for _, addr := range vfAddrs { 432 hasDriver, _ := s.kernelHelper.HasDriver(addr) 433 if !hasDriver { 434 if err := s.kernelHelper.BindDefaultDriver(addr); err != nil { 435 log.Log.Error(err, "configSriovVFDevices(): fail to bind default driver for device", "device", addr) 436 return err 437 } 438 } 439 var group *sriovnetworkv1.VfGroup 440 441 vfID, err := s.dputilsLib.GetVFID(addr) 442 if err != nil { 443 log.Log.Error(err, "configSriovVFDevices(): unable to get VF id", "device", iface.PciAddress) 444 return err 445 } 446 447 for i := range iface.VfGroups { 448 if sriovnetworkv1.IndexInRange(vfID, iface.VfGroups[i].VfRange) { 449 group = &iface.VfGroups[i] 450 break 451 } 452 } 453 454 // VF group not found. 455 if group == nil { 456 continue 457 } 458 459 // only set GUID and MAC for VF with default driver 460 // for userspace drivers like vfio we configure the vf mac using the kernel nic mac address 461 // before we switch to the userspace driver 462 if yes, d := s.kernelHelper.HasDriver(addr); yes && !sriovnetworkv1.StringInArray(d, vars.DpdkDrivers) { 463 // LinkType is an optional field. Let's fallback to current link type 464 // if nothing is specified in the SriovNodePolicy 465 linkType := iface.LinkType 466 if linkType == "" { 467 linkType = s.GetLinkType(iface.Name) 468 } 469 if strings.EqualFold(linkType, consts.LinkTypeIB) { 470 if err = s.SetVfGUID(addr, pfLink); err != nil { 471 return err 472 } 473 } else { 474 vfLink, err := s.VFIsReady(addr) 475 if err != nil { 476 log.Log.Error(err, "configSriovVFDevices(): VF link is not ready", "address", addr) 477 err = s.kernelHelper.RebindVfToDefaultDriver(addr) 478 if err != nil { 479 log.Log.Error(err, "configSriovVFDevices(): failed to rebind VF", "address", addr) 480 return err 481 } 482 483 // Try to check the VF status again 484 vfLink, err = s.VFIsReady(addr) 485 if err != nil { 486 log.Log.Error(err, "configSriovVFDevices(): VF link is not ready", "address", addr) 487 return err 488 } 489 } 490 if err = s.SetVfAdminMac(addr, pfLink, vfLink); err != nil { 491 log.Log.Error(err, "configSriovVFDevices(): fail to configure VF admin mac", "device", addr) 492 return err 493 } 494 } 495 } 496 497 if err = s.kernelHelper.UnbindDriverIfNeeded(addr, group.IsRdma); err != nil { 498 return err 499 } 500 // we set eswitch mode before this point and if the desired mode (and current at this point) 501 // is legacy, then VDPA device is already automatically disappeared, 502 // so we don't need to check it 503 if sriovnetworkv1.GetEswitchModeFromSpec(iface) == sriovnetworkv1.ESwithModeSwitchDev && group.VdpaType == "" { 504 if err := s.vdpaHelper.DeleteVDPADevice(addr); err != nil { 505 log.Log.Error(err, "configSriovVFDevices(): fail to delete VDPA device", 506 "device", addr) 507 return err 508 } 509 } 510 if !sriovnetworkv1.StringInArray(group.DeviceType, vars.DpdkDrivers) { 511 if err := s.kernelHelper.BindDefaultDriver(addr); err != nil { 512 log.Log.Error(err, "configSriovVFDevices(): fail to bind default driver for device", "device", addr) 513 return err 514 } 515 // only set MTU for VF with default driver 516 if group.Mtu > 0 { 517 if err := s.networkHelper.SetNetdevMTU(addr, group.Mtu); err != nil { 518 log.Log.Error(err, "configSriovVFDevices(): fail to set mtu for VF", "address", addr) 519 return err 520 } 521 } 522 if sriovnetworkv1.GetEswitchModeFromSpec(iface) == sriovnetworkv1.ESwithModeSwitchDev && group.VdpaType != "" { 523 if err := s.vdpaHelper.CreateVDPADevice(addr, group.VdpaType); err != nil { 524 log.Log.Error(err, "configSriovVFDevices(): fail to create VDPA device", 525 "vdpaType", group.VdpaType, "device", addr) 526 return err 527 } 528 } 529 } else { 530 if err := s.kernelHelper.BindDpdkDriver(addr, group.DeviceType); err != nil { 531 log.Log.Error(err, "configSriovVFDevices(): fail to bind driver for device", 532 "driver", group.DeviceType, "device", addr) 533 return err 534 } 535 } 536 } 537 } 538 return nil 539 } 540 541 func (s *sriov) configSriovDevice(iface *sriovnetworkv1.Interface, skipVFConfiguration bool) error { 542 log.Log.V(2).Info("configSriovDevice(): configure sriov device", 543 "device", iface.PciAddress, "config", iface, "skipVFConfiguration", skipVFConfiguration) 544 if !iface.ExternallyManaged { 545 if err := s.configSriovPFDevice(iface); err != nil { 546 return err 547 } 548 } 549 if skipVFConfiguration { 550 if iface.ExternallyManaged { 551 return nil 552 } 553 log.Log.V(2).Info("configSriovDevice(): skipVFConfiguration is true, unbind all VFs from drivers", 554 "device", iface.PciAddress) 555 return s.unbindAllVFsOnPF(iface.PciAddress) 556 } 557 // we don't need to validate externally managed PFs when skipVFConfiguration is true. 558 // The function usually called with skipVFConfiguration true when running in the systemd mode and configuration is 559 // in pre phase. Externally managed PFs may not be configured at this stage yet (preConfig stage is executed before NetworkManager, netplan) 560 561 if iface.ExternallyManaged { 562 if err := s.checkExternallyManagedPF(iface); err != nil { 563 return err 564 } 565 } 566 if err := s.configSriovVFDevices(iface); err != nil { 567 return err 568 } 569 // Set PF link up 570 pfLink, err := s.netlinkLib.LinkByName(iface.Name) 571 if err != nil { 572 return err 573 } 574 if pfLink.Attrs().OperState != netlink.OperUp { 575 err = s.netlinkLib.LinkSetUp(pfLink) 576 if err != nil { 577 return err 578 } 579 } 580 return nil 581 } 582 583 func (s *sriov) ConfigSriovInterfaces(storeManager store.ManagerInterface, 584 interfaces []sriovnetworkv1.Interface, ifaceStatuses []sriovnetworkv1.InterfaceExt, skipVFConfiguration bool) error { 585 toBeConfigured, toBeResetted, err := s.getConfigureAndReset(storeManager, interfaces, ifaceStatuses) 586 if err != nil { 587 log.Log.Error(err, "cannot get a list of interfaces to configure") 588 return fmt.Errorf("cannot get a list of interfaces to configure") 589 } 590 591 if vars.ParallelNicConfig { 592 err = s.configSriovInterfacesInParallel(storeManager, toBeConfigured, skipVFConfiguration) 593 } else { 594 err = s.configSriovInterfaces(storeManager, toBeConfigured, skipVFConfiguration) 595 } 596 if err != nil { 597 log.Log.Error(err, "cannot configure sriov interfaces") 598 return fmt.Errorf("cannot configure sriov interfaces") 599 } 600 if sriovnetworkv1.ContainsSwitchdevInterface(interfaces) && len(toBeConfigured) > 0 { 601 // for switchdev devices we create udev rule that renames VF representors 602 // after VFs are created. Reload rules to update interfaces 603 if err := s.udevHelper.LoadUdevRules(); err != nil { 604 log.Log.Error(err, "cannot reload udev rules") 605 return fmt.Errorf("failed to reload udev rules: %v", err) 606 } 607 } 608 609 if vars.ParallelNicConfig { 610 err = s.resetSriovInterfacesInParallel(storeManager, toBeResetted) 611 } else { 612 err = s.resetSriovInterfaces(storeManager, toBeResetted) 613 } 614 if err != nil { 615 log.Log.Error(err, "cannot reset sriov interfaces") 616 return fmt.Errorf("cannot reset sriov interfaces") 617 } 618 return nil 619 } 620 621 func (s *sriov) getConfigureAndReset(storeManager store.ManagerInterface, interfaces []sriovnetworkv1.Interface, 622 ifaceStatuses []sriovnetworkv1.InterfaceExt) ([]interfaceToConfigure, []sriovnetworkv1.InterfaceExt, error) { 623 toBeConfigured := []interfaceToConfigure{} 624 toBeResetted := []sriovnetworkv1.InterfaceExt{} 625 for _, ifaceStatus := range ifaceStatuses { 626 configured := false 627 for _, iface := range interfaces { 628 if iface.PciAddress == ifaceStatus.PciAddress { 629 configured = true 630 skip, err := skipSriovConfig(&iface, &ifaceStatus, storeManager) 631 if err != nil { 632 log.Log.Error(err, "getConfigureAndReset(): failed to check interface") 633 return nil, nil, err 634 } 635 if skip { 636 break 637 } 638 iface := iface 639 ifaceStatus := ifaceStatus 640 toBeConfigured = append(toBeConfigured, interfaceToConfigure{iface: iface, ifaceStatus: ifaceStatus}) 641 } 642 } 643 644 if !configured && ifaceStatus.NumVfs > 0 { 645 toBeResetted = append(toBeResetted, ifaceStatus) 646 } 647 } 648 return toBeConfigured, toBeResetted, nil 649 } 650 651 func (s *sriov) configSriovInterfacesInParallel(storeManager store.ManagerInterface, interfaces []interfaceToConfigure, skipVFConfiguration bool) error { 652 log.Log.V(2).Info("configSriovInterfacesInParallel(): start sriov configuration") 653 654 var result error 655 errChannel := make(chan error) 656 interfacesToConfigure := 0 657 for ifaceIndex, iface := range interfaces { 658 interfacesToConfigure += 1 659 go func(iface *interfaceToConfigure) { 660 var err error 661 if err = s.configSriovDevice(&iface.iface, skipVFConfiguration); err != nil { 662 log.Log.Error(err, "configSriovInterfacesInParallel(): fail to configure sriov interface. resetting interface.", "address", iface.iface.PciAddress) 663 if iface.iface.ExternallyManaged { 664 log.Log.V(2).Info("configSriovInterfacesInParallel(): skipping device reset as the nic is marked as externally created") 665 } else { 666 if resetErr := s.ResetSriovDevice(iface.ifaceStatus); resetErr != nil { 667 log.Log.Error(resetErr, "configSriovInterfacesInParallel(): failed to reset on error SR-IOV interface") 668 err = resetErr 669 } 670 } 671 } 672 errChannel <- err 673 }(&interfaces[ifaceIndex]) 674 // Save the PF status to the host 675 err := storeManager.SaveLastPfAppliedStatus(&iface.iface) 676 if err != nil { 677 log.Log.Error(err, "configSriovInterfacesInParallel(): failed to save PF applied config to host") 678 return err 679 } 680 } 681 682 for i := 0; i < interfacesToConfigure; i++ { 683 errMsg := <-errChannel 684 result = errors.Join(result, errMsg) 685 } 686 if result != nil { 687 log.Log.Error(result, "configSriovInterfacesInParallel(): fail to configure sriov interfaces") 688 return result 689 } 690 log.Log.V(2).Info("configSriovInterfacesInParallel(): sriov configuration finished") 691 return nil 692 } 693 694 func (s *sriov) resetSriovInterfacesInParallel(storeManager store.ManagerInterface, interfaces []sriovnetworkv1.InterfaceExt) error { 695 var result error 696 errChannel := make(chan error, len(interfaces)) 697 interfacesToReset := 0 698 for ifaceIndex := range interfaces { 699 interfacesToReset += 1 700 go func(iface *sriovnetworkv1.InterfaceExt) { 701 var err error 702 if err = s.checkForConfigAndReset(*iface, storeManager); err != nil { 703 log.Log.Error(err, "resetSriovInterfacesInParallel(): fail to reset sriov interface. resetting interface.", "address", iface.PciAddress) 704 } 705 errChannel <- err 706 }(&interfaces[ifaceIndex]) 707 } 708 709 for i := 0; i < interfacesToReset; i++ { 710 errMsg := <-errChannel 711 result = errors.Join(result, errMsg) 712 } 713 if result != nil { 714 log.Log.Error(result, "resetSriovInterfacesInParallel(): fail to reset sriov interface") 715 return result 716 } 717 log.Log.V(2).Info("resetSriovInterfacesInParallel(): sriov reset finished") 718 719 return nil 720 } 721 722 func (s *sriov) configSriovInterfaces(storeManager store.ManagerInterface, interfaces []interfaceToConfigure, skipVFConfiguration bool) error { 723 log.Log.V(2).Info("configSriovInterfaces(): start sriov configuration") 724 for _, iface := range interfaces { 725 if err := s.configSriovDevice(&iface.iface, skipVFConfiguration); err != nil { 726 log.Log.Error(err, "configSriovInterfaces(): fail to configure sriov interface. resetting interface.", "address", iface.iface.PciAddress) 727 if iface.iface.ExternallyManaged { 728 log.Log.V(2).Info("configSriovInterfaces(): skipping device reset as the nic is marked as externally created") 729 } else { 730 if resetErr := s.ResetSriovDevice(iface.ifaceStatus); resetErr != nil { 731 log.Log.Error(resetErr, "configSriovInterfaces(): failed to reset on error SR-IOV interface") 732 } 733 } 734 return err 735 } 736 737 // Save the PF status to the host 738 err := storeManager.SaveLastPfAppliedStatus(&iface.iface) 739 if err != nil { 740 log.Log.Error(err, "configSriovInterfaces(): failed to save PF applied config to host") 741 return err 742 } 743 } 744 log.Log.V(2).Info("configSriovInterfaces(): sriov configuration finished") 745 return nil 746 } 747 748 func (s *sriov) resetSriovInterfaces(storeManager store.ManagerInterface, interfaces []sriovnetworkv1.InterfaceExt) error { 749 for _, iface := range interfaces { 750 if err := s.checkForConfigAndReset(iface, storeManager); err != nil { 751 log.Log.Error(err, "resetSriovInterfaces(): failed to reset sriov interface. resetting interface.", "address", iface.PciAddress) 752 return err 753 } 754 } 755 log.Log.V(2).Info("resetSriovInterfaces(): sriov reset finished") 756 return nil 757 } 758 759 // / skipSriovConfig checks if we need to apply SR-IOV configuration specified specific interface 760 func skipSriovConfig(iface *sriovnetworkv1.Interface, ifaceStatus *sriovnetworkv1.InterfaceExt, storeManager store.ManagerInterface) (bool, error) { 761 if !sriovnetworkv1.NeedToUpdateSriov(iface, ifaceStatus) { 762 log.Log.V(2).Info("ConfigSriovInterfaces(): no need update interface", "address", iface.PciAddress) 763 764 // Save the PF status to the host 765 err := storeManager.SaveLastPfAppliedStatus(iface) 766 if err != nil { 767 log.Log.Error(err, "ConfigSriovInterfaces(): failed to save PF applied status config to host") 768 return false, err 769 } 770 771 return true, nil 772 } 773 return false, nil 774 } 775 776 func (s *sriov) checkForConfigAndReset(ifaceStatus sriovnetworkv1.InterfaceExt, storeManager store.ManagerInterface) error { 777 // load the PF info 778 pfStatus, exist, err := storeManager.LoadPfsStatus(ifaceStatus.PciAddress) 779 if err != nil { 780 log.Log.Error(err, "checkForConfigAndReset(): failed to load info about PF status for device", 781 "address", ifaceStatus.PciAddress) 782 return err 783 } 784 785 if !exist { 786 log.Log.V(2).Info("checkForConfigAndReset(): PF name with pci address has VFs configured but they weren't created by the sriov operator. Skipping the device reset", 787 "pf-name", ifaceStatus.Name, 788 "address", ifaceStatus.PciAddress) 789 return nil 790 } 791 792 if pfStatus.ExternallyManaged { 793 log.Log.V(2).Info("checkForConfigAndReset(): PF name with pci address was externally created skipping the device reset", 794 "pf-name", ifaceStatus.Name, 795 "address", ifaceStatus.PciAddress) 796 return nil 797 } 798 err = s.removeUdevRules(ifaceStatus.PciAddress) 799 if err != nil { 800 return err 801 } 802 803 if err = s.ResetSriovDevice(ifaceStatus); err != nil { 804 return err 805 } 806 807 return nil 808 } 809 810 func (s *sriov) ConfigSriovDeviceVirtual(iface *sriovnetworkv1.Interface) error { 811 log.Log.V(2).Info("ConfigSriovDeviceVirtual(): config interface", "address", iface.PciAddress, "config", iface) 812 // Config VFs 813 if iface.NumVfs > 0 { 814 if iface.NumVfs > 1 { 815 log.Log.Error(nil, "ConfigSriovDeviceVirtual(): in a virtual environment, only one VF per interface", 816 "numVfs", iface.NumVfs) 817 return errors.New("NumVfs > 1") 818 } 819 if len(iface.VfGroups) != 1 { 820 log.Log.Error(nil, "ConfigSriovDeviceVirtual(): missing VFGroup") 821 return errors.New("NumVfs != 1") 822 } 823 addr := iface.PciAddress 824 log.Log.V(2).Info("ConfigSriovDeviceVirtual()", "address", addr) 825 driver := "" 826 vfID := 0 827 for _, group := range iface.VfGroups { 828 log.Log.V(2).Info("ConfigSriovDeviceVirtual()", "group", group) 829 if sriovnetworkv1.IndexInRange(vfID, group.VfRange) { 830 log.Log.V(2).Info("ConfigSriovDeviceVirtual()", "indexInRange", vfID) 831 if sriovnetworkv1.StringInArray(group.DeviceType, vars.DpdkDrivers) { 832 log.Log.V(2).Info("ConfigSriovDeviceVirtual()", "driver", group.DeviceType) 833 driver = group.DeviceType 834 } 835 break 836 } 837 } 838 if driver == "" { 839 log.Log.V(2).Info("ConfigSriovDeviceVirtual(): bind default") 840 if err := s.kernelHelper.BindDefaultDriver(addr); err != nil { 841 log.Log.Error(err, "ConfigSriovDeviceVirtual(): fail to bind default driver", "device", addr) 842 return err 843 } 844 } else { 845 log.Log.V(2).Info("ConfigSriovDeviceVirtual(): bind driver", "driver", driver) 846 if err := s.kernelHelper.BindDpdkDriver(addr, driver); err != nil { 847 log.Log.Error(err, "ConfigSriovDeviceVirtual(): fail to bind driver for device", 848 "driver", driver, "device", addr) 849 return err 850 } 851 } 852 } 853 return nil 854 } 855 856 func (s *sriov) GetNicSriovMode(pciAddress string) string { 857 log.Log.V(2).Info("GetNicSriovMode()", "device", pciAddress) 858 devLink, err := s.netlinkLib.DevLinkGetDeviceByName("pci", pciAddress) 859 if err != nil { 860 if !errors.Is(err, syscall.ENODEV) { 861 log.Log.Error(err, "GetNicSriovMode(): failed to get eswitch mode, assume legacy", "device", pciAddress) 862 } 863 } 864 if devLink != nil && devLink.Attrs.Eswitch.Mode != "" { 865 return devLink.Attrs.Eswitch.Mode 866 } 867 868 return sriovnetworkv1.ESwithModeLegacy 869 } 870 871 func (s *sriov) SetNicSriovMode(pciAddress string, mode string) error { 872 log.Log.V(2).Info("SetNicSriovMode()", "device", pciAddress, "mode", mode) 873 874 dev, err := s.netlinkLib.DevLinkGetDeviceByName("pci", pciAddress) 875 if err != nil { 876 return err 877 } 878 return s.netlinkLib.DevLinkSetEswitchMode(dev, mode) 879 } 880 881 func (s *sriov) GetLinkType(name string) string { 882 log.Log.V(2).Info("GetLinkType()", "name", name) 883 link, err := s.netlinkLib.LinkByName(name) 884 if err != nil { 885 log.Log.Error(err, "GetLinkType(): failed to get link", "device", name) 886 return "" 887 } 888 return s.encapTypeToLinkType(link.Attrs().EncapType) 889 } 890 891 func (s *sriov) encapTypeToLinkType(encapType string) string { 892 if encapType == "ether" { 893 return consts.LinkTypeETH 894 } else if encapType == "infiniband" { 895 return consts.LinkTypeIB 896 } 897 return "" 898 } 899 900 // create required udev rules for PF: 901 // * rule to disable NetworkManager for VFs - for all modes 902 // * rule to keep PF name after switching to switchdev mode - only for switchdev mode 903 func (s *sriov) addUdevRules(iface *sriovnetworkv1.Interface) error { 904 log.Log.V(2).Info("addUdevRules(): add udev rules for device", 905 "device", iface.PciAddress) 906 if err := s.udevHelper.AddDisableNMUdevRule(iface.PciAddress); err != nil { 907 return err 908 } 909 if sriovnetworkv1.GetEswitchModeFromSpec(iface) == sriovnetworkv1.ESwithModeSwitchDev { 910 if err := s.udevHelper.AddPersistPFNameUdevRule(iface.PciAddress, iface.Name); err != nil { 911 return err 912 } 913 } 914 return nil 915 } 916 917 // add switchdev-specific udev rule that renames representors. 918 // this rule relies on phys_port_name and phys_switch_id parameter which 919 // on old kernels can be read only after switching PF to switchdev mode. 920 // if PF doesn't expose phys_port_name and phys_switch_id, then rule creation will be skipped 921 func (s *sriov) addVfRepresentorUdevRule(iface *sriovnetworkv1.Interface) error { 922 if sriovnetworkv1.GetEswitchModeFromSpec(iface) == sriovnetworkv1.ESwithModeSwitchDev { 923 portName, err := s.networkHelper.GetPhysPortName(iface.Name) 924 if err != nil { 925 log.Log.Error(err, "addVfRepresentorUdevRule(): WARNING: can't read phys_port_name for device, skip creation of UDEV rule") 926 return nil 927 } 928 switchID, err := s.networkHelper.GetPhysSwitchID(iface.Name) 929 if err != nil { 930 log.Log.Error(err, "addVfRepresentorUdevRule(): WARNING: can't read phys_switch_id for device, skip creation of UDEV rule") 931 return nil 932 } 933 return s.udevHelper.AddVfRepresentorUdevRule(iface.PciAddress, iface.Name, switchID, portName) 934 } 935 return nil 936 } 937 938 // remove all udev rules for PF created by the operator 939 func (s *sriov) removeUdevRules(pciAddress string) error { 940 log.Log.V(2).Info("removeUdevRules(): remove udev rules for device", 941 "device", pciAddress) 942 if err := s.udevHelper.RemoveDisableNMUdevRule(pciAddress); err != nil { 943 return err 944 } 945 if err := s.udevHelper.RemoveVfRepresentorUdevRule(pciAddress); err != nil { 946 return err 947 } 948 return s.udevHelper.RemovePersistPFNameUdevRule(pciAddress) 949 } 950 951 // create VFs on the PF 952 func (s *sriov) createVFs(iface *sriovnetworkv1.Interface) error { 953 expectedEswitchMode := sriovnetworkv1.GetEswitchModeFromSpec(iface) 954 log.Log.V(2).Info("createVFs(): configure VFs for device", 955 "device", iface.PciAddress, "count", iface.NumVfs, "mode", expectedEswitchMode) 956 957 if s.dputilsLib.GetVFconfigured(iface.PciAddress) == iface.NumVfs { 958 if s.GetNicSriovMode(iface.PciAddress) == expectedEswitchMode { 959 log.Log.V(2).Info("createVFs(): device is already configured", 960 "device", iface.PciAddress, "count", iface.NumVfs, "mode", expectedEswitchMode) 961 return nil 962 } 963 } 964 return s.setEswitchModeAndNumVFs(iface.PciAddress, expectedEswitchMode, iface.NumVfs) 965 } 966 967 func (s *sriov) setEswitchMode(pciAddr, eswitchMode string) error { 968 log.Log.V(2).Info("setEswitchMode(): set eswitch mode", "device", pciAddr, "mode", eswitchMode) 969 if err := s.unbindAllVFsOnPF(pciAddr); err != nil { 970 log.Log.Error(err, "setEswitchMode(): failed to unbind VFs", "device", pciAddr, "mode", eswitchMode) 971 return err 972 } 973 if err := s.SetNicSriovMode(pciAddr, eswitchMode); err != nil { 974 err = fmt.Errorf("failed to switch NIC to SRIOV %s mode: %v", eswitchMode, err) 975 log.Log.Error(err, "setEswitchMode(): failed to set mode", "device", pciAddr, "mode", eswitchMode) 976 return err 977 } 978 return nil 979 } 980 981 func (s *sriov) setEswitchModeAndNumVFs(pciAddr string, desiredEswitchMode string, numVFs int) error { 982 log.Log.V(2).Info("setEswitchModeAndNumVFs(): configure VFs for device", 983 "device", pciAddr, "count", numVFs, "mode", desiredEswitchMode) 984 985 // always switch NIC to the legacy mode before creating VFs. This is required because some drivers 986 // may not support VF creation in the switchdev mode 987 if s.GetNicSriovMode(pciAddr) != sriovnetworkv1.ESwithModeLegacy { 988 if err := s.setEswitchMode(pciAddr, sriovnetworkv1.ESwithModeLegacy); err != nil { 989 return err 990 } 991 } 992 if err := s.SetSriovNumVfs(pciAddr, numVFs); err != nil { 993 return err 994 } 995 996 if desiredEswitchMode == sriovnetworkv1.ESwithModeSwitchDev { 997 return s.setEswitchMode(pciAddr, sriovnetworkv1.ESwithModeSwitchDev) 998 } 999 return nil 1000 } 1001 1002 // retrieve all VFs for the PF and unbind them from a driver 1003 func (s *sriov) unbindAllVFsOnPF(addr string) error { 1004 log.Log.V(2).Info("unbindAllVFsOnPF(): unbind all VFs on PF", "device", addr) 1005 vfAddrs, err := s.dputilsLib.GetVFList(addr) 1006 if err != nil { 1007 return fmt.Errorf("failed to read VF list: %v", err) 1008 } 1009 for _, vfAddr := range vfAddrs { 1010 if err := s.kernelHelper.Unbind(vfAddr); err != nil { 1011 return fmt.Errorf("failed to unbind VF from the driver: %v", err) 1012 } 1013 } 1014 return nil 1015 }