github.com/k8snetworkplumbingwg/sriov-network-operator@v1.2.1-0.20240408194816-2d2e5a45d453/cmd/sriov-network-config-daemon/service.go (about) 1 /* 2 Copyright 2023. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 package main 17 18 import ( 19 "errors" 20 "fmt" 21 "os" 22 23 "github.com/go-logr/logr" 24 "github.com/spf13/cobra" 25 "sigs.k8s.io/controller-runtime/pkg/log" 26 27 sriovv1 "github.com/k8snetworkplumbingwg/sriov-network-operator/api/v1" 28 "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/consts" 29 "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/helper" 30 snolog "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/log" 31 "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/platforms" 32 plugin "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/plugins" 33 "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/plugins/generic" 34 "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/plugins/virtual" 35 "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/systemd" 36 "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/vars" 37 "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/version" 38 ) 39 40 const ( 41 PhasePre = "pre" 42 PhasePost = "post" 43 ) 44 45 var ( 46 serviceCmd = &cobra.Command{ 47 Use: "service", 48 Short: "Starts SR-IOV service Config", 49 Long: "", 50 RunE: runServiceCmd, 51 } 52 phaseArg string 53 54 newGenericPluginFunc = generic.NewGenericPlugin 55 newVirtualPluginFunc = virtual.NewVirtualPlugin 56 newHostHelpersFunc = helper.NewDefaultHostHelpers 57 newPlatformHelperFunc = platforms.NewDefaultPlatformHelper 58 ) 59 60 func init() { 61 rootCmd.AddCommand(serviceCmd) 62 serviceCmd.Flags().StringVarP(&phaseArg, "phase", "p", PhasePre, fmt.Sprintf("configuration phase, supported values are: %s, %s", PhasePre, PhasePost)) 63 } 64 65 // The service supports two configuration phases: 66 // * pre(default) - before the NetworkManager or systemd-networkd 67 // * post - after the NetworkManager or systemd-networkd 68 // "sriov-config" systemd unit is responsible for starting the service in the "pre" phase mode. 69 // "sriov-config-post-network" systemd unit starts the service in the "post" phase mode. 70 // The service may use different plugins for each phase and call different initialization flows. 71 // The "post" phase checks the completion status of the "pre" phase by reading the sriov result file. 72 // The "pre" phase should set "InProgress" status if it succeeds or "Failed" otherwise. 73 // If the result of the "pre" phase is different than "InProgress", then the "post" phase will not be executed 74 // and the execution result will be forcefully set to "Failed". 75 func runServiceCmd(cmd *cobra.Command, args []string) error { 76 if phaseArg != PhasePre && phaseArg != PhasePost { 77 return fmt.Errorf("invalid value for \"--phase\" argument, valid values are: %s, %s", PhasePre, PhasePost) 78 } 79 // init logger 80 snolog.InitLog() 81 setupLog := log.Log.WithName("sriov-config-service").WithValues("phase", phaseArg) 82 83 setupLog.V(0).Info("Starting sriov-config-service", "version", version.Version) 84 85 // Mark that we are running on host 86 vars.UsingSystemdMode = true 87 vars.InChroot = true 88 89 sriovConf, err := readConf(setupLog) 90 if err != nil { 91 return updateSriovResultErr(setupLog, phaseArg, err) 92 } 93 setupLog.V(2).Info("sriov-config-service", "config", sriovConf) 94 vars.DevMode = sriovConf.UnsupportedNics 95 96 if err := initSupportedNics(); err != nil { 97 return updateSriovResultErr(setupLog, phaseArg, fmt.Errorf("failed to initialize list of supported NIC ids: %v", err)) 98 } 99 100 hostHelpers, err := newHostHelpersFunc() 101 if err != nil { 102 return updateSriovResultErr(setupLog, phaseArg, fmt.Errorf("failed to create hostHelpers: %v", err)) 103 } 104 105 if phaseArg == PhasePre { 106 err = phasePre(setupLog, sriovConf, hostHelpers) 107 } else { 108 err = phasePost(setupLog, sriovConf, hostHelpers) 109 } 110 if err != nil { 111 return updateSriovResultErr(setupLog, phaseArg, err) 112 } 113 return updateSriovResultOk(setupLog, phaseArg) 114 } 115 116 func readConf(setupLog logr.Logger) (*systemd.SriovConfig, error) { 117 nodeStateSpec, err := systemd.ReadConfFile() 118 if err != nil { 119 if _, err := os.Stat(systemd.SriovSystemdConfigPath); !errors.Is(err, os.ErrNotExist) { 120 return nil, fmt.Errorf("failed to read the sriov configuration file in path %s: %v", systemd.SriovSystemdConfigPath, err) 121 } 122 setupLog.Info("configuration file not found, use default config") 123 nodeStateSpec = &systemd.SriovConfig{ 124 Spec: sriovv1.SriovNetworkNodeStateSpec{}, 125 UnsupportedNics: false, 126 PlatformType: consts.Baremetal, 127 } 128 } 129 return nodeStateSpec, nil 130 } 131 132 func initSupportedNics() error { 133 supportedNicIds, err := systemd.ReadSriovSupportedNics() 134 if err != nil { 135 return fmt.Errorf("failed to read list of supported nic ids: %v", err) 136 } 137 sriovv1.InitNicIDMapFromList(supportedNicIds) 138 return nil 139 } 140 141 func phasePre(setupLog logr.Logger, conf *systemd.SriovConfig, hostHelpers helper.HostHelpersInterface) error { 142 // make sure there is no stale result file to avoid situation when we 143 // read outdated info in the Post phase when the Pre silently failed (should not happen) 144 if err := systemd.RemoveSriovResult(); err != nil { 145 return fmt.Errorf("failed to remove sriov result file: %v", err) 146 } 147 148 _, err := hostHelpers.TryEnableRdma() 149 if err != nil { 150 setupLog.Error(err, "warning, failed to enable RDMA") 151 } 152 hostHelpers.TryEnableTun() 153 hostHelpers.TryEnableVhostNet() 154 155 return callPlugin(setupLog, PhasePre, conf, hostHelpers) 156 } 157 158 func phasePost(setupLog logr.Logger, conf *systemd.SriovConfig, hostHelpers helper.HostHelpersInterface) error { 159 setupLog.V(0).Info("check result of the Pre phase") 160 prePhaseResult, err := systemd.ReadSriovResult() 161 if err != nil { 162 return fmt.Errorf("failed to read result of the pre phase: %v", err) 163 } 164 if prePhaseResult.SyncStatus != consts.SyncStatusInProgress { 165 return fmt.Errorf("unexpected result of the pre phase: %s, syncError: %s", prePhaseResult.SyncStatus, prePhaseResult.LastSyncError) 166 } 167 setupLog.V(0).Info("Pre phase succeed, continue execution") 168 169 return callPlugin(setupLog, PhasePost, conf, hostHelpers) 170 } 171 172 func callPlugin(setupLog logr.Logger, phase string, conf *systemd.SriovConfig, hostHelpers helper.HostHelpersInterface) error { 173 configPlugin, err := getPlugin(setupLog, phase, conf, hostHelpers) 174 if err != nil { 175 return err 176 } 177 178 if configPlugin == nil { 179 setupLog.V(0).Info("no plugin for the platform for the current phase, skip calling", "platform", conf.PlatformType) 180 return nil 181 } 182 183 nodeState, err := getNetworkNodeState(setupLog, conf, hostHelpers) 184 if err != nil { 185 return nil 186 } 187 _, _, err = configPlugin.OnNodeStateChange(nodeState) 188 if err != nil { 189 return fmt.Errorf("failed to run OnNodeStateChange to update the plugin status %v", err) 190 } 191 192 if err = configPlugin.Apply(); err != nil { 193 return fmt.Errorf("failed to apply configuration: %v", err) 194 } 195 setupLog.V(0).Info("plugin call succeed") 196 return nil 197 } 198 199 func getPlugin(setupLog logr.Logger, phase string, 200 conf *systemd.SriovConfig, hostHelpers helper.HostHelpersInterface) (plugin.VendorPlugin, error) { 201 var ( 202 configPlugin plugin.VendorPlugin 203 err error 204 ) 205 switch conf.PlatformType { 206 case consts.Baremetal: 207 switch phase { 208 case PhasePre: 209 configPlugin, err = newGenericPluginFunc(hostHelpers, generic.WithSkipVFConfiguration()) 210 case PhasePost: 211 configPlugin, err = newGenericPluginFunc(hostHelpers) 212 } 213 if err != nil { 214 return nil, fmt.Errorf("failed to create generic plugin for %v", err) 215 } 216 case consts.VirtualOpenStack: 217 switch phase { 218 case PhasePre: 219 configPlugin, err = newVirtualPluginFunc(hostHelpers) 220 if err != nil { 221 return nil, fmt.Errorf("failed to create virtual plugin %v", err) 222 } 223 case PhasePost: 224 setupLog.Info("skip post configuration phase for virtual cluster") 225 return nil, nil 226 } 227 } 228 return configPlugin, nil 229 } 230 231 func getNetworkNodeState(setupLog logr.Logger, conf *systemd.SriovConfig, 232 hostHelpers helper.HostHelpersInterface) (*sriovv1.SriovNetworkNodeState, error) { 233 var ( 234 ifaceStatuses []sriovv1.InterfaceExt 235 err error 236 ) 237 switch conf.PlatformType { 238 case consts.Baremetal: 239 ifaceStatuses, err = hostHelpers.DiscoverSriovDevices(hostHelpers) 240 if err != nil { 241 return nil, fmt.Errorf("failed to discover sriov devices on the host: %v", err) 242 } 243 case consts.VirtualOpenStack: 244 platformHelper, err := newPlatformHelperFunc() 245 if err != nil { 246 return nil, fmt.Errorf("failed to create platformHelpers") 247 } 248 err = platformHelper.CreateOpenstackDevicesInfo() 249 if err != nil { 250 return nil, fmt.Errorf("failed to read OpenStack data: %v", err) 251 } 252 ifaceStatuses, err = platformHelper.DiscoverSriovDevicesVirtual() 253 if err != nil { 254 return nil, fmt.Errorf("failed to discover devices: %v", err) 255 } 256 } 257 return &sriovv1.SriovNetworkNodeState{ 258 Spec: conf.Spec, 259 Status: sriovv1.SriovNetworkNodeStateStatus{Interfaces: ifaceStatuses}, 260 }, nil 261 } 262 263 func updateSriovResultErr(setupLog logr.Logger, phase string, origErr error) error { 264 setupLog.Error(origErr, "service call failed") 265 err := updateResult(setupLog, consts.SyncStatusFailed, fmt.Sprintf("%s: %v", phase, origErr)) 266 if err != nil { 267 return err 268 } 269 return origErr 270 } 271 272 func updateSriovResultOk(setupLog logr.Logger, phase string) error { 273 setupLog.V(0).Info("service call succeed") 274 syncStatus := consts.SyncStatusSucceeded 275 if phase == PhasePre { 276 syncStatus = consts.SyncStatusInProgress 277 } 278 return updateResult(setupLog, syncStatus, "") 279 } 280 281 func updateResult(setupLog logr.Logger, result, msg string) error { 282 sriovResult := &systemd.SriovResult{ 283 SyncStatus: result, 284 LastSyncError: msg, 285 } 286 err := systemd.WriteSriovResult(sriovResult) 287 if err != nil { 288 setupLog.Error(err, "failed to write sriov result file", "content", *sriovResult) 289 return fmt.Errorf("sriov-config-service failed to write sriov result file with content %v error: %v", *sriovResult, err) 290 } 291 setupLog.V(0).Info("result file updated", "SyncStatus", sriovResult.SyncStatus, "LastSyncError", msg) 292 return nil 293 }