github.com/k8snetworkplumbingwg/sriov-network-operator@v1.2.1-0.20240408194816-2d2e5a45d453/cmd/sriov-network-config-daemon/start.go (about) 1 /* 2 Copyright 2023. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 package main 17 18 import ( 19 "context" 20 "fmt" 21 "net" 22 "net/url" 23 "os" 24 "strings" 25 "time" 26 27 "github.com/spf13/cobra" 28 v1 "k8s.io/apimachinery/pkg/apis/meta/v1" 29 "k8s.io/client-go/kubernetes" 30 "k8s.io/client-go/kubernetes/scheme" 31 "k8s.io/client-go/rest" 32 "k8s.io/client-go/tools/clientcmd" 33 "k8s.io/client-go/util/connrotation" 34 "sigs.k8s.io/controller-runtime/pkg/client" 35 "sigs.k8s.io/controller-runtime/pkg/log" 36 37 configv1 "github.com/openshift/api/config/v1" 38 mcfgv1 "github.com/openshift/machine-config-operator/pkg/apis/machineconfiguration.openshift.io/v1" 39 40 sriovnetworkv1 "github.com/k8snetworkplumbingwg/sriov-network-operator/api/v1" 41 snclientset "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/client/clientset/versioned" 42 "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/consts" 43 "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/daemon" 44 "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/helper" 45 snolog "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/log" 46 "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/platforms" 47 "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/vars" 48 ) 49 50 // stringList is a list of strings, implements pflag.Value interface 51 type stringList []string 52 53 func (sl *stringList) String() string { 54 return strings.Join(*sl, ",") 55 } 56 57 func (sl *stringList) Set(arg string) error { 58 elems := strings.Split(arg, ",") 59 60 for _, elem := range elems { 61 if len(elem) == 0 { 62 return fmt.Errorf("empty plugin name") 63 } 64 *sl = append(*sl, elem) 65 } 66 return nil 67 } 68 69 func (sl *stringList) Type() string { 70 return "CommaSeparatedString" 71 } 72 73 var ( 74 startCmd = &cobra.Command{ 75 Use: "start", 76 Short: "Starts SR-IOV Network Config Daemon", 77 Long: "", 78 RunE: runStartCmd, 79 } 80 81 startOpts struct { 82 kubeconfig string 83 nodeName string 84 systemd bool 85 disabledPlugins stringList 86 parallelNicConfig bool 87 } 88 ) 89 90 func init() { 91 rootCmd.AddCommand(startCmd) 92 startCmd.PersistentFlags().StringVar(&startOpts.kubeconfig, "kubeconfig", "", "Kubeconfig file to access a remote cluster (testing only)") 93 startCmd.PersistentFlags().StringVar(&startOpts.nodeName, "node-name", "", "kubernetes node name daemon is managing") 94 startCmd.PersistentFlags().BoolVar(&startOpts.systemd, "use-systemd-service", false, "use config daemon in systemd mode") 95 startCmd.PersistentFlags().VarP(&startOpts.disabledPlugins, "disable-plugins", "", "comma-separated list of plugins to disable") 96 startCmd.PersistentFlags().BoolVar(&startOpts.parallelNicConfig, "parallel-nic-config", false, "perform NIC configuration in parallel") 97 } 98 99 func runStartCmd(cmd *cobra.Command, args []string) error { 100 // init logger 101 snolog.InitLog() 102 setupLog := log.Log.WithName("sriov-network-config-daemon") 103 104 // Mark that we are running inside a container 105 vars.UsingSystemdMode = false 106 if startOpts.systemd { 107 vars.UsingSystemdMode = true 108 } 109 110 vars.ParallelNicConfig = startOpts.parallelNicConfig 111 112 if startOpts.nodeName == "" { 113 name, ok := os.LookupEnv("NODE_NAME") 114 if !ok || name == "" { 115 return fmt.Errorf("node-name is required") 116 } 117 startOpts.nodeName = name 118 } 119 vars.NodeName = startOpts.nodeName 120 121 for _, p := range startOpts.disabledPlugins { 122 if _, ok := vars.DisableablePlugins[p]; !ok { 123 return fmt.Errorf("%s plugin cannot be disabled", p) 124 } 125 } 126 127 // This channel is used to ensure all spawned goroutines exit when we exit. 128 stopCh := make(chan struct{}) 129 defer close(stopCh) 130 131 // This channel is used to signal Run() something failed and to jump ship. 132 // It's purely a chan<- in the Daemon struct for goroutines to write to, and 133 // a <-chan in Run() for the main thread to listen on. 134 exitCh := make(chan error) 135 defer close(exitCh) 136 137 // This channel is to make sure main thread will wait until the writer finish 138 // to report lastSyncError in SriovNetworkNodeState object. 139 syncCh := make(chan struct{}) 140 defer close(syncCh) 141 142 refreshCh := make(chan daemon.Message) 143 defer close(refreshCh) 144 145 var config *rest.Config 146 var err error 147 148 // On openshift we use the kubeconfig from kubelet on the node where the daemon is running 149 // this allow us to improve security as every daemon has access only to its own node 150 if vars.ClusterType == consts.ClusterTypeOpenshift { 151 kubeconfig, err := clientcmd.LoadFromFile("/host/etc/kubernetes/kubeconfig") 152 if err != nil { 153 setupLog.Error(err, "failed to load kubelet kubeconfig") 154 } 155 clusterName := kubeconfig.Contexts[kubeconfig.CurrentContext].Cluster 156 apiURL := kubeconfig.Clusters[clusterName].Server 157 158 urlPath, err := url.Parse(apiURL) 159 if err != nil { 160 setupLog.Error(err, "failed to parse api url from kubelet kubeconfig") 161 } 162 163 // The kubernetes in-cluster functions don't let you override the apiserver 164 // directly; gotta "pass" it via environment vars. 165 setupLog.V(0).Info("overriding kubernetes api", "new-url", apiURL) 166 err = os.Setenv("KUBERNETES_SERVICE_HOST", urlPath.Hostname()) 167 if err != nil { 168 setupLog.Error(err, "failed to set KUBERNETES_SERVICE_HOST environment variable") 169 } 170 err = os.Setenv("KUBERNETES_SERVICE_PORT", urlPath.Port()) 171 if err != nil { 172 setupLog.Error(err, "failed to set KUBERNETES_SERVICE_PORT environment variable") 173 } 174 } 175 176 kubeconfig := os.Getenv("KUBECONFIG") 177 if kubeconfig != "" { 178 config, err = clientcmd.BuildConfigFromFlags("", kubeconfig) 179 } else { 180 // creates the in-cluster config 181 config, err = rest.InClusterConfig() 182 } 183 184 if err != nil { 185 return err 186 } 187 188 vars.Config = config 189 vars.Scheme = scheme.Scheme 190 191 closeAllConns, err := updateDialer(config) 192 if err != nil { 193 return err 194 } 195 196 err = sriovnetworkv1.AddToScheme(scheme.Scheme) 197 if err != nil { 198 setupLog.Error(err, "failed to load sriov network CRDs to scheme") 199 return err 200 } 201 202 err = mcfgv1.AddToScheme(scheme.Scheme) 203 if err != nil { 204 setupLog.Error(err, "failed to load machine config CRDs to scheme") 205 return err 206 } 207 208 err = configv1.Install(scheme.Scheme) 209 if err != nil { 210 setupLog.Error(err, "failed to load openshift config CRDs to scheme") 211 return err 212 } 213 214 kClient, err := client.New(config, client.Options{Scheme: scheme.Scheme}) 215 if err != nil { 216 setupLog.Error(err, "couldn't create client") 217 os.Exit(1) 218 } 219 220 snclient := snclientset.NewForConfigOrDie(config) 221 kubeclient := kubernetes.NewForConfigOrDie(config) 222 223 hostHelpers, err := helper.NewDefaultHostHelpers() 224 if err != nil { 225 setupLog.Error(err, "failed to create hostHelpers") 226 return err 227 } 228 229 platformHelper, err := platforms.NewDefaultPlatformHelper() 230 if err != nil { 231 setupLog.Error(err, "failed to create platformHelper") 232 return err 233 } 234 235 config.Timeout = 5 * time.Second 236 writerclient := snclientset.NewForConfigOrDie(config) 237 238 eventRecorder := daemon.NewEventRecorder(writerclient, kubeclient) 239 defer eventRecorder.Shutdown() 240 241 setupLog.V(0).Info("starting node writer") 242 nodeWriter := daemon.NewNodeStateStatusWriter(writerclient, 243 closeAllConns, 244 eventRecorder, 245 hostHelpers, 246 platformHelper) 247 248 nodeInfo, err := kubeclient.CoreV1().Nodes().Get(context.Background(), startOpts.nodeName, v1.GetOptions{}) 249 if err == nil { 250 for key, pType := range vars.PlatformsMap { 251 if strings.Contains(strings.ToLower(nodeInfo.Spec.ProviderID), strings.ToLower(key)) { 252 vars.PlatformType = pType 253 } 254 } 255 } else { 256 setupLog.Error(err, "failed to fetch node state, exiting", "node-name", startOpts.nodeName) 257 return err 258 } 259 setupLog.Info("Running on", "platform", vars.PlatformType.String()) 260 261 if err := sriovnetworkv1.InitNicIDMapFromConfigMap(kubeclient, vars.Namespace); err != nil { 262 setupLog.Error(err, "failed to run init NicIdMap") 263 return err 264 } 265 266 eventRecorder.SendEvent("ConfigDaemonStart", "Config Daemon starting") 267 268 // block the deamon process until nodeWriter finish first its run 269 err = nodeWriter.RunOnce() 270 if err != nil { 271 setupLog.Error(err, "failed to run writer") 272 return err 273 } 274 go nodeWriter.Run(stopCh, refreshCh, syncCh) 275 276 setupLog.V(0).Info("Starting SriovNetworkConfigDaemon") 277 err = daemon.New( 278 kClient, 279 snclient, 280 kubeclient, 281 hostHelpers, 282 platformHelper, 283 exitCh, 284 stopCh, 285 syncCh, 286 refreshCh, 287 eventRecorder, 288 startOpts.disabledPlugins, 289 ).Run(stopCh, exitCh) 290 if err != nil { 291 setupLog.Error(err, "failed to run daemon") 292 } 293 setupLog.V(0).Info("Shutting down SriovNetworkConfigDaemon") 294 return err 295 } 296 297 // updateDialer instruments a restconfig with a dial. the returned function allows forcefully closing all active connections. 298 func updateDialer(clientConfig *rest.Config) (func(), error) { 299 if clientConfig.Transport != nil || clientConfig.Dial != nil { 300 return nil, fmt.Errorf("there is already a transport or dialer configured") 301 } 302 f := &net.Dialer{Timeout: 30 * time.Second, KeepAlive: 30 * time.Second} 303 d := connrotation.NewDialer(f.DialContext) 304 clientConfig.Dial = d.DialContext 305 return d.CloseAll, nil 306 }