github.com/webmeshproj/webmesh-cni@v0.0.27/internal/cmd/node/node.go (about) 1 /* 2 Copyright 2023 Avi Zimmerman <avi.zimmerman@gmail.com>. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 // Package node contains the entrypoint for the webmesh-cni node component. 18 package node 19 20 import ( 21 "context" 22 "errors" 23 "flag" 24 "fmt" 25 "net/netip" 26 "os" 27 "strings" 28 "time" 29 30 "github.com/knadh/koanf/parsers/json" 31 "github.com/knadh/koanf/providers/posflag" 32 "github.com/knadh/koanf/v2" 33 "github.com/spf13/pflag" 34 storagev1 "github.com/webmeshproj/storage-provider-k8s/api/storage/v1" 35 storageprovider "github.com/webmeshproj/storage-provider-k8s/provider" 36 "github.com/webmeshproj/webmesh/pkg/cmd/cmdutil" 37 meshconfig "github.com/webmeshproj/webmesh/pkg/config" 38 "github.com/webmeshproj/webmesh/pkg/plugins/builtins" 39 meshservices "github.com/webmeshproj/webmesh/pkg/services" 40 "github.com/webmeshproj/webmesh/pkg/services/meshdns" 41 "github.com/webmeshproj/webmesh/pkg/version" 42 "k8s.io/apimachinery/pkg/runtime" 43 utilruntime "k8s.io/apimachinery/pkg/util/runtime" 44 clientgoscheme "k8s.io/client-go/kubernetes/scheme" 45 _ "k8s.io/client-go/plugin/pkg/client/auth" 46 ctrl "sigs.k8s.io/controller-runtime" 47 "sigs.k8s.io/controller-runtime/pkg/client" 48 ctrlconfig "sigs.k8s.io/controller-runtime/pkg/config" 49 "sigs.k8s.io/controller-runtime/pkg/healthz" 50 ctrllog "sigs.k8s.io/controller-runtime/pkg/log" 51 "sigs.k8s.io/controller-runtime/pkg/log/zap" 52 metricsserver "sigs.k8s.io/controller-runtime/pkg/metrics/server" 53 54 cniv1 "github.com/webmeshproj/webmesh-cni/api/v1" 55 "github.com/webmeshproj/webmesh-cni/internal/config" 56 "github.com/webmeshproj/webmesh-cni/internal/controllers" 57 "github.com/webmeshproj/webmesh-cni/internal/host" 58 "github.com/webmeshproj/webmesh-cni/internal/metadata" 59 ) 60 61 var ( 62 scheme = runtime.NewScheme() 63 log = ctrl.Log.WithName("webmesh-cni") 64 cniopts = config.NewDefaultConfig() 65 zapopts = zap.Options{Development: true} 66 ) 67 68 func init() { 69 utilruntime.Must(clientgoscheme.AddToScheme(scheme)) 70 utilruntime.Must(cniv1.AddToScheme(scheme)) 71 utilruntime.Must(storagev1.AddToScheme(scheme)) 72 } 73 74 func pluginInArgs(pluginName string) bool { 75 for _, arg := range os.Args { 76 if strings.HasPrefix(arg, fmt.Sprintf("--host.plugins.%s", pluginName)) { 77 return true 78 } 79 } 80 return false 81 } 82 83 // Main runs the webmesh-cni daemon. 84 func Main(build version.BuildInfo) { 85 // Build the flagset 86 var configMap string 87 var configMapNamespace string 88 zapset := flag.NewFlagSet("zap", flag.ContinueOnError) 89 fs := pflag.NewFlagSet("webmesh-cni", pflag.ContinueOnError) 90 cniopts.BindFlags(fs) 91 zapopts.BindFlags(zapset) 92 fs.AddGoFlagSet(zapset) 93 fs.StringVar(&configMap, "configmap", "", "The name of the configmap to load configuration from.") 94 fs.StringVar(&configMapNamespace, "configmap-namespace", "kube-system", "The namespace of the configmap to load configuration from.") 95 96 // Create a separate flag set with all plugins for usage. 97 usage := pflag.NewFlagSet("usage", pflag.ContinueOnError) 98 usage.AddFlagSet(fs) 99 pluginConfigs := builtins.NewPluginConfigs() 100 for pluginName, pluginConfig := range pluginConfigs { 101 if !pluginInArgs(pluginName) { 102 pluginConfig.BindFlags(fmt.Sprintf("host.plugins.%s.", pluginName), usage) 103 } 104 } 105 fs.Usage = cmdutil.NewUsageFunc(cmdutil.UsageConfig{ 106 Name: "webmesh-cni-node", 107 Description: "The webmesh-cni node component.", 108 Prefixes: []string{ 109 "manager", 110 "host", 111 "host.auth", 112 "host.network", 113 "host.services", 114 "host.wireguard", 115 "host.plugins", 116 "storage", 117 }, 118 Flagset: usage, 119 }) 120 121 // Parse flags and setup logging. 122 err := fs.Parse(os.Args[1:]) 123 if err != nil { 124 if errors.Is(err, pflag.ErrHelp) { 125 os.Exit(0) 126 } 127 fmt.Println("ERROR: Failed to parse flags:", err) 128 os.Exit(1) 129 } 130 ctrl.SetLogger(zap.New(zap.UseFlagOptions(&zapopts))) 131 132 // Load the configuration from flags and configmap 133 k := koanf.New(".") 134 if configMap != "" { 135 provider := config.NewConfigMapProvider(ctrl.GetConfigOrDie(), client.ObjectKey{ 136 Name: configMap, 137 Namespace: configMapNamespace, 138 }) 139 err := k.Load(provider, json.Parser()) 140 if err != nil { 141 log.Error(err, "Failed to load configuration from configmap") 142 os.Exit(1) 143 } 144 } 145 err = k.Load(posflag.Provider(fs, ".", k), nil) 146 if err != nil { 147 log.Error(err, "Failed to load configuration from flags") 148 os.Exit(1) 149 } 150 err = k.Unmarshal("", &cniopts) 151 if err != nil { 152 log.Error(err, "Failed to unmarshal configuration") 153 os.Exit(1) 154 } 155 156 // Validate the configuration. 157 err = cniopts.Validate() 158 if err != nil { 159 log.Error(err, "Invalid CNI configuration") 160 os.Exit(1) 161 } 162 163 log.Info("Starting webmesh-cni node", "version", build) 164 165 // Create the manager. 166 ctx := ctrl.SetupSignalHandler() 167 ctx = ctrllog.IntoContext(ctx, log) 168 mgr, err := ctrl.NewManager(ctrl.GetConfigOrDie(), ctrl.Options{ 169 Scheme: scheme, 170 Metrics: metricsserver.Options{ 171 BindAddress: cniopts.Manager.MetricsAddress, 172 }, 173 HealthProbeBindAddress: cniopts.Manager.ProbeAddress, 174 GracefulShutdownTimeout: &cniopts.Manager.ShutdownTimeout, 175 Controller: ctrlconfig.Controller{ 176 MaxConcurrentReconciles: cniopts.Manager.MaxConcurrentReconciles, 177 GroupKindConcurrency: map[string]int{ 178 "PeerContainer.cni.webmesh.io": 1, 179 "RemoteNetwork.cni.webmesh.io": 1, 180 }, 181 NeedLeaderElection: &[]bool{false}[0], 182 }, 183 }) 184 if err != nil { 185 log.Error(err, "Failed to create controller manager") 186 os.Exit(1) 187 } 188 189 // Create the storage provider. 190 storageOpts := storageprovider.Options{ 191 NodeID: cniopts.Host.NodeID, 192 Namespace: cniopts.Host.Namespace, 193 ListenPort: int(cniopts.Host.Services.API.ListenPort()), 194 LeaderElectionLeaseDuration: cniopts.Storage.LeaderElectLeaseDuration, 195 LeaderElectionRenewDeadline: cniopts.Storage.LeaderElectRenewDeadline, 196 LeaderElectionRetryPeriod: cniopts.Storage.LeaderElectRetryPeriod, 197 ShutdownTimeout: cniopts.Manager.ShutdownTimeout, 198 } 199 log.V(1).Info("Creating webmesh storage provider", "options", storageOpts) 200 storageProvider, err := storageprovider.NewWithManager(mgr, storageOpts) 201 if err != nil { 202 log.Error(err, "Failed to create webmesh storage provider") 203 os.Exit(1) 204 } 205 206 // Setup the host node. 207 var metaaddr netip.AddrPort 208 if cniopts.Manager.EnableMetadataServer { 209 // Append the metadata server to the allowed routes. 210 metaaddr, err = netip.ParseAddrPort(cniopts.Manager.MetadataAddress) 211 if err != nil { 212 log.Error(err, "Failed to parse metadata address") 213 os.Exit(1) 214 } 215 metaaddrPreifx := netip.PrefixFrom(metaaddr.Addr(), 32) 216 cniopts.Host.Network.Routes = append(cniopts.Host.Network.Routes, metaaddrPreifx.String()) 217 } 218 hostnode := host.NewNode(storageProvider, cniopts.Host) 219 220 // Register the main peer container controller. 221 log.V(1).Info("Registering peer container controller") 222 containerReconciler := &controllers.PeerContainerReconciler{ 223 Client: mgr.GetClient(), 224 Host: hostnode, 225 Provider: storageProvider, 226 Config: cniopts, 227 } 228 if err = containerReconciler.SetupWithManager(mgr); err != nil { 229 log.Error(err, "Failed to setup container reconciler with manager", "controller", "PeerContainer") 230 os.Exit(1) 231 } 232 // Register a node reconciler to make sure edges exist across the cluster. 233 log.V(1).Info("Registering node controller") 234 nodeReconciler := &controllers.NodeReconciler{ 235 Client: mgr.GetClient(), 236 Host: hostnode, 237 Provider: storageProvider, 238 } 239 if err = nodeReconciler.SetupWithManager(mgr); err != nil { 240 log.Error(err, "Failed to setup node reconciler with manager", "controller", "Node") 241 os.Exit(1) 242 } 243 // Register a pod reconciler to check for containers that can broadcast features 244 // to the outside world. 245 log.V(1).Info("Registering pod controller") 246 podRecondiler := &controllers.PodReconciler{ 247 Client: mgr.GetClient(), 248 Host: hostnode, 249 Provider: storageProvider, 250 DNSSelector: cniopts.Manager.ClusterDNSSelector, 251 DNSNamespace: cniopts.Manager.ClusterDNSNamespace, 252 DNSPort: cniopts.Manager.ClusterDNSPortSelector, 253 } 254 if err = podRecondiler.SetupWithManager(mgr); err != nil { 255 log.Error(err, "Failed to setup pod reconciler with manager", "controller", "Node") 256 os.Exit(1) 257 } 258 // Register the remote network reconciler for maintaining bridge connections to 259 // other clusters. 260 log.V(1).Info("Registering remote network controller") 261 remoteNetworkReconciler := &controllers.RemoteNetworkReconciler{ 262 Client: mgr.GetClient(), 263 Config: cniopts, 264 Provider: storageProvider, 265 HostNode: hostnode, 266 } 267 if err = remoteNetworkReconciler.SetupWithManager(mgr); err != nil { 268 log.Error(err, "Failed to setup remote network reconciler with manager", "controller", "RemoteNetwork") 269 os.Exit(1) 270 } 271 272 // Register the health and ready checks. 273 log.V(1).Info("Registering health and ready checks") 274 if err := mgr.AddHealthzCheck("healthz", healthz.Ping); err != nil { 275 log.Error(err, "Failed to set up health check") 276 os.Exit(1) 277 } 278 if err := mgr.AddReadyzCheck("readyz", healthz.Ping); err != nil { 279 log.Error(err, "Failed to set up ready check") 280 os.Exit(1) 281 } 282 283 donec := make(chan struct{}) 284 go func() { 285 defer close(donec) 286 log.Info("Starting peer container manager") 287 if err := mgr.Start(ctx); err != nil { 288 log.Error(err, "Problem running manager") 289 os.Exit(1) 290 } 291 log.Info("Peer container manager finished") 292 ctx, cancel := context.WithTimeout( 293 ctrllog.IntoContext(context.Background(), log), 294 cniopts.Manager.ShutdownTimeout, 295 ) 296 defer cancel() 297 log.Info("Shutting down managed container nodes") 298 containerReconciler.Shutdown(ctx) 299 }() 300 301 // Start the storage provider in unmanaged mode. 302 log.Info("Starting webmesh storage provider") 303 err = storageProvider.StartUnmanaged(ctx) 304 if err != nil { 305 log.Error(err, "Failed to start webmesh storage provider") 306 os.Exit(1) 307 } 308 309 // Wait for the manager cache to sync and then get ready to handle requests 310 311 log.Info("Waiting for manager cache to sync", "timeout", cniopts.Storage.CacheSyncTimeout) 312 cacheCtx, cancel := context.WithTimeout(ctx, cniopts.Storage.CacheSyncTimeout) 313 if synced := mgr.GetCache().WaitForCacheSync(cacheCtx); !synced { 314 if err := storageProvider.Close(); err != nil { 315 log.Error(err, "Failed to stop storage provider") 316 } 317 cancel() 318 log.Error(err, "Timed out waiting for caches to sync") 319 os.Exit(1) 320 } 321 cancel() 322 log.V(1).Info("Caches synced, bootstrapping network state") 323 324 log.Info("Starting host node for routing traffic") 325 host := containerReconciler.Host 326 err = host.Start(ctx, mgr.GetConfig()) 327 if err != nil { 328 if err := storageProvider.Close(); err != nil { 329 log.Error(err, "Failed to stop storage provider") 330 } 331 cancel() 332 log.Error(err, "Failed to start host node") 333 os.Exit(1) 334 } 335 336 log.Info("Webmesh CNI node started") 337 338 // Start any configured services. 339 340 if cniopts.Manager.EnableMetadataServer { 341 // Add the metadata address to the wireguard interface. 342 addr := netip.PrefixFrom(metaaddr.Addr(), 32) 343 err = host.Node().Network().WireGuard().AddAddress(ctx, addr) 344 if err != nil { 345 err := host.Stop(ctx) 346 if err != nil { 347 log.Error(err, "Failed to stop host node") 348 } 349 log.Error(err, "Failed to add metadata address to wireguard interface") 350 os.Exit(1) 351 } 352 metasrv := metadata.NewServer(metadata.Config{ 353 Address: metaaddr, 354 Host: host, 355 Storage: storageProvider, 356 KeyResolver: containerReconciler, 357 EnableIDTokens: cniopts.Manager.EnableMetadataIDTokens, 358 }) 359 go func() { 360 log.Info("Starting metadata server") 361 err := metasrv.ListenAndServe() 362 if err != nil { 363 err := host.Stop(ctx) 364 if err != nil { 365 log.Error(err, "Failed to stop host node") 366 } 367 log.Error(err, "Failed to start metadata server") 368 os.Exit(1) 369 } 370 }() 371 defer func() { 372 if err := metasrv.Shutdown(context.Background()); err != nil { 373 log.Error(err, "Failed to shutdown metadata server") 374 } 375 }() 376 } 377 378 hostCtx := host.NodeContext(context.Background()) 379 if cniopts.Host.Services.MeshDNS.Enabled { 380 // We force subscribe forwarders to true or otherwise it would serve very little purpose. 381 // This makes sure we wind up with CoreDNS as a forwarding server for non-root zones. 382 cniopts.Host.Services.MeshDNS.SubscribeForwarders = true 383 } 384 srvOpts, err := cniopts.Host.Services.NewServiceOptions(hostCtx, host.Node()) 385 if err != nil { 386 err := host.Stop(ctx) 387 if err != nil { 388 log.Error(err, "Failed to stop host node") 389 } 390 log.Error(err, "Failed to create webmesh service options") 391 os.Exit(1) 392 } 393 if cniopts.Host.Services.MeshDNS.Enabled { 394 // Set the DNS server to the remote network controller 395 dnssrv, ok := srvOpts.GetServer(&meshdns.Server{}) 396 if !ok { 397 // Something bizarre happened. 398 err := host.Stop(ctx) 399 if err != nil { 400 log.Error(err, "Failed to stop host node") 401 } 402 log.Error(err, "Failed to get meshdns server") 403 os.Exit(1) 404 } 405 remoteNetworkReconciler.SetDNSServer(dnssrv.(*meshdns.Server)) 406 containerReconciler.SetDNSServer(dnssrv.(*meshdns.Server)) 407 } 408 srv, err := meshservices.NewServer(hostCtx, srvOpts) 409 if err != nil { 410 err := host.Stop(ctx) 411 if err != nil { 412 log.Error(err, "Failed to stop host node") 413 } 414 log.Error(err, "Failed to create webmesh services server") 415 os.Exit(1) 416 } 417 if !cniopts.Host.Services.API.Disabled { 418 err = cniopts.Host.Services.RegisterAPIs(hostCtx, meshconfig.APIRegistrationOptions{ 419 Node: host.Node(), 420 Server: srv, 421 Features: cniopts.Host.Services.NewFeatureSet(storageProvider, srv.GRPCListenPort()), 422 Description: "webmesh-cni", 423 BuildInfo: build, 424 }) 425 if err != nil { 426 err := host.Stop(ctx) 427 if err != nil { 428 log.Error(err, "Failed to stop host node") 429 } 430 log.Error(err, "Failed to register webmesh services APIs") 431 os.Exit(1) 432 } 433 } 434 go func() { 435 log.Info("Starting webmesh services") 436 err := srv.ListenAndServe() 437 if err != nil { 438 err := host.Stop(ctx) 439 if err != nil { 440 log.Error(err, "Failed to stop host node") 441 } 442 log.Error(err, "Failed to start webmesh services server") 443 os.Exit(1) 444 } 445 }() 446 447 // Wait for the manager to exit. 448 <-ctx.Done() 449 450 log.Info("Shutting down webmesh node and services") 451 shutdownCtx, cancel := context.WithTimeout( 452 ctrllog.IntoContext(context.Background(), log), 453 cniopts.Manager.ShutdownTimeout, 454 ) 455 defer cancel() 456 err = host.Stop(shutdownCtx) 457 if err != nil { 458 log.Error(err, "Failed to stop host node") 459 } 460 srv.Shutdown(hostCtx) 461 462 // Wait for the manager to exit. 463 select { 464 case <-donec: 465 log.Info("Finished running manager") 466 case <-time.After(cniopts.Manager.ShutdownTimeout): 467 log.Info("Shutdown timeout reached, exiting") 468 } 469 }