github.com/cilium/cilium@v1.16.2/pkg/envoy/embedded_envoy.go (about) 1 // SPDX-License-Identifier: Apache-2.0 2 // Copyright Authors of Cilium 3 4 package envoy 5 6 import ( 7 "bufio" 8 "errors" 9 "fmt" 10 "io" 11 "os" 12 "os/exec" 13 "path/filepath" 14 "strconv" 15 "strings" 16 17 "github.com/cilium/lumberjack/v2" 18 cilium "github.com/cilium/proxy/go/cilium/api" 19 envoy_config_bootstrap "github.com/cilium/proxy/go/envoy/config/bootstrap/v3" 20 envoy_config_cluster "github.com/cilium/proxy/go/envoy/config/cluster/v3" 21 envoy_config_core "github.com/cilium/proxy/go/envoy/config/core/v3" 22 envoy_config_endpoint "github.com/cilium/proxy/go/envoy/config/endpoint/v3" 23 envoy_extensions_bootstrap_internal_listener_v3 "github.com/cilium/proxy/go/envoy/extensions/bootstrap/internal_listener/v3" 24 envoy_config_upstream "github.com/cilium/proxy/go/envoy/extensions/upstreams/http/v3" 25 "github.com/sirupsen/logrus" 26 "google.golang.org/protobuf/proto" 27 "google.golang.org/protobuf/types/known/anypb" 28 "google.golang.org/protobuf/types/known/durationpb" 29 "google.golang.org/protobuf/types/known/structpb" 30 "google.golang.org/protobuf/types/known/wrapperspb" 31 32 "github.com/cilium/cilium/pkg/flowdebug" 33 "github.com/cilium/cilium/pkg/logging" 34 "github.com/cilium/cilium/pkg/logging/logfields" 35 "github.com/cilium/cilium/pkg/metrics" 36 "github.com/cilium/cilium/pkg/time" 37 ) 38 39 var log = logging.DefaultLogger.WithField(logfields.LogSubsys, "envoy-manager") 40 41 var ( 42 // envoyLevelMap maps logrus.Level values to Envoy (spdlog) log levels. 43 envoyLevelMap = map[logrus.Level]string{ 44 logrus.PanicLevel: "off", 45 logrus.FatalLevel: "critical", 46 logrus.ErrorLevel: "error", 47 logrus.WarnLevel: "warning", 48 logrus.InfoLevel: "info", 49 logrus.DebugLevel: "debug", 50 // spdlog "trace" not mapped 51 } 52 53 tracing = false 54 ) 55 56 const ( 57 ciliumEnvoyStarter = "cilium-envoy-starter" 58 ciliumEnvoy = "cilium-envoy" 59 ) 60 61 // EnableTracing changes Envoy log level to "trace", producing the most logs. 62 func EnableTracing() { 63 tracing = true 64 } 65 66 func mapLogLevel(level logrus.Level) string { 67 // Set Envoy loglevel to trace if debug AND verbose Engoy logging is enabled 68 if level == logrus.DebugLevel && tracing { 69 return "trace" 70 } 71 72 // Suppress the debug level if not debugging at flow level. 73 if level == logrus.DebugLevel && !flowdebug.Enabled() { 74 level = logrus.InfoLevel 75 } 76 77 return envoyLevelMap[level] 78 } 79 80 // Envoy manages a running Envoy proxy instance via the 81 // ListenerDiscoveryService and RouteDiscoveryService gRPC APIs. 82 type EmbeddedEnvoy struct { 83 stopCh chan struct{} 84 errCh chan error 85 admin *EnvoyAdminClient 86 } 87 88 type embeddedEnvoyConfig struct { 89 runDir string 90 logPath string 91 baseID uint64 92 keepCapNetBindService bool 93 connectTimeout int64 94 maxRequestsPerConnection uint32 95 maxConnectionDuration time.Duration 96 idleTimeout time.Duration 97 } 98 99 // startEmbeddedEnvoy starts an Envoy proxy instance. 100 func startEmbeddedEnvoy(config embeddedEnvoyConfig) (*EmbeddedEnvoy, error) { 101 envoy := &EmbeddedEnvoy{ 102 stopCh: make(chan struct{}), 103 errCh: make(chan error, 1), 104 admin: NewEnvoyAdminClientForSocket(GetSocketDir(config.runDir)), 105 } 106 107 bootstrapFilePath := filepath.Join(config.runDir, "envoy", "bootstrap.pb") 108 109 writeBootstrapConfigFile(bootstrapConfig{ 110 filePath: bootstrapFilePath, 111 nodeId: "host~127.0.0.1~no-id~localdomain", // node id format inherited from Istio 112 cluster: ingressClusterName, 113 adminPath: getAdminSocketPath(GetSocketDir(config.runDir)), 114 xdsSock: getXDSSocketPath(GetSocketDir(config.runDir)), 115 egressClusterName: egressClusterName, 116 ingressClusterName: ingressClusterName, 117 connectTimeout: config.connectTimeout, 118 maxRequestsPerConnection: config.maxRequestsPerConnection, 119 maxConnectionDuration: config.maxConnectionDuration, 120 idleTimeout: config.idleTimeout, 121 }) 122 123 log.Debugf("Envoy: Starting: %v", *envoy) 124 125 // make it a buffered channel, so we can not only 126 // read the written value but also skip it in 127 // case no one reader reads it. 128 started := make(chan bool, 1) 129 go func() { 130 var logWriter io.WriteCloser 131 var logFormat string 132 if config.logPath != "" { 133 // Use the Envoy default log format when logging to a separate file 134 logFormat = "[%Y-%m-%d %T.%e][%t][%l][%n] %v" 135 logger := &lumberjack.Logger{ 136 Filename: config.logPath, 137 MaxSize: 100, // megabytes 138 MaxBackups: 3, 139 MaxAge: 28, // days 140 Compress: true, // disabled by default 141 } 142 logWriter = logger 143 } else { 144 // Use log format that looks like Cilium logs when integrating logs 145 // The logs will be reported as coming from the cilium-agent, so 146 // we add the thread id to be able to differentiate between Envoy's 147 // main and worker threads. 148 logFormat = "%t|%l|%n|%v" 149 150 // Create a piper that parses and writes into logrus the log 151 // messages from Envoy. 152 logWriter = newEnvoyLogPiper() 153 } 154 defer logWriter.Close() 155 156 envoyArgs := []string{"-l", mapLogLevel(logging.GetLevel(logging.DefaultLogger)), "-c", bootstrapFilePath, "--base-id", strconv.FormatUint(config.baseID, 10), "--log-format", logFormat} 157 envoyStarterArgs := []string{} 158 if config.keepCapNetBindService { 159 envoyStarterArgs = append(envoyStarterArgs, "--keep-cap-net-bind-service", "--") 160 } 161 envoyStarterArgs = append(envoyStarterArgs, envoyArgs...) 162 163 for { 164 cmd := exec.Command(ciliumEnvoyStarter, envoyStarterArgs...) 165 cmd.Stderr = logWriter 166 cmd.Stdout = logWriter 167 168 if err := cmd.Start(); err != nil { 169 log.WithError(err).Warn("Envoy: Failed to start proxy") 170 select { 171 case started <- false: 172 default: 173 } 174 return 175 } 176 log.Debugf("Envoy: Started proxy") 177 select { 178 case started <- true: 179 default: 180 } 181 182 log.Infof("Envoy: Proxy started with pid %d", cmd.Process.Pid) 183 metrics.SubprocessStart.WithLabelValues(ciliumEnvoyStarter).Inc() 184 185 // We do not return after a successful start, but watch the Envoy process 186 // and restart it if it crashes. 187 // Waiting for the process execution is done in the goroutime. 188 // The purpose of the "crash channel" is to inform the loop about their 189 // Envoy process crash - after closing that channel by the goroutime, 190 // the loop continues, the channel is recreated and the new process 191 // is watched again. 192 crashCh := make(chan struct{}) 193 go func() { 194 if err := cmd.Wait(); err != nil { 195 log.WithError(err).Warn("Envoy: Proxy crashed") 196 // Avoid busy loop & hogging CPU resources by waiting before restarting envoy. 197 time.Sleep(100 * time.Millisecond) 198 } 199 close(crashCh) 200 }() 201 202 select { 203 case <-crashCh: 204 // Start Envoy again 205 continue 206 case <-envoy.stopCh: 207 log.Infof("Envoy: Stopping proxy with pid %d", cmd.Process.Pid) 208 if err := envoy.admin.quit(); err != nil { 209 log.WithError(err).Fatalf("Envoy: Envoy admin quit failed, killing process with pid %d", cmd.Process.Pid) 210 211 if err := cmd.Process.Kill(); err != nil { 212 log.WithError(err).Fatal("Envoy: Stopping Envoy failed") 213 envoy.errCh <- err 214 } 215 } 216 close(envoy.errCh) 217 return 218 } 219 } 220 }() 221 222 if <-started { 223 return envoy, nil 224 } 225 226 return nil, errors.New("failed to start embedded Envoy server") 227 } 228 229 // newEnvoyLogPiper creates a writer that parses and logs log messages written by Envoy. 230 func newEnvoyLogPiper() io.WriteCloser { 231 reader, writer := io.Pipe() 232 scanner := bufio.NewScanner(reader) 233 scanner.Buffer(nil, 1024*1024) 234 go func() { 235 scopedLog := log.WithFields(logrus.Fields{ 236 logfields.LogSubsys: "unknown", 237 logfields.ThreadID: "unknown", 238 }) 239 level := "debug" 240 241 for scanner.Scan() { 242 line := scanner.Text() 243 var msg string 244 245 parts := strings.SplitN(line, "|", 4) 246 // Parse the line as a log message written by Envoy, assuming it 247 // uses the configured format: "%t|%l|%n|%v". 248 if len(parts) == 4 { 249 threadID := parts[0] 250 level = parts[1] 251 loggerName := parts[2] 252 // TODO: Parse msg to extract the source filename, line number, etc. 253 msg = fmt.Sprintf("[%s", parts[3]) 254 255 scopedLog = log.WithFields(logrus.Fields{ 256 logfields.LogSubsys: fmt.Sprintf("envoy-%s", loggerName), 257 logfields.ThreadID: threadID, 258 }) 259 } else { 260 // If this line can't be parsed, it continues a multi-line log 261 // message. In this case, log it at the same level and with the 262 // same fields as the previous line. 263 msg = line 264 } 265 266 if len(msg) == 0 { 267 continue 268 } 269 270 // Map the Envoy log level to a logrus level. 271 switch level { 272 case "off", "critical", "error": 273 scopedLog.Error(msg) 274 case "warning": 275 scopedLog.Warn(msg) 276 case "info": 277 scopedLog.Info(msg) 278 case "debug", "trace": 279 scopedLog.Debug(msg) 280 default: 281 scopedLog.Debug(msg) 282 } 283 } 284 if err := scanner.Err(); err != nil { 285 log.WithError(err).Error("Error while parsing Envoy logs") 286 } 287 reader.Close() 288 }() 289 return writer 290 } 291 292 // Stop kills the Envoy process started with startEmbeddedEnvoy. The gRPC API streams are terminated 293 // first. 294 func (e *EmbeddedEnvoy) Stop() error { 295 close(e.stopCh) 296 err, ok := <-e.errCh 297 if ok { 298 return err 299 } 300 return nil 301 } 302 303 func (e *EmbeddedEnvoy) GetAdminClient() *EnvoyAdminClient { 304 return e.admin 305 } 306 307 type bootstrapConfig struct { 308 filePath string 309 nodeId string 310 cluster string 311 adminPath string 312 xdsSock string 313 egressClusterName string 314 ingressClusterName string 315 connectTimeout int64 316 maxRequestsPerConnection uint32 317 maxConnectionDuration time.Duration 318 idleTimeout time.Duration 319 } 320 321 func writeBootstrapConfigFile(config bootstrapConfig) { 322 useDownstreamProtocol := map[string]*anypb.Any{ 323 "envoy.extensions.upstreams.http.v3.HttpProtocolOptions": toAny(&envoy_config_upstream.HttpProtocolOptions{ 324 CommonHttpProtocolOptions: &envoy_config_core.HttpProtocolOptions{ 325 IdleTimeout: durationpb.New(config.idleTimeout), 326 MaxRequestsPerConnection: wrapperspb.UInt32(config.maxRequestsPerConnection), 327 MaxConnectionDuration: durationpb.New(config.maxConnectionDuration), 328 }, 329 UpstreamProtocolOptions: &envoy_config_upstream.HttpProtocolOptions_UseDownstreamProtocolConfig{ 330 UseDownstreamProtocolConfig: &envoy_config_upstream.HttpProtocolOptions_UseDownstreamHttpConfig{}, 331 }, 332 }), 333 } 334 335 useDownstreamProtocolAutoSNI := map[string]*anypb.Any{ 336 "envoy.extensions.upstreams.http.v3.HttpProtocolOptions": toAny(&envoy_config_upstream.HttpProtocolOptions{ 337 UpstreamHttpProtocolOptions: &envoy_config_core.UpstreamHttpProtocolOptions{ 338 // Setting AutoSni or AutoSanValidation options here may crash 339 // Envoy, when Cilium Network filter already passes these from 340 // downstream to upstream. 341 }, 342 CommonHttpProtocolOptions: &envoy_config_core.HttpProtocolOptions{ 343 IdleTimeout: durationpb.New(config.idleTimeout), 344 MaxRequestsPerConnection: wrapperspb.UInt32(config.maxRequestsPerConnection), 345 MaxConnectionDuration: durationpb.New(config.maxConnectionDuration), 346 }, 347 UpstreamProtocolOptions: &envoy_config_upstream.HttpProtocolOptions_UseDownstreamProtocolConfig{ 348 UseDownstreamProtocolConfig: &envoy_config_upstream.HttpProtocolOptions_UseDownstreamHttpConfig{}, 349 }, 350 }), 351 } 352 353 http2ProtocolOptions := map[string]*anypb.Any{ 354 "envoy.extensions.upstreams.http.v3.HttpProtocolOptions": toAny(&envoy_config_upstream.HttpProtocolOptions{ 355 UpstreamProtocolOptions: &envoy_config_upstream.HttpProtocolOptions_ExplicitHttpConfig_{ 356 ExplicitHttpConfig: &envoy_config_upstream.HttpProtocolOptions_ExplicitHttpConfig{ 357 ProtocolConfig: &envoy_config_upstream.HttpProtocolOptions_ExplicitHttpConfig_Http2ProtocolOptions{}, 358 }, 359 }, 360 }), 361 } 362 363 bs := &envoy_config_bootstrap.Bootstrap{ 364 Node: &envoy_config_core.Node{Id: config.nodeId, Cluster: config.cluster}, 365 StaticResources: &envoy_config_bootstrap.Bootstrap_StaticResources{ 366 Clusters: []*envoy_config_cluster.Cluster{ 367 { 368 Name: egressClusterName, 369 ClusterDiscoveryType: &envoy_config_cluster.Cluster_Type{Type: envoy_config_cluster.Cluster_ORIGINAL_DST}, 370 ConnectTimeout: &durationpb.Duration{Seconds: config.connectTimeout, Nanos: 0}, 371 CleanupInterval: &durationpb.Duration{Seconds: config.connectTimeout, Nanos: 500000000}, 372 LbPolicy: envoy_config_cluster.Cluster_CLUSTER_PROVIDED, 373 TypedExtensionProtocolOptions: useDownstreamProtocol, 374 }, 375 { 376 Name: egressTLSClusterName, 377 ClusterDiscoveryType: &envoy_config_cluster.Cluster_Type{Type: envoy_config_cluster.Cluster_ORIGINAL_DST}, 378 ConnectTimeout: &durationpb.Duration{Seconds: config.connectTimeout, Nanos: 0}, 379 CleanupInterval: &durationpb.Duration{Seconds: config.connectTimeout, Nanos: 500000000}, 380 LbPolicy: envoy_config_cluster.Cluster_CLUSTER_PROVIDED, 381 TypedExtensionProtocolOptions: useDownstreamProtocolAutoSNI, 382 TransportSocket: &envoy_config_core.TransportSocket{ 383 Name: "cilium.tls_wrapper", 384 ConfigType: &envoy_config_core.TransportSocket_TypedConfig{ 385 TypedConfig: toAny(&cilium.UpstreamTlsWrapperContext{}), 386 }, 387 }, 388 }, 389 { 390 Name: ingressClusterName, 391 ClusterDiscoveryType: &envoy_config_cluster.Cluster_Type{Type: envoy_config_cluster.Cluster_ORIGINAL_DST}, 392 ConnectTimeout: &durationpb.Duration{Seconds: config.connectTimeout, Nanos: 0}, 393 CleanupInterval: &durationpb.Duration{Seconds: config.connectTimeout, Nanos: 500000000}, 394 LbPolicy: envoy_config_cluster.Cluster_CLUSTER_PROVIDED, 395 TypedExtensionProtocolOptions: useDownstreamProtocol, 396 }, 397 { 398 Name: ingressTLSClusterName, 399 ClusterDiscoveryType: &envoy_config_cluster.Cluster_Type{Type: envoy_config_cluster.Cluster_ORIGINAL_DST}, 400 ConnectTimeout: &durationpb.Duration{Seconds: config.connectTimeout, Nanos: 0}, 401 CleanupInterval: &durationpb.Duration{Seconds: config.connectTimeout, Nanos: 500000000}, 402 LbPolicy: envoy_config_cluster.Cluster_CLUSTER_PROVIDED, 403 TypedExtensionProtocolOptions: useDownstreamProtocolAutoSNI, 404 TransportSocket: &envoy_config_core.TransportSocket{ 405 Name: "cilium.tls_wrapper", 406 ConfigType: &envoy_config_core.TransportSocket_TypedConfig{ 407 TypedConfig: toAny(&cilium.UpstreamTlsWrapperContext{}), 408 }, 409 }, 410 }, 411 { 412 Name: CiliumXDSClusterName, 413 ClusterDiscoveryType: &envoy_config_cluster.Cluster_Type{Type: envoy_config_cluster.Cluster_STATIC}, 414 ConnectTimeout: &durationpb.Duration{Seconds: config.connectTimeout, Nanos: 0}, 415 LbPolicy: envoy_config_cluster.Cluster_ROUND_ROBIN, 416 LoadAssignment: &envoy_config_endpoint.ClusterLoadAssignment{ 417 ClusterName: CiliumXDSClusterName, 418 Endpoints: []*envoy_config_endpoint.LocalityLbEndpoints{{ 419 LbEndpoints: []*envoy_config_endpoint.LbEndpoint{{ 420 HostIdentifier: &envoy_config_endpoint.LbEndpoint_Endpoint{ 421 Endpoint: &envoy_config_endpoint.Endpoint{ 422 Address: &envoy_config_core.Address{ 423 Address: &envoy_config_core.Address_Pipe{ 424 Pipe: &envoy_config_core.Pipe{Path: config.xdsSock}, 425 }, 426 }, 427 }, 428 }, 429 }}, 430 }}, 431 }, 432 TypedExtensionProtocolOptions: http2ProtocolOptions, 433 }, 434 { 435 Name: adminClusterName, 436 ClusterDiscoveryType: &envoy_config_cluster.Cluster_Type{Type: envoy_config_cluster.Cluster_STATIC}, 437 ConnectTimeout: &durationpb.Duration{Seconds: config.connectTimeout, Nanos: 0}, 438 LbPolicy: envoy_config_cluster.Cluster_ROUND_ROBIN, 439 LoadAssignment: &envoy_config_endpoint.ClusterLoadAssignment{ 440 ClusterName: adminClusterName, 441 Endpoints: []*envoy_config_endpoint.LocalityLbEndpoints{{ 442 LbEndpoints: []*envoy_config_endpoint.LbEndpoint{{ 443 HostIdentifier: &envoy_config_endpoint.LbEndpoint_Endpoint{ 444 Endpoint: &envoy_config_endpoint.Endpoint{ 445 Address: &envoy_config_core.Address{ 446 Address: &envoy_config_core.Address_Pipe{ 447 Pipe: &envoy_config_core.Pipe{Path: config.adminPath}, 448 }, 449 }, 450 }, 451 }, 452 }}, 453 }}, 454 }, 455 }, 456 }, 457 }, 458 DynamicResources: &envoy_config_bootstrap.Bootstrap_DynamicResources{ 459 LdsConfig: CiliumXDSConfigSource, 460 CdsConfig: CiliumXDSConfigSource, 461 }, 462 Admin: &envoy_config_bootstrap.Admin{ 463 Address: &envoy_config_core.Address{ 464 Address: &envoy_config_core.Address_Pipe{ 465 Pipe: &envoy_config_core.Pipe{Path: config.adminPath}, 466 }, 467 }, 468 }, 469 BootstrapExtensions: []*envoy_config_core.TypedExtensionConfig{ 470 { 471 Name: "envoy.bootstrap.internal_listener", 472 TypedConfig: toAny(&envoy_extensions_bootstrap_internal_listener_v3.InternalListener{}), 473 }, 474 }, 475 LayeredRuntime: &envoy_config_bootstrap.LayeredRuntime{ 476 Layers: []*envoy_config_bootstrap.RuntimeLayer{ 477 { 478 Name: "static_layer_0", 479 LayerSpecifier: &envoy_config_bootstrap.RuntimeLayer_StaticLayer{ 480 StaticLayer: &structpb.Struct{Fields: map[string]*structpb.Value{ 481 "overload": {Kind: &structpb.Value_StructValue{StructValue: &structpb.Struct{Fields: map[string]*structpb.Value{ 482 "global_downstream_max_connections": {Kind: &structpb.Value_NumberValue{NumberValue: 50000}}, 483 }}}}, 484 }}, 485 }, 486 }, 487 }, 488 }, 489 } 490 491 log.Debugf("Envoy: Bootstrap: %s", bs) 492 data, err := proto.Marshal(bs) 493 if err != nil { 494 log.WithError(err).Fatal("Envoy: Error marshaling Envoy bootstrap") 495 } 496 err = os.WriteFile(config.filePath, data, 0644) 497 if err != nil { 498 log.WithError(err).Fatal("Envoy: Error writing Envoy bootstrap file") 499 } 500 } 501 502 // getEmbeddedEnvoyVersion returns the envoy binary version string 503 func getEmbeddedEnvoyVersion() (string, error) { 504 out, err := exec.Command(ciliumEnvoy, "--version").Output() 505 if err != nil { 506 return "", fmt.Errorf("failed to execute '%s --version': %w", ciliumEnvoy, err) 507 } 508 envoyVersionString := strings.TrimSpace(string(out)) 509 510 envoyVersionArray := strings.Fields(envoyVersionString) 511 if len(envoyVersionArray) < 3 { 512 return "", fmt.Errorf("failed to extract version from truncated Envoy version string") 513 } 514 515 return envoyVersionArray[2], nil 516 }