
     1  // SPDX-License-Identifier: Apache-2.0
     2  // Copyright Authors of Cilium
     4  package envoy
     6  import (
     7  	"bufio"
     8  	"errors"
     9  	"fmt"
    10  	"io"
    11  	"os"
    12  	"os/exec"
    13  	"path/filepath"
    14  	"strconv"
    15  	"strings"
    17  	""
    18  	cilium ""
    19  	envoy_config_bootstrap ""
    20  	envoy_config_cluster ""
    21  	envoy_config_core ""
    22  	envoy_config_endpoint ""
    23  	envoy_extensions_bootstrap_internal_listener_v3 ""
    24  	envoy_config_upstream ""
    25  	""
    26  	""
    27  	""
    28  	""
    29  	""
    30  	""
    32  	""
    33  	""
    34  	""
    35  	""
    36  	""
    37  )
    39  var log = logging.DefaultLogger.WithField(logfields.LogSubsys, "envoy-manager")
    41  var (
    42  	// envoyLevelMap maps logrus.Level values to Envoy (spdlog) log levels.
    43  	envoyLevelMap = map[logrus.Level]string{
    44  		logrus.PanicLevel: "off",
    45  		logrus.FatalLevel: "critical",
    46  		logrus.ErrorLevel: "error",
    47  		logrus.WarnLevel:  "warning",
    48  		logrus.InfoLevel:  "info",
    49  		logrus.DebugLevel: "debug",
    50  		// spdlog "trace" not mapped
    51  	}
    53  	tracing = false
    54  )
    56  const (
    57  	ciliumEnvoyStarter = "cilium-envoy-starter"
    58  	ciliumEnvoy        = "cilium-envoy"
    59  )
    61  // EnableTracing changes Envoy log level to "trace", producing the most logs.
    62  func EnableTracing() {
    63  	tracing = true
    64  }
    66  func mapLogLevel(level logrus.Level) string {
    67  	// Set Envoy loglevel to trace if debug AND verbose Engoy logging is enabled
    68  	if level == logrus.DebugLevel && tracing {
    69  		return "trace"
    70  	}
    72  	// Suppress the debug level if not debugging at flow level.
    73  	if level == logrus.DebugLevel && !flowdebug.Enabled() {
    74  		level = logrus.InfoLevel
    75  	}
    77  	return envoyLevelMap[level]
    78  }
    80  // Envoy manages a running Envoy proxy instance via the
    81  // ListenerDiscoveryService and RouteDiscoveryService gRPC APIs.
    82  type EmbeddedEnvoy struct {
    83  	stopCh chan struct{}
    84  	errCh  chan error
    85  	admin  *EnvoyAdminClient
    86  }
    88  type embeddedEnvoyConfig struct {
    89  	runDir                   string
    90  	logPath                  string
    91  	baseID                   uint64
    92  	keepCapNetBindService    bool
    93  	connectTimeout           int64
    94  	maxRequestsPerConnection uint32
    95  	maxConnectionDuration    time.Duration
    96  	idleTimeout              time.Duration
    97  }
    99  // startEmbeddedEnvoy starts an Envoy proxy instance.
   100  func startEmbeddedEnvoy(config embeddedEnvoyConfig) (*EmbeddedEnvoy, error) {
   101  	envoy := &EmbeddedEnvoy{
   102  		stopCh: make(chan struct{}),
   103  		errCh:  make(chan error, 1),
   104  		admin:  NewEnvoyAdminClientForSocket(GetSocketDir(config.runDir)),
   105  	}
   107  	bootstrapFilePath := filepath.Join(config.runDir, "envoy", "bootstrap.pb")
   109  	writeBootstrapConfigFile(bootstrapConfig{
   110  		filePath:                 bootstrapFilePath,
   111  		nodeId:                   "host~", // node id format inherited from Istio
   112  		cluster:                  ingressClusterName,
   113  		adminPath:                getAdminSocketPath(GetSocketDir(config.runDir)),
   114  		xdsSock:                  getXDSSocketPath(GetSocketDir(config.runDir)),
   115  		egressClusterName:        egressClusterName,
   116  		ingressClusterName:       ingressClusterName,
   117  		connectTimeout:           config.connectTimeout,
   118  		maxRequestsPerConnection: config.maxRequestsPerConnection,
   119  		maxConnectionDuration:    config.maxConnectionDuration,
   120  		idleTimeout:              config.idleTimeout,
   121  	})
   123  	log.Debugf("Envoy: Starting: %v", *envoy)
   125  	// make it a buffered channel, so we can not only
   126  	// read the written value but also skip it in
   127  	// case no one reader reads it.
   128  	started := make(chan bool, 1)
   129  	go func() {
   130  		var logWriter io.WriteCloser
   131  		var logFormat string
   132  		if config.logPath != "" {
   133  			// Use the Envoy default log format when logging to a separate file
   134  			logFormat = "[%Y-%m-%d %T.%e][%t][%l][%n] %v"
   135  			logger := &lumberjack.Logger{
   136  				Filename:   config.logPath,
   137  				MaxSize:    100, // megabytes
   138  				MaxBackups: 3,
   139  				MaxAge:     28,   // days
   140  				Compress:   true, // disabled by default
   141  			}
   142  			logWriter = logger
   143  		} else {
   144  			// Use log format that looks like Cilium logs when integrating logs
   145  			// The logs will be reported as coming from the cilium-agent, so
   146  			// we add the thread id to be able to differentiate between Envoy's
   147  			// main and worker threads.
   148  			logFormat = "%t|%l|%n|%v"
   150  			// Create a piper that parses and writes into logrus the log
   151  			// messages from Envoy.
   152  			logWriter = newEnvoyLogPiper()
   153  		}
   154  		defer logWriter.Close()
   156  		envoyArgs := []string{"-l", mapLogLevel(logging.GetLevel(logging.DefaultLogger)), "-c", bootstrapFilePath, "--base-id", strconv.FormatUint(config.baseID, 10), "--log-format", logFormat}
   157  		envoyStarterArgs := []string{}
   158  		if config.keepCapNetBindService {
   159  			envoyStarterArgs = append(envoyStarterArgs, "--keep-cap-net-bind-service", "--")
   160  		}
   161  		envoyStarterArgs = append(envoyStarterArgs, envoyArgs...)
   163  		for {
   164  			cmd := exec.Command(ciliumEnvoyStarter, envoyStarterArgs...)
   165  			cmd.Stderr = logWriter
   166  			cmd.Stdout = logWriter
   168  			if err := cmd.Start(); err != nil {
   169  				log.WithError(err).Warn("Envoy: Failed to start proxy")
   170  				select {
   171  				case started <- false:
   172  				default:
   173  				}
   174  				return
   175  			}
   176  			log.Debugf("Envoy: Started proxy")
   177  			select {
   178  			case started <- true:
   179  			default:
   180  			}
   182  			log.Infof("Envoy: Proxy started with pid %d", cmd.Process.Pid)
   183  			metrics.SubprocessStart.WithLabelValues(ciliumEnvoyStarter).Inc()
   185  			// We do not return after a successful start, but watch the Envoy process
   186  			// and restart it if it crashes.
   187  			// Waiting for the process execution is done in the goroutime.
   188  			// The purpose of the "crash channel" is to inform the loop about their
   189  			// Envoy process crash - after closing that channel by the goroutime,
   190  			// the loop continues, the channel is recreated and the new process
   191  			// is watched again.
   192  			crashCh := make(chan struct{})
   193  			go func() {
   194  				if err := cmd.Wait(); err != nil {
   195  					log.WithError(err).Warn("Envoy: Proxy crashed")
   196  					// Avoid busy loop & hogging CPU resources by waiting before restarting envoy.
   197  					time.Sleep(100 * time.Millisecond)
   198  				}
   199  				close(crashCh)
   200  			}()
   202  			select {
   203  			case <-crashCh:
   204  				// Start Envoy again
   205  				continue
   206  			case <-envoy.stopCh:
   207  				log.Infof("Envoy: Stopping proxy with pid %d", cmd.Process.Pid)
   208  				if err := envoy.admin.quit(); err != nil {
   209  					log.WithError(err).Fatalf("Envoy: Envoy admin quit failed, killing process with pid %d", cmd.Process.Pid)
   211  					if err := cmd.Process.Kill(); err != nil {
   212  						log.WithError(err).Fatal("Envoy: Stopping Envoy failed")
   213  						envoy.errCh <- err
   214  					}
   215  				}
   216  				close(envoy.errCh)
   217  				return
   218  			}
   219  		}
   220  	}()
   222  	if <-started {
   223  		return envoy, nil
   224  	}
   226  	return nil, errors.New("failed to start embedded Envoy server")
   227  }
   229  // newEnvoyLogPiper creates a writer that parses and logs log messages written by Envoy.
   230  func newEnvoyLogPiper() io.WriteCloser {
   231  	reader, writer := io.Pipe()
   232  	scanner := bufio.NewScanner(reader)
   233  	scanner.Buffer(nil, 1024*1024)
   234  	go func() {
   235  		scopedLog := log.WithFields(logrus.Fields{
   236  			logfields.LogSubsys: "unknown",
   237  			logfields.ThreadID:  "unknown",
   238  		})
   239  		level := "debug"
   241  		for scanner.Scan() {
   242  			line := scanner.Text()
   243  			var msg string
   245  			parts := strings.SplitN(line, "|", 4)
   246  			// Parse the line as a log message written by Envoy, assuming it
   247  			// uses the configured format: "%t|%l|%n|%v".
   248  			if len(parts) == 4 {
   249  				threadID := parts[0]
   250  				level = parts[1]
   251  				loggerName := parts[2]
   252  				// TODO: Parse msg to extract the source filename, line number, etc.
   253  				msg = fmt.Sprintf("[%s", parts[3])
   255  				scopedLog = log.WithFields(logrus.Fields{
   256  					logfields.LogSubsys: fmt.Sprintf("envoy-%s", loggerName),
   257  					logfields.ThreadID:  threadID,
   258  				})
   259  			} else {
   260  				// If this line can't be parsed, it continues a multi-line log
   261  				// message. In this case, log it at the same level and with the
   262  				// same fields as the previous line.
   263  				msg = line
   264  			}
   266  			if len(msg) == 0 {
   267  				continue
   268  			}
   270  			// Map the Envoy log level to a logrus level.
   271  			switch level {
   272  			case "off", "critical", "error":
   273  				scopedLog.Error(msg)
   274  			case "warning":
   275  				scopedLog.Warn(msg)
   276  			case "info":
   277  				scopedLog.Info(msg)
   278  			case "debug", "trace":
   279  				scopedLog.Debug(msg)
   280  			default:
   281  				scopedLog.Debug(msg)
   282  			}
   283  		}
   284  		if err := scanner.Err(); err != nil {
   285  			log.WithError(err).Error("Error while parsing Envoy logs")
   286  		}
   287  		reader.Close()
   288  	}()
   289  	return writer
   290  }
   292  // Stop kills the Envoy process started with startEmbeddedEnvoy. The gRPC API streams are terminated
   293  // first.
   294  func (e *EmbeddedEnvoy) Stop() error {
   295  	close(e.stopCh)
   296  	err, ok := <-e.errCh
   297  	if ok {
   298  		return err
   299  	}
   300  	return nil
   301  }
   303  func (e *EmbeddedEnvoy) GetAdminClient() *EnvoyAdminClient {
   304  	return e.admin
   305  }
   307  type bootstrapConfig struct {
   308  	filePath                 string
   309  	nodeId                   string
   310  	cluster                  string
   311  	adminPath                string
   312  	xdsSock                  string
   313  	egressClusterName        string
   314  	ingressClusterName       string
   315  	connectTimeout           int64
   316  	maxRequestsPerConnection uint32
   317  	maxConnectionDuration    time.Duration
   318  	idleTimeout              time.Duration
   319  }
   321  func writeBootstrapConfigFile(config bootstrapConfig) {
   322  	useDownstreamProtocol := map[string]*anypb.Any{
   323  		"envoy.extensions.upstreams.http.v3.HttpProtocolOptions": toAny(&envoy_config_upstream.HttpProtocolOptions{
   324  			CommonHttpProtocolOptions: &envoy_config_core.HttpProtocolOptions{
   325  				IdleTimeout:              durationpb.New(config.idleTimeout),
   326  				MaxRequestsPerConnection: wrapperspb.UInt32(config.maxRequestsPerConnection),
   327  				MaxConnectionDuration:    durationpb.New(config.maxConnectionDuration),
   328  			},
   329  			UpstreamProtocolOptions: &envoy_config_upstream.HttpProtocolOptions_UseDownstreamProtocolConfig{
   330  				UseDownstreamProtocolConfig: &envoy_config_upstream.HttpProtocolOptions_UseDownstreamHttpConfig{},
   331  			},
   332  		}),
   333  	}
   335  	useDownstreamProtocolAutoSNI := map[string]*anypb.Any{
   336  		"envoy.extensions.upstreams.http.v3.HttpProtocolOptions": toAny(&envoy_config_upstream.HttpProtocolOptions{
   337  			UpstreamHttpProtocolOptions: &envoy_config_core.UpstreamHttpProtocolOptions{
   338  				//	Setting AutoSni or AutoSanValidation options here may crash
   339  				//	Envoy, when Cilium Network filter already passes these from
   340  				//	downstream to upstream.
   341  			},
   342  			CommonHttpProtocolOptions: &envoy_config_core.HttpProtocolOptions{
   343  				IdleTimeout:              durationpb.New(config.idleTimeout),
   344  				MaxRequestsPerConnection: wrapperspb.UInt32(config.maxRequestsPerConnection),
   345  				MaxConnectionDuration:    durationpb.New(config.maxConnectionDuration),
   346  			},
   347  			UpstreamProtocolOptions: &envoy_config_upstream.HttpProtocolOptions_UseDownstreamProtocolConfig{
   348  				UseDownstreamProtocolConfig: &envoy_config_upstream.HttpProtocolOptions_UseDownstreamHttpConfig{},
   349  			},
   350  		}),
   351  	}
   353  	http2ProtocolOptions := map[string]*anypb.Any{
   354  		"envoy.extensions.upstreams.http.v3.HttpProtocolOptions": toAny(&envoy_config_upstream.HttpProtocolOptions{
   355  			UpstreamProtocolOptions: &envoy_config_upstream.HttpProtocolOptions_ExplicitHttpConfig_{
   356  				ExplicitHttpConfig: &envoy_config_upstream.HttpProtocolOptions_ExplicitHttpConfig{
   357  					ProtocolConfig: &envoy_config_upstream.HttpProtocolOptions_ExplicitHttpConfig_Http2ProtocolOptions{},
   358  				},
   359  			},
   360  		}),
   361  	}
   363  	bs := &envoy_config_bootstrap.Bootstrap{
   364  		Node: &envoy_config_core.Node{Id: config.nodeId, Cluster: config.cluster},
   365  		StaticResources: &envoy_config_bootstrap.Bootstrap_StaticResources{
   366  			Clusters: []*envoy_config_cluster.Cluster{
   367  				{
   368  					Name:                          egressClusterName,
   369  					ClusterDiscoveryType:          &envoy_config_cluster.Cluster_Type{Type: envoy_config_cluster.Cluster_ORIGINAL_DST},
   370  					ConnectTimeout:                &durationpb.Duration{Seconds: config.connectTimeout, Nanos: 0},
   371  					CleanupInterval:               &durationpb.Duration{Seconds: config.connectTimeout, Nanos: 500000000},
   372  					LbPolicy:                      envoy_config_cluster.Cluster_CLUSTER_PROVIDED,
   373  					TypedExtensionProtocolOptions: useDownstreamProtocol,
   374  				},
   375  				{
   376  					Name:                          egressTLSClusterName,
   377  					ClusterDiscoveryType:          &envoy_config_cluster.Cluster_Type{Type: envoy_config_cluster.Cluster_ORIGINAL_DST},
   378  					ConnectTimeout:                &durationpb.Duration{Seconds: config.connectTimeout, Nanos: 0},
   379  					CleanupInterval:               &durationpb.Duration{Seconds: config.connectTimeout, Nanos: 500000000},
   380  					LbPolicy:                      envoy_config_cluster.Cluster_CLUSTER_PROVIDED,
   381  					TypedExtensionProtocolOptions: useDownstreamProtocolAutoSNI,
   382  					TransportSocket: &envoy_config_core.TransportSocket{
   383  						Name: "cilium.tls_wrapper",
   384  						ConfigType: &envoy_config_core.TransportSocket_TypedConfig{
   385  							TypedConfig: toAny(&cilium.UpstreamTlsWrapperContext{}),
   386  						},
   387  					},
   388  				},
   389  				{
   390  					Name:                          ingressClusterName,
   391  					ClusterDiscoveryType:          &envoy_config_cluster.Cluster_Type{Type: envoy_config_cluster.Cluster_ORIGINAL_DST},
   392  					ConnectTimeout:                &durationpb.Duration{Seconds: config.connectTimeout, Nanos: 0},
   393  					CleanupInterval:               &durationpb.Duration{Seconds: config.connectTimeout, Nanos: 500000000},
   394  					LbPolicy:                      envoy_config_cluster.Cluster_CLUSTER_PROVIDED,
   395  					TypedExtensionProtocolOptions: useDownstreamProtocol,
   396  				},
   397  				{
   398  					Name:                          ingressTLSClusterName,
   399  					ClusterDiscoveryType:          &envoy_config_cluster.Cluster_Type{Type: envoy_config_cluster.Cluster_ORIGINAL_DST},
   400  					ConnectTimeout:                &durationpb.Duration{Seconds: config.connectTimeout, Nanos: 0},
   401  					CleanupInterval:               &durationpb.Duration{Seconds: config.connectTimeout, Nanos: 500000000},
   402  					LbPolicy:                      envoy_config_cluster.Cluster_CLUSTER_PROVIDED,
   403  					TypedExtensionProtocolOptions: useDownstreamProtocolAutoSNI,
   404  					TransportSocket: &envoy_config_core.TransportSocket{
   405  						Name: "cilium.tls_wrapper",
   406  						ConfigType: &envoy_config_core.TransportSocket_TypedConfig{
   407  							TypedConfig: toAny(&cilium.UpstreamTlsWrapperContext{}),
   408  						},
   409  					},
   410  				},
   411  				{
   412  					Name:                 CiliumXDSClusterName,
   413  					ClusterDiscoveryType: &envoy_config_cluster.Cluster_Type{Type: envoy_config_cluster.Cluster_STATIC},
   414  					ConnectTimeout:       &durationpb.Duration{Seconds: config.connectTimeout, Nanos: 0},
   415  					LbPolicy:             envoy_config_cluster.Cluster_ROUND_ROBIN,
   416  					LoadAssignment: &envoy_config_endpoint.ClusterLoadAssignment{
   417  						ClusterName: CiliumXDSClusterName,
   418  						Endpoints: []*envoy_config_endpoint.LocalityLbEndpoints{{
   419  							LbEndpoints: []*envoy_config_endpoint.LbEndpoint{{
   420  								HostIdentifier: &envoy_config_endpoint.LbEndpoint_Endpoint{
   421  									Endpoint: &envoy_config_endpoint.Endpoint{
   422  										Address: &envoy_config_core.Address{
   423  											Address: &envoy_config_core.Address_Pipe{
   424  												Pipe: &envoy_config_core.Pipe{Path: config.xdsSock},
   425  											},
   426  										},
   427  									},
   428  								},
   429  							}},
   430  						}},
   431  					},
   432  					TypedExtensionProtocolOptions: http2ProtocolOptions,
   433  				},
   434  				{
   435  					Name:                 adminClusterName,
   436  					ClusterDiscoveryType: &envoy_config_cluster.Cluster_Type{Type: envoy_config_cluster.Cluster_STATIC},
   437  					ConnectTimeout:       &durationpb.Duration{Seconds: config.connectTimeout, Nanos: 0},
   438  					LbPolicy:             envoy_config_cluster.Cluster_ROUND_ROBIN,
   439  					LoadAssignment: &envoy_config_endpoint.ClusterLoadAssignment{
   440  						ClusterName: adminClusterName,
   441  						Endpoints: []*envoy_config_endpoint.LocalityLbEndpoints{{
   442  							LbEndpoints: []*envoy_config_endpoint.LbEndpoint{{
   443  								HostIdentifier: &envoy_config_endpoint.LbEndpoint_Endpoint{
   444  									Endpoint: &envoy_config_endpoint.Endpoint{
   445  										Address: &envoy_config_core.Address{
   446  											Address: &envoy_config_core.Address_Pipe{
   447  												Pipe: &envoy_config_core.Pipe{Path: config.adminPath},
   448  											},
   449  										},
   450  									},
   451  								},
   452  							}},
   453  						}},
   454  					},
   455  				},
   456  			},
   457  		},
   458  		DynamicResources: &envoy_config_bootstrap.Bootstrap_DynamicResources{
   459  			LdsConfig: CiliumXDSConfigSource,
   460  			CdsConfig: CiliumXDSConfigSource,
   461  		},
   462  		Admin: &envoy_config_bootstrap.Admin{
   463  			Address: &envoy_config_core.Address{
   464  				Address: &envoy_config_core.Address_Pipe{
   465  					Pipe: &envoy_config_core.Pipe{Path: config.adminPath},
   466  				},
   467  			},
   468  		},
   469  		BootstrapExtensions: []*envoy_config_core.TypedExtensionConfig{
   470  			{
   471  				Name:        "envoy.bootstrap.internal_listener",
   472  				TypedConfig: toAny(&envoy_extensions_bootstrap_internal_listener_v3.InternalListener{}),
   473  			},
   474  		},
   475  		LayeredRuntime: &envoy_config_bootstrap.LayeredRuntime{
   476  			Layers: []*envoy_config_bootstrap.RuntimeLayer{
   477  				{
   478  					Name: "static_layer_0",
   479  					LayerSpecifier: &envoy_config_bootstrap.RuntimeLayer_StaticLayer{
   480  						StaticLayer: &structpb.Struct{Fields: map[string]*structpb.Value{
   481  							"overload": {Kind: &structpb.Value_StructValue{StructValue: &structpb.Struct{Fields: map[string]*structpb.Value{
   482  								"global_downstream_max_connections": {Kind: &structpb.Value_NumberValue{NumberValue: 50000}},
   483  							}}}},
   484  						}},
   485  					},
   486  				},
   487  			},
   488  		},
   489  	}
   491  	log.Debugf("Envoy: Bootstrap: %s", bs)
   492  	data, err := proto.Marshal(bs)
   493  	if err != nil {
   494  		log.WithError(err).Fatal("Envoy: Error marshaling Envoy bootstrap")
   495  	}
   496  	err = os.WriteFile(config.filePath, data, 0644)
   497  	if err != nil {
   498  		log.WithError(err).Fatal("Envoy: Error writing Envoy bootstrap file")
   499  	}
   500  }
   502  // getEmbeddedEnvoyVersion returns the envoy binary version string
   503  func getEmbeddedEnvoyVersion() (string, error) {
   504  	out, err := exec.Command(ciliumEnvoy, "--version").Output()
   505  	if err != nil {
   506  		return "", fmt.Errorf("failed to execute '%s --version': %w", ciliumEnvoy, err)
   507  	}
   508  	envoyVersionString := strings.TrimSpace(string(out))
   510  	envoyVersionArray := strings.Fields(envoyVersionString)
   511  	if len(envoyVersionArray) < 3 {
   512  		return "", fmt.Errorf("failed to extract version from truncated Envoy version string")
   513  	}
   515  	return envoyVersionArray[2], nil
   516  }