github.phpd.cn/cilium/cilium@v1.6.12/pkg/envoy/envoy.go (about)

     1  // Copyright 2017, 2018 Authors of Cilium
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package envoy
    16  
    17  import (
    18  	"bufio"
    19  	"fmt"
    20  	"io"
    21  	"io/ioutil"
    22  	"net"
    23  	"net/http"
    24  	"os/exec"
    25  	"path/filepath"
    26  	"strconv"
    27  	"strings"
    28  	"time"
    29  
    30  	"github.com/cilium/cilium/pkg/flowdebug"
    31  	"github.com/cilium/cilium/pkg/logging"
    32  	"github.com/cilium/cilium/pkg/logging/logfields"
    33  	"github.com/cilium/cilium/pkg/metrics"
    34  
    35  	"github.com/sirupsen/logrus"
    36  	"gopkg.in/natefinch/lumberjack.v2"
    37  )
    38  
    39  var log = logging.DefaultLogger.WithField(logfields.LogSubsys, "envoy-manager")
    40  
    41  var (
    42  	// RequiredEnvoyVersionSHA is set during build
    43  	// Running Envoy version will be checked against `RequiredEnvoyVersionSHA`.
    44  	// By default cilium-agent will fail to start if there is a version mismatch.
    45  	RequiredEnvoyVersionSHA string
    46  
    47  	// envoyLevelMap maps logrus.Level values to Envoy (spdlog) log levels.
    48  	envoyLevelMap = map[logrus.Level]string{
    49  		logrus.PanicLevel: "off",
    50  		logrus.FatalLevel: "critical",
    51  		logrus.ErrorLevel: "error",
    52  		logrus.WarnLevel:  "warning",
    53  		logrus.InfoLevel:  "info",
    54  		logrus.DebugLevel: "debug",
    55  		// spdlog "trace" not mapped
    56  	}
    57  
    58  	tracing = false
    59  )
    60  
    61  const (
    62  	adminSock   = "envoy-admin.sock"
    63  	ciliumEnvoy = "cilium-envoy"
    64  )
    65  
    66  // EnableTracing changes Envoy log level to "trace", producing the most logs.
    67  func EnableTracing() {
    68  	tracing = true
    69  }
    70  
    71  func mapLogLevel(level logrus.Level) string {
    72  	if tracing {
    73  		return "trace"
    74  	}
    75  
    76  	// Suppress the debug level if not debugging at flow level.
    77  	if level == logrus.DebugLevel && !flowdebug.Enabled() {
    78  		level = logrus.InfoLevel
    79  	}
    80  	return envoyLevelMap[level]
    81  }
    82  
    83  type admin struct {
    84  	adminURL string
    85  	unixPath string
    86  	level    string
    87  }
    88  
    89  func (a *admin) transact(query string) error {
    90  	// Use a custom dialer to use a Unix domain socket for a HTTP connection.
    91  	client := &http.Client{
    92  		Transport: &http.Transport{
    93  			Dial: func(_, _ string) (net.Conn, error) { return net.Dial("unix", a.unixPath) },
    94  		},
    95  	}
    96  
    97  	resp, err := client.Post(a.adminURL+query, "", nil)
    98  	if err != nil {
    99  		return err
   100  	}
   101  	defer resp.Body.Close()
   102  	body, err := ioutil.ReadAll(resp.Body)
   103  	if err != nil {
   104  		return err
   105  	}
   106  	ret := strings.Replace(string(body), "\r", "", -1)
   107  	log.Debugf("Envoy: Admin response to %s: %s", query, ret)
   108  	return nil
   109  }
   110  
   111  func (a *admin) changeLogLevel(level logrus.Level) error {
   112  	envoyLevel := mapLogLevel(level)
   113  
   114  	if envoyLevel == a.level {
   115  		log.Debugf("Envoy: Log level is already set as: %v", envoyLevel)
   116  		return nil
   117  	}
   118  
   119  	err := a.transact("logging?level=" + envoyLevel)
   120  	if err != nil {
   121  		log.WithError(err).Warnf("Envoy: Failed to set log level to: %v", envoyLevel)
   122  	} else {
   123  		a.level = envoyLevel
   124  	}
   125  	return err
   126  }
   127  
   128  func (a *admin) quit() error {
   129  	return a.transact("quitquitquit")
   130  }
   131  
   132  // Envoy manages a running Envoy proxy instance via the
   133  // ListenerDiscoveryService and RouteDiscoveryService gRPC APIs.
   134  type Envoy struct {
   135  	stopCh chan struct{}
   136  	errCh  chan error
   137  	admin  *admin
   138  }
   139  
   140  // GetEnvoyVersion returns the envoy binary version string
   141  func GetEnvoyVersion() string {
   142  	out, err := exec.Command(ciliumEnvoy, "--version").Output()
   143  	if err != nil {
   144  		log.WithError(err).Fatalf("Envoy: Binary %q cannot be executed", ciliumEnvoy)
   145  	}
   146  	return strings.TrimSpace(string(out))
   147  }
   148  
   149  // StartEnvoy starts an Envoy proxy instance.
   150  func StartEnvoy(stateDir, logPath string, baseID uint64) *Envoy {
   151  	bootstrapPath := filepath.Join(stateDir, "bootstrap.pb")
   152  	xdsPath := getXDSPath(stateDir)
   153  
   154  	// Have to use a fake IP address:port even when we Dial to a Unix domain socket.
   155  	// The address:port will be visible to Envoy as ':authority', but its value is
   156  	// not meaningful.
   157  	// Not using the normal localhost address to make it obvious that we are not
   158  	// connecting to Envoy's admin interface via the IP stack.
   159  	adminAddress := "192.0.2.34:56"
   160  	adminPath := filepath.Join(stateDir, adminSock)
   161  
   162  	e := &Envoy{
   163  		stopCh: make(chan struct{}),
   164  		errCh:  make(chan error, 1),
   165  		admin: &admin{
   166  			adminURL: "http://" + adminAddress + "/",
   167  			unixPath: adminPath,
   168  		},
   169  	}
   170  
   171  	// Use the same structure as Istio's pilot-agent for the node ID:
   172  	// nodeType~ipAddress~proxyId~domain
   173  	nodeId := "host~127.0.0.1~no-id~localdomain"
   174  
   175  	// Create static configuration
   176  	createBootstrap(bootstrapPath, nodeId, ingressClusterName,
   177  		xdsPath, egressClusterName, ingressClusterName, adminPath)
   178  
   179  	log.Debugf("Envoy: Starting: %v", *e)
   180  
   181  	// make it a buffered channel so we can not only
   182  	// read the written value but also skip it in
   183  	// case no one reader reads it.
   184  	started := make(chan bool, 1)
   185  	go func() {
   186  		var logWriter io.WriteCloser
   187  		var logFormat string
   188  		if logPath != "" {
   189  			// Use the Envoy default log format when logging to a separate file
   190  			logFormat = "[%Y-%m-%d %T.%e][%t][%l][%n] %v"
   191  			logger := &lumberjack.Logger{
   192  				Filename:   logPath,
   193  				MaxSize:    100, // megabytes
   194  				MaxBackups: 3,
   195  				MaxAge:     28,   //days
   196  				Compress:   true, // disabled by default
   197  			}
   198  			logWriter = logger
   199  		} else {
   200  			// Use log format that looks like Cilium logs when integrating logs
   201  			// The logs will be reported as coming from the cilium-agent, so
   202  			// we add the thread id to be able to differentiate between Envoy's
   203  			// main and worker threads.
   204  			logFormat = "%t|%l|%n|%v"
   205  
   206  			// Create a piper that parses and writes into logrus the log
   207  			// messages from Envoy.
   208  			logWriter = newEnvoyLogPiper()
   209  		}
   210  		defer logWriter.Close()
   211  
   212  		for {
   213  			logLevel := logging.GetLevel(logging.DefaultLogger)
   214  			cmd := exec.Command(ciliumEnvoy, "-l", mapLogLevel(logLevel), "-c", bootstrapPath, "--base-id", strconv.FormatUint(baseID, 10), "--log-format", logFormat)
   215  			cmd.Stderr = logWriter
   216  			cmd.Stdout = logWriter
   217  
   218  			if err := cmd.Start(); err != nil {
   219  				log.WithError(err).Warn("Envoy: Failed to start proxy")
   220  				select {
   221  				case started <- false:
   222  				default:
   223  				}
   224  				return
   225  			}
   226  			log.Debugf("Envoy: Started proxy")
   227  			select {
   228  			case started <- true:
   229  			default:
   230  			}
   231  
   232  			log.Infof("Envoy: Proxy started with pid %d", cmd.Process.Pid)
   233  			metrics.SubprocessStart.WithLabelValues(ciliumEnvoy).Inc()
   234  
   235  			// We do not return after a successful start, but watch the Envoy process
   236  			// and restart it if it crashes.
   237  			// Waiting for the process execution is done in the goroutime.
   238  			// The purpose of the "crash channel" is to inform the loop about their
   239  			// Envoy process crash - after closing that channel by the goroutime,
   240  			// the loop continues, the channel is recreated and the new process
   241  			// is watched again.
   242  			crashCh := make(chan struct{})
   243  			go func() {
   244  				if err := cmd.Wait(); err != nil {
   245  					log.WithError(err).Warn("Envoy: Proxy crashed")
   246  				}
   247  				close(crashCh)
   248  			}()
   249  
   250  			// start again after a short wait. If Cilium exits this should be enough
   251  			// time to not start Envoy again in that case.
   252  			log.Info("Envoy: Sleeping for 100ms before restarting proxy")
   253  			time.Sleep(100 * time.Millisecond)
   254  
   255  			select {
   256  			case <-crashCh:
   257  				// Start Envoy again
   258  				continue
   259  			case <-e.stopCh:
   260  				log.Infof("Envoy: Stopping proxy with pid %d", cmd.Process.Pid)
   261  				if err := e.admin.quit(); err != nil {
   262  					log.WithError(err).Fatalf("Envoy: Envoy admin quit failed, killing process with pid %d", cmd.Process.Pid)
   263  
   264  					if err := cmd.Process.Kill(); err != nil {
   265  						log.WithError(err).Fatal("Envoy: Stopping Envoy failed")
   266  						e.errCh <- err
   267  					}
   268  				}
   269  				close(e.errCh)
   270  				return
   271  			}
   272  		}
   273  	}()
   274  
   275  	if <-started {
   276  		return e
   277  	}
   278  
   279  	return nil
   280  }
   281  
   282  // newEnvoyLogPiper creates a writer that parses and logs log messages written by Envoy.
   283  func newEnvoyLogPiper() io.WriteCloser {
   284  	reader, writer := io.Pipe()
   285  	scanner := bufio.NewScanner(reader)
   286  	go func() {
   287  		scopedLog := log.WithFields(logrus.Fields{
   288  			logfields.LogSubsys: "unknown",
   289  			logfields.ThreadID:  "unknown",
   290  		})
   291  		level := "debug"
   292  
   293  		for scanner.Scan() {
   294  			line := scanner.Text()
   295  			var msg string
   296  
   297  			parts := strings.SplitN(line, "|", 4)
   298  			// Parse the line as a log message written by Envoy, assuming it
   299  			// uses the configured format: "%t|%l|%n|%v".
   300  			if len(parts) == 4 {
   301  				threadID := parts[0]
   302  				level = parts[1]
   303  				loggerName := parts[2]
   304  				// TODO: Parse msg to extract the source filename, line number, etc.
   305  				msg = fmt.Sprintf("[%s", parts[3])
   306  
   307  				scopedLog = log.WithFields(logrus.Fields{
   308  					logfields.LogSubsys: fmt.Sprintf("envoy-%s", loggerName),
   309  					logfields.ThreadID:  threadID,
   310  				})
   311  			} else {
   312  				// If this line can't be parsed, it continues a multi-line log
   313  				// message. In this case, log it at the same level and with the
   314  				// same fields as the previous line.
   315  				msg = line
   316  			}
   317  
   318  			if len(msg) == 0 {
   319  				continue
   320  			}
   321  
   322  			// Map the Envoy log level to a logrus level.
   323  			switch level {
   324  			case "off", "critical", "error":
   325  				scopedLog.Error(msg)
   326  			case "warning":
   327  				scopedLog.Warn(msg)
   328  			case "info":
   329  				scopedLog.Info(msg)
   330  			case "debug", "trace":
   331  				scopedLog.Debug(msg)
   332  			default:
   333  				scopedLog.Debug(msg)
   334  			}
   335  		}
   336  		if err := scanner.Err(); err != nil {
   337  			log.WithError(err).Error("Error while parsing Envoy logs")
   338  		}
   339  		reader.Close()
   340  	}()
   341  	return writer
   342  }
   343  
   344  // isEOF returns true if the error message ends in "EOF". ReadMsgUnix returns extra info in the beginning.
   345  func isEOF(err error) bool {
   346  	strerr := err.Error()
   347  	errlen := len(strerr)
   348  	return errlen >= 3 && strerr[errlen-3:] == io.EOF.Error()
   349  }
   350  
   351  // StopEnvoy kills the Envoy process started with StartEnvoy. The gRPC API streams are terminated
   352  // first.
   353  func (e *Envoy) StopEnvoy() error {
   354  	close(e.stopCh)
   355  	err, ok := <-e.errCh
   356  	if ok {
   357  		return err
   358  	}
   359  	return nil
   360  }
   361  
   362  // ChangeLogLevel changes Envoy log level to correspond to the logrus log level 'level'.
   363  func (e *Envoy) ChangeLogLevel(level logrus.Level) {
   364  	e.admin.changeLogLevel(level)
   365  }