github.phpd.cn/cilium/cilium@v1.6.12/pkg/envoy/envoy.go (about) 1 // Copyright 2017, 2018 Authors of Cilium 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package envoy 16 17 import ( 18 "bufio" 19 "fmt" 20 "io" 21 "io/ioutil" 22 "net" 23 "net/http" 24 "os/exec" 25 "path/filepath" 26 "strconv" 27 "strings" 28 "time" 29 30 "github.com/cilium/cilium/pkg/flowdebug" 31 "github.com/cilium/cilium/pkg/logging" 32 "github.com/cilium/cilium/pkg/logging/logfields" 33 "github.com/cilium/cilium/pkg/metrics" 34 35 "github.com/sirupsen/logrus" 36 "gopkg.in/natefinch/lumberjack.v2" 37 ) 38 39 var log = logging.DefaultLogger.WithField(logfields.LogSubsys, "envoy-manager") 40 41 var ( 42 // RequiredEnvoyVersionSHA is set during build 43 // Running Envoy version will be checked against `RequiredEnvoyVersionSHA`. 44 // By default cilium-agent will fail to start if there is a version mismatch. 45 RequiredEnvoyVersionSHA string 46 47 // envoyLevelMap maps logrus.Level values to Envoy (spdlog) log levels. 48 envoyLevelMap = map[logrus.Level]string{ 49 logrus.PanicLevel: "off", 50 logrus.FatalLevel: "critical", 51 logrus.ErrorLevel: "error", 52 logrus.WarnLevel: "warning", 53 logrus.InfoLevel: "info", 54 logrus.DebugLevel: "debug", 55 // spdlog "trace" not mapped 56 } 57 58 tracing = false 59 ) 60 61 const ( 62 adminSock = "envoy-admin.sock" 63 ciliumEnvoy = "cilium-envoy" 64 ) 65 66 // EnableTracing changes Envoy log level to "trace", producing the most logs. 67 func EnableTracing() { 68 tracing = true 69 } 70 71 func mapLogLevel(level logrus.Level) string { 72 if tracing { 73 return "trace" 74 } 75 76 // Suppress the debug level if not debugging at flow level. 77 if level == logrus.DebugLevel && !flowdebug.Enabled() { 78 level = logrus.InfoLevel 79 } 80 return envoyLevelMap[level] 81 } 82 83 type admin struct { 84 adminURL string 85 unixPath string 86 level string 87 } 88 89 func (a *admin) transact(query string) error { 90 // Use a custom dialer to use a Unix domain socket for a HTTP connection. 91 client := &http.Client{ 92 Transport: &http.Transport{ 93 Dial: func(_, _ string) (net.Conn, error) { return net.Dial("unix", a.unixPath) }, 94 }, 95 } 96 97 resp, err := client.Post(a.adminURL+query, "", nil) 98 if err != nil { 99 return err 100 } 101 defer resp.Body.Close() 102 body, err := ioutil.ReadAll(resp.Body) 103 if err != nil { 104 return err 105 } 106 ret := strings.Replace(string(body), "\r", "", -1) 107 log.Debugf("Envoy: Admin response to %s: %s", query, ret) 108 return nil 109 } 110 111 func (a *admin) changeLogLevel(level logrus.Level) error { 112 envoyLevel := mapLogLevel(level) 113 114 if envoyLevel == a.level { 115 log.Debugf("Envoy: Log level is already set as: %v", envoyLevel) 116 return nil 117 } 118 119 err := a.transact("logging?level=" + envoyLevel) 120 if err != nil { 121 log.WithError(err).Warnf("Envoy: Failed to set log level to: %v", envoyLevel) 122 } else { 123 a.level = envoyLevel 124 } 125 return err 126 } 127 128 func (a *admin) quit() error { 129 return a.transact("quitquitquit") 130 } 131 132 // Envoy manages a running Envoy proxy instance via the 133 // ListenerDiscoveryService and RouteDiscoveryService gRPC APIs. 134 type Envoy struct { 135 stopCh chan struct{} 136 errCh chan error 137 admin *admin 138 } 139 140 // GetEnvoyVersion returns the envoy binary version string 141 func GetEnvoyVersion() string { 142 out, err := exec.Command(ciliumEnvoy, "--version").Output() 143 if err != nil { 144 log.WithError(err).Fatalf("Envoy: Binary %q cannot be executed", ciliumEnvoy) 145 } 146 return strings.TrimSpace(string(out)) 147 } 148 149 // StartEnvoy starts an Envoy proxy instance. 150 func StartEnvoy(stateDir, logPath string, baseID uint64) *Envoy { 151 bootstrapPath := filepath.Join(stateDir, "bootstrap.pb") 152 xdsPath := getXDSPath(stateDir) 153 154 // Have to use a fake IP address:port even when we Dial to a Unix domain socket. 155 // The address:port will be visible to Envoy as ':authority', but its value is 156 // not meaningful. 157 // Not using the normal localhost address to make it obvious that we are not 158 // connecting to Envoy's admin interface via the IP stack. 159 adminAddress := "192.0.2.34:56" 160 adminPath := filepath.Join(stateDir, adminSock) 161 162 e := &Envoy{ 163 stopCh: make(chan struct{}), 164 errCh: make(chan error, 1), 165 admin: &admin{ 166 adminURL: "http://" + adminAddress + "/", 167 unixPath: adminPath, 168 }, 169 } 170 171 // Use the same structure as Istio's pilot-agent for the node ID: 172 // nodeType~ipAddress~proxyId~domain 173 nodeId := "host~127.0.0.1~no-id~localdomain" 174 175 // Create static configuration 176 createBootstrap(bootstrapPath, nodeId, ingressClusterName, 177 xdsPath, egressClusterName, ingressClusterName, adminPath) 178 179 log.Debugf("Envoy: Starting: %v", *e) 180 181 // make it a buffered channel so we can not only 182 // read the written value but also skip it in 183 // case no one reader reads it. 184 started := make(chan bool, 1) 185 go func() { 186 var logWriter io.WriteCloser 187 var logFormat string 188 if logPath != "" { 189 // Use the Envoy default log format when logging to a separate file 190 logFormat = "[%Y-%m-%d %T.%e][%t][%l][%n] %v" 191 logger := &lumberjack.Logger{ 192 Filename: logPath, 193 MaxSize: 100, // megabytes 194 MaxBackups: 3, 195 MaxAge: 28, //days 196 Compress: true, // disabled by default 197 } 198 logWriter = logger 199 } else { 200 // Use log format that looks like Cilium logs when integrating logs 201 // The logs will be reported as coming from the cilium-agent, so 202 // we add the thread id to be able to differentiate between Envoy's 203 // main and worker threads. 204 logFormat = "%t|%l|%n|%v" 205 206 // Create a piper that parses and writes into logrus the log 207 // messages from Envoy. 208 logWriter = newEnvoyLogPiper() 209 } 210 defer logWriter.Close() 211 212 for { 213 logLevel := logging.GetLevel(logging.DefaultLogger) 214 cmd := exec.Command(ciliumEnvoy, "-l", mapLogLevel(logLevel), "-c", bootstrapPath, "--base-id", strconv.FormatUint(baseID, 10), "--log-format", logFormat) 215 cmd.Stderr = logWriter 216 cmd.Stdout = logWriter 217 218 if err := cmd.Start(); err != nil { 219 log.WithError(err).Warn("Envoy: Failed to start proxy") 220 select { 221 case started <- false: 222 default: 223 } 224 return 225 } 226 log.Debugf("Envoy: Started proxy") 227 select { 228 case started <- true: 229 default: 230 } 231 232 log.Infof("Envoy: Proxy started with pid %d", cmd.Process.Pid) 233 metrics.SubprocessStart.WithLabelValues(ciliumEnvoy).Inc() 234 235 // We do not return after a successful start, but watch the Envoy process 236 // and restart it if it crashes. 237 // Waiting for the process execution is done in the goroutime. 238 // The purpose of the "crash channel" is to inform the loop about their 239 // Envoy process crash - after closing that channel by the goroutime, 240 // the loop continues, the channel is recreated and the new process 241 // is watched again. 242 crashCh := make(chan struct{}) 243 go func() { 244 if err := cmd.Wait(); err != nil { 245 log.WithError(err).Warn("Envoy: Proxy crashed") 246 } 247 close(crashCh) 248 }() 249 250 // start again after a short wait. If Cilium exits this should be enough 251 // time to not start Envoy again in that case. 252 log.Info("Envoy: Sleeping for 100ms before restarting proxy") 253 time.Sleep(100 * time.Millisecond) 254 255 select { 256 case <-crashCh: 257 // Start Envoy again 258 continue 259 case <-e.stopCh: 260 log.Infof("Envoy: Stopping proxy with pid %d", cmd.Process.Pid) 261 if err := e.admin.quit(); err != nil { 262 log.WithError(err).Fatalf("Envoy: Envoy admin quit failed, killing process with pid %d", cmd.Process.Pid) 263 264 if err := cmd.Process.Kill(); err != nil { 265 log.WithError(err).Fatal("Envoy: Stopping Envoy failed") 266 e.errCh <- err 267 } 268 } 269 close(e.errCh) 270 return 271 } 272 } 273 }() 274 275 if <-started { 276 return e 277 } 278 279 return nil 280 } 281 282 // newEnvoyLogPiper creates a writer that parses and logs log messages written by Envoy. 283 func newEnvoyLogPiper() io.WriteCloser { 284 reader, writer := io.Pipe() 285 scanner := bufio.NewScanner(reader) 286 go func() { 287 scopedLog := log.WithFields(logrus.Fields{ 288 logfields.LogSubsys: "unknown", 289 logfields.ThreadID: "unknown", 290 }) 291 level := "debug" 292 293 for scanner.Scan() { 294 line := scanner.Text() 295 var msg string 296 297 parts := strings.SplitN(line, "|", 4) 298 // Parse the line as a log message written by Envoy, assuming it 299 // uses the configured format: "%t|%l|%n|%v". 300 if len(parts) == 4 { 301 threadID := parts[0] 302 level = parts[1] 303 loggerName := parts[2] 304 // TODO: Parse msg to extract the source filename, line number, etc. 305 msg = fmt.Sprintf("[%s", parts[3]) 306 307 scopedLog = log.WithFields(logrus.Fields{ 308 logfields.LogSubsys: fmt.Sprintf("envoy-%s", loggerName), 309 logfields.ThreadID: threadID, 310 }) 311 } else { 312 // If this line can't be parsed, it continues a multi-line log 313 // message. In this case, log it at the same level and with the 314 // same fields as the previous line. 315 msg = line 316 } 317 318 if len(msg) == 0 { 319 continue 320 } 321 322 // Map the Envoy log level to a logrus level. 323 switch level { 324 case "off", "critical", "error": 325 scopedLog.Error(msg) 326 case "warning": 327 scopedLog.Warn(msg) 328 case "info": 329 scopedLog.Info(msg) 330 case "debug", "trace": 331 scopedLog.Debug(msg) 332 default: 333 scopedLog.Debug(msg) 334 } 335 } 336 if err := scanner.Err(); err != nil { 337 log.WithError(err).Error("Error while parsing Envoy logs") 338 } 339 reader.Close() 340 }() 341 return writer 342 } 343 344 // isEOF returns true if the error message ends in "EOF". ReadMsgUnix returns extra info in the beginning. 345 func isEOF(err error) bool { 346 strerr := err.Error() 347 errlen := len(strerr) 348 return errlen >= 3 && strerr[errlen-3:] == io.EOF.Error() 349 } 350 351 // StopEnvoy kills the Envoy process started with StartEnvoy. The gRPC API streams are terminated 352 // first. 353 func (e *Envoy) StopEnvoy() error { 354 close(e.stopCh) 355 err, ok := <-e.errCh 356 if ok { 357 return err 358 } 359 return nil 360 } 361 362 // ChangeLogLevel changes Envoy log level to correspond to the logrus log level 'level'. 363 func (e *Envoy) ChangeLogLevel(level logrus.Level) { 364 e.admin.changeLogLevel(level) 365 }