
     1  // Copyright Istio Authors
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    15  package status
    17  import (
    18  	"context"
    19  	"crypto/tls"
    20  	"encoding/json"
    21  	"errors"
    22  	"fmt"
    23  	"io"
    24  	"mime"
    25  	"net"
    26  	"net/http"
    27  	"net/http/pprof"
    28  	"os"
    29  	"regexp"
    30  	"strconv"
    31  	"strings"
    32  	"sync"
    33  	"syscall"
    34  	"time"
    36  	""
    37  	""
    38  	""
    39  	""
    40  	""
    41  	""
    42  	""
    43  	grpcHealth ""
    44  	grpcStatus ""
    45  	""
    46  	k8sUtilIo ""
    48  	""
    49  	""
    50  	""
    51  	""
    52  	dnsProto ""
    53  	""
    54  	commonFeatures ""
    55  	""
    56  	""
    57  	""
    58  	""
    59  	""
    60  	""
    61  	istioNetUtil ""
    62  )
    64  const (
    65  	// readyPath is for the pilot agent readiness itself.
    66  	readyPath = "/healthz/ready"
    67  	// quitPath is to notify the pilot agent to quit.
    68  	quitPath  = "/quitquitquit"
    69  	drainPath = "/drain"
    70  	// KubeAppProberEnvName is the name of the command line flag for pilot agent to pass app prober config.
    71  	// The json encoded string to pass app HTTP probe information from injector(istioctl or webhook).
    72  	// For example, ISTIO_KUBE_APP_PROBERS='{"/app-health/httpbin/livez":{"httpGet":{"path": "/hello", "port": 8080}}.
    73  	// indicates that httpbin container liveness prober port is 8080 and probing path is /hello.
    74  	// This environment variable should never be set manually.
    75  	KubeAppProberEnvName = "ISTIO_KUBE_APP_PROBERS"
    77  	localHostIPv4     = ""
    78  	localHostIPv6     = "::1"
    79  	maxRespBodyLength = 10 * 1 << 10
    80  )
    82  var (
    83  	UpstreamLocalAddressIPv4 = &net.TCPAddr{IP: net.ParseIP("")}
    84  	UpstreamLocalAddressIPv6 = &net.TCPAddr{IP: net.ParseIP("::6")}
    85  )
    87  var PrometheusScrapingConfig = env.Register("ISTIO_PROMETHEUS_ANNOTATIONS", "", "")
    89  var (
    90  	appProberPattern = regexp.MustCompile(`^/app-health/[^/]+/(livez|readyz|startupz)$`)
    92  	EnableHTTP2Probing = env.Register("ISTIO_ENABLE_HTTP2_PROBING", true,
    93  		"If enabled, HTTP2 probes will be enabled for HTTPS probes, following Kubernetes").Get()
    95  	LegacyLocalhostProbeDestination = env.Register("REWRITE_PROBE_LEGACY_LOCALHOST_DESTINATION", false,
    96  		"If enabled, readiness probes will be sent to 'localhost'. Otherwise, they will be sent to the Pod's IP, matching Kubernetes' behavior.")
    98  	ProbeKeepaliveConnections = env.Register("ENABLE_PROBE_KEEPALIVE_CONNECTIONS", false,
    99  		"If enabled, readiness probes will keep the connection from pilot-agent to the application alive. "+
   100  			"This mirrors older Istio versions' behaviors, but not kubelet's.").Get()
   101  )
   103  // KubeAppProbers holds the information about a Kubernetes pod prober.
   104  // It's a map from the prober URL path to the Kubernetes Prober config.
   105  // For example, "/app-health/hello-world/livez" entry contains liveness prober config for
   106  // container "hello-world".
   107  type KubeAppProbers map[string]*Prober
   109  // Prober represents a single container prober
   110  type Prober struct {
   111  	HTTPGet        *apimirror.HTTPGetAction   `json:"httpGet,omitempty"`
   112  	TCPSocket      *apimirror.TCPSocketAction `json:"tcpSocket,omitempty"`
   113  	GRPC           *apimirror.GRPCAction      `json:"grpc,omitempty"`
   114  	TimeoutSeconds int32                      `json:"timeoutSeconds,omitempty"`
   115  }
   117  // Options for the status server.
   118  type Options struct {
   119  	// Ip of the pod. Note: this is only applicable for Kubernetes pods and should only be used for
   120  	// the prober.
   121  	PodIP string
   122  	// KubeAppProbers is a json with Kubernetes application prober config encoded.
   123  	KubeAppProbers      string
   124  	NodeType            model.NodeType
   125  	StatusPort          uint16
   126  	AdminPort           uint16
   127  	IPv6                bool
   128  	Probes              []ready.Prober
   129  	EnvoyPrometheusPort int
   130  	Context             context.Context
   131  	FetchDNS            func() *dnsProto.NameTable
   132  	NoEnvoy             bool
   133  	GRPCBootstrap       string
   134  	EnableProfiling     bool
   135  	// PrometheusRegistry to use. Just for testing.
   136  	PrometheusRegistry prometheus.Gatherer
   137  	Shutdown           context.CancelFunc
   138  	TriggerDrain       func()
   139  }
   141  // Server provides an endpoint for handling status probes.
   142  type Server struct {
   143  	ready                 []ready.Prober
   144  	prometheus            *PrometheusScrapeConfiguration
   145  	mutex                 sync.RWMutex
   146  	appProbersDestination string
   147  	appKubeProbers        KubeAppProbers
   148  	appProbeClient        map[string]*http.Client
   149  	statusPort            uint16
   150  	lastProbeSuccessful   bool
   151  	envoyStatsPort        int
   152  	fetchDNS              func() *dnsProto.NameTable
   153  	upstreamLocalAddress  *net.TCPAddr
   154  	config                Options
   155  	http                  *http.Client
   156  	enableProfiling       bool
   157  	registry              prometheus.Gatherer
   158  	shutdown              context.CancelFunc
   159  	drain                 func()
   160  }
   162  func initializeMonitoring() (prometheus.Gatherer, error) {
   163  	registry := prometheus.NewRegistry()
   164  	wrapped := prometheus.WrapRegistererWithPrefix("istio_agent_", registry)
   165  	wrapped.MustRegister(collectors.NewProcessCollector(collectors.ProcessCollectorOpts{}))
   166  	wrapped.MustRegister(collectors.NewGoCollector())
   168  	_, err := monitoring.RegisterPrometheusExporter(wrapped, registry)
   169  	if err != nil {
   170  		return nil, fmt.Errorf("could not setup exporter: %v", err)
   171  	}
   172  	return registry, nil
   173  }
   175  // NewServer creates a new status server.
   176  func NewServer(config Options) (*Server, error) {
   177  	localhost := localHostIPv4
   178  	upstreamLocalAddress := UpstreamLocalAddressIPv4
   179  	if config.IPv6 {
   180  		localhost = localHostIPv6
   181  		upstreamLocalAddress = UpstreamLocalAddressIPv6
   182  	} else {
   183  		// if not ipv6-only, it can be ipv4-only or dual-stack
   184  		// let InstanceIP decide the localhost
   185  		netIP := net.ParseIP(config.PodIP)
   186  		if netIP.To4() == nil && netIP.To16() != nil && !netIP.IsLinkLocalUnicast() {
   187  			localhost = localHostIPv6
   188  			upstreamLocalAddress = UpstreamLocalAddressIPv6
   189  		}
   190  	}
   191  	probes := make([]ready.Prober, 0)
   192  	if !config.NoEnvoy {
   193  		probes = append(probes, &ready.Probe{
   194  			LocalHostAddr: localhost,
   195  			AdminPort:     config.AdminPort,
   196  			Context:       config.Context,
   197  			NoEnvoy:       config.NoEnvoy,
   198  		})
   199  	}
   201  	if config.GRPCBootstrap != "" {
   202  		probes = append(probes, grpcready.NewProbe(config.GRPCBootstrap))
   203  	}
   205  	probes = append(probes, config.Probes...)
   206  	registry := config.PrometheusRegistry
   207  	if registry == nil {
   208  		var err error
   209  		registry, err = initializeMonitoring()
   210  		if err != nil {
   211  			return nil, err
   212  		}
   213  	}
   214  	s := &Server{
   215  		statusPort:            config.StatusPort,
   216  		ready:                 probes,
   217  		http:                  &http.Client{},
   218  		appProbersDestination: config.PodIP,
   219  		envoyStatsPort:        config.EnvoyPrometheusPort,
   220  		fetchDNS:              config.FetchDNS,
   221  		upstreamLocalAddress:  upstreamLocalAddress,
   222  		config:                config,
   223  		enableProfiling:       config.EnableProfiling,
   224  		registry:              registry,
   225  		shutdown: func() {
   226  			config.Shutdown()
   227  		},
   228  		drain: config.TriggerDrain,
   229  	}
   230  	if LegacyLocalhostProbeDestination.Get() {
   231  		s.appProbersDestination = "localhost"
   232  	}
   234  	// Enable prometheus server if its configured and a sidecar
   235  	// Because port 15020 is exposed in the gateway Services, we cannot safely serve this endpoint
   236  	// If we need to do this in the future, we should use envoy to do routing or have another port to make this internal
   237  	// only. For now, its not needed for gateway, as we can just get Envoy stats directly, but if we
   238  	// want to expose istio-agent metrics we may want to revisit this.
   239  	if cfg, f := PrometheusScrapingConfig.Lookup(); config.NodeType == model.SidecarProxy && f {
   240  		var prom PrometheusScrapeConfiguration
   241  		if err := json.Unmarshal([]byte(cfg), &prom); err != nil {
   242  			return nil, fmt.Errorf("failed to unmarshal %s: %v", PrometheusScrapingConfig.Name, err)
   243  		}
   244  		log.Infof("Prometheus scraping configuration: %v", prom)
   245  		if prom.Scrape != "false" {
   246  			s.prometheus = &prom
   247  			if s.prometheus.Path == "" {
   248  				s.prometheus.Path = "/metrics"
   249  			}
   250  			if s.prometheus.Port == "" {
   251  				s.prometheus.Port = "80"
   252  			}
   253  			if s.prometheus.Port == strconv.Itoa(int(config.StatusPort)) {
   254  				return nil, fmt.Errorf("invalid prometheus scrape configuration: "+
   255  					"application port is the same as agent port, which may lead to a recursive loop. "+
   256  					"Ensure pod does not have label, or that injection is not happening multiple times", config.StatusPort)
   257  			}
   258  		}
   259  	}
   261  	if config.KubeAppProbers == "" {
   262  		return s, nil
   263  	}
   264  	if err := json.Unmarshal([]byte(config.KubeAppProbers), &s.appKubeProbers); err != nil {
   265  		return nil, fmt.Errorf("failed to decode app prober err = %v, json string = %v", err, config.KubeAppProbers)
   266  	}
   268  	s.appProbeClient = make(map[string]*http.Client, len(s.appKubeProbers))
   269  	// Validate the map key matching the regex pattern.
   270  	for path, prober := range s.appKubeProbers {
   271  		err := validateAppKubeProber(path, prober)
   272  		if err != nil {
   273  			return nil, err
   274  		}
   275  		if prober.HTTPGet != nil {
   276  			d := ProbeDialer()
   277  			d.LocalAddr = s.upstreamLocalAddress
   278  			// nolint: gosec
   279  			// This is matching Kubernetes. It is a reasonable usage of this, as it is just a health check over localhost.
   280  			transport, err := setTransportDefaults(&http.Transport{
   281  				TLSClientConfig: &tls.Config{InsecureSkipVerify: true},
   282  				DialContext:     d.DialContext,
   283  				//
   284  				// Match Kubernetes logic. This also ensures idle timeouts do not trigger probe failures
   285  				DisableKeepAlives: !ProbeKeepaliveConnections,
   286  			})
   287  			if err != nil {
   288  				return nil, err
   289  			}
   290  			// Construct a http client and cache it in order to reuse the connection.
   291  			s.appProbeClient[path] = &http.Client{
   292  				Timeout: time.Duration(prober.TimeoutSeconds) * time.Second,
   293  				// We skip the verification since kubelet skips the verification for HTTPS prober as well
   294  				//
   295  				Transport:     transport,
   296  				CheckRedirect: redirectChecker(),
   297  			}
   298  		}
   299  	}
   301  	return s, nil
   302  }
   304  // Copies logic from
   305  func isRedirect(code int) bool {
   306  	return code >= http.StatusMultipleChoices && code < http.StatusBadRequest
   307  }
   309  // Using the same redirect logic that kubelet does:
   310  // This means that:
   311  // * If we exceed 10 redirects, the probe fails
   312  // * If we redirect somewhere external, the probe succeeds (
   313  // * If we redirect to the same address, the probe will follow the redirect
   314  func redirectChecker() func(*http.Request, []*http.Request) error {
   315  	return func(req *http.Request, via []*http.Request) error {
   316  		if req.URL.Hostname() != via[0].URL.Hostname() {
   317  			return http.ErrUseLastResponse
   318  		}
   319  		// Default behavior: stop after 10 redirects.
   320  		if len(via) >= 10 {
   321  			return errors.New("stopped after 10 redirects")
   322  		}
   323  		return nil
   324  	}
   325  }
   327  func validateAppKubeProber(path string, prober *Prober) error {
   328  	if !appProberPattern.MatchString(path) {
   329  		return fmt.Errorf(`invalid path, must be in form of regex pattern %v`, appProberPattern)
   330  	}
   331  	count := 0
   332  	if prober.HTTPGet != nil {
   333  		count++
   334  	}
   335  	if prober.TCPSocket != nil {
   336  		count++
   337  	}
   338  	if prober.GRPC != nil {
   339  		count++
   340  	}
   341  	if count != 1 {
   342  		return fmt.Errorf(`invalid prober type, must be one of type httpGet, tcpSocket or gRPC`)
   343  	}
   344  	if prober.HTTPGet != nil && prober.HTTPGet.Port.Type != intstr.Int {
   345  		return fmt.Errorf("invalid prober config for %v, the port must be int type", path)
   346  	}
   347  	if prober.TCPSocket != nil && prober.TCPSocket.Port.Type != intstr.Int {
   348  		return fmt.Errorf("invalid prober config for %v, the port must be int type", path)
   349  	}
   350  	return nil
   351  }
   353  // FormatProberURL returns a set of HTTP URLs that pilot agent will serve to take over Kubernetes
   354  // app probers.
   355  func FormatProberURL(container string) (string, string, string) {
   356  	return fmt.Sprintf("/app-health/%v/readyz", container),
   357  		fmt.Sprintf("/app-health/%v/livez", container),
   358  		fmt.Sprintf("/app-health/%v/startupz", container)
   359  }
   361  // Run opens a the status port and begins accepting probes.
   362  func (s *Server) Run(ctx context.Context) {
   363  	log.Infof("Opening status port %d", s.statusPort)
   365  	mux := http.NewServeMux()
   367  	// Add the handler for ready probes.
   368  	mux.HandleFunc(readyPath, s.handleReadyProbe)
   369  	// Default path for prom
   370  	mux.HandleFunc(`/metrics`, s.handleStats)
   371  	// Envoy uses something else - and original agent used the same.
   372  	// Keep for backward compat with configs.
   373  	mux.HandleFunc(`/stats/prometheus`, s.handleStats)
   374  	mux.HandleFunc(quitPath, s.handleQuit)
   375  	mux.HandleFunc(drainPath, s.handleDrain)
   376  	mux.HandleFunc("/app-health/", s.handleAppProbe)
   378  	if s.enableProfiling {
   379  		// Add the handler for pprof.
   380  		mux.HandleFunc("/debug/pprof/", s.handlePprofIndex)
   381  		mux.HandleFunc("/debug/pprof/cmdline", s.handlePprofCmdline)
   382  		mux.HandleFunc("/debug/pprof/profile", s.handlePprofProfile)
   383  		mux.HandleFunc("/debug/pprof/symbol", s.handlePprofSymbol)
   384  		mux.HandleFunc("/debug/pprof/trace", s.handlePprofTrace)
   385  	}
   386  	mux.HandleFunc("/debug/ndsz", s.handleNdsz)
   388  	l, err := net.Listen("tcp", fmt.Sprintf(":%d", s.statusPort))
   389  	if err != nil {
   390  		log.Errorf("Error listening on status port: %v", err.Error())
   391  		return
   392  	}
   393  	// for testing.
   394  	if s.statusPort == 0 {
   395  		_, hostPort, _ := net.SplitHostPort(l.Addr().String())
   396  		allocatedPort, _ := strconv.Atoi(hostPort)
   397  		s.mutex.Lock()
   398  		s.statusPort = uint16(allocatedPort)
   399  		s.mutex.Unlock()
   400  	}
   401  	defer l.Close()
   403  	go func() {
   404  		if err := http.Serve(l, mux); err != nil {
   405  			if network.IsUnexpectedListenerError(err) {
   406  				log.Error(err)
   407  			}
   408  			select {
   409  			case <-ctx.Done():
   410  				// We are shutting down already, don't trigger SIGTERM
   411  				return
   412  			default:
   413  				// If the server errors then pilot-agent can never pass readiness or liveness probes
   414  				// Therefore, trigger graceful termination by sending SIGTERM to the binary pid
   415  				notifyExit()
   416  			}
   417  		}
   418  	}()
   420  	// Wait for the agent to be shut down.
   421  	<-ctx.Done()
   422  	log.Info("Status server has successfully terminated")
   423  }
   425  func (s *Server) handlePprofIndex(w http.ResponseWriter, r *http.Request) {
   426  	if !istioNetUtil.IsRequestFromLocalhost(r) {
   427  		http.Error(w, "Only requests from localhost are allowed", http.StatusForbidden)
   428  		return
   429  	}
   431  	pprof.Index(w, r)
   432  }
   434  func (s *Server) handlePprofCmdline(w http.ResponseWriter, r *http.Request) {
   435  	if !istioNetUtil.IsRequestFromLocalhost(r) {
   436  		http.Error(w, "Only requests from localhost are allowed", http.StatusForbidden)
   437  		return
   438  	}
   440  	pprof.Cmdline(w, r)
   441  }
   443  func (s *Server) handlePprofSymbol(w http.ResponseWriter, r *http.Request) {
   444  	if !istioNetUtil.IsRequestFromLocalhost(r) {
   445  		http.Error(w, "Only requests from localhost are allowed", http.StatusForbidden)
   446  		return
   447  	}
   449  	pprof.Symbol(w, r)
   450  }
   452  func (s *Server) handlePprofProfile(w http.ResponseWriter, r *http.Request) {
   453  	if !istioNetUtil.IsRequestFromLocalhost(r) {
   454  		http.Error(w, "Only requests from localhost are allowed", http.StatusForbidden)
   455  		return
   456  	}
   458  	pprof.Profile(w, r)
   459  }
   461  func (s *Server) handlePprofTrace(w http.ResponseWriter, r *http.Request) {
   462  	if !istioNetUtil.IsRequestFromLocalhost(r) {
   463  		http.Error(w, "Only requests from localhost are allowed", http.StatusForbidden)
   464  		return
   465  	}
   467  	pprof.Trace(w, r)
   468  }
   470  func (s *Server) handleReadyProbe(w http.ResponseWriter, _ *http.Request) {
   471  	err := s.isReady()
   472  	s.mutex.Lock()
   473  	if err != nil {
   474  		w.WriteHeader(http.StatusServiceUnavailable)
   476  		log.Warnf("Envoy proxy is NOT ready: %s", err.Error())
   477  		s.lastProbeSuccessful = false
   478  	} else {
   479  		w.WriteHeader(http.StatusOK)
   481  		if !s.lastProbeSuccessful {
   482  			log.Info("Envoy proxy is ready")
   483  		}
   484  		s.lastProbeSuccessful = true
   485  	}
   486  	s.mutex.Unlock()
   487  }
   489  func (s *Server) isReady() error {
   490  	for _, p := range s.ready {
   491  		if err := p.Check(); err != nil {
   492  			return err
   493  		}
   494  	}
   495  	return nil
   496  }
   498  type PrometheusScrapeConfiguration struct {
   499  	Scrape string `json:"scrape"`
   500  	Path   string `json:"path"`
   501  	Port   string `json:"port"`
   502  }
   504  // handleStats handles prometheus stats scraping. This will scrape envoy metrics, and, if configured,
   505  // the application metrics and merge them together.
   506  // The merge here is a simple string concatenation. This works for almost all cases, assuming the application
   507  // is not exposing the same metrics as Envoy.
   508  // This merging works for both FmtText and FmtOpenMetrics and will use the format of the application metrics
   509  // Note that we do not return any errors here. If we do, we will drop metrics. For example, the app may be having issues,
   510  // but we still want Envoy metrics. Instead, errors are tracked in the failed scrape metrics/logs.
   511  func (s *Server) handleStats(w http.ResponseWriter, r *http.Request) {
   512  	if commonFeatures.MetricsLocalhostAccessOnly && !istioNetUtil.IsRequestFromLocalhost(r) {
   513  		http.Error(w, "Only requests from localhost are allowed", http.StatusForbidden)
   514  		return
   515  	}
   516  	metrics.ScrapeTotals.Increment()
   517  	var err error
   518  	var envoy, application io.ReadCloser
   519  	var envoyCancel, appCancel context.CancelFunc
   520  	defer func() {
   521  		if envoy != nil {
   522  			err = envoy.Close()
   523  			if err != nil {
   524  				log.Infof("envoy connection is not closed: %v", err)
   525  			}
   526  		}
   527  		if application != nil {
   528  			err = application.Close()
   529  			if err != nil {
   530  				log.Infof("app connection is not closed: %v", err)
   531  			}
   532  		}
   533  		if envoyCancel != nil {
   534  			envoyCancel()
   535  		}
   536  		if appCancel != nil {
   537  			appCancel()
   538  		}
   539  	}()
   541  	// Gather all the metrics we will merge
   542  	if !s.config.NoEnvoy {
   543  		if envoy, envoyCancel, _, err = s.scrape(fmt.Sprintf("http://localhost:%d/stats/prometheus", s.envoyStatsPort), r.Header); err != nil {
   544  			log.Errorf("failed scraping envoy metrics: %v", err)
   545  			metrics.EnvoyScrapeErrors.Increment()
   546  		}
   547  	}
   549  	// Scrape app metrics if defined and capture their format
   550  	var format expfmt.Format
   551  	if s.prometheus != nil {
   552  		var contentType string
   553  		url := fmt.Sprintf("http://localhost:%s%s", s.prometheus.Port, s.prometheus.Path)
   554  		if application, appCancel, contentType, err = s.scrape(url, r.Header); err != nil {
   555  			log.Errorf("failed scraping application metrics: %v", err)
   556  			metrics.AppScrapeErrors.Increment()
   557  		}
   558  		format = negotiateMetricsFormat(contentType)
   559  	} else {
   560  		// Without app metrics format use a default
   561  		format = FmtText
   562  	}
   564  	w.Header().Set("Content-Type", string(format))
   566  	// Write out the metrics
   567  	if err = scrapeAndWriteAgentMetrics(s.registry, io.Writer(w)); err != nil {
   568  		log.Errorf("failed scraping and writing agent metrics: %v", err)
   569  		metrics.AgentScrapeErrors.Increment()
   570  	}
   572  	if envoy != nil {
   573  		_, err = io.Copy(w, envoy)
   574  		if err != nil {
   575  			log.Errorf("failed to scraping and writing envoy metrics: %v", err)
   576  			metrics.EnvoyScrapeErrors.Increment()
   577  		}
   578  	}
   580  	// App metrics must go last because if they are FmtOpenMetrics,
   581  	// they will have a trailing "# EOF" which terminates the full exposition
   582  	if application != nil {
   583  		_, err = io.Copy(w, application)
   584  		if err != nil {
   585  			log.Errorf("failed to scraping and writing application metrics: %v", err)
   586  			metrics.AppScrapeErrors.Increment()
   587  		}
   588  	}
   589  }
   591  const (
   592  	// nolint: revive, stylecheck
   593  	FmtOpenMetrics_0_0_1 = expfmt.OpenMetricsType + `; version=` + expfmt.OpenMetricsVersion_0_0_1 + `; charset=utf-8`
   594  	// nolint: revive, stylecheck
   595  	FmtOpenMetrics_1_0_0 = expfmt.OpenMetricsType + `; version=` + expfmt.OpenMetricsVersion_1_0_0 + `; charset=utf-8`
   596  	FmtText              = `text/plain; version=` + expfmt.TextVersion + `; charset=utf-8`
   597  )
   599  func negotiateMetricsFormat(contentType string) expfmt.Format {
   600  	mediaType, params, err := mime.ParseMediaType(contentType)
   601  	if err == nil && mediaType == expfmt.OpenMetricsType {
   602  		switch params["version"] {
   603  		case expfmt.OpenMetricsVersion_1_0_0:
   604  			return FmtOpenMetrics_1_0_0
   605  		case expfmt.OpenMetricsVersion_0_0_1, "":
   606  			return FmtOpenMetrics_0_0_1
   607  		}
   608  	}
   609  	return FmtText
   610  }
   612  func scrapeAndWriteAgentMetrics(registry prometheus.Gatherer, w io.Writer) error {
   613  	mfs, err := registry.Gather()
   614  	enc := expfmt.NewEncoder(w, FmtText)
   615  	if err != nil {
   616  		return err
   617  	}
   618  	for _, mf := range mfs {
   619  		if err := enc.Encode(mf); err != nil {
   620  			return err
   621  		}
   622  	}
   623  	return nil
   624  }
   626  func applyHeaders(into http.Header, from http.Header, keys ...string) {
   627  	for _, key := range keys {
   628  		val := from.Get(key)
   629  		if val != "" {
   630  			into.Set(key, val)
   631  		}
   632  	}
   633  }
   635  // getHeaderTimeout parse a string like (1.234) representing number of seconds
   636  func getHeaderTimeout(timeout string) (time.Duration, error) {
   637  	timeoutSeconds, err := strconv.ParseFloat(timeout, 64)
   638  	if err != nil {
   639  		return 0 * time.Second, err
   640  	}
   642  	return time.Duration(timeoutSeconds * 1e9), nil
   643  }
   645  // scrape will send a request to the provided url to scrape metrics from
   646  // This will attempt to mimic some of Prometheus functionality by passing some of the headers through
   647  // such as accept, timeout, and user agent
   648  // Returns the scraped metrics reader as well as the response's "Content-Type" header to determine the metrics format
   649  func (s *Server) scrape(url string, header http.Header) (io.ReadCloser, context.CancelFunc, string, error) {
   650  	var cancel context.CancelFunc
   651  	ctx := context.Background()
   652  	if timeoutString := header.Get("X-Prometheus-Scrape-Timeout-Seconds"); timeoutString != "" {
   653  		timeout, err := getHeaderTimeout(timeoutString)
   654  		if err != nil {
   655  			log.Warnf("Failed to parse timeout header %v: %v", timeoutString, err)
   656  		} else {
   657  			ctx, cancel = context.WithTimeout(ctx, timeout)
   658  		}
   659  	}
   660  	req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil)
   661  	if err != nil {
   662  		return nil, cancel, "", err
   663  	}
   664  	applyHeaders(req.Header, header, "Accept",
   665  		"User-Agent",
   666  		"X-Prometheus-Scrape-Timeout-Seconds",
   667  	)
   669  	resp, err := s.http.Do(req)
   670  	if err != nil {
   671  		return nil, cancel, "", fmt.Errorf("error scraping %s: %v", url, err)
   672  	}
   673  	if resp.StatusCode != http.StatusOK {
   674  		resp.Body.Close()
   675  		return nil, cancel, "", fmt.Errorf("error scraping %s, status code: %v", url, resp.StatusCode)
   676  	}
   677  	format := resp.Header.Get("Content-Type")
   678  	return resp.Body, cancel, format, nil
   679  }
   681  func (s *Server) handleQuit(w http.ResponseWriter, r *http.Request) {
   682  	if !istioNetUtil.IsRequestFromLocalhost(r) {
   683  		http.Error(w, "Only requests from localhost are allowed", http.StatusForbidden)
   684  		return
   685  	}
   686  	if r.Method != http.MethodPost {
   687  		http.Error(w, "Method Not Allowed", http.StatusMethodNotAllowed)
   688  		return
   689  	}
   690  	w.WriteHeader(http.StatusOK)
   691  	_, _ = w.Write([]byte("OK"))
   692  	log.Infof("handling %s, notifying pilot-agent to exit", quitPath)
   693  	s.shutdown()
   694  }
   696  func (s *Server) handleDrain(w http.ResponseWriter, r *http.Request) {
   697  	if !istioNetUtil.IsRequestFromLocalhost(r) {
   698  		http.Error(w, "Only requests from localhost are allowed", http.StatusForbidden)
   699  		return
   700  	}
   701  	if r.Method != http.MethodPost {
   702  		http.Error(w, "Method Not Allowed", http.StatusMethodNotAllowed)
   703  		return
   704  	}
   705  	w.WriteHeader(http.StatusOK)
   706  	_, _ = w.Write([]byte("OK"))
   707  	log.Infof("handling %s, starting drain", drainPath)
   708  	s.drain()
   709  }
   711  func (s *Server) handleAppProbe(w http.ResponseWriter, req *http.Request) {
   712  	// Validate the request first.
   713  	path := req.URL.Path
   714  	if !strings.HasPrefix(path, "/") {
   715  		path = "/" + req.URL.Path
   716  	}
   717  	prober, exists := s.appKubeProbers[path]
   718  	if !exists {
   719  		log.Errorf("Prober does not exists url %v", path)
   720  		w.WriteHeader(http.StatusBadRequest)
   721  		_, _ = w.Write([]byte(fmt.Sprintf("app prober config does not exists for %v", path)))
   722  		return
   723  	}
   725  	switch {
   726  	case prober.HTTPGet != nil:
   727  		s.handleAppProbeHTTPGet(w, req, prober, path)
   728  	case prober.TCPSocket != nil:
   729  		s.handleAppProbeTCPSocket(w, prober)
   730  	case prober.GRPC != nil:
   731  		s.handleAppProbeGRPC(w, req, prober)
   732  	}
   733  }
   735  func (s *Server) handleAppProbeHTTPGet(w http.ResponseWriter, req *http.Request, prober *Prober, path string) {
   736  	proberPath := prober.HTTPGet.Path
   737  	if !strings.HasPrefix(proberPath, "/") {
   738  		proberPath = "/" + proberPath
   739  	}
   740  	var url string
   742  	hostPort := net.JoinHostPort(s.appProbersDestination, strconv.Itoa(prober.HTTPGet.Port.IntValue()))
   743  	if prober.HTTPGet.Scheme == apimirror.URISchemeHTTPS {
   744  		url = fmt.Sprintf("https://%s%s", hostPort, proberPath)
   745  	} else {
   746  		url = fmt.Sprintf("http://%s%s", hostPort, proberPath)
   747  	}
   748  	appReq, err := http.NewRequest(http.MethodGet, url, nil)
   749  	if err != nil {
   750  		log.Errorf("Failed to create request to probe app %v, original url %v", err, path)
   751  		w.WriteHeader(http.StatusInternalServerError)
   752  		return
   753  	}
   755  	appReq.Host = req.Host
   756  	if host, port, err := net.SplitHostPort(req.Host); err == nil {
   757  		port, _ := strconv.Atoi(port)
   758  		// the port is same as the status port, then we need to replace the port in the host with the real one
   759  		if port == int(s.statusPort) {
   760  			realPort := strconv.Itoa(prober.HTTPGet.Port.IntValue())
   761  			appReq.Host = net.JoinHostPort(host, realPort)
   762  		}
   763  	}
   764  	// Forward incoming headers to the application.
   765  	for name, values := range req.Header {
   766  		appReq.Header[name] = slices.Clone(values)
   767  		if len(values) > 0 && (name == "Host") {
   768  			// Probe has specific host header override; honor it
   769  			appReq.Host = values[0]
   770  		}
   771  	}
   773  	// get the http client must exist because
   774  	httpClient := s.appProbeClient[path]
   776  	// Send the request.
   777  	response, err := httpClient.Do(appReq)
   778  	if err != nil {
   779  		log.Errorf("Request to probe app failed: %v, original URL path = %v\napp URL path = %v", err, path, proberPath)
   780  		w.WriteHeader(http.StatusInternalServerError)
   781  		return
   782  	}
   783  	defer func() {
   784  		// Drain and close the body to let the Transport reuse the connection
   785  		_, _ = io.Copy(io.Discard, response.Body)
   786  		_ = response.Body.Close()
   787  	}()
   789  	if isRedirect(response.StatusCode) { // Redirect
   790  		// In other cases, we return the original status code. For redirects, it is illegal to
   791  		// not have Location header, so we need to switch to just 200.
   792  		w.WriteHeader(http.StatusOK)
   793  		return
   794  	}
   795  	// We only write the status code to the response.
   796  	w.WriteHeader(response.StatusCode)
   797  	// Return the body from probe as well
   798  	b, _ := k8sUtilIo.ReadAtMost(response.Body, maxRespBodyLength)
   799  	_, _ = w.Write(b)
   800  }
   802  func (s *Server) handleAppProbeTCPSocket(w http.ResponseWriter, prober *Prober) {
   803  	timeout := time.Duration(prober.TimeoutSeconds) * time.Second
   805  	d := ProbeDialer()
   806  	d.LocalAddr = s.upstreamLocalAddress
   807  	d.Timeout = timeout
   809  	conn, err := d.Dial("tcp", net.JoinHostPort(s.appProbersDestination, strconv.Itoa(prober.TCPSocket.Port.IntValue())))
   810  	if err != nil {
   811  		w.WriteHeader(http.StatusInternalServerError)
   812  	} else {
   813  		w.WriteHeader(http.StatusOK)
   814  		err = conn.Close()
   815  		if err != nil {
   816  			log.Infof("tcp connection is not closed: %v", err)
   817  		}
   818  	}
   819  }
   821  func (s *Server) handleAppProbeGRPC(w http.ResponseWriter, req *http.Request, prober *Prober) {
   822  	timeout := time.Duration(prober.TimeoutSeconds) * time.Second
   823  	// the DialOptions are referenced from
   824  	opts := []grpc.DialOption{
   825  		grpc.WithBlock(),
   826  		grpc.WithTransportCredentials(insecure.NewCredentials()), // credentials are currently not supported
   827  		grpc.WithContextDialer(func(ctx context.Context, addr string) (net.Conn, error) {
   828  			d := ProbeDialer()
   829  			d.LocalAddr = s.upstreamLocalAddress
   830  			d.Timeout = timeout
   831  			return d.DialContext(ctx, "tcp", addr)
   832  		}),
   833  	}
   834  	if userAgent := req.Header["User-Agent"]; len(userAgent) > 0 {
   835  		// simulate kubelet
   836  		// please refer to:
   837  		//
   838  		//
   839  		opts = append(opts, grpc.WithUserAgent(userAgent[0]))
   840  	}
   842  	ctx, cancel := context.WithTimeout(context.Background(), timeout)
   843  	defer cancel()
   845  	addr := net.JoinHostPort(s.appProbersDestination, strconv.Itoa(int(prober.GRPC.Port)))
   846  	conn, err := grpc.DialContext(ctx, addr, opts...)
   847  	if err != nil {
   848  		log.Errorf("Failed to create grpc connection to probe app: %v", err)
   849  		w.WriteHeader(http.StatusInternalServerError)
   850  		return
   851  	}
   852  	defer conn.Close()
   854  	var svc string
   855  	if prober.GRPC.Service != nil {
   856  		svc = *prober.GRPC.Service
   857  	}
   858  	grpcClient := grpcHealth.NewHealthClient(conn)
   859  	resp, err := grpcClient.Check(ctx, &grpcHealth.HealthCheckRequest{
   860  		Service: svc,
   861  	})
   862  	// the error handling is referenced from
   863  	if err != nil {
   864  		status, ok := grpcStatus.FromError(err)
   865  		if ok {
   866  			switch status.Code() {
   867  			case codes.Unimplemented:
   868  				log.Errorf("server does not implement the grpc health protocol ( %v", err)
   869  			case codes.DeadlineExceeded:
   870  				log.Errorf("grpc request not finished within timeout: %v", err)
   871  			default:
   872  				log.Errorf("grpc probe failed: %v", err)
   873  			}
   874  		} else {
   875  			log.Errorf("grpc probe failed: %v", err)
   876  		}
   877  		w.WriteHeader(http.StatusInternalServerError)
   878  		return
   879  	}
   881  	if resp.GetStatus() == grpcHealth.HealthCheckResponse_SERVING {
   882  		w.WriteHeader(http.StatusOK)
   883  		return
   884  	}
   885  	w.WriteHeader(http.StatusInternalServerError)
   886  }
   888  func (s *Server) handleNdsz(w http.ResponseWriter, r *http.Request) {
   889  	if !istioNetUtil.IsRequestFromLocalhost(r) {
   890  		http.Error(w, "Only requests from localhost are allowed", http.StatusForbidden)
   891  		return
   892  	}
   893  	nametable := s.fetchDNS()
   894  	if nametable == nil {
   895  		// See for why writeJSONProto cannot handle this
   896  		w.WriteHeader(http.StatusNotFound)
   897  		_, _ = w.Write([]byte(`{}`))
   898  		return
   899  	}
   900  	writeJSONProto(w, nametable)
   901  }
   903  // writeJSONProto writes a protobuf to a json payload, handling content type, marshaling, and errors
   904  func writeJSONProto(w http.ResponseWriter, obj any) {
   905  	w.Header().Set("Content-Type", "application/json")
   906  	b, err := config.ToJSON(obj)
   907  	if err != nil {
   908  		w.WriteHeader(http.StatusInternalServerError)
   909  		_, _ = w.Write([]byte(err.Error()))
   910  		return
   911  	}
   912  	_, err = w.Write(b)
   913  	if err != nil {
   914  		w.WriteHeader(http.StatusInternalServerError)
   915  	}
   916  }
   918  // notifyExit sends SIGTERM to itself
   919  func notifyExit() {
   920  	p, err := os.FindProcess(os.Getpid())
   921  	if err != nil {
   922  		log.Error(err)
   923  	}
   924  	if err := p.Signal(syscall.SIGTERM); err != nil {
   925  		log.Errorf("failed to send SIGTERM to self: %v", err)
   926  	}
   927  }
   929  var defaultTransport = http.DefaultTransport.(*http.Transport)
   931  // SetTransportDefaults mirrors Kubernetes probe settings
   932  //
   933  func setTransportDefaults(t *http.Transport) (*http.Transport, error) {
   934  	if !EnableHTTP2Probing {
   935  		return t, nil
   936  	}
   937  	if t.TLSHandshakeTimeout == 0 {
   938  		t.TLSHandshakeTimeout = defaultTransport.TLSHandshakeTimeout
   939  	}
   940  	if t.IdleConnTimeout == 0 {
   941  		t.IdleConnTimeout = defaultTransport.IdleConnTimeout
   942  	}
   943  	t2, err := http2.ConfigureTransports(t)
   944  	if err != nil {
   945  		return nil, err
   946  	}
   947  	t2.ReadIdleTimeout = time.Duration(30) * time.Second
   948  	t2.PingTimeout = time.Duration(15) * time.Second
   949  	return t, nil
   950  }