k8s.io/apiserver@v0.31.1/pkg/server/healthz/healthz.go (about)

     1  /*
     2  Copyright 2014 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package healthz
    18  
    19  import (
    20  	"bytes"
    21  	"context"
    22  	"fmt"
    23  	"net/http"
    24  	"reflect"
    25  	"strings"
    26  	"sync"
    27  	"sync/atomic"
    28  	"time"
    29  
    30  	"k8s.io/apimachinery/pkg/util/sets"
    31  	"k8s.io/apimachinery/pkg/util/wait"
    32  	"k8s.io/apiserver/pkg/endpoints/metrics"
    33  	"k8s.io/apiserver/pkg/server/httplog"
    34  	"k8s.io/component-base/metrics/prometheus/slis"
    35  	"k8s.io/klog/v2"
    36  )
    37  
    38  // HealthChecker is a named healthz checker.
    39  type HealthChecker interface {
    40  	Name() string
    41  	Check(req *http.Request) error
    42  }
    43  
    44  // PingHealthz returns true automatically when checked
    45  var PingHealthz HealthChecker = ping{}
    46  
    47  // ping implements the simplest possible healthz checker.
    48  type ping struct{}
    49  
    50  func (ping) Name() string {
    51  	return "ping"
    52  }
    53  
    54  // PingHealthz is a health check that returns true.
    55  func (ping) Check(_ *http.Request) error {
    56  	return nil
    57  }
    58  
    59  // LogHealthz returns true if logging is not blocked
    60  var LogHealthz HealthChecker = &log{}
    61  
    62  type log struct {
    63  	startOnce    sync.Once
    64  	lastVerified atomic.Value
    65  }
    66  
    67  func (l *log) Name() string {
    68  	return "log"
    69  }
    70  
    71  func (l *log) Check(_ *http.Request) error {
    72  	l.startOnce.Do(func() {
    73  		l.lastVerified.Store(time.Now())
    74  		go wait.Forever(func() {
    75  			klog.Flush()
    76  			l.lastVerified.Store(time.Now())
    77  		}, time.Minute)
    78  	})
    79  
    80  	lastVerified := l.lastVerified.Load().(time.Time)
    81  	if time.Since(lastVerified) < (2 * time.Minute) {
    82  		return nil
    83  	}
    84  	return fmt.Errorf("logging blocked")
    85  }
    86  
    87  type cacheSyncWaiter interface {
    88  	WaitForCacheSync(stopCh <-chan struct{}) map[reflect.Type]bool
    89  }
    90  
    91  type informerSync struct {
    92  	cacheSyncWaiter cacheSyncWaiter
    93  }
    94  
    95  var _ HealthChecker = &informerSync{}
    96  
    97  // NewInformerSyncHealthz returns a new HealthChecker that will pass only if all informers in the given cacheSyncWaiter sync.
    98  func NewInformerSyncHealthz(cacheSyncWaiter cacheSyncWaiter) HealthChecker {
    99  	return &informerSync{
   100  		cacheSyncWaiter: cacheSyncWaiter,
   101  	}
   102  }
   103  
   104  func (i *informerSync) Name() string {
   105  	return "informer-sync"
   106  }
   107  
   108  type shutdown struct {
   109  	stopCh <-chan struct{}
   110  }
   111  
   112  // NewShutdownHealthz returns a new HealthChecker that will fail if the embedded channel is closed.
   113  // This is intended to allow for graceful shutdown sequences.
   114  func NewShutdownHealthz(stopCh <-chan struct{}) HealthChecker {
   115  	return &shutdown{stopCh}
   116  }
   117  
   118  func (s *shutdown) Name() string {
   119  	return "shutdown"
   120  }
   121  
   122  func (s *shutdown) Check(req *http.Request) error {
   123  	select {
   124  	case <-s.stopCh:
   125  		return fmt.Errorf("process is shutting down")
   126  	default:
   127  	}
   128  	return nil
   129  }
   130  
   131  func (i *informerSync) Check(_ *http.Request) error {
   132  	stopCh := make(chan struct{})
   133  	// Close stopCh to force checking if informers are synced now.
   134  	close(stopCh)
   135  
   136  	informersByStarted := make(map[bool][]string)
   137  	for informerType, started := range i.cacheSyncWaiter.WaitForCacheSync(stopCh) {
   138  		informersByStarted[started] = append(informersByStarted[started], informerType.String())
   139  	}
   140  
   141  	if notStarted := informersByStarted[false]; len(notStarted) > 0 {
   142  		return fmt.Errorf("%d informers not started yet: %v", len(notStarted), notStarted)
   143  	}
   144  	return nil
   145  }
   146  
   147  // NamedCheck returns a healthz checker for the given name and function.
   148  func NamedCheck(name string, check func(r *http.Request) error) HealthChecker {
   149  	return &healthzCheck{name, check}
   150  }
   151  
   152  // InstallHandler registers handlers for health checking on the path
   153  // "/healthz" to mux. *All handlers* for mux must be specified in
   154  // exactly one call to InstallHandler. Calling InstallHandler more
   155  // than once for the same mux will result in a panic.
   156  func InstallHandler(mux mux, checks ...HealthChecker) {
   157  	InstallPathHandler(mux, "/healthz", checks...)
   158  }
   159  
   160  // InstallReadyzHandler registers handlers for health checking on the path
   161  // "/readyz" to mux. *All handlers* for mux must be specified in
   162  // exactly one call to InstallHandler. Calling InstallHandler more
   163  // than once for the same mux will result in a panic.
   164  func InstallReadyzHandler(mux mux, checks ...HealthChecker) {
   165  	InstallPathHandler(mux, "/readyz", checks...)
   166  }
   167  
   168  // InstallLivezHandler registers handlers for liveness checking on the path
   169  // "/livez" to mux. *All handlers* for mux must be specified in
   170  // exactly one call to InstallHandler. Calling InstallHandler more
   171  // than once for the same mux will result in a panic.
   172  func InstallLivezHandler(mux mux, checks ...HealthChecker) {
   173  	InstallPathHandler(mux, "/livez", checks...)
   174  }
   175  
   176  // InstallPathHandler registers handlers for health checking on
   177  // a specific path to mux. *All handlers* for the path must be
   178  // specified in exactly one call to InstallPathHandler. Calling
   179  // InstallPathHandler more than once for the same path and mux will
   180  // result in a panic.
   181  func InstallPathHandler(mux mux, path string, checks ...HealthChecker) {
   182  	InstallPathHandlerWithHealthyFunc(mux, path, nil, checks...)
   183  }
   184  
   185  // InstallPathHandlerWithHealthyFunc is like InstallPathHandler, but calls firstTimeHealthy exactly once
   186  // when the handler succeeds for the first time.
   187  func InstallPathHandlerWithHealthyFunc(mux mux, path string, firstTimeHealthy func(), checks ...HealthChecker) {
   188  	if len(checks) == 0 {
   189  		klog.V(5).Info("No default health checks specified. Installing the ping handler.")
   190  		checks = []HealthChecker{PingHealthz}
   191  	}
   192  
   193  	klog.V(5).Infof("Installing health checkers for (%v): %v", path, formatQuoted(checkerNames(checks...)...))
   194  
   195  	name := strings.Split(strings.TrimPrefix(path, "/"), "/")[0]
   196  	mux.Handle(path,
   197  		metrics.InstrumentHandlerFunc("GET",
   198  			/* group = */ "",
   199  			/* version = */ "",
   200  			/* resource = */ "",
   201  			/* subresource = */ path,
   202  			/* scope = */ "",
   203  			/* component = */ "",
   204  			/* deprecated */ false,
   205  			/* removedRelease */ "",
   206  			handleRootHealth(name, firstTimeHealthy, checks...)))
   207  	for _, check := range checks {
   208  		mux.Handle(fmt.Sprintf("%s/%v", path, check.Name()), adaptCheckToHandler(check.Check))
   209  	}
   210  }
   211  
   212  // mux is an interface describing the methods InstallHandler requires.
   213  type mux interface {
   214  	Handle(pattern string, handler http.Handler)
   215  }
   216  
   217  // healthzCheck implements HealthChecker on an arbitrary name and check function.
   218  type healthzCheck struct {
   219  	name  string
   220  	check func(r *http.Request) error
   221  }
   222  
   223  var _ HealthChecker = &healthzCheck{}
   224  
   225  func (c *healthzCheck) Name() string {
   226  	return c.name
   227  }
   228  
   229  func (c *healthzCheck) Check(r *http.Request) error {
   230  	return c.check(r)
   231  }
   232  
   233  // getExcludedChecks extracts the health check names to be excluded from the query param
   234  func getExcludedChecks(r *http.Request) sets.String {
   235  	checks, found := r.URL.Query()["exclude"]
   236  	if found {
   237  		return sets.NewString(checks...)
   238  	}
   239  	return sets.NewString()
   240  }
   241  
   242  // handleRootHealth returns an http.HandlerFunc that serves the provided checks.
   243  func handleRootHealth(name string, firstTimeHealthy func(), checks ...HealthChecker) http.HandlerFunc {
   244  	var notifyOnce sync.Once
   245  	return func(w http.ResponseWriter, r *http.Request) {
   246  		excluded := getExcludedChecks(r)
   247  		// failedVerboseLogOutput is for output to the log.  It indicates detailed failed output information for the log.
   248  		var failedVerboseLogOutput bytes.Buffer
   249  		var failedChecks []string
   250  		var individualCheckOutput bytes.Buffer
   251  		for _, check := range checks {
   252  			// no-op the check if we've specified we want to exclude the check
   253  			if excluded.Has(check.Name()) {
   254  				excluded.Delete(check.Name())
   255  				fmt.Fprintf(&individualCheckOutput, "[+]%s excluded: ok\n", check.Name())
   256  				continue
   257  			}
   258  			if err := check.Check(r); err != nil {
   259  				slis.ObserveHealthcheck(context.Background(), check.Name(), name, slis.Error)
   260  				// don't include the error since this endpoint is public.  If someone wants more detail
   261  				// they should have explicit permission to the detailed checks.
   262  				fmt.Fprintf(&individualCheckOutput, "[-]%s failed: reason withheld\n", check.Name())
   263  				// but we do want detailed information for our log
   264  				fmt.Fprintf(&failedVerboseLogOutput, "[-]%s failed: %v\n", check.Name(), err)
   265  				failedChecks = append(failedChecks, check.Name())
   266  			} else {
   267  				slis.ObserveHealthcheck(context.Background(), check.Name(), name, slis.Success)
   268  				fmt.Fprintf(&individualCheckOutput, "[+]%s ok\n", check.Name())
   269  			}
   270  		}
   271  		if excluded.Len() > 0 {
   272  			fmt.Fprintf(&individualCheckOutput, "warn: some health checks cannot be excluded: no matches for %s\n", formatQuoted(excluded.List()...))
   273  			klog.V(6).Infof("cannot exclude some health checks, no health checks are installed matching %s",
   274  				formatQuoted(excluded.List()...))
   275  		}
   276  		// always be verbose on failure
   277  		if len(failedChecks) > 0 {
   278  			klog.V(2).Infof("%s check failed: %s\n%v", strings.Join(failedChecks, ","), name, failedVerboseLogOutput.String())
   279  			httplog.SetStacktracePredicate(r.Context(), func(int) bool { return false })
   280  			http.Error(w, fmt.Sprintf("%s%s check failed", individualCheckOutput.String(), name), http.StatusInternalServerError)
   281  			return
   282  		}
   283  
   284  		// signal first time this is healthy
   285  		if firstTimeHealthy != nil {
   286  			notifyOnce.Do(firstTimeHealthy)
   287  		}
   288  
   289  		w.Header().Set("Content-Type", "text/plain; charset=utf-8")
   290  		w.Header().Set("X-Content-Type-Options", "nosniff")
   291  		if _, found := r.URL.Query()["verbose"]; !found {
   292  			fmt.Fprint(w, "ok")
   293  			return
   294  		}
   295  
   296  		individualCheckOutput.WriteTo(w)
   297  		fmt.Fprintf(w, "%s check passed\n", name)
   298  	}
   299  }
   300  
   301  // adaptCheckToHandler returns an http.HandlerFunc that serves the provided checks.
   302  func adaptCheckToHandler(c func(r *http.Request) error) http.HandlerFunc {
   303  	return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
   304  		err := c(r)
   305  		if err != nil {
   306  			http.Error(w, fmt.Sprintf("internal server error: %v", err), http.StatusInternalServerError)
   307  		} else {
   308  			fmt.Fprint(w, "ok")
   309  		}
   310  	})
   311  }
   312  
   313  // checkerNames returns the names of the checks in the same order as passed in.
   314  func checkerNames(checks ...HealthChecker) []string {
   315  	// accumulate the names of checks for printing them out.
   316  	checkerNames := make([]string, 0, len(checks))
   317  	for _, check := range checks {
   318  		checkerNames = append(checkerNames, check.Name())
   319  	}
   320  	return checkerNames
   321  }
   322  
   323  // formatQuoted returns a formatted string of the health check names,
   324  // preserving the order passed in.
   325  func formatQuoted(names ...string) string {
   326  	quoted := make([]string, 0, len(names))
   327  	for _, name := range names {
   328  		quoted = append(quoted, fmt.Sprintf("%q", name))
   329  	}
   330  	return strings.Join(quoted, ",")
   331  }