github.com/outbrain/consul@v1.4.5/agent/checks/check.go (about)

     1  package checks
     2  
     3  import (
     4  	"crypto/tls"
     5  	"fmt"
     6  	"io"
     7  	"io/ioutil"
     8  	"log"
     9  	"net"
    10  	"net/http"
    11  	"os"
    12  	osexec "os/exec"
    13  	"sync"
    14  	"syscall"
    15  	"time"
    16  
    17  	"github.com/armon/circbuf"
    18  	"github.com/hashicorp/consul/agent/exec"
    19  	"github.com/hashicorp/consul/api"
    20  	"github.com/hashicorp/consul/lib"
    21  	"github.com/hashicorp/consul/types"
    22  	"github.com/hashicorp/go-cleanhttp"
    23  )
    24  
    25  const (
    26  	// MinInterval is the minimal interval between
    27  	// two checks. Do not allow for a interval below this value.
    28  	// Otherwise we risk fork bombing a system.
    29  	MinInterval = time.Second
    30  
    31  	// BufSize is the maximum size of the captured
    32  	// check output. Prevents an enormous buffer
    33  	// from being captured
    34  	BufSize = 4 * 1024 // 4KB
    35  
    36  	// UserAgent is the value of the User-Agent header
    37  	// for HTTP health checks.
    38  	UserAgent = "Consul Health Check"
    39  )
    40  
    41  // RPC is an interface that an RPC client must implement. This is a helper
    42  // interface that is implemented by the agent delegate for checks that need
    43  // to make RPC calls.
    44  type RPC interface {
    45  	RPC(method string, args interface{}, reply interface{}) error
    46  }
    47  
    48  // CheckNotifier interface is used by the CheckMonitor
    49  // to notify when a check has a status update. The update
    50  // should take care to be idempotent.
    51  type CheckNotifier interface {
    52  	UpdateCheck(checkID types.CheckID, status, output string)
    53  }
    54  
    55  // CheckMonitor is used to periodically invoke a script to
    56  // determine the health of a given check. It is compatible with
    57  // nagios plugins and expects the output in the same format.
    58  type CheckMonitor struct {
    59  	Notify     CheckNotifier
    60  	CheckID    types.CheckID
    61  	Script     string
    62  	ScriptArgs []string
    63  	Interval   time.Duration
    64  	Timeout    time.Duration
    65  	Logger     *log.Logger
    66  
    67  	stop     bool
    68  	stopCh   chan struct{}
    69  	stopLock sync.Mutex
    70  }
    71  
    72  // Start is used to start a check monitor.
    73  // Monitor runs until stop is called
    74  func (c *CheckMonitor) Start() {
    75  	c.stopLock.Lock()
    76  	defer c.stopLock.Unlock()
    77  	c.stop = false
    78  	c.stopCh = make(chan struct{})
    79  	go c.run()
    80  }
    81  
    82  // Stop is used to stop a check monitor.
    83  func (c *CheckMonitor) Stop() {
    84  	c.stopLock.Lock()
    85  	defer c.stopLock.Unlock()
    86  	if !c.stop {
    87  		c.stop = true
    88  		close(c.stopCh)
    89  	}
    90  }
    91  
    92  // run is invoked by a goroutine to run until Stop() is called
    93  func (c *CheckMonitor) run() {
    94  	// Get the randomized initial pause time
    95  	initialPauseTime := lib.RandomStagger(c.Interval)
    96  	next := time.After(initialPauseTime)
    97  	for {
    98  		select {
    99  		case <-next:
   100  			c.check()
   101  			next = time.After(c.Interval)
   102  		case <-c.stopCh:
   103  			return
   104  		}
   105  	}
   106  }
   107  
   108  // check is invoked periodically to perform the script check
   109  func (c *CheckMonitor) check() {
   110  	// Create the command
   111  	var cmd *osexec.Cmd
   112  	var err error
   113  	if len(c.ScriptArgs) > 0 {
   114  		cmd, err = exec.Subprocess(c.ScriptArgs)
   115  	} else {
   116  		cmd, err = exec.Script(c.Script)
   117  	}
   118  	if err != nil {
   119  		c.Logger.Printf("[ERR] agent: Check %q failed to setup: %s", c.CheckID, err)
   120  		c.Notify.UpdateCheck(c.CheckID, api.HealthCritical, err.Error())
   121  		return
   122  	}
   123  
   124  	// Collect the output
   125  	output, _ := circbuf.NewBuffer(BufSize)
   126  	cmd.Stdout = output
   127  	cmd.Stderr = output
   128  	exec.SetSysProcAttr(cmd)
   129  
   130  	truncateAndLogOutput := func() string {
   131  		outputStr := string(output.Bytes())
   132  		if output.TotalWritten() > output.Size() {
   133  			outputStr = fmt.Sprintf("Captured %d of %d bytes\n...\n%s",
   134  				output.Size(), output.TotalWritten(), outputStr)
   135  		}
   136  		c.Logger.Printf("[TRACE] agent: Check %q output: %s", c.CheckID, outputStr)
   137  		return outputStr
   138  	}
   139  
   140  	// Start the check
   141  	if err := cmd.Start(); err != nil {
   142  		c.Logger.Printf("[ERR] agent: Check %q failed to invoke: %s", c.CheckID, err)
   143  		c.Notify.UpdateCheck(c.CheckID, api.HealthCritical, err.Error())
   144  		return
   145  	}
   146  
   147  	// Wait for the check to complete
   148  	waitCh := make(chan error, 1)
   149  	go func() {
   150  		waitCh <- cmd.Wait()
   151  	}()
   152  
   153  	timeout := 30 * time.Second
   154  	if c.Timeout > 0 {
   155  		timeout = c.Timeout
   156  	}
   157  	select {
   158  	case <-time.After(timeout):
   159  		if err := exec.KillCommandSubtree(cmd); err != nil {
   160  			c.Logger.Printf("[WARN] agent: Check %q failed to kill after timeout: %s", c.CheckID, err)
   161  		}
   162  
   163  		msg := fmt.Sprintf("Timed out (%s) running check", timeout.String())
   164  		c.Logger.Printf("[WARN] agent: Check %q: %s", c.CheckID, msg)
   165  
   166  		outputStr := truncateAndLogOutput()
   167  		if len(outputStr) > 0 {
   168  			msg += "\n\n" + outputStr
   169  		}
   170  		c.Notify.UpdateCheck(c.CheckID, api.HealthCritical, msg)
   171  
   172  		// Now wait for the process to exit so we never start another
   173  		// instance concurrently.
   174  		<-waitCh
   175  		return
   176  
   177  	case err = <-waitCh:
   178  		// The process returned before the timeout, proceed normally
   179  	}
   180  
   181  	// Check if the check passed
   182  	outputStr := truncateAndLogOutput()
   183  	if err == nil {
   184  		c.Logger.Printf("[DEBUG] agent: Check %q is passing", c.CheckID)
   185  		c.Notify.UpdateCheck(c.CheckID, api.HealthPassing, outputStr)
   186  		return
   187  	}
   188  
   189  	// If the exit code is 1, set check as warning
   190  	exitErr, ok := err.(*osexec.ExitError)
   191  	if ok {
   192  		if status, ok := exitErr.Sys().(syscall.WaitStatus); ok {
   193  			code := status.ExitStatus()
   194  			if code == 1 {
   195  				c.Logger.Printf("[WARN] agent: Check %q is now warning", c.CheckID)
   196  				c.Notify.UpdateCheck(c.CheckID, api.HealthWarning, outputStr)
   197  				return
   198  			}
   199  		}
   200  	}
   201  
   202  	// Set the health as critical
   203  	c.Logger.Printf("[WARN] agent: Check %q is now critical", c.CheckID)
   204  	c.Notify.UpdateCheck(c.CheckID, api.HealthCritical, outputStr)
   205  }
   206  
   207  // CheckTTL is used to apply a TTL to check status,
   208  // and enables clients to set the status of a check
   209  // but upon the TTL expiring, the check status is
   210  // automatically set to critical.
   211  type CheckTTL struct {
   212  	Notify  CheckNotifier
   213  	CheckID types.CheckID
   214  	TTL     time.Duration
   215  	Logger  *log.Logger
   216  
   217  	timer *time.Timer
   218  
   219  	lastOutput     string
   220  	lastOutputLock sync.RWMutex
   221  
   222  	stop     bool
   223  	stopCh   chan struct{}
   224  	stopLock sync.Mutex
   225  }
   226  
   227  // Start is used to start a check ttl, runs until Stop()
   228  func (c *CheckTTL) Start() {
   229  	c.stopLock.Lock()
   230  	defer c.stopLock.Unlock()
   231  	c.stop = false
   232  	c.stopCh = make(chan struct{})
   233  	c.timer = time.NewTimer(c.TTL)
   234  	go c.run()
   235  }
   236  
   237  // Stop is used to stop a check ttl.
   238  func (c *CheckTTL) Stop() {
   239  	c.stopLock.Lock()
   240  	defer c.stopLock.Unlock()
   241  	if !c.stop {
   242  		c.timer.Stop()
   243  		c.stop = true
   244  		close(c.stopCh)
   245  	}
   246  }
   247  
   248  // run is used to handle TTL expiration and to update the check status
   249  func (c *CheckTTL) run() {
   250  	for {
   251  		select {
   252  		case <-c.timer.C:
   253  			c.Logger.Printf("[WARN] agent: Check %q missed TTL, is now critical",
   254  				c.CheckID)
   255  			c.Notify.UpdateCheck(c.CheckID, api.HealthCritical, c.getExpiredOutput())
   256  
   257  		case <-c.stopCh:
   258  			return
   259  		}
   260  	}
   261  }
   262  
   263  // getExpiredOutput formats the output for the case when the TTL is expired.
   264  func (c *CheckTTL) getExpiredOutput() string {
   265  	c.lastOutputLock.RLock()
   266  	defer c.lastOutputLock.RUnlock()
   267  
   268  	const prefix = "TTL expired"
   269  	if c.lastOutput == "" {
   270  		return prefix
   271  	}
   272  
   273  	return fmt.Sprintf("%s (last output before timeout follows): %s", prefix, c.lastOutput)
   274  }
   275  
   276  // SetStatus is used to update the status of the check,
   277  // and to renew the TTL. If expired, TTL is restarted.
   278  func (c *CheckTTL) SetStatus(status, output string) {
   279  	c.Logger.Printf("[DEBUG] agent: Check %q status is now %s", c.CheckID, status)
   280  	c.Notify.UpdateCheck(c.CheckID, status, output)
   281  
   282  	// Store the last output so we can retain it if the TTL expires.
   283  	c.lastOutputLock.Lock()
   284  	c.lastOutput = output
   285  	c.lastOutputLock.Unlock()
   286  
   287  	c.timer.Reset(c.TTL)
   288  }
   289  
   290  // CheckHTTP is used to periodically make an HTTP request to
   291  // determine the health of a given check.
   292  // The check is passing if the response code is 2XX.
   293  // The check is warning if the response code is 429.
   294  // The check is critical if the response code is anything else
   295  // or if the request returns an error
   296  type CheckHTTP struct {
   297  	Notify          CheckNotifier
   298  	CheckID         types.CheckID
   299  	HTTP            string
   300  	Header          map[string][]string
   301  	Method          string
   302  	Interval        time.Duration
   303  	Timeout         time.Duration
   304  	Logger          *log.Logger
   305  	TLSClientConfig *tls.Config
   306  
   307  	httpClient *http.Client
   308  	stop       bool
   309  	stopCh     chan struct{}
   310  	stopLock   sync.Mutex
   311  }
   312  
   313  // Start is used to start an HTTP check.
   314  // The check runs until stop is called
   315  func (c *CheckHTTP) Start() {
   316  	c.stopLock.Lock()
   317  	defer c.stopLock.Unlock()
   318  
   319  	if c.httpClient == nil {
   320  		// Create the transport. We disable HTTP Keep-Alive's to prevent
   321  		// failing checks due to the keepalive interval.
   322  		trans := cleanhttp.DefaultTransport()
   323  		trans.DisableKeepAlives = true
   324  
   325  		// Take on the supplied TLS client config.
   326  		trans.TLSClientConfig = c.TLSClientConfig
   327  
   328  		// Create the HTTP client.
   329  		c.httpClient = &http.Client{
   330  			Timeout:   10 * time.Second,
   331  			Transport: trans,
   332  		}
   333  
   334  		// For long (>10s) interval checks the http timeout is 10s, otherwise the
   335  		// timeout is the interval. This means that a check *should* return
   336  		// before the next check begins.
   337  		if c.Timeout > 0 && c.Timeout < c.Interval {
   338  			c.httpClient.Timeout = c.Timeout
   339  		} else if c.Interval < 10*time.Second {
   340  			c.httpClient.Timeout = c.Interval
   341  		}
   342  	}
   343  
   344  	c.stop = false
   345  	c.stopCh = make(chan struct{})
   346  	go c.run()
   347  }
   348  
   349  // Stop is used to stop an HTTP check.
   350  func (c *CheckHTTP) Stop() {
   351  	c.stopLock.Lock()
   352  	defer c.stopLock.Unlock()
   353  	if !c.stop {
   354  		c.stop = true
   355  		close(c.stopCh)
   356  	}
   357  }
   358  
   359  // run is invoked by a goroutine to run until Stop() is called
   360  func (c *CheckHTTP) run() {
   361  	// Get the randomized initial pause time
   362  	initialPauseTime := lib.RandomStagger(c.Interval)
   363  	next := time.After(initialPauseTime)
   364  	for {
   365  		select {
   366  		case <-next:
   367  			c.check()
   368  			next = time.After(c.Interval)
   369  		case <-c.stopCh:
   370  			return
   371  		}
   372  	}
   373  }
   374  
   375  // check is invoked periodically to perform the HTTP check
   376  func (c *CheckHTTP) check() {
   377  	method := c.Method
   378  	if method == "" {
   379  		method = "GET"
   380  	}
   381  
   382  	req, err := http.NewRequest(method, c.HTTP, nil)
   383  	if err != nil {
   384  		c.Logger.Printf("[WARN] agent: Check %q HTTP request failed: %s", c.CheckID, err)
   385  		c.Notify.UpdateCheck(c.CheckID, api.HealthCritical, err.Error())
   386  		return
   387  	}
   388  
   389  	req.Header = http.Header(c.Header)
   390  
   391  	// this happens during testing but not in prod
   392  	if req.Header == nil {
   393  		req.Header = make(http.Header)
   394  	}
   395  
   396  	if host := req.Header.Get("Host"); host != "" {
   397  		req.Host = host
   398  	}
   399  
   400  	if req.Header.Get("User-Agent") == "" {
   401  		req.Header.Set("User-Agent", UserAgent)
   402  	}
   403  	if req.Header.Get("Accept") == "" {
   404  		req.Header.Set("Accept", "text/plain, text/*, */*")
   405  	}
   406  
   407  	resp, err := c.httpClient.Do(req)
   408  	if err != nil {
   409  		c.Logger.Printf("[WARN] agent: Check %q HTTP request failed: %s", c.CheckID, err)
   410  		c.Notify.UpdateCheck(c.CheckID, api.HealthCritical, err.Error())
   411  		return
   412  	}
   413  	defer resp.Body.Close()
   414  
   415  	// Read the response into a circular buffer to limit the size
   416  	output, _ := circbuf.NewBuffer(BufSize)
   417  	if _, err := io.Copy(output, resp.Body); err != nil {
   418  		c.Logger.Printf("[WARN] agent: Check %q error while reading body: %s", c.CheckID, err)
   419  	}
   420  
   421  	// Format the response body
   422  	result := fmt.Sprintf("HTTP %s %s: %s Output: %s", method, c.HTTP, resp.Status, output.String())
   423  
   424  	if resp.StatusCode >= 200 && resp.StatusCode <= 299 {
   425  		// PASSING (2xx)
   426  		c.Logger.Printf("[DEBUG] agent: Check %q is passing", c.CheckID)
   427  		c.Notify.UpdateCheck(c.CheckID, api.HealthPassing, result)
   428  
   429  	} else if resp.StatusCode == 429 {
   430  		// WARNING
   431  		// 429 Too Many Requests (RFC 6585)
   432  		// The user has sent too many requests in a given amount of time.
   433  		c.Logger.Printf("[WARN] agent: Check %q is now warning", c.CheckID)
   434  		c.Notify.UpdateCheck(c.CheckID, api.HealthWarning, result)
   435  
   436  	} else {
   437  		// CRITICAL
   438  		c.Logger.Printf("[WARN] agent: Check %q is now critical", c.CheckID)
   439  		c.Notify.UpdateCheck(c.CheckID, api.HealthCritical, result)
   440  	}
   441  }
   442  
   443  // CheckTCP is used to periodically make an TCP/UDP connection to
   444  // determine the health of a given check.
   445  // The check is passing if the connection succeeds
   446  // The check is critical if the connection returns an error
   447  type CheckTCP struct {
   448  	Notify   CheckNotifier
   449  	CheckID  types.CheckID
   450  	TCP      string
   451  	Interval time.Duration
   452  	Timeout  time.Duration
   453  	Logger   *log.Logger
   454  
   455  	dialer   *net.Dialer
   456  	stop     bool
   457  	stopCh   chan struct{}
   458  	stopLock sync.Mutex
   459  }
   460  
   461  // Start is used to start a TCP check.
   462  // The check runs until stop is called
   463  func (c *CheckTCP) Start() {
   464  	c.stopLock.Lock()
   465  	defer c.stopLock.Unlock()
   466  
   467  	if c.dialer == nil {
   468  		// Create the socket dialer
   469  		c.dialer = &net.Dialer{DualStack: true}
   470  
   471  		// For long (>10s) interval checks the socket timeout is 10s, otherwise
   472  		// the timeout is the interval. This means that a check *should* return
   473  		// before the next check begins.
   474  		if c.Timeout > 0 && c.Timeout < c.Interval {
   475  			c.dialer.Timeout = c.Timeout
   476  		} else if c.Interval < 10*time.Second {
   477  			c.dialer.Timeout = c.Interval
   478  		}
   479  	}
   480  
   481  	c.stop = false
   482  	c.stopCh = make(chan struct{})
   483  	go c.run()
   484  }
   485  
   486  // Stop is used to stop a TCP check.
   487  func (c *CheckTCP) Stop() {
   488  	c.stopLock.Lock()
   489  	defer c.stopLock.Unlock()
   490  	if !c.stop {
   491  		c.stop = true
   492  		close(c.stopCh)
   493  	}
   494  }
   495  
   496  // run is invoked by a goroutine to run until Stop() is called
   497  func (c *CheckTCP) run() {
   498  	// Get the randomized initial pause time
   499  	initialPauseTime := lib.RandomStagger(c.Interval)
   500  	next := time.After(initialPauseTime)
   501  	for {
   502  		select {
   503  		case <-next:
   504  			c.check()
   505  			next = time.After(c.Interval)
   506  		case <-c.stopCh:
   507  			return
   508  		}
   509  	}
   510  }
   511  
   512  // check is invoked periodically to perform the TCP check
   513  func (c *CheckTCP) check() {
   514  	conn, err := c.dialer.Dial(`tcp`, c.TCP)
   515  	if err != nil {
   516  		c.Logger.Printf("[WARN] agent: Check %q socket connection failed: %s", c.CheckID, err)
   517  		c.Notify.UpdateCheck(c.CheckID, api.HealthCritical, err.Error())
   518  		return
   519  	}
   520  	conn.Close()
   521  	c.Logger.Printf("[DEBUG] agent: Check %q is passing", c.CheckID)
   522  	c.Notify.UpdateCheck(c.CheckID, api.HealthPassing, fmt.Sprintf("TCP connect %s: Success", c.TCP))
   523  }
   524  
   525  // CheckDocker is used to periodically invoke a script to
   526  // determine the health of an application running inside a
   527  // Docker Container. We assume that the script is compatible
   528  // with nagios plugins and expects the output in the same format.
   529  type CheckDocker struct {
   530  	Notify            CheckNotifier
   531  	CheckID           types.CheckID
   532  	Script            string
   533  	ScriptArgs        []string
   534  	DockerContainerID string
   535  	Shell             string
   536  	Interval          time.Duration
   537  	Logger            *log.Logger
   538  	Client            *DockerClient
   539  
   540  	stop chan struct{}
   541  }
   542  
   543  func (c *CheckDocker) Start() {
   544  	if c.stop != nil {
   545  		panic("Docker check already started")
   546  	}
   547  
   548  	if c.Logger == nil {
   549  		c.Logger = log.New(ioutil.Discard, "", 0)
   550  	}
   551  
   552  	if c.Shell == "" {
   553  		c.Shell = os.Getenv("SHELL")
   554  		if c.Shell == "" {
   555  			c.Shell = "/bin/sh"
   556  		}
   557  	}
   558  	c.stop = make(chan struct{})
   559  	go c.run()
   560  }
   561  
   562  func (c *CheckDocker) Stop() {
   563  	if c.stop == nil {
   564  		panic("Stop called before start")
   565  	}
   566  	close(c.stop)
   567  }
   568  
   569  func (c *CheckDocker) run() {
   570  	defer c.Client.Close()
   571  	firstWait := lib.RandomStagger(c.Interval)
   572  	next := time.After(firstWait)
   573  	for {
   574  		select {
   575  		case <-next:
   576  			c.check()
   577  			next = time.After(c.Interval)
   578  		case <-c.stop:
   579  			return
   580  		}
   581  	}
   582  }
   583  
   584  func (c *CheckDocker) check() {
   585  	var out string
   586  	status, b, err := c.doCheck()
   587  	if err != nil {
   588  		c.Logger.Printf("[DEBUG] agent: Check %q: %s", c.CheckID, err)
   589  		out = err.Error()
   590  	} else {
   591  		// out is already limited to CheckBufSize since we're getting a
   592  		// limited buffer. So we don't need to truncate it just report
   593  		// that it was truncated.
   594  		out = string(b.Bytes())
   595  		if int(b.TotalWritten()) > len(out) {
   596  			out = fmt.Sprintf("Captured %d of %d bytes\n...\n%s", len(out), b.TotalWritten(), out)
   597  		}
   598  		c.Logger.Printf("[TRACE] agent: Check %q output: %s", c.CheckID, out)
   599  	}
   600  
   601  	if status == api.HealthCritical {
   602  		c.Logger.Printf("[WARN] agent: Check %q is now critical", c.CheckID)
   603  	}
   604  
   605  	c.Notify.UpdateCheck(c.CheckID, status, out)
   606  }
   607  
   608  func (c *CheckDocker) doCheck() (string, *circbuf.Buffer, error) {
   609  	var cmd []string
   610  	if len(c.ScriptArgs) > 0 {
   611  		cmd = c.ScriptArgs
   612  	} else {
   613  		cmd = []string{c.Shell, "-c", c.Script}
   614  	}
   615  
   616  	execID, err := c.Client.CreateExec(c.DockerContainerID, cmd)
   617  	if err != nil {
   618  		return api.HealthCritical, nil, err
   619  	}
   620  
   621  	buf, err := c.Client.StartExec(c.DockerContainerID, execID)
   622  	if err != nil {
   623  		return api.HealthCritical, nil, err
   624  	}
   625  
   626  	exitCode, err := c.Client.InspectExec(c.DockerContainerID, execID)
   627  	if err != nil {
   628  		return api.HealthCritical, nil, err
   629  	}
   630  
   631  	switch exitCode {
   632  	case 0:
   633  		return api.HealthPassing, buf, nil
   634  	case 1:
   635  		c.Logger.Printf("[DEBUG] agent: Check %q failed with exit code: %d", c.CheckID, exitCode)
   636  		return api.HealthWarning, buf, nil
   637  	default:
   638  		c.Logger.Printf("[DEBUG] agent: Check %q failed with exit code: %d", c.CheckID, exitCode)
   639  		return api.HealthCritical, buf, nil
   640  	}
   641  }
   642  
   643  // CheckGRPC is used to periodically send request to a gRPC server
   644  // application that implements gRPC health-checking protocol.
   645  // The check is passing if returned status is SERVING.
   646  // The check is critical if connection fails or returned status is
   647  // not SERVING.
   648  type CheckGRPC struct {
   649  	Notify          CheckNotifier
   650  	CheckID         types.CheckID
   651  	GRPC            string
   652  	Interval        time.Duration
   653  	Timeout         time.Duration
   654  	TLSClientConfig *tls.Config
   655  	Logger          *log.Logger
   656  
   657  	probe    *GrpcHealthProbe
   658  	stop     bool
   659  	stopCh   chan struct{}
   660  	stopLock sync.Mutex
   661  }
   662  
   663  func (c *CheckGRPC) Start() {
   664  	c.stopLock.Lock()
   665  	defer c.stopLock.Unlock()
   666  	timeout := 10 * time.Second
   667  	if c.Timeout > 0 {
   668  		timeout = c.Timeout
   669  	}
   670  	c.probe = NewGrpcHealthProbe(c.GRPC, timeout, c.TLSClientConfig)
   671  	c.stop = false
   672  	c.stopCh = make(chan struct{})
   673  	go c.run()
   674  }
   675  
   676  func (c *CheckGRPC) run() {
   677  	// Get the randomized initial pause time
   678  	initialPauseTime := lib.RandomStagger(c.Interval)
   679  	next := time.After(initialPauseTime)
   680  	for {
   681  		select {
   682  		case <-next:
   683  			c.check()
   684  			next = time.After(c.Interval)
   685  		case <-c.stopCh:
   686  			return
   687  		}
   688  	}
   689  }
   690  
   691  func (c *CheckGRPC) check() {
   692  	err := c.probe.Check()
   693  	if err != nil {
   694  		c.Logger.Printf("[DEBUG] agent: Check %q failed: %s", c.CheckID, err.Error())
   695  		c.Notify.UpdateCheck(c.CheckID, api.HealthCritical, err.Error())
   696  	} else {
   697  		c.Logger.Printf("[DEBUG] agent: Check %q is passing", c.CheckID)
   698  		c.Notify.UpdateCheck(c.CheckID, api.HealthPassing, fmt.Sprintf("gRPC check %s: success", c.GRPC))
   699  	}
   700  }
   701  
   702  func (c *CheckGRPC) Stop() {
   703  	c.stopLock.Lock()
   704  	defer c.stopLock.Unlock()
   705  	if !c.stop {
   706  		c.stop = true
   707  		close(c.stopCh)
   708  	}
   709  }