github.com/google/cloudprober@v0.11.3/probes/external/external.go

github.com/google/cloudprober@v0.11.3/probes/external/external.go (about)

     1  // Copyright 2017-2020 The Cloudprober Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  /*
    16  Package external implements an external probe type for cloudprober.
    17  
    18  External probe type executes an external process for actual probing. These probes
    19  can have two modes: "once" and "server". In "once" mode, the external process is
    20  started for each probe run cycle, while in "server" mode, external process is
    21  started only if it's not running already and Cloudprober communicates with it
    22  over stdin/stdout for each probe cycle.
    23  
    24  TODO(manugarg): Add a way to test this program. Write another program that
    25  implements the probe server protocol and use that for testing.
    26  */
    27  package external
    28  
    29  import (
    30  	"bufio"
    31  	"context"
    32  	"fmt"
    33  	"io"
    34  	"os"
    35  	"os/exec"
    36  	"regexp"
    37  	"strconv"
    38  	"strings"
    39  	"sync"
    40  	"time"
    41  
    42  	"github.com/golang/protobuf/proto"
    43  	"github.com/google/cloudprober/logger"
    44  	"github.com/google/cloudprober/metrics"
    45  	"github.com/google/cloudprober/metrics/payload"
    46  	configpb "github.com/google/cloudprober/probes/external/proto"
    47  	serverpb "github.com/google/cloudprober/probes/external/proto"
    48  	"github.com/google/cloudprober/probes/external/serverutils"
    49  	"github.com/google/cloudprober/probes/options"
    50  	"github.com/google/cloudprober/targets/endpoint"
    51  	"github.com/google/cloudprober/validators"
    52  	"github.com/google/shlex"
    53  )
    54  
    55  var (
    56  	// TimeBetweenRequests is the time interval between probe requests for
    57  	// multiple targets. In server mode, probe requests for multiple targets are
    58  	// sent to the same external probe process. Sleeping between requests provides
    59  	// some time buffer for the probe process to dequeue the incoming requests and
    60  	// avoids filling up the communication pipe.
    61  	//
    62  	// Note that this value impacts the effective timeout for a target as timeout
    63  	// is applied for all the targets in aggregate. For example, 100th target in
    64  	// the targets list will have the effective timeout of (timeout - 1ms).
    65  	// TODO(manugarg): Make sure that the last target in the list has an impact of
    66  	// less than 1% on its timeout.
    67  	TimeBetweenRequests = 10 * time.Microsecond
    68  	validLabelRe        = regexp.MustCompile(`@(target|address|port|probe|target\.label\.[^@]+)@`)
    69  )
    70  
    71  type result struct {
    72  	total, success    int64
    73  	latency           metrics.Value
    74  	validationFailure *metrics.Map
    75  	payloadMetrics    *metrics.EventMetrics
    76  }
    77  
    78  // Probe holds aggregate information about all probe runs, per-target.
    79  type Probe struct {
    80  	name    string
    81  	mode    string
    82  	cmdName string
    83  	cmdArgs []string
    84  	opts    *options.Options
    85  	c       *configpb.ProbeConf
    86  	l       *logger.Logger
    87  
    88  	// book-keeping params
    89  	labelKeys  map[string]bool // Labels for substitution
    90  	requestID  int32
    91  	cmdRunning bool
    92  	cmdStdin   io.Writer
    93  	cmdStdout  io.ReadCloser
    94  	cmdStderr  io.ReadCloser
    95  	replyChan  chan *serverpb.ProbeReply
    96  	targets    []endpoint.Endpoint
    97  	results    map[string]*result // probe results keyed by targets
    98  	dataChan   chan *metrics.EventMetrics
    99  
   100  	// default payload metrics that we clone from to build per-target payload
   101  	// metrics.
   102  	payloadParser *payload.Parser
   103  }
   104  
   105  func (p *Probe) updateLabelKeys() {
   106  	p.labelKeys = make(map[string]bool)
   107  
   108  	updateLabelKeysFn := func(s string) {
   109  		matches := validLabelRe.FindAllStringSubmatch(s, -1)
   110  		for _, m := range matches {
   111  			if len(m) >= 2 {
   112  				// Pick the match within outer parentheses.
   113  				p.labelKeys[m[1]] = true
   114  			}
   115  		}
   116  	}
   117  
   118  	for _, opt := range p.c.GetOptions() {
   119  		updateLabelKeysFn(opt.GetValue())
   120  	}
   121  	for _, arg := range p.cmdArgs {
   122  		updateLabelKeysFn(arg)
   123  	}
   124  }
   125  
   126  // Init initializes the probe with the given params.
   127  func (p *Probe) Init(name string, opts *options.Options) error {
   128  	c, ok := opts.ProbeConf.(*configpb.ProbeConf)
   129  	if !ok {
   130  		return fmt.Errorf("not external probe config")
   131  	}
   132  	p.name = name
   133  	p.opts = opts
   134  	if p.l = opts.Logger; p.l == nil {
   135  		p.l = &logger.Logger{}
   136  	}
   137  	p.c = c
   138  	p.replyChan = make(chan *serverpb.ProbeReply)
   139  
   140  	cmdParts, err := shlex.Split(p.c.GetCommand())
   141  	if err != nil {
   142  		return fmt.Errorf("error parsing command line (%s): %v", p.c.GetCommand(), err)
   143  	}
   144  	p.cmdName = cmdParts[0]
   145  	p.cmdArgs = cmdParts[1:len(cmdParts)]
   146  
   147  	// Figure out labels we are interested in
   148  	p.updateLabelKeys()
   149  
   150  	switch p.c.GetMode() {
   151  	case configpb.ProbeConf_ONCE:
   152  		p.mode = "once"
   153  	case configpb.ProbeConf_SERVER:
   154  		p.mode = "server"
   155  	default:
   156  		return fmt.Errorf("invalid mode: %s", p.c.GetMode())
   157  	}
   158  
   159  	p.results = make(map[string]*result)
   160  
   161  	if !p.c.GetOutputAsMetrics() {
   162  		return nil
   163  	}
   164  
   165  	defaultKind := metrics.CUMULATIVE
   166  	if p.c.GetMode() == configpb.ProbeConf_ONCE {
   167  		defaultKind = metrics.GAUGE
   168  	}
   169  
   170  	p.payloadParser, err = payload.NewParser(p.c.GetOutputMetricsOptions(), "external", p.name, metrics.Kind(defaultKind), p.l)
   171  	if err != nil {
   172  		return fmt.Errorf("error initializing payload metrics: %v", err)
   173  	}
   174  
   175  	return nil
   176  }
   177  
   178  // substituteLabels replaces occurrences of @label@ with the values from
   179  // labels.  It returns the substituted string and a bool indicating if there
   180  // was a @label@ that did not exist in the labels map.
   181  func substituteLabels(in string, labels map[string]string) (string, bool) {
   182  	if len(labels) == 0 {
   183  		return in, !strings.Contains(in, "@")
   184  	}
   185  	delimiter := "@"
   186  	output := ""
   187  	words := strings.Split(in, delimiter)
   188  	count := len(words)
   189  	foundAll := true
   190  	for j, kwd := range words {
   191  		// Even number of words => just copy out.
   192  		if j%2 == 0 {
   193  			output += kwd
   194  			continue
   195  		}
   196  		// Special case: If there are an odd number of '@' (unbalanced), the last
   197  		// odd index doesn't actually have a closing '@', so we just append it as it
   198  		// is.
   199  		if j == count-1 {
   200  			output += delimiter
   201  			output += kwd
   202  			continue
   203  		}
   204  
   205  		// Special case: "@@" => "@"
   206  		if kwd == "" {
   207  			output += delimiter
   208  			continue
   209  		}
   210  
   211  		// Finally, the labels.
   212  		replace, ok := labels[kwd]
   213  		if ok {
   214  			output += replace
   215  			continue
   216  		}
   217  
   218  		// Nothing - put the token back in.
   219  		foundAll = false
   220  		output += delimiter
   221  		output += kwd
   222  		output += delimiter
   223  	}
   224  	return output, foundAll
   225  }
   226  
   227  type command interface {
   228  	Wait() error
   229  }
   230  
   231  // monitorCommand waits for the process to terminate and sets cmdRunning to
   232  // false when that happens.
   233  func (p *Probe) monitorCommand(startCtx context.Context, cmd command) error {
   234  	err := cmd.Wait()
   235  
   236  	// Spare logging error message if killed explicitly.
   237  	select {
   238  	case <-startCtx.Done():
   239  		return nil
   240  	default:
   241  	}
   242  
   243  	if exitErr, ok := err.(*exec.ExitError); ok {
   244  		return fmt.Errorf("external probe process died with the status: %s. Stderr: %s", exitErr.Error(), string(exitErr.Stderr))
   245  	}
   246  	return err
   247  }
   248  
   249  func (p *Probe) startCmdIfNotRunning(startCtx context.Context) error {
   250  	// Start external probe command if it's not running already. Note that here we
   251  	// are trusting the cmdRunning to be set correctly. It can be false for 3 reasons:
   252  	// 1) This is the first call and the process has actually never been started.
   253  	// 2) cmd.Start() started the process but still returned an error.
   254  	// 3) cmd.Wait() returned incorrectly, while the process was still running.
   255  	//
   256  	// 2 or 3 should never happen as per design, but managing processes can be tricky.
   257  	// Documenting here to help with debugging if we run into an issue.
   258  	if p.cmdRunning {
   259  		return nil
   260  	}
   261  	p.l.Infof("Starting external command: %s %s", p.cmdName, strings.Join(p.cmdArgs, " "))
   262  	cmd := exec.CommandContext(startCtx, p.cmdName, p.cmdArgs...)
   263  	var err error
   264  	if p.cmdStdin, err = cmd.StdinPipe(); err != nil {
   265  		return err
   266  	}
   267  	if p.cmdStdout, err = cmd.StdoutPipe(); err != nil {
   268  		return err
   269  	}
   270  	if p.cmdStderr, err = cmd.StderrPipe(); err != nil {
   271  		return err
   272  	}
   273  
   274  	go func() {
   275  		scanner := bufio.NewScanner(p.cmdStderr)
   276  		for scanner.Scan() {
   277  			p.l.Warningf("Stderr of %s: %s", cmd.Path, scanner.Text())
   278  		}
   279  	}()
   280  
   281  	if err = cmd.Start(); err != nil {
   282  		p.l.Errorf("error while starting the cmd: %s %s. Err: %v", cmd.Path, cmd.Args, err)
   283  		return fmt.Errorf("error while starting the cmd: %s %s. Err: %v", cmd.Path, cmd.Args, err)
   284  	}
   285  
   286  	doneChan := make(chan struct{})
   287  	// This goroutine waits for the process to terminate and sets cmdRunning to
   288  	// false when that happens.
   289  	go func() {
   290  		if err := p.monitorCommand(startCtx, cmd); err != nil {
   291  			p.l.Error(err.Error())
   292  		}
   293  		close(doneChan)
   294  		p.cmdRunning = false
   295  	}()
   296  	go p.readProbeReplies(doneChan)
   297  	p.cmdRunning = true
   298  	return nil
   299  }
   300  
   301  func (p *Probe) readProbeReplies(done chan struct{}) error {
   302  	bufReader := bufio.NewReader(p.cmdStdout)
   303  	// Start a background goroutine to read probe replies from the probe server
   304  	// process's stdout and put them on the probe's replyChan. Note that replyChan
   305  	// is a one element channel. Idea is that we won't need buffering other than
   306  	// the one provided by Unix pipes.
   307  	for {
   308  		select {
   309  		case <-done:
   310  			return nil
   311  		default:
   312  		}
   313  		rep, err := serverutils.ReadProbeReply(bufReader)
   314  		if err != nil {
   315  			// Return if external probe process pipe has closed. We get:
   316  			//  io.EOF: when other process has closed the pipe.
   317  			//  os.ErrClosed: when we have closed the pipe (through cmd.Wait()).
   318  			// *os.PathError: deferred close of the pipe.
   319  			_, isPathError := err.(*os.PathError)
   320  			if err == os.ErrClosed || err == io.EOF || isPathError {
   321  				p.l.Errorf("External probe process pipe is closed. Err: %s", err.Error())
   322  				return err
   323  			}
   324  			p.l.Errorf("Error reading probe reply: %s", err.Error())
   325  			continue
   326  		}
   327  		p.replyChan <- rep
   328  	}
   329  
   330  }
   331  
   332  func (p *Probe) withAdditionalLabels(em *metrics.EventMetrics, target string) *metrics.EventMetrics {
   333  	for _, al := range p.opts.AdditionalLabels {
   334  		em.AddLabel(al.KeyValueForTarget(target))
   335  	}
   336  	return em
   337  }
   338  
   339  func (p *Probe) defaultMetrics(target string, result *result) *metrics.EventMetrics {
   340  	em := metrics.NewEventMetrics(time.Now()).
   341  		AddMetric("success", metrics.NewInt(result.success)).
   342  		AddMetric("total", metrics.NewInt(result.total)).
   343  		AddMetric("latency", result.latency).
   344  		AddLabel("ptype", "external").
   345  		AddLabel("probe", p.name).
   346  		AddLabel("dst", target)
   347  
   348  	em.LatencyUnit = p.opts.LatencyUnit
   349  
   350  	if p.opts.Validators != nil {
   351  		em.AddMetric("validation_failure", result.validationFailure)
   352  	}
   353  
   354  	return p.withAdditionalLabels(em, target)
   355  }
   356  
   357  func (p *Probe) labels(ep endpoint.Endpoint) map[string]string {
   358  	labels := make(map[string]string)
   359  	if p.labelKeys["probe"] {
   360  		labels["probe"] = p.name
   361  	}
   362  	if p.labelKeys["target"] {
   363  		labels["target"] = ep.Name
   364  	}
   365  	if p.labelKeys["port"] {
   366  		labels["port"] = strconv.Itoa(ep.Port)
   367  	}
   368  	if p.labelKeys["address"] {
   369  		addr, err := p.opts.Targets.Resolve(ep.Name, p.opts.IPVersion)
   370  		if err != nil {
   371  			p.l.Warningf("Targets.Resolve(%v, %v) failed: %v ", ep.Name, p.opts.IPVersion, err)
   372  		} else if !addr.IsUnspecified() {
   373  			labels["address"] = addr.String()
   374  		}
   375  	}
   376  	for lk, lv := range ep.Labels {
   377  		k := "target.label." + lk
   378  		if p.labelKeys[k] {
   379  			labels[k] = lv
   380  		}
   381  	}
   382  	return labels
   383  }
   384  
   385  func (p *Probe) sendRequest(requestID int32, ep endpoint.Endpoint) error {
   386  	req := &serverpb.ProbeRequest{
   387  		RequestId: proto.Int32(requestID),
   388  		TimeLimit: proto.Int32(int32(p.opts.Timeout / time.Millisecond)),
   389  		Options:   []*serverpb.ProbeRequest_Option{},
   390  	}
   391  	for _, opt := range p.c.GetOptions() {
   392  		value, found := substituteLabels(opt.GetValue(), p.labels(ep))
   393  		if !found {
   394  			p.l.Warningf("Missing substitution in option %q", value)
   395  		}
   396  		req.Options = append(req.Options, &serverpb.ProbeRequest_Option{
   397  			Name:  opt.Name,
   398  			Value: proto.String(value),
   399  		})
   400  	}
   401  
   402  	p.l.Debugf("Sending a probe request %v to the external probe server for target %v", requestID, ep.Name)
   403  	return serverutils.WriteMessage(req, p.cmdStdin)
   404  }
   405  
   406  type requestInfo struct {
   407  	target    string
   408  	timestamp time.Time
   409  }
   410  
   411  // probeStatus captures the single probe status. It's only used by runProbe
   412  // functions to pass a probe's status to processProbeResult method.
   413  type probeStatus struct {
   414  	target  string
   415  	success bool
   416  	latency time.Duration
   417  	payload string
   418  }
   419  
   420  func (p *Probe) processProbeResult(ps *probeStatus, result *result) {
   421  	if ps.success && p.opts.Validators != nil {
   422  		failedValidations := validators.RunValidators(p.opts.Validators, &validators.Input{ResponseBody: []byte(ps.payload)}, result.validationFailure, p.l)
   423  
   424  		// If any validation failed, log and set success to false.
   425  		if len(failedValidations) > 0 {
   426  			p.l.Debug("Target:", ps.target, " failed validations: ", strings.Join(failedValidations, ","), ".")
   427  			ps.success = false
   428  		}
   429  	}
   430  
   431  	if ps.success {
   432  		result.success++
   433  		result.latency.AddFloat64(ps.latency.Seconds() / p.opts.LatencyUnit.Seconds())
   434  	}
   435  
   436  	em := p.defaultMetrics(ps.target, result)
   437  	p.opts.LogMetrics(em)
   438  	p.dataChan <- em
   439  
   440  	// If probe is configured to use the external process output (or reply payload
   441  	// in case of server probe) as metrics.
   442  	if p.c.GetOutputAsMetrics() {
   443  		if p.c.GetOutputMetricsOptions().GetAggregateInCloudprober() {
   444  			result.payloadMetrics = p.payloadParser.AggregatedPayloadMetrics(result.payloadMetrics, ps.payload, ps.target)
   445  			p.opts.LogMetrics(result.payloadMetrics)
   446  			p.dataChan <- p.withAdditionalLabels(result.payloadMetrics, ps.target)
   447  		} else {
   448  			for _, em := range p.payloadParser.PayloadMetrics(ps.payload, ps.target) {
   449  				p.opts.LogMetrics(em)
   450  				p.dataChan <- p.withAdditionalLabels(em, ps.target)
   451  			}
   452  		}
   453  	}
   454  }
   455  
   456  func (p *Probe) runServerProbe(ctx, startCtx context.Context) {
   457  	requests := make(map[int32]requestInfo)
   458  	var requestsMu sync.RWMutex
   459  	doneChan := make(chan struct{})
   460  
   461  	if err := p.startCmdIfNotRunning(startCtx); err != nil {
   462  		p.l.Error(err.Error())
   463  		return
   464  	}
   465  
   466  	var wg sync.WaitGroup
   467  	wg.Add(1)
   468  	go func() {
   469  		defer wg.Done()
   470  
   471  		// Read probe replies until we have no outstanding requests or context has
   472  		// run out.
   473  		for {
   474  			_, ok := <-doneChan
   475  			if !ok {
   476  				// It is safe to access requests without lock here as it won't be accessed
   477  				// by the send loop after doneChan is closed.
   478  				p.l.Debugf("Number of outstanding requests: %d", len(requests))
   479  				if len(requests) == 0 {
   480  					return
   481  				}
   482  			}
   483  			select {
   484  			case <-ctx.Done():
   485  				p.l.Error(ctx.Err().Error())
   486  				return
   487  			case rep := <-p.replyChan:
   488  				requestsMu.Lock()
   489  				reqInfo, ok := requests[rep.GetRequestId()]
   490  				if ok {
   491  					delete(requests, rep.GetRequestId())
   492  				}
   493  				requestsMu.Unlock()
   494  				if !ok {
   495  					// Not our reply, could be from the last timed out probe.
   496  					p.l.Warningf("Got a reply that doesn't match any outstading request: Request id from reply: %v. Ignoring.", rep.GetRequestId())
   497  					continue
   498  				}
   499  				success := true
   500  				if rep.GetErrorMessage() != "" {
   501  					p.l.Errorf("Probe for target %v failed with error message: %s", reqInfo.target, rep.GetErrorMessage())
   502  					success = false
   503  				}
   504  				p.processProbeResult(&probeStatus{
   505  					target:  reqInfo.target,
   506  					success: success,
   507  					latency: time.Since(reqInfo.timestamp),
   508  					payload: rep.GetPayload(),
   509  				}, p.results[reqInfo.target])
   510  			}
   511  		}
   512  	}()
   513  
   514  	// Send probe requests
   515  	for _, target := range p.targets {
   516  		p.requestID++
   517  		p.results[target.Name].total++
   518  		requestsMu.Lock()
   519  		requests[p.requestID] = requestInfo{
   520  			target:    target.Name,
   521  			timestamp: time.Now(),
   522  		}
   523  		requestsMu.Unlock()
   524  		p.sendRequest(p.requestID, target)
   525  		time.Sleep(TimeBetweenRequests)
   526  	}
   527  
   528  	// Send signal to receiver loop that we are done sending request.
   529  	close(doneChan)
   530  
   531  	// Wait for receiver goroutine to exit.
   532  	wg.Wait()
   533  
   534  	// Handle requests that we have not yet received replies for: "requests" will
   535  	// contain only outstanding requests by this point.
   536  	requestsMu.Lock()
   537  	defer requestsMu.Unlock()
   538  	for _, req := range requests {
   539  		p.processProbeResult(&probeStatus{
   540  			target:  req.target,
   541  			success: false,
   542  		}, p.results[req.target])
   543  	}
   544  }
   545  
   546  // runCommand encapsulates command executor in a variable so that we can
   547  // override it for testing.
   548  var runCommand = func(ctx context.Context, cmd string, args []string) ([]byte, error) {
   549  	return exec.CommandContext(ctx, cmd, args...).Output()
   550  }
   551  
   552  func (p *Probe) runOnceProbe(ctx context.Context) {
   553  	var wg sync.WaitGroup
   554  
   555  	for _, target := range p.targets {
   556  		wg.Add(1)
   557  		go func(target endpoint.Endpoint, result *result) {
   558  			defer wg.Done()
   559  			args := make([]string, len(p.cmdArgs))
   560  			for i, arg := range p.cmdArgs {
   561  				res, found := substituteLabels(arg, p.labels(target))
   562  				if !found {
   563  					p.l.Warningf("Substitution not found in %q", arg)
   564  				}
   565  				args[i] = res
   566  			}
   567  
   568  			p.l.Infof("Running external command: %s %s", p.cmdName, strings.Join(args, " "))
   569  			result.total++
   570  			startTime := time.Now()
   571  			b, err := runCommand(ctx, p.cmdName, args)
   572  
   573  			success := true
   574  			if err != nil {
   575  				success = false
   576  				if exitErr, ok := err.(*exec.ExitError); ok {
   577  					p.l.Errorf("external probe process died with the status: %s. Stderr: %s", exitErr.Error(), exitErr.Stderr)
   578  				} else {
   579  					p.l.Errorf("Error executing the external program. Err: %v", err)
   580  				}
   581  			}
   582  
   583  			p.processProbeResult(&probeStatus{
   584  				target:  target.Name,
   585  				success: success,
   586  				latency: time.Since(startTime),
   587  				payload: string(b),
   588  			}, result)
   589  		}(target, p.results[target.Name])
   590  	}
   591  	wg.Wait()
   592  }
   593  
   594  func (p *Probe) updateTargets() {
   595  	p.targets = p.opts.Targets.ListEndpoints()
   596  
   597  	for _, target := range p.targets {
   598  		if _, ok := p.results[target.Name]; ok {
   599  			continue
   600  		}
   601  
   602  		var latencyValue metrics.Value
   603  		if p.opts.LatencyDist != nil {
   604  			latencyValue = p.opts.LatencyDist.Clone()
   605  		} else {
   606  			latencyValue = metrics.NewFloat(0)
   607  		}
   608  
   609  		p.results[target.Name] = &result{
   610  			latency:           latencyValue,
   611  			validationFailure: validators.ValidationFailureMap(p.opts.Validators),
   612  		}
   613  
   614  		for _, al := range p.opts.AdditionalLabels {
   615  			al.UpdateForTarget(target)
   616  		}
   617  	}
   618  }
   619  
   620  func (p *Probe) runProbe(startCtx context.Context) {
   621  	probeCtx, cancelFunc := context.WithTimeout(startCtx, p.opts.Timeout)
   622  	defer cancelFunc()
   623  
   624  	p.updateTargets()
   625  
   626  	if p.mode == "server" {
   627  		p.runServerProbe(probeCtx, startCtx)
   628  	} else {
   629  		p.runOnceProbe(probeCtx)
   630  	}
   631  }
   632  
   633  // Start starts and runs the probe indefinitely.
   634  func (p *Probe) Start(startCtx context.Context, dataChan chan *metrics.EventMetrics) {
   635  	p.dataChan = dataChan
   636  
   637  	ticker := time.NewTicker(p.opts.Interval)
   638  	defer ticker.Stop()
   639  
   640  	for range ticker.C {
   641  		// Don't run another probe if context is canceled already.
   642  		select {
   643  		case <-startCtx.Done():
   644  			return
   645  		default:
   646  		}
   647  
   648  		p.runProbe(startCtx)
   649  	}
   650  }