github.com/xtls/xray-core@v1.8.12-0.20240518155711-3168d27b0bdb/app/observatory/burst/healthping.go (about)

     1  package burst
     2  
     3  import (
     4  	"context"
     5  	"fmt"
     6  	"strings"
     7  	"sync"
     8  	"time"
     9  
    10  	"github.com/xtls/xray-core/common/dice"
    11  )
    12  
    13  // HealthPingSettings holds settings for health Checker
    14  type HealthPingSettings struct {
    15  	Destination   string        `json:"destination"`
    16  	Connectivity  string        `json:"connectivity"`
    17  	Interval      time.Duration `json:"interval"`
    18  	SamplingCount int           `json:"sampling"`
    19  	Timeout       time.Duration `json:"timeout"`
    20  }
    21  
    22  // HealthPing is the health checker for balancers
    23  type HealthPing struct {
    24  	ctx         context.Context
    25  	access      sync.Mutex
    26  	ticker      *time.Ticker
    27  	tickerClose chan struct{}
    28  
    29  	Settings *HealthPingSettings
    30  	Results  map[string]*HealthPingRTTS
    31  }
    32  
    33  // NewHealthPing creates a new HealthPing with settings
    34  func NewHealthPing(ctx context.Context, config *HealthPingConfig) *HealthPing {
    35  	settings := &HealthPingSettings{}
    36  	if config != nil {
    37  		settings = &HealthPingSettings{
    38  			Connectivity:  strings.TrimSpace(config.Connectivity),
    39  			Destination:   strings.TrimSpace(config.Destination),
    40  			Interval:      time.Duration(config.Interval),
    41  			SamplingCount: int(config.SamplingCount),
    42  			Timeout:       time.Duration(config.Timeout),
    43  		}
    44  	}
    45  	if settings.Destination == "" {
    46  		// Destination URL, need 204 for success return default to chromium
    47  		// https://github.com/chromium/chromium/blob/main/components/safety_check/url_constants.cc#L10
    48  		// https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/safety_check/url_constants.cc#10
    49  		settings.Destination = "https://connectivitycheck.gstatic.com/generate_204"
    50  	}
    51  	if settings.Interval == 0 {
    52  		settings.Interval = time.Duration(1) * time.Minute
    53  	} else if settings.Interval < 10 {
    54  		newError("health check interval is too small, 10s is applied").AtWarning().WriteToLog()
    55  		settings.Interval = time.Duration(10) * time.Second
    56  	}
    57  	if settings.SamplingCount <= 0 {
    58  		settings.SamplingCount = 10
    59  	}
    60  	if settings.Timeout <= 0 {
    61  		// results are saved after all health pings finish,
    62  		// a larger timeout could possibly makes checks run longer
    63  		settings.Timeout = time.Duration(5) * time.Second
    64  	}
    65  	return &HealthPing{
    66  		ctx:      ctx,
    67  		Settings: settings,
    68  		Results:  nil,
    69  	}
    70  }
    71  
    72  // StartScheduler implements the HealthChecker
    73  func (h *HealthPing) StartScheduler(selector func() ([]string, error)) {
    74  	if h.ticker != nil {
    75  		return
    76  	}
    77  	interval := h.Settings.Interval * time.Duration(h.Settings.SamplingCount)
    78  	ticker := time.NewTicker(interval)
    79  	tickerClose := make(chan struct{})
    80  	h.ticker = ticker
    81  	h.tickerClose = tickerClose
    82  	go func() {
    83  		tags, err := selector()
    84  		if err != nil {
    85  			newError("error select outbounds for initial health check: ", err).AtWarning().WriteToLog()
    86  			return
    87  		}
    88  		h.Check(tags)
    89  	}()
    90  
    91  	go func() {
    92  		for {
    93  			go func() {
    94  				tags, err := selector()
    95  				if err != nil {
    96  					newError("error select outbounds for scheduled health check: ", err).AtWarning().WriteToLog()
    97  					return
    98  				}
    99  				h.doCheck(tags, interval, h.Settings.SamplingCount)
   100  				h.Cleanup(tags)
   101  			}()
   102  			select {
   103  			case <-ticker.C:
   104  				continue
   105  			case <-tickerClose:
   106  				return
   107  			}
   108  		}
   109  	}()
   110  }
   111  
   112  // StopScheduler implements the HealthChecker
   113  func (h *HealthPing) StopScheduler() {
   114  	if h.ticker == nil {
   115  		return
   116  	}
   117  	h.ticker.Stop()
   118  	h.ticker = nil
   119  	close(h.tickerClose)
   120  	h.tickerClose = nil
   121  }
   122  
   123  // Check implements the HealthChecker
   124  func (h *HealthPing) Check(tags []string) error {
   125  	if len(tags) == 0 {
   126  		return nil
   127  	}
   128  	newError("perform one-time health check for tags ", tags).AtInfo().WriteToLog()
   129  	h.doCheck(tags, 0, 1)
   130  	return nil
   131  }
   132  
   133  type rtt struct {
   134  	handler string
   135  	value   time.Duration
   136  }
   137  
   138  // doCheck performs the 'rounds' amount checks in given 'duration'. You should make
   139  // sure all tags are valid for current balancer
   140  func (h *HealthPing) doCheck(tags []string, duration time.Duration, rounds int) {
   141  	count := len(tags) * rounds
   142  	if count == 0 {
   143  		return
   144  	}
   145  	ch := make(chan *rtt, count)
   146  
   147  	for _, tag := range tags {
   148  		handler := tag
   149  		client := newPingClient(
   150  			h.ctx,
   151  			h.Settings.Destination,
   152  			h.Settings.Timeout,
   153  			handler,
   154  		)
   155  		for i := 0; i < rounds; i++ {
   156  			delay := time.Duration(0)
   157  			if duration > 0 {
   158  				delay = time.Duration(dice.Roll(int(duration)))
   159  			}
   160  			time.AfterFunc(delay, func() {
   161  				newError("checking ", handler).AtDebug().WriteToLog()
   162  				delay, err := client.MeasureDelay()
   163  				if err == nil {
   164  					ch <- &rtt{
   165  						handler: handler,
   166  						value:   delay,
   167  					}
   168  					return
   169  				}
   170  				if !h.checkConnectivity() {
   171  					newError("network is down").AtWarning().WriteToLog()
   172  					ch <- &rtt{
   173  						handler: handler,
   174  						value:   0,
   175  					}
   176  					return
   177  				}
   178  				newError(fmt.Sprintf(
   179  					"error ping %s with %s: %s",
   180  					h.Settings.Destination,
   181  					handler,
   182  					err,
   183  				)).AtWarning().WriteToLog()
   184  				ch <- &rtt{
   185  					handler: handler,
   186  					value:   rttFailed,
   187  				}
   188  			})
   189  		}
   190  	}
   191  	for i := 0; i < count; i++ {
   192  		rtt := <-ch
   193  		if rtt.value > 0 {
   194  			// should not put results when network is down
   195  			h.PutResult(rtt.handler, rtt.value)
   196  		}
   197  	}
   198  }
   199  
   200  // PutResult put a ping rtt to results
   201  func (h *HealthPing) PutResult(tag string, rtt time.Duration) {
   202  	h.access.Lock()
   203  	defer h.access.Unlock()
   204  	if h.Results == nil {
   205  		h.Results = make(map[string]*HealthPingRTTS)
   206  	}
   207  	r, ok := h.Results[tag]
   208  	if !ok {
   209  		// validity is 2 times to sampling period, since the check are
   210  		// distributed in the time line randomly, in extreme cases,
   211  		// previous checks are distributed on the left, and latters
   212  		// on the right
   213  		validity := h.Settings.Interval * time.Duration(h.Settings.SamplingCount) * 2
   214  		r = NewHealthPingResult(h.Settings.SamplingCount, validity)
   215  		h.Results[tag] = r
   216  	}
   217  	r.Put(rtt)
   218  }
   219  
   220  // Cleanup removes results of removed handlers,
   221  // tags should be all valid tags of the Balancer now
   222  func (h *HealthPing) Cleanup(tags []string) {
   223  	h.access.Lock()
   224  	defer h.access.Unlock()
   225  	for tag := range h.Results {
   226  		found := false
   227  		for _, v := range tags {
   228  			if tag == v {
   229  				found = true
   230  				break
   231  			}
   232  		}
   233  		if !found {
   234  			delete(h.Results, tag)
   235  		}
   236  	}
   237  }
   238  
   239  // checkConnectivity checks the network connectivity, it returns
   240  // true if network is good or "connectivity check url" not set
   241  func (h *HealthPing) checkConnectivity() bool {
   242  	if h.Settings.Connectivity == "" {
   243  		return true
   244  	}
   245  	tester := newDirectPingClient(
   246  		h.Settings.Connectivity,
   247  		h.Settings.Timeout,
   248  	)
   249  	if _, err := tester.MeasureDelay(); err != nil {
   250  		return false
   251  	}
   252  	return true
   253  }