github.com/clly/consul@v1.4.5/agent/consul/stats_fetcher.go (about)

     1  package consul
     2  
     3  import (
     4  	"context"
     5  	"log"
     6  	"sync"
     7  
     8  	"github.com/hashicorp/consul/agent/consul/autopilot"
     9  	"github.com/hashicorp/consul/agent/metadata"
    10  	"github.com/hashicorp/consul/agent/pool"
    11  	"github.com/hashicorp/serf/serf"
    12  )
    13  
    14  // StatsFetcher has two functions for autopilot. First, lets us fetch all the
    15  // stats in parallel so we are taking a sample as close to the same time as
    16  // possible, since we are comparing time-sensitive info for the health check.
    17  // Second, it bounds the time so that one slow RPC can't hold up the health
    18  // check loop; as a side effect of how it implements this, it also limits to
    19  // a single in-flight RPC to any given server, so goroutines don't accumulate
    20  // as we run the health check fairly frequently.
    21  type StatsFetcher struct {
    22  	logger       *log.Logger
    23  	pool         *pool.ConnPool
    24  	datacenter   string
    25  	inflight     map[string]struct{}
    26  	inflightLock sync.Mutex
    27  }
    28  
    29  // NewStatsFetcher returns a stats fetcher.
    30  func NewStatsFetcher(logger *log.Logger, pool *pool.ConnPool, datacenter string) *StatsFetcher {
    31  	return &StatsFetcher{
    32  		logger:     logger,
    33  		pool:       pool,
    34  		datacenter: datacenter,
    35  		inflight:   make(map[string]struct{}),
    36  	}
    37  }
    38  
    39  // fetch does the RPC to fetch the server stats from a single server. We don't
    40  // cancel this when the context is canceled because we only want one in-flight
    41  // RPC to each server, so we let it finish and then clean up the in-flight
    42  // tracking.
    43  func (f *StatsFetcher) fetch(server *metadata.Server, replyCh chan *autopilot.ServerStats) {
    44  	var args struct{}
    45  	var reply autopilot.ServerStats
    46  	err := f.pool.RPC(f.datacenter, server.Addr, server.Version, "Status.RaftStats", server.UseTLS, &args, &reply)
    47  	if err != nil {
    48  		f.logger.Printf("[WARN] consul: error getting server health from %q: %v",
    49  			server.Name, err)
    50  	} else {
    51  		replyCh <- &reply
    52  	}
    53  
    54  	f.inflightLock.Lock()
    55  	delete(f.inflight, server.ID)
    56  	f.inflightLock.Unlock()
    57  }
    58  
    59  // Fetch will attempt to query all the servers in parallel.
    60  func (f *StatsFetcher) Fetch(ctx context.Context, members []serf.Member) map[string]*autopilot.ServerStats {
    61  	type workItem struct {
    62  		server  *metadata.Server
    63  		replyCh chan *autopilot.ServerStats
    64  	}
    65  	var servers []*metadata.Server
    66  	for _, s := range members {
    67  		if ok, parts := metadata.IsConsulServer(s); ok {
    68  			servers = append(servers, parts)
    69  		}
    70  	}
    71  
    72  	// Skip any servers that have inflight requests.
    73  	var work []*workItem
    74  	f.inflightLock.Lock()
    75  	for _, server := range servers {
    76  		if _, ok := f.inflight[server.ID]; ok {
    77  			f.logger.Printf("[WARN] consul: error getting server health from %q: last request still outstanding",
    78  				server.Name)
    79  		} else {
    80  			workItem := &workItem{
    81  				server:  server,
    82  				replyCh: make(chan *autopilot.ServerStats, 1),
    83  			}
    84  			work = append(work, workItem)
    85  			f.inflight[server.ID] = struct{}{}
    86  			go f.fetch(workItem.server, workItem.replyCh)
    87  		}
    88  	}
    89  	f.inflightLock.Unlock()
    90  
    91  	// Now wait for the results to come in, or for the context to be
    92  	// canceled.
    93  	replies := make(map[string]*autopilot.ServerStats)
    94  	for _, workItem := range work {
    95  		// Drain the reply first if there is one.
    96  		select {
    97  		case reply := <-workItem.replyCh:
    98  			replies[workItem.server.ID] = reply
    99  			continue
   100  		default:
   101  		}
   102  
   103  		select {
   104  		case reply := <-workItem.replyCh:
   105  			replies[workItem.server.ID] = reply
   106  
   107  		case <-ctx.Done():
   108  			f.logger.Printf("[WARN] consul: error getting server health from %q: %v",
   109  				workItem.server.Name, ctx.Err())
   110  		}
   111  	}
   112  	return replies
   113  }