github.com/clly/consul@v1.4.5/agent/consul/stats_fetcher.go (about) 1 package consul 2 3 import ( 4 "context" 5 "log" 6 "sync" 7 8 "github.com/hashicorp/consul/agent/consul/autopilot" 9 "github.com/hashicorp/consul/agent/metadata" 10 "github.com/hashicorp/consul/agent/pool" 11 "github.com/hashicorp/serf/serf" 12 ) 13 14 // StatsFetcher has two functions for autopilot. First, lets us fetch all the 15 // stats in parallel so we are taking a sample as close to the same time as 16 // possible, since we are comparing time-sensitive info for the health check. 17 // Second, it bounds the time so that one slow RPC can't hold up the health 18 // check loop; as a side effect of how it implements this, it also limits to 19 // a single in-flight RPC to any given server, so goroutines don't accumulate 20 // as we run the health check fairly frequently. 21 type StatsFetcher struct { 22 logger *log.Logger 23 pool *pool.ConnPool 24 datacenter string 25 inflight map[string]struct{} 26 inflightLock sync.Mutex 27 } 28 29 // NewStatsFetcher returns a stats fetcher. 30 func NewStatsFetcher(logger *log.Logger, pool *pool.ConnPool, datacenter string) *StatsFetcher { 31 return &StatsFetcher{ 32 logger: logger, 33 pool: pool, 34 datacenter: datacenter, 35 inflight: make(map[string]struct{}), 36 } 37 } 38 39 // fetch does the RPC to fetch the server stats from a single server. We don't 40 // cancel this when the context is canceled because we only want one in-flight 41 // RPC to each server, so we let it finish and then clean up the in-flight 42 // tracking. 43 func (f *StatsFetcher) fetch(server *metadata.Server, replyCh chan *autopilot.ServerStats) { 44 var args struct{} 45 var reply autopilot.ServerStats 46 err := f.pool.RPC(f.datacenter, server.Addr, server.Version, "Status.RaftStats", server.UseTLS, &args, &reply) 47 if err != nil { 48 f.logger.Printf("[WARN] consul: error getting server health from %q: %v", 49 server.Name, err) 50 } else { 51 replyCh <- &reply 52 } 53 54 f.inflightLock.Lock() 55 delete(f.inflight, server.ID) 56 f.inflightLock.Unlock() 57 } 58 59 // Fetch will attempt to query all the servers in parallel. 60 func (f *StatsFetcher) Fetch(ctx context.Context, members []serf.Member) map[string]*autopilot.ServerStats { 61 type workItem struct { 62 server *metadata.Server 63 replyCh chan *autopilot.ServerStats 64 } 65 var servers []*metadata.Server 66 for _, s := range members { 67 if ok, parts := metadata.IsConsulServer(s); ok { 68 servers = append(servers, parts) 69 } 70 } 71 72 // Skip any servers that have inflight requests. 73 var work []*workItem 74 f.inflightLock.Lock() 75 for _, server := range servers { 76 if _, ok := f.inflight[server.ID]; ok { 77 f.logger.Printf("[WARN] consul: error getting server health from %q: last request still outstanding", 78 server.Name) 79 } else { 80 workItem := &workItem{ 81 server: server, 82 replyCh: make(chan *autopilot.ServerStats, 1), 83 } 84 work = append(work, workItem) 85 f.inflight[server.ID] = struct{}{} 86 go f.fetch(workItem.server, workItem.replyCh) 87 } 88 } 89 f.inflightLock.Unlock() 90 91 // Now wait for the results to come in, or for the context to be 92 // canceled. 93 replies := make(map[string]*autopilot.ServerStats) 94 for _, workItem := range work { 95 // Drain the reply first if there is one. 96 select { 97 case reply := <-workItem.replyCh: 98 replies[workItem.server.ID] = reply 99 continue 100 default: 101 } 102 103 select { 104 case reply := <-workItem.replyCh: 105 replies[workItem.server.ID] = reply 106 107 case <-ctx.Done(): 108 f.logger.Printf("[WARN] consul: error getting server health from %q: %v", 109 workItem.server.Name, ctx.Err()) 110 } 111 } 112 return replies 113 }