github.com/Ilhicas/nomad@v1.0.4-0.20210304152020-e86851182bc3/drivers/docker/stats.go (about) 1 package docker 2 3 import ( 4 "context" 5 "fmt" 6 "io" 7 "sync" 8 "time" 9 10 docker "github.com/fsouza/go-dockerclient" 11 cstructs "github.com/hashicorp/nomad/client/structs" 12 "github.com/hashicorp/nomad/drivers/docker/util" 13 nstructs "github.com/hashicorp/nomad/nomad/structs" 14 ) 15 16 const ( 17 // statsCollectorBackoffBaseline is the baseline time for exponential 18 // backoff while calling the docker stats api. 19 statsCollectorBackoffBaseline = 5 * time.Second 20 21 // statsCollectorBackoffLimit is the limit of the exponential backoff for 22 // calling the docker stats api. 23 statsCollectorBackoffLimit = 2 * time.Minute 24 ) 25 26 // usageSender wraps a TaskResourceUsage chan such that it supports concurrent 27 // sending and closing, and backpressures by dropping events if necessary. 28 type usageSender struct { 29 closed bool 30 destCh chan<- *cstructs.TaskResourceUsage 31 mu sync.Mutex 32 } 33 34 // newStatsChanPipe returns a chan wrapped in a struct that supports concurrent 35 // sending and closing, and the receiver end of the chan. 36 func newStatsChanPipe() (*usageSender, <-chan *cstructs.TaskResourceUsage) { 37 destCh := make(chan *cstructs.TaskResourceUsage, 1) 38 return &usageSender{ 39 destCh: destCh, 40 }, destCh 41 42 } 43 44 // send resource usage to the receiver unless the chan is already full or 45 // closed. 46 func (u *usageSender) send(tru *cstructs.TaskResourceUsage) { 47 u.mu.Lock() 48 defer u.mu.Unlock() 49 50 if u.closed { 51 return 52 } 53 54 select { 55 case u.destCh <- tru: 56 default: 57 // Backpressure caused missed interval 58 } 59 } 60 61 // close resource usage. Any further sends will be dropped. 62 func (u *usageSender) close() { 63 u.mu.Lock() 64 defer u.mu.Unlock() 65 66 if u.closed { 67 // already closed 68 return 69 } 70 71 u.closed = true 72 close(u.destCh) 73 } 74 75 // Stats starts collecting stats from the docker daemon and sends them on the 76 // returned channel. 77 func (h *taskHandle) Stats(ctx context.Context, interval time.Duration) (<-chan *cstructs.TaskResourceUsage, error) { 78 select { 79 case <-h.doneCh: 80 return nil, nstructs.NewRecoverableError(fmt.Errorf("container stopped"), false) 81 default: 82 } 83 84 destCh, recvCh := newStatsChanPipe() 85 go h.collectStats(ctx, destCh, interval) 86 return recvCh, nil 87 } 88 89 // collectStats starts collecting resource usage stats of a docker container 90 func (h *taskHandle) collectStats(ctx context.Context, destCh *usageSender, interval time.Duration) { 91 defer destCh.close() 92 93 // backoff and retry used if the docker stats API returns an error 94 var backoff time.Duration 95 var retry int 96 // loops until doneCh is closed 97 for { 98 if backoff > 0 { 99 select { 100 case <-time.After(backoff): 101 case <-ctx.Done(): 102 return 103 case <-h.doneCh: 104 return 105 } 106 } 107 // make a channel for docker stats structs and start a collector to 108 // receive stats from docker and emit nomad stats 109 // statsCh will always be closed by docker client. 110 statsCh := make(chan *docker.Stats) 111 go dockerStatsCollector(destCh, statsCh, interval) 112 113 statsOpts := docker.StatsOptions{ 114 ID: h.containerID, 115 Context: ctx, 116 Done: h.doneCh, 117 Stats: statsCh, 118 Stream: true, 119 } 120 121 // Stats blocks until an error has occurred, or doneCh has been closed 122 if err := h.client.Stats(statsOpts); err != nil && err != io.ErrClosedPipe { 123 // An error occurred during stats collection, retry with backoff 124 h.logger.Debug("error collecting stats from container", "error", err) 125 126 // Calculate the new backoff 127 backoff = (1 << (2 * uint64(retry))) * statsCollectorBackoffBaseline 128 if backoff > statsCollectorBackoffLimit { 129 backoff = statsCollectorBackoffLimit 130 } 131 // Increment retry counter 132 retry++ 133 continue 134 } 135 // Stats finished either because context was canceled, doneCh was closed 136 // or the container stopped. Stop stats collections. 137 return 138 } 139 } 140 141 func dockerStatsCollector(destCh *usageSender, statsCh <-chan *docker.Stats, interval time.Duration) { 142 var resourceUsage *cstructs.TaskResourceUsage 143 144 // hasSentInitialStats is used so as to emit the first stats received from 145 // the docker daemon 146 var hasSentInitialStats bool 147 148 // timer is used to send nomad status at the specified interval 149 timer := time.NewTimer(interval) 150 for { 151 select { 152 case <-timer.C: 153 // it is possible for the timer to go off before the first stats 154 // has been emitted from docker 155 if resourceUsage == nil { 156 continue 157 } 158 159 // sending to destCh could block, drop this interval if it does 160 destCh.send(resourceUsage) 161 162 timer.Reset(interval) 163 164 case s, ok := <-statsCh: 165 // if statsCh is closed stop collection 166 if !ok { 167 return 168 } 169 // s should always be set, but check and skip just in case 170 if s != nil { 171 resourceUsage = util.DockerStatsToTaskResourceUsage(s) 172 // send stats next interation if this is the first time received 173 // from docker 174 if !hasSentInitialStats { 175 timer.Reset(0) 176 hasSentInitialStats = true 177 } 178 } 179 } 180 } 181 }