github.com/iqoqo/nomad@v0.11.3-0.20200911112621-d7021c74d101/drivers/docker/stats.go (about) 1 package docker 2 3 import ( 4 "context" 5 "fmt" 6 "io" 7 "sync" 8 "time" 9 10 docker "github.com/fsouza/go-dockerclient" 11 "github.com/hashicorp/nomad/client/structs" 12 cstructs "github.com/hashicorp/nomad/client/structs" 13 "github.com/hashicorp/nomad/drivers/docker/util" 14 nstructs "github.com/hashicorp/nomad/nomad/structs" 15 ) 16 17 const ( 18 // statsCollectorBackoffBaseline is the baseline time for exponential 19 // backoff while calling the docker stats api. 20 statsCollectorBackoffBaseline = 5 * time.Second 21 22 // statsCollectorBackoffLimit is the limit of the exponential backoff for 23 // calling the docker stats api. 24 statsCollectorBackoffLimit = 2 * time.Minute 25 ) 26 27 // usageSender wraps a TaskResourceUsage chan such that it supports concurrent 28 // sending and closing, and backpressures by dropping events if necessary. 29 type usageSender struct { 30 closed bool 31 destCh chan<- *structs.TaskResourceUsage 32 mu sync.Mutex 33 } 34 35 // newStatsChanPipe returns a chan wrapped in a struct that supports concurrent 36 // sending and closing, and the receiver end of the chan. 37 func newStatsChanPipe() (*usageSender, <-chan *structs.TaskResourceUsage) { 38 destCh := make(chan *cstructs.TaskResourceUsage, 1) 39 return &usageSender{ 40 destCh: destCh, 41 }, destCh 42 43 } 44 45 // send resource usage to the receiver unless the chan is already full or 46 // closed. 47 func (u *usageSender) send(tru *cstructs.TaskResourceUsage) { 48 u.mu.Lock() 49 defer u.mu.Unlock() 50 51 if u.closed { 52 return 53 } 54 55 select { 56 case u.destCh <- tru: 57 default: 58 // Backpressure caused missed interval 59 } 60 } 61 62 // close resource usage. Any further sends will be dropped. 63 func (u *usageSender) close() { 64 u.mu.Lock() 65 defer u.mu.Unlock() 66 67 if u.closed { 68 // already closed 69 return 70 } 71 72 u.closed = true 73 close(u.destCh) 74 } 75 76 // Stats starts collecting stats from the docker daemon and sends them on the 77 // returned channel. 78 func (h *taskHandle) Stats(ctx context.Context, interval time.Duration) (<-chan *cstructs.TaskResourceUsage, error) { 79 select { 80 case <-h.doneCh: 81 return nil, nstructs.NewRecoverableError(fmt.Errorf("container stopped"), false) 82 default: 83 } 84 85 destCh, recvCh := newStatsChanPipe() 86 go h.collectStats(ctx, destCh, interval) 87 return recvCh, nil 88 } 89 90 // collectStats starts collecting resource usage stats of a docker container 91 func (h *taskHandle) collectStats(ctx context.Context, destCh *usageSender, interval time.Duration) { 92 defer destCh.close() 93 94 // backoff and retry used if the docker stats API returns an error 95 var backoff time.Duration 96 var retry int 97 // loops until doneCh is closed 98 for { 99 if backoff > 0 { 100 select { 101 case <-time.After(backoff): 102 case <-ctx.Done(): 103 return 104 case <-h.doneCh: 105 return 106 } 107 } 108 // make a channel for docker stats structs and start a collector to 109 // receive stats from docker and emit nomad stats 110 // statsCh will always be closed by docker client. 111 statsCh := make(chan *docker.Stats) 112 go dockerStatsCollector(destCh, statsCh, interval) 113 114 statsOpts := docker.StatsOptions{ 115 ID: h.containerID, 116 Context: ctx, 117 Done: h.doneCh, 118 Stats: statsCh, 119 Stream: true, 120 } 121 122 // Stats blocks until an error has occurred, or doneCh has been closed 123 if err := h.client.Stats(statsOpts); err != nil && err != io.ErrClosedPipe { 124 // An error occurred during stats collection, retry with backoff 125 h.logger.Debug("error collecting stats from container", "error", err) 126 127 // Calculate the new backoff 128 backoff = (1 << (2 * uint64(retry))) * statsCollectorBackoffBaseline 129 if backoff > statsCollectorBackoffLimit { 130 backoff = statsCollectorBackoffLimit 131 } 132 // Increment retry counter 133 retry++ 134 continue 135 } 136 // Stats finished either because context was canceled, doneCh was closed 137 // or the container stopped. Stop stats collections. 138 return 139 } 140 } 141 142 func dockerStatsCollector(destCh *usageSender, statsCh <-chan *docker.Stats, interval time.Duration) { 143 var resourceUsage *cstructs.TaskResourceUsage 144 145 // hasSentInitialStats is used so as to emit the first stats received from 146 // the docker daemon 147 var hasSentInitialStats bool 148 149 // timer is used to send nomad status at the specified interval 150 timer := time.NewTimer(interval) 151 for { 152 select { 153 case <-timer.C: 154 // it is possible for the timer to go off before the first stats 155 // has been emitted from docker 156 if resourceUsage == nil { 157 continue 158 } 159 160 // sending to destCh could block, drop this interval if it does 161 destCh.send(resourceUsage) 162 163 timer.Reset(interval) 164 165 case s, ok := <-statsCh: 166 // if statsCh is closed stop collection 167 if !ok { 168 return 169 } 170 // s should always be set, but check and skip just in case 171 if s != nil { 172 resourceUsage = util.DockerStatsToTaskResourceUsage(s) 173 // send stats next interation if this is the first time received 174 // from docker 175 if !hasSentInitialStats { 176 timer.Reset(0) 177 hasSentInitialStats = true 178 } 179 } 180 } 181 } 182 }