github.com/Ilhicas/nomad@v1.0.4-0.20210304152020-e86851182bc3/drivers/docker/stats.go (about)

     1  package docker
     2  
     3  import (
     4  	"context"
     5  	"fmt"
     6  	"io"
     7  	"sync"
     8  	"time"
     9  
    10  	docker "github.com/fsouza/go-dockerclient"
    11  	cstructs "github.com/hashicorp/nomad/client/structs"
    12  	"github.com/hashicorp/nomad/drivers/docker/util"
    13  	nstructs "github.com/hashicorp/nomad/nomad/structs"
    14  )
    15  
    16  const (
    17  	// statsCollectorBackoffBaseline is the baseline time for exponential
    18  	// backoff while calling the docker stats api.
    19  	statsCollectorBackoffBaseline = 5 * time.Second
    20  
    21  	// statsCollectorBackoffLimit is the limit of the exponential backoff for
    22  	// calling the docker stats api.
    23  	statsCollectorBackoffLimit = 2 * time.Minute
    24  )
    25  
    26  // usageSender wraps a TaskResourceUsage chan such that it supports concurrent
    27  // sending and closing, and backpressures by dropping events if necessary.
    28  type usageSender struct {
    29  	closed bool
    30  	destCh chan<- *cstructs.TaskResourceUsage
    31  	mu     sync.Mutex
    32  }
    33  
    34  // newStatsChanPipe returns a chan wrapped in a struct that supports concurrent
    35  // sending and closing, and the receiver end of the chan.
    36  func newStatsChanPipe() (*usageSender, <-chan *cstructs.TaskResourceUsage) {
    37  	destCh := make(chan *cstructs.TaskResourceUsage, 1)
    38  	return &usageSender{
    39  		destCh: destCh,
    40  	}, destCh
    41  
    42  }
    43  
    44  // send resource usage to the receiver unless the chan is already full or
    45  // closed.
    46  func (u *usageSender) send(tru *cstructs.TaskResourceUsage) {
    47  	u.mu.Lock()
    48  	defer u.mu.Unlock()
    49  
    50  	if u.closed {
    51  		return
    52  	}
    53  
    54  	select {
    55  	case u.destCh <- tru:
    56  	default:
    57  		// Backpressure caused missed interval
    58  	}
    59  }
    60  
    61  // close resource usage. Any further sends will be dropped.
    62  func (u *usageSender) close() {
    63  	u.mu.Lock()
    64  	defer u.mu.Unlock()
    65  
    66  	if u.closed {
    67  		// already closed
    68  		return
    69  	}
    70  
    71  	u.closed = true
    72  	close(u.destCh)
    73  }
    74  
    75  // Stats starts collecting stats from the docker daemon and sends them on the
    76  // returned channel.
    77  func (h *taskHandle) Stats(ctx context.Context, interval time.Duration) (<-chan *cstructs.TaskResourceUsage, error) {
    78  	select {
    79  	case <-h.doneCh:
    80  		return nil, nstructs.NewRecoverableError(fmt.Errorf("container stopped"), false)
    81  	default:
    82  	}
    83  
    84  	destCh, recvCh := newStatsChanPipe()
    85  	go h.collectStats(ctx, destCh, interval)
    86  	return recvCh, nil
    87  }
    88  
    89  // collectStats starts collecting resource usage stats of a docker container
    90  func (h *taskHandle) collectStats(ctx context.Context, destCh *usageSender, interval time.Duration) {
    91  	defer destCh.close()
    92  
    93  	// backoff and retry used if the docker stats API returns an error
    94  	var backoff time.Duration
    95  	var retry int
    96  	// loops until doneCh is closed
    97  	for {
    98  		if backoff > 0 {
    99  			select {
   100  			case <-time.After(backoff):
   101  			case <-ctx.Done():
   102  				return
   103  			case <-h.doneCh:
   104  				return
   105  			}
   106  		}
   107  		// make a channel for docker stats structs and start a collector to
   108  		// receive stats from docker and emit nomad stats
   109  		// statsCh will always be closed by docker client.
   110  		statsCh := make(chan *docker.Stats)
   111  		go dockerStatsCollector(destCh, statsCh, interval)
   112  
   113  		statsOpts := docker.StatsOptions{
   114  			ID:      h.containerID,
   115  			Context: ctx,
   116  			Done:    h.doneCh,
   117  			Stats:   statsCh,
   118  			Stream:  true,
   119  		}
   120  
   121  		// Stats blocks until an error has occurred, or doneCh has been closed
   122  		if err := h.client.Stats(statsOpts); err != nil && err != io.ErrClosedPipe {
   123  			// An error occurred during stats collection, retry with backoff
   124  			h.logger.Debug("error collecting stats from container", "error", err)
   125  
   126  			// Calculate the new backoff
   127  			backoff = (1 << (2 * uint64(retry))) * statsCollectorBackoffBaseline
   128  			if backoff > statsCollectorBackoffLimit {
   129  				backoff = statsCollectorBackoffLimit
   130  			}
   131  			// Increment retry counter
   132  			retry++
   133  			continue
   134  		}
   135  		// Stats finished either because context was canceled, doneCh was closed
   136  		// or the container stopped. Stop stats collections.
   137  		return
   138  	}
   139  }
   140  
   141  func dockerStatsCollector(destCh *usageSender, statsCh <-chan *docker.Stats, interval time.Duration) {
   142  	var resourceUsage *cstructs.TaskResourceUsage
   143  
   144  	// hasSentInitialStats is used so as to emit the first stats received from
   145  	// the docker daemon
   146  	var hasSentInitialStats bool
   147  
   148  	// timer is used to send nomad status at the specified interval
   149  	timer := time.NewTimer(interval)
   150  	for {
   151  		select {
   152  		case <-timer.C:
   153  			// it is possible for the timer to go off before the first stats
   154  			// has been emitted from docker
   155  			if resourceUsage == nil {
   156  				continue
   157  			}
   158  
   159  			// sending to destCh could block, drop this interval if it does
   160  			destCh.send(resourceUsage)
   161  
   162  			timer.Reset(interval)
   163  
   164  		case s, ok := <-statsCh:
   165  			// if statsCh is closed stop collection
   166  			if !ok {
   167  				return
   168  			}
   169  			// s should always be set, but check and skip just in case
   170  			if s != nil {
   171  				resourceUsage = util.DockerStatsToTaskResourceUsage(s)
   172  				// send stats next interation if this is the first time received
   173  				// from docker
   174  				if !hasSentInitialStats {
   175  					timer.Reset(0)
   176  					hasSentInitialStats = true
   177  				}
   178  			}
   179  		}
   180  	}
   181  }