github.com/Ilhicas/nomad@v1.0.4-0.20210304152020-e86851182bc3/client/allocrunner/taskrunner/stats_hook_test.go

github.com/Ilhicas/nomad@v1.0.4-0.20210304152020-e86851182bc3/client/allocrunner/taskrunner/stats_hook_test.go (about)

     1  package taskrunner
     2  
     3  import (
     4  	"context"
     5  	"sync/atomic"
     6  	"testing"
     7  	"time"
     8  
     9  	"github.com/hashicorp/nomad/client/allocrunner/interfaces"
    10  	cstructs "github.com/hashicorp/nomad/client/structs"
    11  	"github.com/hashicorp/nomad/helper/testlog"
    12  	"github.com/stretchr/testify/require"
    13  )
    14  
    15  // Statically assert the stats hook implements the expected interfaces
    16  var _ interfaces.TaskPoststartHook = (*statsHook)(nil)
    17  var _ interfaces.TaskExitedHook = (*statsHook)(nil)
    18  var _ interfaces.ShutdownHook = (*statsHook)(nil)
    19  
    20  type mockStatsUpdater struct {
    21  	// Ch is sent task resource usage updates if not nil
    22  	Ch chan *cstructs.TaskResourceUsage
    23  }
    24  
    25  // newMockStatsUpdater returns a mockStatsUpdater that blocks on Ch for every
    26  // call to UpdateStats
    27  func newMockStatsUpdater() *mockStatsUpdater {
    28  	return &mockStatsUpdater{
    29  		Ch: make(chan *cstructs.TaskResourceUsage),
    30  	}
    31  }
    32  
    33  func (m *mockStatsUpdater) UpdateStats(ru *cstructs.TaskResourceUsage) {
    34  	if m.Ch != nil {
    35  		m.Ch <- ru
    36  	}
    37  }
    38  
    39  type mockDriverStats struct {
    40  	called uint32
    41  
    42  	// err is returned by Stats if it is non-nil
    43  	err error
    44  }
    45  
    46  func (m *mockDriverStats) Stats(ctx context.Context, interval time.Duration) (<-chan *cstructs.TaskResourceUsage, error) {
    47  	atomic.AddUint32(&m.called, 1)
    48  
    49  	if m.err != nil {
    50  		return nil, m.err
    51  	}
    52  	ru := &cstructs.TaskResourceUsage{
    53  		ResourceUsage: &cstructs.ResourceUsage{
    54  			MemoryStats: &cstructs.MemoryStats{
    55  				RSS:      1,
    56  				Measured: []string{"RSS"},
    57  			},
    58  			CpuStats: &cstructs.CpuStats{
    59  				SystemMode: 1,
    60  				Measured:   []string{"System Mode"},
    61  			},
    62  		},
    63  		Timestamp: time.Now().UnixNano(),
    64  		Pids:      map[string]*cstructs.ResourceUsage{},
    65  	}
    66  	ru.Pids["task"] = ru.ResourceUsage
    67  	ch := make(chan *cstructs.TaskResourceUsage)
    68  	go func() {
    69  		defer close(ch)
    70  		select {
    71  		case <-ctx.Done():
    72  		case ch <- ru:
    73  		}
    74  	}()
    75  	return ch, nil
    76  }
    77  
    78  func (m *mockDriverStats) Called() int {
    79  	return int(atomic.LoadUint32(&m.called))
    80  }
    81  
    82  // TestTaskRunner_StatsHook_PoststartExited asserts the stats hook starts and
    83  // stops.
    84  func TestTaskRunner_StatsHook_PoststartExited(t *testing.T) {
    85  	t.Parallel()
    86  
    87  	require := require.New(t)
    88  	logger := testlog.HCLogger(t)
    89  	su := newMockStatsUpdater()
    90  	ds := new(mockDriverStats)
    91  
    92  	poststartReq := &interfaces.TaskPoststartRequest{DriverStats: ds}
    93  
    94  	// Create hook
    95  	h := newStatsHook(su, time.Minute, logger)
    96  
    97  	// Always call Exited to cleanup goroutines
    98  	defer h.Exited(context.Background(), nil, nil)
    99  
   100  	// Run prestart
   101  	require.NoError(h.Poststart(context.Background(), poststartReq, nil))
   102  
   103  	// An initial stats collection should run and call the updater
   104  	select {
   105  	case ru := <-su.Ch:
   106  		require.Equal(uint64(1), ru.ResourceUsage.MemoryStats.RSS)
   107  	case <-time.After(10 * time.Second):
   108  		t.Fatalf("timeout waiting for initial stats collection")
   109  	}
   110  
   111  	require.NoError(h.Exited(context.Background(), nil, nil))
   112  }
   113  
   114  // TestTaskRunner_StatsHook_Periodic asserts the stats hook collects stats on
   115  // an interval.
   116  func TestTaskRunner_StatsHook_Periodic(t *testing.T) {
   117  	t.Parallel()
   118  
   119  	require := require.New(t)
   120  	logger := testlog.HCLogger(t)
   121  	su := newMockStatsUpdater()
   122  
   123  	ds := new(mockDriverStats)
   124  	poststartReq := &interfaces.TaskPoststartRequest{DriverStats: ds}
   125  
   126  	// interval needs to be high enough that even on a slow/busy VM
   127  	// Exited() can complete within the interval.
   128  	const interval = 500 * time.Millisecond
   129  
   130  	h := newStatsHook(su, interval, logger)
   131  	defer h.Exited(context.Background(), nil, nil)
   132  
   133  	// Run prestart
   134  	require.NoError(h.Poststart(context.Background(), poststartReq, nil))
   135  
   136  	// An initial stats collection should run and call the updater
   137  	var firstrun int64
   138  	select {
   139  	case ru := <-su.Ch:
   140  		if ru.Timestamp <= 0 {
   141  			t.Fatalf("expected nonzero timestamp (%v)", ru.Timestamp)
   142  		}
   143  		firstrun = ru.Timestamp
   144  	case <-time.After(10 * time.Second):
   145  		t.Fatalf("timeout waiting for initial stats collection")
   146  	}
   147  
   148  	// Should get another update in ~500ms (see interval above)
   149  	select {
   150  	case ru := <-su.Ch:
   151  		if ru.Timestamp <= firstrun {
   152  			t.Fatalf("expected timestamp (%v) after first run (%v)", ru.Timestamp, firstrun)
   153  		}
   154  	case <-time.After(10 * time.Second):
   155  		t.Fatalf("timeout waiting for second stats collection")
   156  	}
   157  
   158  	// Exiting should prevent further updates
   159  	require.NoError(h.Exited(context.Background(), nil, nil))
   160  
   161  	// Should *not* get another update in ~500ms (see interval above)
   162  	// we may get a single update due to race with exit
   163  	timeout := time.After(2 * interval)
   164  	firstUpdate := true
   165  
   166  WAITING:
   167  	select {
   168  	case ru := <-su.Ch:
   169  		if firstUpdate {
   170  			firstUpdate = false
   171  			goto WAITING
   172  		}
   173  		t.Fatalf("unexpected update after exit (firstrun=%v; update=%v", firstrun, ru.Timestamp)
   174  	case <-timeout:
   175  		// Ok! No update after exit as expected.
   176  	}
   177  }
   178  
   179  // TestTaskRunner_StatsHook_NotImplemented asserts the stats hook stops if the
   180  // driver returns NotImplemented.
   181  func TestTaskRunner_StatsHook_NotImplemented(t *testing.T) {
   182  	t.Parallel()
   183  
   184  	require := require.New(t)
   185  	logger := testlog.HCLogger(t)
   186  	su := newMockStatsUpdater()
   187  	ds := &mockDriverStats{
   188  		err: cstructs.DriverStatsNotImplemented,
   189  	}
   190  
   191  	poststartReq := &interfaces.TaskPoststartRequest{DriverStats: ds}
   192  
   193  	h := newStatsHook(su, 1, logger)
   194  	defer h.Exited(context.Background(), nil, nil)
   195  
   196  	// Run prestart
   197  	require.NoError(h.Poststart(context.Background(), poststartReq, nil))
   198  
   199  	// An initial stats collection should run and *not* call the updater
   200  	select {
   201  	case ru := <-su.Ch:
   202  		t.Fatalf("unexpected resource update (timestamp=%v)", ru.Timestamp)
   203  	case <-time.After(500 * time.Millisecond):
   204  		// Ok! No update received because error was returned
   205  	}
   206  }
   207  
   208  // TestTaskRunner_StatsHook_Backoff asserts that stats hook does some backoff
   209  // even if the driver doesn't support intervals well
   210  func TestTaskRunner_StatsHook_Backoff(t *testing.T) {
   211  	t.Parallel()
   212  
   213  	logger := testlog.HCLogger(t)
   214  	su := newMockStatsUpdater()
   215  	ds := &mockDriverStats{}
   216  
   217  	poststartReq := &interfaces.TaskPoststartRequest{DriverStats: ds}
   218  
   219  	h := newStatsHook(su, time.Minute, logger)
   220  	defer h.Exited(context.Background(), nil, nil)
   221  
   222  	// Run prestart
   223  	require.NoError(t, h.Poststart(context.Background(), poststartReq, nil))
   224  
   225  	timeout := time.After(500 * time.Millisecond)
   226  
   227  DRAIN:
   228  	for {
   229  		select {
   230  		case <-su.Ch:
   231  		case <-timeout:
   232  			break DRAIN
   233  		}
   234  	}
   235  
   236  	require.Equal(t, ds.Called(), 1)
   237  }