github.com/anth0d/nomad@v0.0.0-20221214183521-ae3a0a2cad06/client/allocrunner/taskrunner/stats_hook_test.go (about)

     1  package taskrunner
     2  
     3  import (
     4  	"context"
     5  	"sync/atomic"
     6  	"testing"
     7  	"time"
     8  
     9  	"github.com/hashicorp/nomad/ci"
    10  	"github.com/hashicorp/nomad/client/allocrunner/interfaces"
    11  	cstructs "github.com/hashicorp/nomad/client/structs"
    12  	"github.com/hashicorp/nomad/helper/testlog"
    13  	"github.com/stretchr/testify/require"
    14  )
    15  
    16  // Statically assert the stats hook implements the expected interfaces
    17  var _ interfaces.TaskPoststartHook = (*statsHook)(nil)
    18  var _ interfaces.TaskExitedHook = (*statsHook)(nil)
    19  var _ interfaces.ShutdownHook = (*statsHook)(nil)
    20  
    21  type mockStatsUpdater struct {
    22  	// Ch is sent task resource usage updates if not nil
    23  	Ch chan *cstructs.TaskResourceUsage
    24  }
    25  
    26  // newMockStatsUpdater returns a mockStatsUpdater that blocks on Ch for every
    27  // call to UpdateStats
    28  func newMockStatsUpdater() *mockStatsUpdater {
    29  	return &mockStatsUpdater{
    30  		Ch: make(chan *cstructs.TaskResourceUsage),
    31  	}
    32  }
    33  
    34  func (m *mockStatsUpdater) UpdateStats(ru *cstructs.TaskResourceUsage) {
    35  	if m.Ch != nil {
    36  		m.Ch <- ru
    37  	}
    38  }
    39  
    40  type mockDriverStats struct {
    41  	called uint32
    42  
    43  	// err is returned by Stats if it is non-nil
    44  	err error
    45  }
    46  
    47  func (m *mockDriverStats) Stats(ctx context.Context, interval time.Duration) (<-chan *cstructs.TaskResourceUsage, error) {
    48  	atomic.AddUint32(&m.called, 1)
    49  
    50  	if m.err != nil {
    51  		return nil, m.err
    52  	}
    53  	ru := &cstructs.TaskResourceUsage{
    54  		ResourceUsage: &cstructs.ResourceUsage{
    55  			MemoryStats: &cstructs.MemoryStats{
    56  				RSS:      1,
    57  				Measured: []string{"RSS"},
    58  			},
    59  			CpuStats: &cstructs.CpuStats{
    60  				SystemMode: 1,
    61  				Measured:   []string{"System Mode"},
    62  			},
    63  		},
    64  		Timestamp: time.Now().UnixNano(),
    65  		Pids:      map[string]*cstructs.ResourceUsage{},
    66  	}
    67  	ru.Pids["task"] = ru.ResourceUsage
    68  	ch := make(chan *cstructs.TaskResourceUsage)
    69  	go func() {
    70  		defer close(ch)
    71  		select {
    72  		case <-ctx.Done():
    73  		case ch <- ru:
    74  		}
    75  	}()
    76  	return ch, nil
    77  }
    78  
    79  func (m *mockDriverStats) Called() int {
    80  	return int(atomic.LoadUint32(&m.called))
    81  }
    82  
    83  // TestTaskRunner_StatsHook_PoststartExited asserts the stats hook starts and
    84  // stops.
    85  func TestTaskRunner_StatsHook_PoststartExited(t *testing.T) {
    86  	ci.Parallel(t)
    87  
    88  	require := require.New(t)
    89  	logger := testlog.HCLogger(t)
    90  	su := newMockStatsUpdater()
    91  	ds := new(mockDriverStats)
    92  
    93  	poststartReq := &interfaces.TaskPoststartRequest{DriverStats: ds}
    94  
    95  	// Create hook
    96  	h := newStatsHook(su, time.Minute, logger)
    97  
    98  	// Always call Exited to cleanup goroutines
    99  	defer h.Exited(context.Background(), nil, nil)
   100  
   101  	// Run prestart
   102  	require.NoError(h.Poststart(context.Background(), poststartReq, nil))
   103  
   104  	// An initial stats collection should run and call the updater
   105  	select {
   106  	case ru := <-su.Ch:
   107  		require.Equal(uint64(1), ru.ResourceUsage.MemoryStats.RSS)
   108  	case <-time.After(10 * time.Second):
   109  		t.Fatalf("timeout waiting for initial stats collection")
   110  	}
   111  
   112  	require.NoError(h.Exited(context.Background(), nil, nil))
   113  }
   114  
   115  // TestTaskRunner_StatsHook_Periodic asserts the stats hook collects stats on
   116  // an interval.
   117  func TestTaskRunner_StatsHook_Periodic(t *testing.T) {
   118  	ci.Parallel(t)
   119  
   120  	require := require.New(t)
   121  	logger := testlog.HCLogger(t)
   122  	su := newMockStatsUpdater()
   123  
   124  	ds := new(mockDriverStats)
   125  	poststartReq := &interfaces.TaskPoststartRequest{DriverStats: ds}
   126  
   127  	// interval needs to be high enough that even on a slow/busy VM
   128  	// Exited() can complete within the interval.
   129  	const interval = 500 * time.Millisecond
   130  
   131  	h := newStatsHook(su, interval, logger)
   132  	defer h.Exited(context.Background(), nil, nil)
   133  
   134  	// Run prestart
   135  	require.NoError(h.Poststart(context.Background(), poststartReq, nil))
   136  
   137  	// An initial stats collection should run and call the updater
   138  	var firstrun int64
   139  	select {
   140  	case ru := <-su.Ch:
   141  		if ru.Timestamp <= 0 {
   142  			t.Fatalf("expected nonzero timestamp (%v)", ru.Timestamp)
   143  		}
   144  		firstrun = ru.Timestamp
   145  	case <-time.After(10 * time.Second):
   146  		t.Fatalf("timeout waiting for initial stats collection")
   147  	}
   148  
   149  	// Should get another update in ~500ms (see interval above)
   150  	select {
   151  	case ru := <-su.Ch:
   152  		if ru.Timestamp <= firstrun {
   153  			t.Fatalf("expected timestamp (%v) after first run (%v)", ru.Timestamp, firstrun)
   154  		}
   155  	case <-time.After(10 * time.Second):
   156  		t.Fatalf("timeout waiting for second stats collection")
   157  	}
   158  
   159  	// Exiting should prevent further updates
   160  	require.NoError(h.Exited(context.Background(), nil, nil))
   161  
   162  	// Should *not* get another update in ~500ms (see interval above)
   163  	// we may get a single update due to race with exit
   164  	timeout := time.After(2 * interval)
   165  	firstUpdate := true
   166  
   167  WAITING:
   168  	select {
   169  	case ru := <-su.Ch:
   170  		if firstUpdate {
   171  			firstUpdate = false
   172  			goto WAITING
   173  		}
   174  		t.Fatalf("unexpected update after exit (firstrun=%v; update=%v", firstrun, ru.Timestamp)
   175  	case <-timeout:
   176  		// Ok! No update after exit as expected.
   177  	}
   178  }
   179  
   180  // TestTaskRunner_StatsHook_NotImplemented asserts the stats hook stops if the
   181  // driver returns NotImplemented.
   182  func TestTaskRunner_StatsHook_NotImplemented(t *testing.T) {
   183  	ci.Parallel(t)
   184  
   185  	require := require.New(t)
   186  	logger := testlog.HCLogger(t)
   187  	su := newMockStatsUpdater()
   188  	ds := &mockDriverStats{
   189  		err: cstructs.DriverStatsNotImplemented,
   190  	}
   191  
   192  	poststartReq := &interfaces.TaskPoststartRequest{DriverStats: ds}
   193  
   194  	h := newStatsHook(su, 1, logger)
   195  	defer h.Exited(context.Background(), nil, nil)
   196  
   197  	// Run prestart
   198  	require.NoError(h.Poststart(context.Background(), poststartReq, nil))
   199  
   200  	// An initial stats collection should run and *not* call the updater
   201  	select {
   202  	case ru := <-su.Ch:
   203  		t.Fatalf("unexpected resource update (timestamp=%v)", ru.Timestamp)
   204  	case <-time.After(500 * time.Millisecond):
   205  		// Ok! No update received because error was returned
   206  	}
   207  }
   208  
   209  // TestTaskRunner_StatsHook_Backoff asserts that stats hook does some backoff
   210  // even if the driver doesn't support intervals well
   211  func TestTaskRunner_StatsHook_Backoff(t *testing.T) {
   212  	ci.Parallel(t)
   213  
   214  	logger := testlog.HCLogger(t)
   215  	su := newMockStatsUpdater()
   216  	ds := &mockDriverStats{}
   217  
   218  	poststartReq := &interfaces.TaskPoststartRequest{DriverStats: ds}
   219  
   220  	h := newStatsHook(su, time.Minute, logger)
   221  	defer h.Exited(context.Background(), nil, nil)
   222  
   223  	// Run prestart
   224  	require.NoError(t, h.Poststart(context.Background(), poststartReq, nil))
   225  
   226  	timeout := time.After(500 * time.Millisecond)
   227  
   228  DRAIN:
   229  	for {
   230  		select {
   231  		case <-su.Ch:
   232  		case <-timeout:
   233  			break DRAIN
   234  		}
   235  	}
   236  
   237  	require.Equal(t, ds.Called(), 1)
   238  }