github.com/anth0d/nomad@v0.0.0-20221214183521-ae3a0a2cad06/client/allocrunner/taskrunner/stats_hook_test.go (about) 1 package taskrunner 2 3 import ( 4 "context" 5 "sync/atomic" 6 "testing" 7 "time" 8 9 "github.com/hashicorp/nomad/ci" 10 "github.com/hashicorp/nomad/client/allocrunner/interfaces" 11 cstructs "github.com/hashicorp/nomad/client/structs" 12 "github.com/hashicorp/nomad/helper/testlog" 13 "github.com/stretchr/testify/require" 14 ) 15 16 // Statically assert the stats hook implements the expected interfaces 17 var _ interfaces.TaskPoststartHook = (*statsHook)(nil) 18 var _ interfaces.TaskExitedHook = (*statsHook)(nil) 19 var _ interfaces.ShutdownHook = (*statsHook)(nil) 20 21 type mockStatsUpdater struct { 22 // Ch is sent task resource usage updates if not nil 23 Ch chan *cstructs.TaskResourceUsage 24 } 25 26 // newMockStatsUpdater returns a mockStatsUpdater that blocks on Ch for every 27 // call to UpdateStats 28 func newMockStatsUpdater() *mockStatsUpdater { 29 return &mockStatsUpdater{ 30 Ch: make(chan *cstructs.TaskResourceUsage), 31 } 32 } 33 34 func (m *mockStatsUpdater) UpdateStats(ru *cstructs.TaskResourceUsage) { 35 if m.Ch != nil { 36 m.Ch <- ru 37 } 38 } 39 40 type mockDriverStats struct { 41 called uint32 42 43 // err is returned by Stats if it is non-nil 44 err error 45 } 46 47 func (m *mockDriverStats) Stats(ctx context.Context, interval time.Duration) (<-chan *cstructs.TaskResourceUsage, error) { 48 atomic.AddUint32(&m.called, 1) 49 50 if m.err != nil { 51 return nil, m.err 52 } 53 ru := &cstructs.TaskResourceUsage{ 54 ResourceUsage: &cstructs.ResourceUsage{ 55 MemoryStats: &cstructs.MemoryStats{ 56 RSS: 1, 57 Measured: []string{"RSS"}, 58 }, 59 CpuStats: &cstructs.CpuStats{ 60 SystemMode: 1, 61 Measured: []string{"System Mode"}, 62 }, 63 }, 64 Timestamp: time.Now().UnixNano(), 65 Pids: map[string]*cstructs.ResourceUsage{}, 66 } 67 ru.Pids["task"] = ru.ResourceUsage 68 ch := make(chan *cstructs.TaskResourceUsage) 69 go func() { 70 defer close(ch) 71 select { 72 case <-ctx.Done(): 73 case ch <- ru: 74 } 75 }() 76 return ch, nil 77 } 78 79 func (m *mockDriverStats) Called() int { 80 return int(atomic.LoadUint32(&m.called)) 81 } 82 83 // TestTaskRunner_StatsHook_PoststartExited asserts the stats hook starts and 84 // stops. 85 func TestTaskRunner_StatsHook_PoststartExited(t *testing.T) { 86 ci.Parallel(t) 87 88 require := require.New(t) 89 logger := testlog.HCLogger(t) 90 su := newMockStatsUpdater() 91 ds := new(mockDriverStats) 92 93 poststartReq := &interfaces.TaskPoststartRequest{DriverStats: ds} 94 95 // Create hook 96 h := newStatsHook(su, time.Minute, logger) 97 98 // Always call Exited to cleanup goroutines 99 defer h.Exited(context.Background(), nil, nil) 100 101 // Run prestart 102 require.NoError(h.Poststart(context.Background(), poststartReq, nil)) 103 104 // An initial stats collection should run and call the updater 105 select { 106 case ru := <-su.Ch: 107 require.Equal(uint64(1), ru.ResourceUsage.MemoryStats.RSS) 108 case <-time.After(10 * time.Second): 109 t.Fatalf("timeout waiting for initial stats collection") 110 } 111 112 require.NoError(h.Exited(context.Background(), nil, nil)) 113 } 114 115 // TestTaskRunner_StatsHook_Periodic asserts the stats hook collects stats on 116 // an interval. 117 func TestTaskRunner_StatsHook_Periodic(t *testing.T) { 118 ci.Parallel(t) 119 120 require := require.New(t) 121 logger := testlog.HCLogger(t) 122 su := newMockStatsUpdater() 123 124 ds := new(mockDriverStats) 125 poststartReq := &interfaces.TaskPoststartRequest{DriverStats: ds} 126 127 // interval needs to be high enough that even on a slow/busy VM 128 // Exited() can complete within the interval. 129 const interval = 500 * time.Millisecond 130 131 h := newStatsHook(su, interval, logger) 132 defer h.Exited(context.Background(), nil, nil) 133 134 // Run prestart 135 require.NoError(h.Poststart(context.Background(), poststartReq, nil)) 136 137 // An initial stats collection should run and call the updater 138 var firstrun int64 139 select { 140 case ru := <-su.Ch: 141 if ru.Timestamp <= 0 { 142 t.Fatalf("expected nonzero timestamp (%v)", ru.Timestamp) 143 } 144 firstrun = ru.Timestamp 145 case <-time.After(10 * time.Second): 146 t.Fatalf("timeout waiting for initial stats collection") 147 } 148 149 // Should get another update in ~500ms (see interval above) 150 select { 151 case ru := <-su.Ch: 152 if ru.Timestamp <= firstrun { 153 t.Fatalf("expected timestamp (%v) after first run (%v)", ru.Timestamp, firstrun) 154 } 155 case <-time.After(10 * time.Second): 156 t.Fatalf("timeout waiting for second stats collection") 157 } 158 159 // Exiting should prevent further updates 160 require.NoError(h.Exited(context.Background(), nil, nil)) 161 162 // Should *not* get another update in ~500ms (see interval above) 163 // we may get a single update due to race with exit 164 timeout := time.After(2 * interval) 165 firstUpdate := true 166 167 WAITING: 168 select { 169 case ru := <-su.Ch: 170 if firstUpdate { 171 firstUpdate = false 172 goto WAITING 173 } 174 t.Fatalf("unexpected update after exit (firstrun=%v; update=%v", firstrun, ru.Timestamp) 175 case <-timeout: 176 // Ok! No update after exit as expected. 177 } 178 } 179 180 // TestTaskRunner_StatsHook_NotImplemented asserts the stats hook stops if the 181 // driver returns NotImplemented. 182 func TestTaskRunner_StatsHook_NotImplemented(t *testing.T) { 183 ci.Parallel(t) 184 185 require := require.New(t) 186 logger := testlog.HCLogger(t) 187 su := newMockStatsUpdater() 188 ds := &mockDriverStats{ 189 err: cstructs.DriverStatsNotImplemented, 190 } 191 192 poststartReq := &interfaces.TaskPoststartRequest{DriverStats: ds} 193 194 h := newStatsHook(su, 1, logger) 195 defer h.Exited(context.Background(), nil, nil) 196 197 // Run prestart 198 require.NoError(h.Poststart(context.Background(), poststartReq, nil)) 199 200 // An initial stats collection should run and *not* call the updater 201 select { 202 case ru := <-su.Ch: 203 t.Fatalf("unexpected resource update (timestamp=%v)", ru.Timestamp) 204 case <-time.After(500 * time.Millisecond): 205 // Ok! No update received because error was returned 206 } 207 } 208 209 // TestTaskRunner_StatsHook_Backoff asserts that stats hook does some backoff 210 // even if the driver doesn't support intervals well 211 func TestTaskRunner_StatsHook_Backoff(t *testing.T) { 212 ci.Parallel(t) 213 214 logger := testlog.HCLogger(t) 215 su := newMockStatsUpdater() 216 ds := &mockDriverStats{} 217 218 poststartReq := &interfaces.TaskPoststartRequest{DriverStats: ds} 219 220 h := newStatsHook(su, time.Minute, logger) 221 defer h.Exited(context.Background(), nil, nil) 222 223 // Run prestart 224 require.NoError(t, h.Poststart(context.Background(), poststartReq, nil)) 225 226 timeout := time.After(500 * time.Millisecond) 227 228 DRAIN: 229 for { 230 select { 231 case <-su.Ch: 232 case <-timeout: 233 break DRAIN 234 } 235 } 236 237 require.Equal(t, ds.Called(), 1) 238 }