github.com/bigcommerce/nomad@v0.9.3-bc/scheduler/device_test.go (about) 1 package scheduler 2 3 import ( 4 "testing" 5 6 "github.com/hashicorp/nomad/helper/uuid" 7 "github.com/hashicorp/nomad/nomad/mock" 8 "github.com/hashicorp/nomad/nomad/structs" 9 psstructs "github.com/hashicorp/nomad/plugins/shared/structs" 10 "github.com/stretchr/testify/require" 11 ) 12 13 // deviceRequest takes the name, count and potential constraints and affinities 14 // and returns a device request. 15 func deviceRequest(name string, count uint64, 16 constraints []*structs.Constraint, affinities []*structs.Affinity) *structs.RequestedDevice { 17 return &structs.RequestedDevice{ 18 Name: name, 19 Count: count, 20 Constraints: constraints, 21 Affinities: affinities, 22 } 23 } 24 25 // devNode returns a node containing two devices, an nvidia gpu and an intel 26 // FPGA. 27 func devNode() *structs.Node { 28 n := mock.NvidiaNode() 29 n.NodeResources.Devices = append(n.NodeResources.Devices, &structs.NodeDeviceResource{ 30 Type: "fpga", 31 Vendor: "intel", 32 Name: "F100", 33 Attributes: map[string]*psstructs.Attribute{ 34 "memory": psstructs.NewIntAttribute(4, psstructs.UnitGiB), 35 }, 36 Instances: []*structs.NodeDevice{ 37 { 38 ID: uuid.Generate(), 39 Healthy: true, 40 }, 41 { 42 ID: uuid.Generate(), 43 Healthy: false, 44 }, 45 }, 46 }) 47 return n 48 } 49 50 // multipleNvidiaNode returns a node containing multiple nvidia device types. 51 func multipleNvidiaNode() *structs.Node { 52 n := mock.NvidiaNode() 53 n.NodeResources.Devices = append(n.NodeResources.Devices, &structs.NodeDeviceResource{ 54 Type: "gpu", 55 Vendor: "nvidia", 56 Name: "2080ti", 57 Attributes: map[string]*psstructs.Attribute{ 58 "memory": psstructs.NewIntAttribute(11, psstructs.UnitGiB), 59 "cuda_cores": psstructs.NewIntAttribute(4352, ""), 60 "graphics_clock": psstructs.NewIntAttribute(1350, psstructs.UnitMHz), 61 "memory_bandwidth": psstructs.NewIntAttribute(14, psstructs.UnitGBPerS), 62 }, 63 Instances: []*structs.NodeDevice{ 64 { 65 ID: uuid.Generate(), 66 Healthy: true, 67 }, 68 { 69 ID: uuid.Generate(), 70 Healthy: true, 71 }, 72 }, 73 }) 74 return n 75 76 } 77 78 // collectInstanceIDs returns the IDs of the device instances 79 func collectInstanceIDs(devices ...*structs.NodeDeviceResource) []string { 80 var out []string 81 for _, d := range devices { 82 for _, i := range d.Instances { 83 out = append(out, i.ID) 84 } 85 } 86 return out 87 } 88 89 // Test that asking for a device that isn't fully specified works. 90 func TestDeviceAllocator_Allocate_GenericRequest(t *testing.T) { 91 require := require.New(t) 92 _, ctx := testContext(t) 93 n := devNode() 94 d := newDeviceAllocator(ctx, n) 95 require.NotNil(d) 96 97 // Build the request 98 ask := deviceRequest("gpu", 1, nil, nil) 99 100 out, score, err := d.AssignDevice(ask) 101 require.NotNil(out) 102 require.Zero(score) 103 require.NoError(err) 104 105 // Check that we got the nvidia device 106 require.Len(out.DeviceIDs, 1) 107 require.Contains(collectInstanceIDs(n.NodeResources.Devices[0]), out.DeviceIDs[0]) 108 } 109 110 // Test that asking for a device that is fully specified works. 111 func TestDeviceAllocator_Allocate_FullyQualifiedRequest(t *testing.T) { 112 require := require.New(t) 113 _, ctx := testContext(t) 114 n := devNode() 115 d := newDeviceAllocator(ctx, n) 116 require.NotNil(d) 117 118 // Build the request 119 ask := deviceRequest("intel/fpga/F100", 1, nil, nil) 120 121 out, score, err := d.AssignDevice(ask) 122 require.NotNil(out) 123 require.Zero(score) 124 require.NoError(err) 125 126 // Check that we got the nvidia device 127 require.Len(out.DeviceIDs, 1) 128 require.Contains(collectInstanceIDs(n.NodeResources.Devices[1]), out.DeviceIDs[0]) 129 } 130 131 // Test that asking for a device with too much count doesn't place 132 func TestDeviceAllocator_Allocate_NotEnoughInstances(t *testing.T) { 133 require := require.New(t) 134 _, ctx := testContext(t) 135 n := devNode() 136 d := newDeviceAllocator(ctx, n) 137 require.NotNil(d) 138 139 // Build the request 140 ask := deviceRequest("gpu", 4, nil, nil) 141 142 out, _, err := d.AssignDevice(ask) 143 require.Nil(out) 144 require.Error(err) 145 require.Contains(err.Error(), "no devices match request") 146 } 147 148 // Test that asking for a device with constraints works 149 func TestDeviceAllocator_Allocate_Constraints(t *testing.T) { 150 n := multipleNvidiaNode() 151 nvidia0 := n.NodeResources.Devices[0] 152 nvidia1 := n.NodeResources.Devices[1] 153 154 cases := []struct { 155 Name string 156 Constraints []*structs.Constraint 157 ExpectedDevice *structs.NodeDeviceResource 158 NoPlacement bool 159 }{ 160 { 161 Name: "gpu", 162 Constraints: []*structs.Constraint{ 163 { 164 LTarget: "${device.attr.cuda_cores}", 165 Operand: ">", 166 RTarget: "4000", 167 }, 168 }, 169 ExpectedDevice: nvidia1, 170 }, 171 { 172 Name: "gpu", 173 Constraints: []*structs.Constraint{ 174 { 175 LTarget: "${device.attr.cuda_cores}", 176 Operand: "<", 177 RTarget: "4000", 178 }, 179 }, 180 ExpectedDevice: nvidia0, 181 }, 182 { 183 Name: "nvidia/gpu", 184 Constraints: []*structs.Constraint{ 185 // First two are shared across both devices 186 { 187 LTarget: "${device.attr.memory_bandwidth}", 188 Operand: ">", 189 RTarget: "10 GB/s", 190 }, 191 { 192 LTarget: "${device.attr.memory}", 193 Operand: "is", 194 RTarget: "11264 MiB", 195 }, 196 { 197 LTarget: "${device.attr.graphics_clock}", 198 Operand: ">", 199 RTarget: "1.4 GHz", 200 }, 201 }, 202 ExpectedDevice: nvidia0, 203 }, 204 { 205 Name: "intel/gpu", 206 NoPlacement: true, 207 }, 208 { 209 Name: "nvidia/gpu", 210 Constraints: []*structs.Constraint{ 211 { 212 LTarget: "${device.attr.memory_bandwidth}", 213 Operand: ">", 214 RTarget: "10 GB/s", 215 }, 216 { 217 LTarget: "${device.attr.memory}", 218 Operand: "is", 219 RTarget: "11264 MiB", 220 }, 221 // Rules both out 222 { 223 LTarget: "${device.attr.graphics_clock}", 224 Operand: ">", 225 RTarget: "2.4 GHz", 226 }, 227 }, 228 NoPlacement: true, 229 }, 230 } 231 232 for _, c := range cases { 233 t.Run(c.Name, func(t *testing.T) { 234 require := require.New(t) 235 _, ctx := testContext(t) 236 d := newDeviceAllocator(ctx, n) 237 require.NotNil(d) 238 239 // Build the request 240 ask := deviceRequest(c.Name, 1, c.Constraints, nil) 241 242 out, score, err := d.AssignDevice(ask) 243 if c.NoPlacement { 244 require.Nil(out) 245 } else { 246 require.NotNil(out) 247 require.Zero(score) 248 require.NoError(err) 249 250 // Check that we got the nvidia device 251 require.Len(out.DeviceIDs, 1) 252 require.Contains(collectInstanceIDs(c.ExpectedDevice), out.DeviceIDs[0]) 253 } 254 }) 255 } 256 } 257 258 // Test that asking for a device with affinities works 259 func TestDeviceAllocator_Allocate_Affinities(t *testing.T) { 260 n := multipleNvidiaNode() 261 nvidia0 := n.NodeResources.Devices[0] 262 nvidia1 := n.NodeResources.Devices[1] 263 264 cases := []struct { 265 Name string 266 Affinities []*structs.Affinity 267 ExpectedDevice *structs.NodeDeviceResource 268 ZeroScore bool 269 }{ 270 { 271 Name: "gpu", 272 Affinities: []*structs.Affinity{ 273 { 274 LTarget: "${device.attr.cuda_cores}", 275 Operand: ">", 276 RTarget: "4000", 277 Weight: 60, 278 }, 279 }, 280 ExpectedDevice: nvidia1, 281 }, 282 { 283 Name: "gpu", 284 Affinities: []*structs.Affinity{ 285 { 286 LTarget: "${device.attr.cuda_cores}", 287 Operand: "<", 288 RTarget: "4000", 289 Weight: 10, 290 }, 291 }, 292 ExpectedDevice: nvidia0, 293 }, 294 { 295 Name: "gpu", 296 Affinities: []*structs.Affinity{ 297 { 298 LTarget: "${device.attr.cuda_cores}", 299 Operand: ">", 300 RTarget: "4000", 301 Weight: -20, 302 }, 303 }, 304 ZeroScore: true, 305 ExpectedDevice: nvidia0, 306 }, 307 { 308 Name: "nvidia/gpu", 309 Affinities: []*structs.Affinity{ 310 // First two are shared across both devices 311 { 312 LTarget: "${device.attr.memory_bandwidth}", 313 Operand: ">", 314 RTarget: "10 GB/s", 315 Weight: 20, 316 }, 317 { 318 LTarget: "${device.attr.memory}", 319 Operand: "is", 320 RTarget: "11264 MiB", 321 Weight: 20, 322 }, 323 { 324 LTarget: "${device.attr.graphics_clock}", 325 Operand: ">", 326 RTarget: "1.4 GHz", 327 Weight: 90, 328 }, 329 }, 330 ExpectedDevice: nvidia0, 331 }, 332 } 333 334 for _, c := range cases { 335 t.Run(c.Name, func(t *testing.T) { 336 require := require.New(t) 337 _, ctx := testContext(t) 338 d := newDeviceAllocator(ctx, n) 339 require.NotNil(d) 340 341 // Build the request 342 ask := deviceRequest(c.Name, 1, nil, c.Affinities) 343 344 out, score, err := d.AssignDevice(ask) 345 require.NotNil(out) 346 require.NoError(err) 347 if c.ZeroScore { 348 require.Zero(score) 349 } else { 350 require.NotZero(score) 351 } 352 353 // Check that we got the nvidia device 354 require.Len(out.DeviceIDs, 1) 355 require.Contains(collectInstanceIDs(c.ExpectedDevice), out.DeviceIDs[0]) 356 }) 357 } 358 }