github.com/uchennaokeke444/nomad@v0.11.8/nomad/structs/devices_test.go (about) 1 package structs 2 3 import ( 4 "testing" 5 6 "github.com/hashicorp/nomad/helper/uuid" 7 psstructs "github.com/hashicorp/nomad/plugins/shared/structs" 8 "github.com/stretchr/testify/require" 9 ) 10 11 // nvidiaAllocatedDevice returns an allocated nvidia device 12 func nvidiaAllocatedDevice() *AllocatedDeviceResource { 13 return &AllocatedDeviceResource{ 14 Type: "gpu", 15 Vendor: "nvidia", 16 Name: "1080ti", 17 DeviceIDs: []string{uuid.Generate()}, 18 } 19 } 20 21 // nvidiaAlloc returns an allocation that has been assigned an nvidia device. 22 func nvidiaAlloc() *Allocation { 23 a := MockAlloc() 24 a.AllocatedResources.Tasks["web"].Devices = []*AllocatedDeviceResource{ 25 nvidiaAllocatedDevice(), 26 } 27 return a 28 } 29 30 // devNode returns a node containing two devices, an nvidia gpu and an intel 31 // FPGA. 32 func devNode() *Node { 33 n := MockNvidiaNode() 34 n.NodeResources.Devices = append(n.NodeResources.Devices, &NodeDeviceResource{ 35 Type: "fpga", 36 Vendor: "intel", 37 Name: "F100", 38 Attributes: map[string]*psstructs.Attribute{ 39 "memory": psstructs.NewIntAttribute(4, psstructs.UnitGiB), 40 }, 41 Instances: []*NodeDevice{ 42 { 43 ID: uuid.Generate(), 44 Healthy: true, 45 }, 46 { 47 ID: uuid.Generate(), 48 Healthy: false, 49 }, 50 }, 51 }) 52 return n 53 } 54 55 // Make sure that the device accounter works even if the node has no devices 56 func TestDeviceAccounter_AddAllocs_NoDeviceNode(t *testing.T) { 57 require := require.New(t) 58 n := MockNode() 59 d := NewDeviceAccounter(n) 60 require.NotNil(d) 61 62 // Create three allocations, one with a device, one without, and one 63 // terminal 64 a1, a2, a3 := MockAlloc(), nvidiaAlloc(), MockAlloc() 65 allocs := []*Allocation{a1, a2, a3} 66 a3.DesiredStatus = AllocDesiredStatusStop 67 68 require.False(d.AddAllocs(allocs)) 69 require.Len(d.Devices, 0) 70 } 71 72 // Add allocs to a node with a device 73 func TestDeviceAccounter_AddAllocs(t *testing.T) { 74 require := require.New(t) 75 n := devNode() 76 d := NewDeviceAccounter(n) 77 require.NotNil(d) 78 79 // Create three allocations, one with a device, one without, and one 80 // terminal 81 a1, a2, a3 := MockAlloc(), nvidiaAlloc(), MockAlloc() 82 83 nvidiaDev0ID := n.NodeResources.Devices[0].Instances[0].ID 84 intelDev0ID := n.NodeResources.Devices[1].Instances[0].ID 85 a2.AllocatedResources.Tasks["web"].Devices[0].DeviceIDs = []string{nvidiaDev0ID} 86 87 allocs := []*Allocation{a1, a2, a3} 88 a3.DesiredStatus = AllocDesiredStatusStop 89 90 require.False(d.AddAllocs(allocs)) 91 require.Len(d.Devices, 2) 92 93 // Check that we have two devices for nvidia and that one of them is used 94 nvidiaDevice, ok := d.Devices[*n.NodeResources.Devices[0].ID()] 95 require.True(ok) 96 require.Len(nvidiaDevice.Instances, 2) 97 require.Contains(nvidiaDevice.Instances, nvidiaDev0ID) 98 require.Equal(1, nvidiaDevice.Instances[nvidiaDev0ID]) 99 100 // Check only one instance of the intel device is set up since the other is 101 // unhealthy 102 intelDevice, ok := d.Devices[*n.NodeResources.Devices[1].ID()] 103 require.True(ok) 104 require.Len(intelDevice.Instances, 1) 105 require.Equal(0, intelDevice.Instances[intelDev0ID]) 106 } 107 108 // Add alloc with unknown ID to a node with devices. This tests that we can 109 // operate on previous allocs even if the device has changed to unhealthy and we 110 // don't track it 111 func TestDeviceAccounter_AddAllocs_UnknownID(t *testing.T) { 112 require := require.New(t) 113 n := devNode() 114 d := NewDeviceAccounter(n) 115 require.NotNil(d) 116 117 // Create three allocations, one with a device, one without, and one 118 // terminal 119 a1, a2, a3 := MockAlloc(), nvidiaAlloc(), MockAlloc() 120 121 // a2 will have a random ID since it is generated 122 123 allocs := []*Allocation{a1, a2, a3} 124 a3.DesiredStatus = AllocDesiredStatusStop 125 126 require.False(d.AddAllocs(allocs)) 127 require.Len(d.Devices, 2) 128 129 // Check that we have two devices for nvidia and that one of them is used 130 nvidiaDevice, ok := d.Devices[*n.NodeResources.Devices[0].ID()] 131 require.True(ok) 132 require.Len(nvidiaDevice.Instances, 2) 133 for _, v := range nvidiaDevice.Instances { 134 require.Equal(0, v) 135 } 136 } 137 138 // Test that collision detection works 139 func TestDeviceAccounter_AddAllocs_Collision(t *testing.T) { 140 require := require.New(t) 141 n := devNode() 142 d := NewDeviceAccounter(n) 143 require.NotNil(d) 144 145 // Create two allocations, both with the same device 146 a1, a2 := nvidiaAlloc(), nvidiaAlloc() 147 148 nvidiaDev0ID := n.NodeResources.Devices[0].Instances[0].ID 149 a1.AllocatedResources.Tasks["web"].Devices[0].DeviceIDs = []string{nvidiaDev0ID} 150 a2.AllocatedResources.Tasks["web"].Devices[0].DeviceIDs = []string{nvidiaDev0ID} 151 152 allocs := []*Allocation{a1, a2} 153 require.True(d.AddAllocs(allocs)) 154 } 155 156 // Make sure that the device allocator works even if the node has no devices 157 func TestDeviceAccounter_AddReserved_NoDeviceNode(t *testing.T) { 158 require := require.New(t) 159 n := MockNode() 160 d := NewDeviceAccounter(n) 161 require.NotNil(d) 162 163 require.False(d.AddReserved(nvidiaAllocatedDevice())) 164 require.Len(d.Devices, 0) 165 } 166 167 // Add reserved to a node with a device 168 func TestDeviceAccounter_AddReserved(t *testing.T) { 169 require := require.New(t) 170 n := devNode() 171 d := NewDeviceAccounter(n) 172 require.NotNil(d) 173 174 nvidiaDev0ID := n.NodeResources.Devices[0].Instances[0].ID 175 intelDev0ID := n.NodeResources.Devices[1].Instances[0].ID 176 177 res := nvidiaAllocatedDevice() 178 res.DeviceIDs = []string{nvidiaDev0ID} 179 180 require.False(d.AddReserved(res)) 181 require.Len(d.Devices, 2) 182 183 // Check that we have two devices for nvidia and that one of them is used 184 nvidiaDevice, ok := d.Devices[*n.NodeResources.Devices[0].ID()] 185 require.True(ok) 186 require.Len(nvidiaDevice.Instances, 2) 187 require.Contains(nvidiaDevice.Instances, nvidiaDev0ID) 188 require.Equal(1, nvidiaDevice.Instances[nvidiaDev0ID]) 189 190 // Check only one instance of the intel device is set up since the other is 191 // unhealthy 192 intelDevice, ok := d.Devices[*n.NodeResources.Devices[1].ID()] 193 require.True(ok) 194 require.Len(intelDevice.Instances, 1) 195 require.Equal(0, intelDevice.Instances[intelDev0ID]) 196 } 197 198 // Test that collision detection works 199 func TestDeviceAccounter_AddReserved_Collision(t *testing.T) { 200 require := require.New(t) 201 n := devNode() 202 d := NewDeviceAccounter(n) 203 require.NotNil(d) 204 205 nvidiaDev0ID := n.NodeResources.Devices[0].Instances[0].ID 206 207 // Create an alloc with nvidia 208 a1 := nvidiaAlloc() 209 a1.AllocatedResources.Tasks["web"].Devices[0].DeviceIDs = []string{nvidiaDev0ID} 210 require.False(d.AddAllocs([]*Allocation{a1})) 211 212 // Reserve the same device 213 res := nvidiaAllocatedDevice() 214 res.DeviceIDs = []string{nvidiaDev0ID} 215 require.True(d.AddReserved(res)) 216 }