github.com/uchennaokeke444/nomad@v0.11.8/nomad/structs/devices_test.go (about)

     1  package structs
     2  
     3  import (
     4  	"testing"
     5  
     6  	"github.com/hashicorp/nomad/helper/uuid"
     7  	psstructs "github.com/hashicorp/nomad/plugins/shared/structs"
     8  	"github.com/stretchr/testify/require"
     9  )
    10  
    11  // nvidiaAllocatedDevice returns an allocated nvidia device
    12  func nvidiaAllocatedDevice() *AllocatedDeviceResource {
    13  	return &AllocatedDeviceResource{
    14  		Type:      "gpu",
    15  		Vendor:    "nvidia",
    16  		Name:      "1080ti",
    17  		DeviceIDs: []string{uuid.Generate()},
    18  	}
    19  }
    20  
    21  // nvidiaAlloc returns an allocation that has been assigned an nvidia device.
    22  func nvidiaAlloc() *Allocation {
    23  	a := MockAlloc()
    24  	a.AllocatedResources.Tasks["web"].Devices = []*AllocatedDeviceResource{
    25  		nvidiaAllocatedDevice(),
    26  	}
    27  	return a
    28  }
    29  
    30  // devNode returns a node containing two devices, an nvidia gpu and an intel
    31  // FPGA.
    32  func devNode() *Node {
    33  	n := MockNvidiaNode()
    34  	n.NodeResources.Devices = append(n.NodeResources.Devices, &NodeDeviceResource{
    35  		Type:   "fpga",
    36  		Vendor: "intel",
    37  		Name:   "F100",
    38  		Attributes: map[string]*psstructs.Attribute{
    39  			"memory": psstructs.NewIntAttribute(4, psstructs.UnitGiB),
    40  		},
    41  		Instances: []*NodeDevice{
    42  			{
    43  				ID:      uuid.Generate(),
    44  				Healthy: true,
    45  			},
    46  			{
    47  				ID:      uuid.Generate(),
    48  				Healthy: false,
    49  			},
    50  		},
    51  	})
    52  	return n
    53  }
    54  
    55  // Make sure that the device accounter works even if the node has no devices
    56  func TestDeviceAccounter_AddAllocs_NoDeviceNode(t *testing.T) {
    57  	require := require.New(t)
    58  	n := MockNode()
    59  	d := NewDeviceAccounter(n)
    60  	require.NotNil(d)
    61  
    62  	// Create three allocations, one with a device, one without, and one
    63  	// terminal
    64  	a1, a2, a3 := MockAlloc(), nvidiaAlloc(), MockAlloc()
    65  	allocs := []*Allocation{a1, a2, a3}
    66  	a3.DesiredStatus = AllocDesiredStatusStop
    67  
    68  	require.False(d.AddAllocs(allocs))
    69  	require.Len(d.Devices, 0)
    70  }
    71  
    72  // Add allocs to a node with a device
    73  func TestDeviceAccounter_AddAllocs(t *testing.T) {
    74  	require := require.New(t)
    75  	n := devNode()
    76  	d := NewDeviceAccounter(n)
    77  	require.NotNil(d)
    78  
    79  	// Create three allocations, one with a device, one without, and one
    80  	// terminal
    81  	a1, a2, a3 := MockAlloc(), nvidiaAlloc(), MockAlloc()
    82  
    83  	nvidiaDev0ID := n.NodeResources.Devices[0].Instances[0].ID
    84  	intelDev0ID := n.NodeResources.Devices[1].Instances[0].ID
    85  	a2.AllocatedResources.Tasks["web"].Devices[0].DeviceIDs = []string{nvidiaDev0ID}
    86  
    87  	allocs := []*Allocation{a1, a2, a3}
    88  	a3.DesiredStatus = AllocDesiredStatusStop
    89  
    90  	require.False(d.AddAllocs(allocs))
    91  	require.Len(d.Devices, 2)
    92  
    93  	// Check that we have two devices for nvidia and that one of them is used
    94  	nvidiaDevice, ok := d.Devices[*n.NodeResources.Devices[0].ID()]
    95  	require.True(ok)
    96  	require.Len(nvidiaDevice.Instances, 2)
    97  	require.Contains(nvidiaDevice.Instances, nvidiaDev0ID)
    98  	require.Equal(1, nvidiaDevice.Instances[nvidiaDev0ID])
    99  
   100  	// Check only one instance of the intel device is set up since the other is
   101  	// unhealthy
   102  	intelDevice, ok := d.Devices[*n.NodeResources.Devices[1].ID()]
   103  	require.True(ok)
   104  	require.Len(intelDevice.Instances, 1)
   105  	require.Equal(0, intelDevice.Instances[intelDev0ID])
   106  }
   107  
   108  // Add alloc with unknown ID to a node with devices. This tests that we can
   109  // operate on previous allocs even if the device has changed to unhealthy and we
   110  // don't track it
   111  func TestDeviceAccounter_AddAllocs_UnknownID(t *testing.T) {
   112  	require := require.New(t)
   113  	n := devNode()
   114  	d := NewDeviceAccounter(n)
   115  	require.NotNil(d)
   116  
   117  	// Create three allocations, one with a device, one without, and one
   118  	// terminal
   119  	a1, a2, a3 := MockAlloc(), nvidiaAlloc(), MockAlloc()
   120  
   121  	// a2 will have a random ID since it is generated
   122  
   123  	allocs := []*Allocation{a1, a2, a3}
   124  	a3.DesiredStatus = AllocDesiredStatusStop
   125  
   126  	require.False(d.AddAllocs(allocs))
   127  	require.Len(d.Devices, 2)
   128  
   129  	// Check that we have two devices for nvidia and that one of them is used
   130  	nvidiaDevice, ok := d.Devices[*n.NodeResources.Devices[0].ID()]
   131  	require.True(ok)
   132  	require.Len(nvidiaDevice.Instances, 2)
   133  	for _, v := range nvidiaDevice.Instances {
   134  		require.Equal(0, v)
   135  	}
   136  }
   137  
   138  // Test that collision detection works
   139  func TestDeviceAccounter_AddAllocs_Collision(t *testing.T) {
   140  	require := require.New(t)
   141  	n := devNode()
   142  	d := NewDeviceAccounter(n)
   143  	require.NotNil(d)
   144  
   145  	// Create two allocations, both with the same device
   146  	a1, a2 := nvidiaAlloc(), nvidiaAlloc()
   147  
   148  	nvidiaDev0ID := n.NodeResources.Devices[0].Instances[0].ID
   149  	a1.AllocatedResources.Tasks["web"].Devices[0].DeviceIDs = []string{nvidiaDev0ID}
   150  	a2.AllocatedResources.Tasks["web"].Devices[0].DeviceIDs = []string{nvidiaDev0ID}
   151  
   152  	allocs := []*Allocation{a1, a2}
   153  	require.True(d.AddAllocs(allocs))
   154  }
   155  
   156  // Make sure that the device allocator works even if the node has no devices
   157  func TestDeviceAccounter_AddReserved_NoDeviceNode(t *testing.T) {
   158  	require := require.New(t)
   159  	n := MockNode()
   160  	d := NewDeviceAccounter(n)
   161  	require.NotNil(d)
   162  
   163  	require.False(d.AddReserved(nvidiaAllocatedDevice()))
   164  	require.Len(d.Devices, 0)
   165  }
   166  
   167  // Add reserved to a node with a device
   168  func TestDeviceAccounter_AddReserved(t *testing.T) {
   169  	require := require.New(t)
   170  	n := devNode()
   171  	d := NewDeviceAccounter(n)
   172  	require.NotNil(d)
   173  
   174  	nvidiaDev0ID := n.NodeResources.Devices[0].Instances[0].ID
   175  	intelDev0ID := n.NodeResources.Devices[1].Instances[0].ID
   176  
   177  	res := nvidiaAllocatedDevice()
   178  	res.DeviceIDs = []string{nvidiaDev0ID}
   179  
   180  	require.False(d.AddReserved(res))
   181  	require.Len(d.Devices, 2)
   182  
   183  	// Check that we have two devices for nvidia and that one of them is used
   184  	nvidiaDevice, ok := d.Devices[*n.NodeResources.Devices[0].ID()]
   185  	require.True(ok)
   186  	require.Len(nvidiaDevice.Instances, 2)
   187  	require.Contains(nvidiaDevice.Instances, nvidiaDev0ID)
   188  	require.Equal(1, nvidiaDevice.Instances[nvidiaDev0ID])
   189  
   190  	// Check only one instance of the intel device is set up since the other is
   191  	// unhealthy
   192  	intelDevice, ok := d.Devices[*n.NodeResources.Devices[1].ID()]
   193  	require.True(ok)
   194  	require.Len(intelDevice.Instances, 1)
   195  	require.Equal(0, intelDevice.Instances[intelDev0ID])
   196  }
   197  
   198  // Test that collision detection works
   199  func TestDeviceAccounter_AddReserved_Collision(t *testing.T) {
   200  	require := require.New(t)
   201  	n := devNode()
   202  	d := NewDeviceAccounter(n)
   203  	require.NotNil(d)
   204  
   205  	nvidiaDev0ID := n.NodeResources.Devices[0].Instances[0].ID
   206  
   207  	// Create an alloc with nvidia
   208  	a1 := nvidiaAlloc()
   209  	a1.AllocatedResources.Tasks["web"].Devices[0].DeviceIDs = []string{nvidiaDev0ID}
   210  	require.False(d.AddAllocs([]*Allocation{a1}))
   211  
   212  	// Reserve the same device
   213  	res := nvidiaAllocatedDevice()
   214  	res.DeviceIDs = []string{nvidiaDev0ID}
   215  	require.True(d.AddReserved(res))
   216  }