github.com/adityamillind98/nomad@v0.11.8/scheduler/device_test.go (about)

     1  package scheduler
     2  
     3  import (
     4  	"testing"
     5  
     6  	"github.com/hashicorp/nomad/helper/uuid"
     7  	"github.com/hashicorp/nomad/nomad/mock"
     8  	"github.com/hashicorp/nomad/nomad/structs"
     9  	psstructs "github.com/hashicorp/nomad/plugins/shared/structs"
    10  	"github.com/stretchr/testify/require"
    11  )
    12  
    13  // deviceRequest takes the name, count and potential constraints and affinities
    14  // and returns a device request.
    15  func deviceRequest(name string, count uint64,
    16  	constraints []*structs.Constraint, affinities []*structs.Affinity) *structs.RequestedDevice {
    17  	return &structs.RequestedDevice{
    18  		Name:        name,
    19  		Count:       count,
    20  		Constraints: constraints,
    21  		Affinities:  affinities,
    22  	}
    23  }
    24  
    25  // devNode returns a node containing two devices, an nvidia gpu and an intel
    26  // FPGA.
    27  func devNode() *structs.Node {
    28  	n := mock.NvidiaNode()
    29  	n.NodeResources.Devices = append(n.NodeResources.Devices, &structs.NodeDeviceResource{
    30  		Type:   "fpga",
    31  		Vendor: "intel",
    32  		Name:   "F100",
    33  		Attributes: map[string]*psstructs.Attribute{
    34  			"memory": psstructs.NewIntAttribute(4, psstructs.UnitGiB),
    35  		},
    36  		Instances: []*structs.NodeDevice{
    37  			{
    38  				ID:      uuid.Generate(),
    39  				Healthy: true,
    40  			},
    41  			{
    42  				ID:      uuid.Generate(),
    43  				Healthy: false,
    44  			},
    45  		},
    46  	})
    47  	return n
    48  }
    49  
    50  // multipleNvidiaNode returns a node containing multiple nvidia device types.
    51  func multipleNvidiaNode() *structs.Node {
    52  	n := mock.NvidiaNode()
    53  	n.NodeResources.Devices = append(n.NodeResources.Devices, &structs.NodeDeviceResource{
    54  		Type:   "gpu",
    55  		Vendor: "nvidia",
    56  		Name:   "2080ti",
    57  		Attributes: map[string]*psstructs.Attribute{
    58  			"memory":           psstructs.NewIntAttribute(11, psstructs.UnitGiB),
    59  			"cuda_cores":       psstructs.NewIntAttribute(4352, ""),
    60  			"graphics_clock":   psstructs.NewIntAttribute(1350, psstructs.UnitMHz),
    61  			"memory_bandwidth": psstructs.NewIntAttribute(14, psstructs.UnitGBPerS),
    62  		},
    63  		Instances: []*structs.NodeDevice{
    64  			{
    65  				ID:      uuid.Generate(),
    66  				Healthy: true,
    67  			},
    68  			{
    69  				ID:      uuid.Generate(),
    70  				Healthy: true,
    71  			},
    72  		},
    73  	})
    74  	return n
    75  
    76  }
    77  
    78  // collectInstanceIDs returns the IDs of the device instances
    79  func collectInstanceIDs(devices ...*structs.NodeDeviceResource) []string {
    80  	var out []string
    81  	for _, d := range devices {
    82  		for _, i := range d.Instances {
    83  			out = append(out, i.ID)
    84  		}
    85  	}
    86  	return out
    87  }
    88  
    89  // Test that asking for a device that isn't fully specified works.
    90  func TestDeviceAllocator_Allocate_GenericRequest(t *testing.T) {
    91  	require := require.New(t)
    92  	_, ctx := testContext(t)
    93  	n := devNode()
    94  	d := newDeviceAllocator(ctx, n)
    95  	require.NotNil(d)
    96  
    97  	// Build the request
    98  	ask := deviceRequest("gpu", 1, nil, nil)
    99  
   100  	out, score, err := d.AssignDevice(ask)
   101  	require.NotNil(out)
   102  	require.Zero(score)
   103  	require.NoError(err)
   104  
   105  	// Check that we got the nvidia device
   106  	require.Len(out.DeviceIDs, 1)
   107  	require.Contains(collectInstanceIDs(n.NodeResources.Devices[0]), out.DeviceIDs[0])
   108  }
   109  
   110  // Test that asking for a device that is fully specified works.
   111  func TestDeviceAllocator_Allocate_FullyQualifiedRequest(t *testing.T) {
   112  	require := require.New(t)
   113  	_, ctx := testContext(t)
   114  	n := devNode()
   115  	d := newDeviceAllocator(ctx, n)
   116  	require.NotNil(d)
   117  
   118  	// Build the request
   119  	ask := deviceRequest("intel/fpga/F100", 1, nil, nil)
   120  
   121  	out, score, err := d.AssignDevice(ask)
   122  	require.NotNil(out)
   123  	require.Zero(score)
   124  	require.NoError(err)
   125  
   126  	// Check that we got the nvidia device
   127  	require.Len(out.DeviceIDs, 1)
   128  	require.Contains(collectInstanceIDs(n.NodeResources.Devices[1]), out.DeviceIDs[0])
   129  }
   130  
   131  // Test that asking for a device with too much count doesn't place
   132  func TestDeviceAllocator_Allocate_NotEnoughInstances(t *testing.T) {
   133  	require := require.New(t)
   134  	_, ctx := testContext(t)
   135  	n := devNode()
   136  	d := newDeviceAllocator(ctx, n)
   137  	require.NotNil(d)
   138  
   139  	// Build the request
   140  	ask := deviceRequest("gpu", 4, nil, nil)
   141  
   142  	out, _, err := d.AssignDevice(ask)
   143  	require.Nil(out)
   144  	require.Error(err)
   145  	require.Contains(err.Error(), "no devices match request")
   146  }
   147  
   148  // Test that asking for a device with constraints works
   149  func TestDeviceAllocator_Allocate_Constraints(t *testing.T) {
   150  	n := multipleNvidiaNode()
   151  	nvidia0 := n.NodeResources.Devices[0]
   152  	nvidia1 := n.NodeResources.Devices[1]
   153  
   154  	cases := []struct {
   155  		Name           string
   156  		Constraints    []*structs.Constraint
   157  		ExpectedDevice *structs.NodeDeviceResource
   158  		NoPlacement    bool
   159  	}{
   160  		{
   161  			Name: "gpu",
   162  			Constraints: []*structs.Constraint{
   163  				{
   164  					LTarget: "${device.attr.cuda_cores}",
   165  					Operand: ">",
   166  					RTarget: "4000",
   167  				},
   168  			},
   169  			ExpectedDevice: nvidia1,
   170  		},
   171  		{
   172  			Name: "gpu",
   173  			Constraints: []*structs.Constraint{
   174  				{
   175  					LTarget: "${device.attr.cuda_cores}",
   176  					Operand: "<",
   177  					RTarget: "4000",
   178  				},
   179  			},
   180  			ExpectedDevice: nvidia0,
   181  		},
   182  		{
   183  			Name: "nvidia/gpu",
   184  			Constraints: []*structs.Constraint{
   185  				// First two are shared across both devices
   186  				{
   187  					LTarget: "${device.attr.memory_bandwidth}",
   188  					Operand: ">",
   189  					RTarget: "10 GB/s",
   190  				},
   191  				{
   192  					LTarget: "${device.attr.memory}",
   193  					Operand: "is",
   194  					RTarget: "11264 MiB",
   195  				},
   196  				{
   197  					LTarget: "${device.attr.graphics_clock}",
   198  					Operand: ">",
   199  					RTarget: "1.4 GHz",
   200  				},
   201  			},
   202  			ExpectedDevice: nvidia0,
   203  		},
   204  		{
   205  			Name:        "intel/gpu",
   206  			NoPlacement: true,
   207  		},
   208  		{
   209  			Name: "nvidia/gpu",
   210  			Constraints: []*structs.Constraint{
   211  				{
   212  					LTarget: "${device.attr.memory_bandwidth}",
   213  					Operand: ">",
   214  					RTarget: "10 GB/s",
   215  				},
   216  				{
   217  					LTarget: "${device.attr.memory}",
   218  					Operand: "is",
   219  					RTarget: "11264 MiB",
   220  				},
   221  				// Rules both out
   222  				{
   223  					LTarget: "${device.attr.graphics_clock}",
   224  					Operand: ">",
   225  					RTarget: "2.4 GHz",
   226  				},
   227  			},
   228  			NoPlacement: true,
   229  		},
   230  	}
   231  
   232  	for _, c := range cases {
   233  		t.Run(c.Name, func(t *testing.T) {
   234  			require := require.New(t)
   235  			_, ctx := testContext(t)
   236  			d := newDeviceAllocator(ctx, n)
   237  			require.NotNil(d)
   238  
   239  			// Build the request
   240  			ask := deviceRequest(c.Name, 1, c.Constraints, nil)
   241  
   242  			out, score, err := d.AssignDevice(ask)
   243  			if c.NoPlacement {
   244  				require.Nil(out)
   245  			} else {
   246  				require.NotNil(out)
   247  				require.Zero(score)
   248  				require.NoError(err)
   249  
   250  				// Check that we got the nvidia device
   251  				require.Len(out.DeviceIDs, 1)
   252  				require.Contains(collectInstanceIDs(c.ExpectedDevice), out.DeviceIDs[0])
   253  			}
   254  		})
   255  	}
   256  }
   257  
   258  // Test that asking for a device with affinities works
   259  func TestDeviceAllocator_Allocate_Affinities(t *testing.T) {
   260  	n := multipleNvidiaNode()
   261  	nvidia0 := n.NodeResources.Devices[0]
   262  	nvidia1 := n.NodeResources.Devices[1]
   263  
   264  	cases := []struct {
   265  		Name           string
   266  		Affinities     []*structs.Affinity
   267  		ExpectedDevice *structs.NodeDeviceResource
   268  		ZeroScore      bool
   269  	}{
   270  		{
   271  			Name: "gpu",
   272  			Affinities: []*structs.Affinity{
   273  				{
   274  					LTarget: "${device.attr.cuda_cores}",
   275  					Operand: ">",
   276  					RTarget: "4000",
   277  					Weight:  60,
   278  				},
   279  			},
   280  			ExpectedDevice: nvidia1,
   281  		},
   282  		{
   283  			Name: "gpu",
   284  			Affinities: []*structs.Affinity{
   285  				{
   286  					LTarget: "${device.attr.cuda_cores}",
   287  					Operand: "<",
   288  					RTarget: "4000",
   289  					Weight:  10,
   290  				},
   291  			},
   292  			ExpectedDevice: nvidia0,
   293  		},
   294  		{
   295  			Name: "gpu",
   296  			Affinities: []*structs.Affinity{
   297  				{
   298  					LTarget: "${device.attr.cuda_cores}",
   299  					Operand: ">",
   300  					RTarget: "4000",
   301  					Weight:  -20,
   302  				},
   303  			},
   304  			ZeroScore:      true,
   305  			ExpectedDevice: nvidia0,
   306  		},
   307  		{
   308  			Name: "nvidia/gpu",
   309  			Affinities: []*structs.Affinity{
   310  				// First two are shared across both devices
   311  				{
   312  					LTarget: "${device.attr.memory_bandwidth}",
   313  					Operand: ">",
   314  					RTarget: "10 GB/s",
   315  					Weight:  20,
   316  				},
   317  				{
   318  					LTarget: "${device.attr.memory}",
   319  					Operand: "is",
   320  					RTarget: "11264 MiB",
   321  					Weight:  20,
   322  				},
   323  				{
   324  					LTarget: "${device.attr.graphics_clock}",
   325  					Operand: ">",
   326  					RTarget: "1.4 GHz",
   327  					Weight:  90,
   328  				},
   329  			},
   330  			ExpectedDevice: nvidia0,
   331  		},
   332  	}
   333  
   334  	for _, c := range cases {
   335  		t.Run(c.Name, func(t *testing.T) {
   336  			require := require.New(t)
   337  			_, ctx := testContext(t)
   338  			d := newDeviceAllocator(ctx, n)
   339  			require.NotNil(d)
   340  
   341  			// Build the request
   342  			ask := deviceRequest(c.Name, 1, nil, c.Affinities)
   343  
   344  			out, score, err := d.AssignDevice(ask)
   345  			require.NotNil(out)
   346  			require.NoError(err)
   347  			if c.ZeroScore {
   348  				require.Zero(score)
   349  			} else {
   350  				require.NotZero(score)
   351  			}
   352  
   353  			// Check that we got the nvidia device
   354  			require.Len(out.DeviceIDs, 1)
   355  			require.Contains(collectInstanceIDs(c.ExpectedDevice), out.DeviceIDs[0])
   356  		})
   357  	}
   358  }