github.com/sentienttechnologies/studio-go-runner@v0.0.0-20201118202441-6d21f2ced8ee/internal/runner/alloc_test.go (about)

     1  // Copyright 2018-2020 (c) Cognizant Digital Business, Evolutionary AI. All rights reserved. Issued under the Apache 2.0 License.
     2  
     3  package runner
     4  
     5  import (
     6  	"testing"
     7  
     8  	"github.com/go-stack/stack"
     9  	"github.com/jjeffery/kv" // MIT License
    10  	"github.com/rs/xid"
    11  )
    12  
    13  // This file contains the implementations of tests
    14  // related to resource allocation logic
    15  
    16  // TestCUDATrivialAlloc implements the barest minimum success and failure cases with
    17  // a single resource
    18  //
    19  func TestCUDATrivialAlloc(t *testing.T) {
    20  	id := xid.New().String()
    21  	testAlloc := gpuTracker{
    22  		Allocs: map[string]*GPUTrack{
    23  			id: {
    24  				UUID:       id,
    25  				Slots:      1,
    26  				Mem:        1,
    27  				FreeSlots:  1,
    28  				FreeMem:    1,
    29  				EccFailure: nil,
    30  				Tracking:   map[string]struct{}{},
    31  			},
    32  		},
    33  	}
    34  
    35  	goodAllocs, err := testAlloc.AllocGPU(1, 1, []uint{1}, true)
    36  	if err != nil {
    37  		t.Fatal(err)
    38  	}
    39  	// Make sure we have the expected allocation passed back
    40  	if len(goodAllocs) != 1 {
    41  		t.Fatal(kv.NewError("allocation result was unexpected").With("expected_devices", 1).With("actual_devices", len(goodAllocs)).With("stack", stack.Trace().TrimRuntime()))
    42  	}
    43  
    44  	// Try to allocate a new GPU and make sure it fails
    45  	badAllocs, err := testAlloc.AllocGPU(1, 1, []uint{1}, true)
    46  	if len(badAllocs) != 0 {
    47  		t.Fatal(kv.NewError("allocation result should be empty").With("expected_devices", 0).With("actual_devices", len(badAllocs)).With("stack", stack.Trace().TrimRuntime()))
    48  	}
    49  	if err == nil {
    50  		t.Fatal(kv.NewError("allocation result should have failed").With("stack", stack.Trace().TrimRuntime()))
    51  	}
    52  }
    53  
    54  // TestCUDAAggregateAlloc implements the minimal 2 card allocation test
    55  //
    56  func TestCUDAAggregateAlloc(t *testing.T) {
    57  	card1 := xid.New().String()
    58  	card2 := xid.New().String()
    59  
    60  	testAlloc := gpuTracker{
    61  		Allocs: map[string]*GPUTrack{
    62  			card1: {
    63  				UUID:       card1,
    64  				Slots:      1,
    65  				Mem:        1,
    66  				FreeSlots:  1,
    67  				FreeMem:    1,
    68  				EccFailure: nil,
    69  				Tracking:   map[string]struct{}{},
    70  			},
    71  			card2: {
    72  				UUID:       card2,
    73  				Slots:      1,
    74  				Mem:        1,
    75  				FreeSlots:  1,
    76  				FreeMem:    1,
    77  				EccFailure: nil,
    78  				Tracking:   map[string]struct{}{},
    79  			},
    80  		},
    81  	}
    82  
    83  	good1Allocs, err := testAlloc.AllocGPU(1, 1, []uint{1}, true)
    84  	if err != nil {
    85  		t.Fatal(err)
    86  	}
    87  	// Make sure we have the expected allocation passed back
    88  	if len(good1Allocs) != 1 {
    89  		t.Fatal(kv.NewError("allocation result was unexpected").With("expected_devices", 1).With("actual_devices", len(good1Allocs)).With("stack", stack.Trace().TrimRuntime()))
    90  	}
    91  
    92  	good2Allocs, err := testAlloc.AllocGPU(1, 1, []uint{1}, true)
    93  	if err != nil {
    94  		t.Fatal(err)
    95  	}
    96  	// Make sure we have the expected allocation passed back
    97  	if len(good2Allocs) != 1 {
    98  		t.Fatal(kv.NewError("allocation result was unexpected").With("expected_devices", 1).With("actual_devices", len(good2Allocs)).With("stack", stack.Trace().TrimRuntime()))
    99  	}
   100  
   101  	for _, anAlloc := range good1Allocs {
   102  		err = testAlloc.ReturnGPU(anAlloc)
   103  		if err != nil {
   104  			t.Fatal(err)
   105  		}
   106  	}
   107  	for _, anAlloc := range good2Allocs {
   108  		err = testAlloc.ReturnGPU(anAlloc)
   109  		if err != nil {
   110  			t.Fatal(err)
   111  		}
   112  	}
   113  
   114  	// maxGPU, maxGPUMem, unit of allocation
   115  	goodAllAllocs, err := testAlloc.AllocGPU(2, 1, []uint{1, 2}, true)
   116  	if err != nil {
   117  		t.Fatal(err)
   118  	}
   119  	// Make sure we have the expected allocation passed back
   120  	if len(goodAllAllocs) != 2 {
   121  		t.Fatal(kv.NewError("allocation result was unexpected").With("expected_devices", 2).With("actual_devices", len(goodAllAllocs)).With("stack", stack.Trace().TrimRuntime()))
   122  	}
   123  
   124  	for _, anAlloc := range goodAllAllocs {
   125  		err = testAlloc.ReturnGPU(anAlloc)
   126  		if err != nil {
   127  			t.Fatal(err)
   128  		}
   129  	}
   130  
   131  	// Now try an alloc that has already been released to make sure we get an error
   132  	for _, anAlloc := range goodAllAllocs {
   133  		err = testAlloc.ReturnGPU(anAlloc)
   134  		if err == nil {
   135  			t.Fatal(kv.NewError("double release did not fail").With("stack", stack.Trace().TrimRuntime()))
   136  		}
   137  	}
   138  }
   139  
   140  // TestCUDATypicalAlloc implements the multi slot 2 card allocation test
   141  //
   142  func TestCUDATypicalAlloc(t *testing.T) {
   143  	card1 := xid.New().String()
   144  	card2 := xid.New().String()
   145  
   146  	// Test the case of two four slot cards and fit perfectedly into the requested
   147  	// 8 slots
   148  	testAlloc := gpuTracker{
   149  		Allocs: map[string]*GPUTrack{
   150  			card1: {
   151  				UUID:       card1,
   152  				Slots:      4,
   153  				Mem:        2,
   154  				FreeSlots:  4,
   155  				FreeMem:    2,
   156  				EccFailure: nil,
   157  				Tracking:   map[string]struct{}{},
   158  			},
   159  			card2: {
   160  				UUID:       card2,
   161  				Slots:      4,
   162  				Mem:        2,
   163  				FreeSlots:  4,
   164  				FreeMem:    2,
   165  				EccFailure: nil,
   166  				Tracking:   map[string]struct{}{},
   167  			},
   168  		},
   169  	}
   170  
   171  	good1Allocs, err := testAlloc.AllocGPU(8, 2, []uint{8, 4, 2, 1}, true)
   172  	if err != nil {
   173  		t.Fatal(err)
   174  	}
   175  	// Make sure we have the expected allocation passed back
   176  	if len(good1Allocs) != 2 {
   177  		t.Fatal(kv.NewError("allocation result was unexpected").With("expected_devices", 2).With("actual_devices", len(good1Allocs)).With("stack", stack.Trace().TrimRuntime()))
   178  	}
   179  
   180  	for _, anAlloc := range good1Allocs {
   181  		err = testAlloc.ReturnGPU(anAlloc)
   182  		if err != nil {
   183  			t.Fatal(err)
   184  		}
   185  	}
   186  
   187  	// Add an 8 slot card to the two 4 slot cards and then do an 8 slot allocation to
   188  	// ensure it finds the most efficient single card allocation
   189  	//
   190  	card3 := &GPUTrack{
   191  		UUID:       xid.New().String(),
   192  		Slots:      8,
   193  		Mem:        2,
   194  		FreeSlots:  8,
   195  		FreeMem:    2,
   196  		EccFailure: nil,
   197  		Tracking:   map[string]struct{}{},
   198  	}
   199  	testAlloc.Allocs[card3.UUID] = card3
   200  
   201  	efficentAllocs, err := testAlloc.AllocGPU(8, 2, []uint{8, 4, 2, 1}, true)
   202  	if err != nil {
   203  		t.Fatal(err)
   204  	}
   205  
   206  	// Make sure we have the expected allocation passed back
   207  	if len(efficentAllocs) != 1 {
   208  		t.Fatal(kv.NewError("multi-allocation result was unexpected").With("expected_devices", 1).With("actual_devices", len(efficentAllocs)).With("stack", stack.Trace().TrimRuntime()))
   209  	}
   210  
   211  	for _, anAlloc := range efficentAllocs {
   212  		err = testAlloc.ReturnGPU(anAlloc)
   213  		if err != nil {
   214  			t.Fatal(err)
   215  		}
   216  	}
   217  
   218  	// Take the 8 slot allocation and only allow 4 slot pieces and see what happens
   219  	//
   220  	inefficentAllocs, err := testAlloc.AllocGPU(8, 2, []uint{4, 2, 1}, true)
   221  	if err != nil {
   222  		t.Fatal(err)
   223  	}
   224  
   225  	// Make sure we have the expected allocation passed back
   226  	if len(inefficentAllocs) != 2 {
   227  		t.Fatal(kv.NewError("multi-allocation result was unexpected").With("expected_devices", 2).With("actual_devices", len(inefficentAllocs)).With("stack", stack.Trace().TrimRuntime()))
   228  	}
   229  
   230  	for _, anAlloc := range inefficentAllocs {
   231  		err = testAlloc.ReturnGPU(anAlloc)
   232  		if err != nil {
   233  			t.Fatal(err)
   234  		}
   235  	}
   236  
   237  	// Take the 8 slot allocation and only allow 8 slot pieces and make sure it fails
   238  	// after taking the 8 slot card out of the allocator
   239  	//
   240  	delete(testAlloc.Allocs, card3.UUID)
   241  	inefficentAllocs, err = testAlloc.AllocGPU(8, 2, []uint{8}, true)
   242  	if err == nil {
   243  		t.Fatal(kv.NewError("allocation success was unexpected").With("expected_devices", 0).With("actual_devices", len(inefficentAllocs)).With("stack", stack.Trace().TrimRuntime()))
   244  	}
   245  
   246  	// Make sure we have the expected allocation passed back
   247  	if len(inefficentAllocs) != 0 {
   248  		t.Fatal(kv.NewError("allocation result was unexpected").With("expected_devices", 0).With("actual_devices", len(inefficentAllocs)).With("stack", stack.Trace().TrimRuntime()))
   249  	}
   250  }
   251  
   252  // TestCUDALargeAlloc implements the multi slot single card allocation test
   253  //
   254  func TestCUDALargeAlloc(t *testing.T) {
   255  	card1 := xid.New().String()
   256  
   257  	// Test the case of one 16 slot card and fit perfectedly into the requested
   258  	// 16 slots
   259  	testAlloc := gpuTracker{
   260  		Allocs: map[string]*GPUTrack{
   261  			card1: {
   262  				UUID:       card1,
   263  				Slots:      16,
   264  				Mem:        2,
   265  				FreeSlots:  16,
   266  				FreeMem:    2,
   267  				EccFailure: nil,
   268  				Tracking:   map[string]struct{}{},
   269  			},
   270  		},
   271  	}
   272  
   273  	good1Allocs, err := testAlloc.AllocGPU(16, 2, []uint{16, 8, 4, 2, 1}, true)
   274  	if err != nil {
   275  		t.Fatal(err)
   276  	}
   277  	// Make sure we have the expected allocation passed back
   278  	if len(good1Allocs) != 1 {
   279  		t.Fatal(kv.NewError("allocation result was unexpected").With("expected_devices", 1).With("actual_devices", len(good1Allocs)).With("stack", stack.Trace().TrimRuntime()))
   280  	}
   281  
   282  	for _, anAlloc := range good1Allocs {
   283  		err = testAlloc.ReturnGPU(anAlloc)
   284  		if err != nil {
   285  			t.Fatal(err)
   286  		}
   287  	}
   288  
   289  	if good1Allocs, err = testAlloc.AllocGPU(8, 2, []uint{16, 8, 4, 2, 1}, true); err != nil {
   290  		t.Fatal(err)
   291  	}
   292  
   293  	// Make sure we have the expected allocation passed back
   294  	if len(good1Allocs) != 1 {
   295  		t.Fatal(kv.NewError("allocation result was unexpected").With("expected_devices", 1).With("actual_devices", len(good1Allocs)).With("stack", stack.Trace().TrimRuntime()))
   296  	}
   297  
   298  	for _, anAlloc := range good1Allocs {
   299  		err = testAlloc.ReturnGPU(anAlloc)
   300  		if err != nil {
   301  			t.Fatal(err)
   302  		}
   303  	}
   304  }
   305  
   306  // TestCUDATrivialTestAlloc implements the barest minimum success and failure cases with
   307  // a single resource
   308  //
   309  func TestCUDATrivialTrialAlloc(t *testing.T) {
   310  	id := xid.New().String()
   311  	testAlloc := gpuTracker{
   312  		Allocs: map[string]*GPUTrack{
   313  			id: {
   314  				UUID:       id,
   315  				Slots:      1,
   316  				Mem:        1,
   317  				FreeSlots:  1,
   318  				FreeMem:    1,
   319  				EccFailure: nil,
   320  				Tracking:   map[string]struct{}{},
   321  			},
   322  		},
   323  	}
   324  
   325  	for i := 0; i < 4; i++ {
   326  		if _, err := testAlloc.AllocGPU(1, 1, []uint{1}, false); err != nil {
   327  			t.Fatal(err)
   328  		}
   329  	}
   330  	if _, err := testAlloc.AllocGPU(2, 2, []uint{1}, false); err == nil {
   331  		t.Fatal(kv.NewError("allocation result was unexpected").With("expected_devices", 1).With("actual_devices", 2).With("stack", stack.Trace().TrimRuntime()))
   332  	}
   333  }