github.com/sentienttechnologies/studio-go-runner@v0.0.0-20201118202441-6d21f2ced8ee/internal/runner/alloc_test.go (about) 1 // Copyright 2018-2020 (c) Cognizant Digital Business, Evolutionary AI. All rights reserved. Issued under the Apache 2.0 License. 2 3 package runner 4 5 import ( 6 "testing" 7 8 "github.com/go-stack/stack" 9 "github.com/jjeffery/kv" // MIT License 10 "github.com/rs/xid" 11 ) 12 13 // This file contains the implementations of tests 14 // related to resource allocation logic 15 16 // TestCUDATrivialAlloc implements the barest minimum success and failure cases with 17 // a single resource 18 // 19 func TestCUDATrivialAlloc(t *testing.T) { 20 id := xid.New().String() 21 testAlloc := gpuTracker{ 22 Allocs: map[string]*GPUTrack{ 23 id: { 24 UUID: id, 25 Slots: 1, 26 Mem: 1, 27 FreeSlots: 1, 28 FreeMem: 1, 29 EccFailure: nil, 30 Tracking: map[string]struct{}{}, 31 }, 32 }, 33 } 34 35 goodAllocs, err := testAlloc.AllocGPU(1, 1, []uint{1}, true) 36 if err != nil { 37 t.Fatal(err) 38 } 39 // Make sure we have the expected allocation passed back 40 if len(goodAllocs) != 1 { 41 t.Fatal(kv.NewError("allocation result was unexpected").With("expected_devices", 1).With("actual_devices", len(goodAllocs)).With("stack", stack.Trace().TrimRuntime())) 42 } 43 44 // Try to allocate a new GPU and make sure it fails 45 badAllocs, err := testAlloc.AllocGPU(1, 1, []uint{1}, true) 46 if len(badAllocs) != 0 { 47 t.Fatal(kv.NewError("allocation result should be empty").With("expected_devices", 0).With("actual_devices", len(badAllocs)).With("stack", stack.Trace().TrimRuntime())) 48 } 49 if err == nil { 50 t.Fatal(kv.NewError("allocation result should have failed").With("stack", stack.Trace().TrimRuntime())) 51 } 52 } 53 54 // TestCUDAAggregateAlloc implements the minimal 2 card allocation test 55 // 56 func TestCUDAAggregateAlloc(t *testing.T) { 57 card1 := xid.New().String() 58 card2 := xid.New().String() 59 60 testAlloc := gpuTracker{ 61 Allocs: map[string]*GPUTrack{ 62 card1: { 63 UUID: card1, 64 Slots: 1, 65 Mem: 1, 66 FreeSlots: 1, 67 FreeMem: 1, 68 EccFailure: nil, 69 Tracking: map[string]struct{}{}, 70 }, 71 card2: { 72 UUID: card2, 73 Slots: 1, 74 Mem: 1, 75 FreeSlots: 1, 76 FreeMem: 1, 77 EccFailure: nil, 78 Tracking: map[string]struct{}{}, 79 }, 80 }, 81 } 82 83 good1Allocs, err := testAlloc.AllocGPU(1, 1, []uint{1}, true) 84 if err != nil { 85 t.Fatal(err) 86 } 87 // Make sure we have the expected allocation passed back 88 if len(good1Allocs) != 1 { 89 t.Fatal(kv.NewError("allocation result was unexpected").With("expected_devices", 1).With("actual_devices", len(good1Allocs)).With("stack", stack.Trace().TrimRuntime())) 90 } 91 92 good2Allocs, err := testAlloc.AllocGPU(1, 1, []uint{1}, true) 93 if err != nil { 94 t.Fatal(err) 95 } 96 // Make sure we have the expected allocation passed back 97 if len(good2Allocs) != 1 { 98 t.Fatal(kv.NewError("allocation result was unexpected").With("expected_devices", 1).With("actual_devices", len(good2Allocs)).With("stack", stack.Trace().TrimRuntime())) 99 } 100 101 for _, anAlloc := range good1Allocs { 102 err = testAlloc.ReturnGPU(anAlloc) 103 if err != nil { 104 t.Fatal(err) 105 } 106 } 107 for _, anAlloc := range good2Allocs { 108 err = testAlloc.ReturnGPU(anAlloc) 109 if err != nil { 110 t.Fatal(err) 111 } 112 } 113 114 // maxGPU, maxGPUMem, unit of allocation 115 goodAllAllocs, err := testAlloc.AllocGPU(2, 1, []uint{1, 2}, true) 116 if err != nil { 117 t.Fatal(err) 118 } 119 // Make sure we have the expected allocation passed back 120 if len(goodAllAllocs) != 2 { 121 t.Fatal(kv.NewError("allocation result was unexpected").With("expected_devices", 2).With("actual_devices", len(goodAllAllocs)).With("stack", stack.Trace().TrimRuntime())) 122 } 123 124 for _, anAlloc := range goodAllAllocs { 125 err = testAlloc.ReturnGPU(anAlloc) 126 if err != nil { 127 t.Fatal(err) 128 } 129 } 130 131 // Now try an alloc that has already been released to make sure we get an error 132 for _, anAlloc := range goodAllAllocs { 133 err = testAlloc.ReturnGPU(anAlloc) 134 if err == nil { 135 t.Fatal(kv.NewError("double release did not fail").With("stack", stack.Trace().TrimRuntime())) 136 } 137 } 138 } 139 140 // TestCUDATypicalAlloc implements the multi slot 2 card allocation test 141 // 142 func TestCUDATypicalAlloc(t *testing.T) { 143 card1 := xid.New().String() 144 card2 := xid.New().String() 145 146 // Test the case of two four slot cards and fit perfectedly into the requested 147 // 8 slots 148 testAlloc := gpuTracker{ 149 Allocs: map[string]*GPUTrack{ 150 card1: { 151 UUID: card1, 152 Slots: 4, 153 Mem: 2, 154 FreeSlots: 4, 155 FreeMem: 2, 156 EccFailure: nil, 157 Tracking: map[string]struct{}{}, 158 }, 159 card2: { 160 UUID: card2, 161 Slots: 4, 162 Mem: 2, 163 FreeSlots: 4, 164 FreeMem: 2, 165 EccFailure: nil, 166 Tracking: map[string]struct{}{}, 167 }, 168 }, 169 } 170 171 good1Allocs, err := testAlloc.AllocGPU(8, 2, []uint{8, 4, 2, 1}, true) 172 if err != nil { 173 t.Fatal(err) 174 } 175 // Make sure we have the expected allocation passed back 176 if len(good1Allocs) != 2 { 177 t.Fatal(kv.NewError("allocation result was unexpected").With("expected_devices", 2).With("actual_devices", len(good1Allocs)).With("stack", stack.Trace().TrimRuntime())) 178 } 179 180 for _, anAlloc := range good1Allocs { 181 err = testAlloc.ReturnGPU(anAlloc) 182 if err != nil { 183 t.Fatal(err) 184 } 185 } 186 187 // Add an 8 slot card to the two 4 slot cards and then do an 8 slot allocation to 188 // ensure it finds the most efficient single card allocation 189 // 190 card3 := &GPUTrack{ 191 UUID: xid.New().String(), 192 Slots: 8, 193 Mem: 2, 194 FreeSlots: 8, 195 FreeMem: 2, 196 EccFailure: nil, 197 Tracking: map[string]struct{}{}, 198 } 199 testAlloc.Allocs[card3.UUID] = card3 200 201 efficentAllocs, err := testAlloc.AllocGPU(8, 2, []uint{8, 4, 2, 1}, true) 202 if err != nil { 203 t.Fatal(err) 204 } 205 206 // Make sure we have the expected allocation passed back 207 if len(efficentAllocs) != 1 { 208 t.Fatal(kv.NewError("multi-allocation result was unexpected").With("expected_devices", 1).With("actual_devices", len(efficentAllocs)).With("stack", stack.Trace().TrimRuntime())) 209 } 210 211 for _, anAlloc := range efficentAllocs { 212 err = testAlloc.ReturnGPU(anAlloc) 213 if err != nil { 214 t.Fatal(err) 215 } 216 } 217 218 // Take the 8 slot allocation and only allow 4 slot pieces and see what happens 219 // 220 inefficentAllocs, err := testAlloc.AllocGPU(8, 2, []uint{4, 2, 1}, true) 221 if err != nil { 222 t.Fatal(err) 223 } 224 225 // Make sure we have the expected allocation passed back 226 if len(inefficentAllocs) != 2 { 227 t.Fatal(kv.NewError("multi-allocation result was unexpected").With("expected_devices", 2).With("actual_devices", len(inefficentAllocs)).With("stack", stack.Trace().TrimRuntime())) 228 } 229 230 for _, anAlloc := range inefficentAllocs { 231 err = testAlloc.ReturnGPU(anAlloc) 232 if err != nil { 233 t.Fatal(err) 234 } 235 } 236 237 // Take the 8 slot allocation and only allow 8 slot pieces and make sure it fails 238 // after taking the 8 slot card out of the allocator 239 // 240 delete(testAlloc.Allocs, card3.UUID) 241 inefficentAllocs, err = testAlloc.AllocGPU(8, 2, []uint{8}, true) 242 if err == nil { 243 t.Fatal(kv.NewError("allocation success was unexpected").With("expected_devices", 0).With("actual_devices", len(inefficentAllocs)).With("stack", stack.Trace().TrimRuntime())) 244 } 245 246 // Make sure we have the expected allocation passed back 247 if len(inefficentAllocs) != 0 { 248 t.Fatal(kv.NewError("allocation result was unexpected").With("expected_devices", 0).With("actual_devices", len(inefficentAllocs)).With("stack", stack.Trace().TrimRuntime())) 249 } 250 } 251 252 // TestCUDALargeAlloc implements the multi slot single card allocation test 253 // 254 func TestCUDALargeAlloc(t *testing.T) { 255 card1 := xid.New().String() 256 257 // Test the case of one 16 slot card and fit perfectedly into the requested 258 // 16 slots 259 testAlloc := gpuTracker{ 260 Allocs: map[string]*GPUTrack{ 261 card1: { 262 UUID: card1, 263 Slots: 16, 264 Mem: 2, 265 FreeSlots: 16, 266 FreeMem: 2, 267 EccFailure: nil, 268 Tracking: map[string]struct{}{}, 269 }, 270 }, 271 } 272 273 good1Allocs, err := testAlloc.AllocGPU(16, 2, []uint{16, 8, 4, 2, 1}, true) 274 if err != nil { 275 t.Fatal(err) 276 } 277 // Make sure we have the expected allocation passed back 278 if len(good1Allocs) != 1 { 279 t.Fatal(kv.NewError("allocation result was unexpected").With("expected_devices", 1).With("actual_devices", len(good1Allocs)).With("stack", stack.Trace().TrimRuntime())) 280 } 281 282 for _, anAlloc := range good1Allocs { 283 err = testAlloc.ReturnGPU(anAlloc) 284 if err != nil { 285 t.Fatal(err) 286 } 287 } 288 289 if good1Allocs, err = testAlloc.AllocGPU(8, 2, []uint{16, 8, 4, 2, 1}, true); err != nil { 290 t.Fatal(err) 291 } 292 293 // Make sure we have the expected allocation passed back 294 if len(good1Allocs) != 1 { 295 t.Fatal(kv.NewError("allocation result was unexpected").With("expected_devices", 1).With("actual_devices", len(good1Allocs)).With("stack", stack.Trace().TrimRuntime())) 296 } 297 298 for _, anAlloc := range good1Allocs { 299 err = testAlloc.ReturnGPU(anAlloc) 300 if err != nil { 301 t.Fatal(err) 302 } 303 } 304 } 305 306 // TestCUDATrivialTestAlloc implements the barest minimum success and failure cases with 307 // a single resource 308 // 309 func TestCUDATrivialTrialAlloc(t *testing.T) { 310 id := xid.New().String() 311 testAlloc := gpuTracker{ 312 Allocs: map[string]*GPUTrack{ 313 id: { 314 UUID: id, 315 Slots: 1, 316 Mem: 1, 317 FreeSlots: 1, 318 FreeMem: 1, 319 EccFailure: nil, 320 Tracking: map[string]struct{}{}, 321 }, 322 }, 323 } 324 325 for i := 0; i < 4; i++ { 326 if _, err := testAlloc.AllocGPU(1, 1, []uint{1}, false); err != nil { 327 t.Fatal(err) 328 } 329 } 330 if _, err := testAlloc.AllocGPU(2, 2, []uint{1}, false); err == nil { 331 t.Fatal(kv.NewError("allocation result was unexpected").With("expected_devices", 1).With("actual_devices", 2).With("stack", stack.Trace().TrimRuntime())) 332 } 333 }