github.com/iqoqo/nomad@v0.11.3-0.20200911112621-d7021c74d101/devices/gpu/nvidia/fingerprint_test.go (about) 1 package nvidia 2 3 import ( 4 "context" 5 "errors" 6 "sort" 7 "testing" 8 9 hclog "github.com/hashicorp/go-hclog" 10 "github.com/hashicorp/nomad/devices/gpu/nvidia/nvml" 11 "github.com/hashicorp/nomad/helper" 12 "github.com/hashicorp/nomad/plugins/device" 13 "github.com/hashicorp/nomad/plugins/shared/structs" 14 "github.com/stretchr/testify/require" 15 ) 16 17 func TestIgnoreFingerprintedDevices(t *testing.T) { 18 for _, testCase := range []struct { 19 Name string 20 DeviceData []*nvml.FingerprintDeviceData 21 IgnoredGPUIds map[string]struct{} 22 ExpectedResult []*nvml.FingerprintDeviceData 23 }{ 24 { 25 Name: "Odd ignored", 26 DeviceData: []*nvml.FingerprintDeviceData{ 27 { 28 DeviceData: &nvml.DeviceData{ 29 DeviceName: helper.StringToPtr("DeviceName1"), 30 UUID: "UUID1", 31 MemoryMiB: helper.Uint64ToPtr(1000), 32 }, 33 }, 34 { 35 DeviceData: &nvml.DeviceData{ 36 DeviceName: helper.StringToPtr("DeviceName2"), 37 UUID: "UUID2", 38 MemoryMiB: helper.Uint64ToPtr(1000), 39 }, 40 }, 41 { 42 DeviceData: &nvml.DeviceData{ 43 DeviceName: helper.StringToPtr("DeviceName3"), 44 UUID: "UUID3", 45 MemoryMiB: helper.Uint64ToPtr(1000), 46 }, 47 }, 48 }, 49 IgnoredGPUIds: map[string]struct{}{ 50 "UUID2": {}, 51 }, 52 ExpectedResult: []*nvml.FingerprintDeviceData{ 53 { 54 DeviceData: &nvml.DeviceData{ 55 DeviceName: helper.StringToPtr("DeviceName1"), 56 UUID: "UUID1", 57 MemoryMiB: helper.Uint64ToPtr(1000), 58 }, 59 }, 60 { 61 DeviceData: &nvml.DeviceData{ 62 DeviceName: helper.StringToPtr("DeviceName3"), 63 UUID: "UUID3", 64 MemoryMiB: helper.Uint64ToPtr(1000), 65 }, 66 }, 67 }, 68 }, 69 { 70 Name: "Even ignored", 71 DeviceData: []*nvml.FingerprintDeviceData{ 72 { 73 DeviceData: &nvml.DeviceData{ 74 DeviceName: helper.StringToPtr("DeviceName1"), 75 UUID: "UUID1", 76 MemoryMiB: helper.Uint64ToPtr(1000), 77 }, 78 }, 79 { 80 DeviceData: &nvml.DeviceData{ 81 DeviceName: helper.StringToPtr("DeviceName2"), 82 UUID: "UUID2", 83 MemoryMiB: helper.Uint64ToPtr(1000), 84 }, 85 }, 86 { 87 DeviceData: &nvml.DeviceData{ 88 DeviceName: helper.StringToPtr("DeviceName3"), 89 UUID: "UUID3", 90 MemoryMiB: helper.Uint64ToPtr(1000), 91 }, 92 }, 93 }, 94 IgnoredGPUIds: map[string]struct{}{ 95 "UUID1": {}, 96 "UUID3": {}, 97 }, 98 ExpectedResult: []*nvml.FingerprintDeviceData{ 99 { 100 DeviceData: &nvml.DeviceData{ 101 DeviceName: helper.StringToPtr("DeviceName2"), 102 UUID: "UUID2", 103 MemoryMiB: helper.Uint64ToPtr(1000), 104 }, 105 }, 106 }, 107 }, 108 { 109 Name: "All ignored", 110 DeviceData: []*nvml.FingerprintDeviceData{ 111 { 112 DeviceData: &nvml.DeviceData{ 113 DeviceName: helper.StringToPtr("DeviceName1"), 114 UUID: "UUID1", 115 MemoryMiB: helper.Uint64ToPtr(1000), 116 }, 117 }, 118 { 119 DeviceData: &nvml.DeviceData{ 120 DeviceName: helper.StringToPtr("DeviceName2"), 121 UUID: "UUID2", 122 MemoryMiB: helper.Uint64ToPtr(1000), 123 }, 124 }, 125 { 126 DeviceData: &nvml.DeviceData{ 127 DeviceName: helper.StringToPtr("DeviceName3"), 128 UUID: "UUID3", 129 MemoryMiB: helper.Uint64ToPtr(1000), 130 }, 131 }, 132 }, 133 IgnoredGPUIds: map[string]struct{}{ 134 "UUID1": {}, 135 "UUID2": {}, 136 "UUID3": {}, 137 }, 138 ExpectedResult: nil, 139 }, 140 { 141 Name: "No ignored", 142 DeviceData: []*nvml.FingerprintDeviceData{ 143 { 144 DeviceData: &nvml.DeviceData{ 145 DeviceName: helper.StringToPtr("DeviceName1"), 146 UUID: "UUID1", 147 MemoryMiB: helper.Uint64ToPtr(1000), 148 }, 149 }, 150 { 151 DeviceData: &nvml.DeviceData{ 152 DeviceName: helper.StringToPtr("DeviceName2"), 153 UUID: "UUID2", 154 MemoryMiB: helper.Uint64ToPtr(1000), 155 }, 156 }, 157 { 158 DeviceData: &nvml.DeviceData{ 159 DeviceName: helper.StringToPtr("DeviceName3"), 160 UUID: "UUID3", 161 MemoryMiB: helper.Uint64ToPtr(1000), 162 }, 163 }, 164 }, 165 IgnoredGPUIds: map[string]struct{}{}, 166 ExpectedResult: []*nvml.FingerprintDeviceData{ 167 { 168 DeviceData: &nvml.DeviceData{ 169 DeviceName: helper.StringToPtr("DeviceName1"), 170 UUID: "UUID1", 171 MemoryMiB: helper.Uint64ToPtr(1000), 172 }, 173 }, 174 { 175 DeviceData: &nvml.DeviceData{ 176 DeviceName: helper.StringToPtr("DeviceName2"), 177 UUID: "UUID2", 178 MemoryMiB: helper.Uint64ToPtr(1000), 179 }, 180 }, 181 { 182 DeviceData: &nvml.DeviceData{ 183 DeviceName: helper.StringToPtr("DeviceName3"), 184 UUID: "UUID3", 185 MemoryMiB: helper.Uint64ToPtr(1000), 186 }, 187 }, 188 }, 189 }, 190 { 191 Name: "No DeviceData provided", 192 DeviceData: nil, 193 IgnoredGPUIds: map[string]struct{}{ 194 "UUID1": {}, 195 "UUID2": {}, 196 "UUID3": {}, 197 }, 198 ExpectedResult: nil, 199 }, 200 } { 201 t.Run(testCase.Name, func(t *testing.T) { 202 actualResult := ignoreFingerprintedDevices(testCase.DeviceData, testCase.IgnoredGPUIds) 203 require.New(t).Equal(testCase.ExpectedResult, actualResult) 204 }) 205 } 206 } 207 208 func TestCheckFingerprintUpdates(t *testing.T) { 209 for _, testCase := range []struct { 210 Name string 211 Device *NvidiaDevice 212 AllDevices []*nvml.FingerprintDeviceData 213 DeviceMapAfterMethodCall map[string]struct{} 214 ExpectedResult bool 215 }{ 216 { 217 Name: "No updates", 218 Device: &NvidiaDevice{devices: map[string]struct{}{ 219 "1": {}, 220 "2": {}, 221 "3": {}, 222 }}, 223 AllDevices: []*nvml.FingerprintDeviceData{ 224 { 225 DeviceData: &nvml.DeviceData{ 226 UUID: "1", 227 }, 228 }, 229 { 230 DeviceData: &nvml.DeviceData{ 231 UUID: "2", 232 }, 233 }, 234 { 235 DeviceData: &nvml.DeviceData{ 236 UUID: "3", 237 }, 238 }, 239 }, 240 ExpectedResult: false, 241 DeviceMapAfterMethodCall: map[string]struct{}{ 242 "1": {}, 243 "2": {}, 244 "3": {}, 245 }, 246 }, 247 { 248 Name: "New Device Appeared", 249 Device: &NvidiaDevice{devices: map[string]struct{}{ 250 "1": {}, 251 "2": {}, 252 "3": {}, 253 }}, 254 AllDevices: []*nvml.FingerprintDeviceData{ 255 { 256 DeviceData: &nvml.DeviceData{ 257 UUID: "1", 258 }, 259 }, 260 { 261 DeviceData: &nvml.DeviceData{ 262 UUID: "2", 263 }, 264 }, 265 { 266 DeviceData: &nvml.DeviceData{ 267 UUID: "3", 268 }, 269 }, 270 { 271 DeviceData: &nvml.DeviceData{ 272 UUID: "I am new", 273 }, 274 }, 275 }, 276 ExpectedResult: true, 277 DeviceMapAfterMethodCall: map[string]struct{}{ 278 "1": {}, 279 "2": {}, 280 "3": {}, 281 "I am new": {}, 282 }, 283 }, 284 { 285 Name: "Device disappeared", 286 Device: &NvidiaDevice{devices: map[string]struct{}{ 287 "1": {}, 288 "2": {}, 289 "3": {}, 290 }}, 291 AllDevices: []*nvml.FingerprintDeviceData{ 292 { 293 DeviceData: &nvml.DeviceData{ 294 UUID: "1", 295 }, 296 }, 297 { 298 DeviceData: &nvml.DeviceData{ 299 UUID: "2", 300 }, 301 }, 302 }, 303 ExpectedResult: true, 304 DeviceMapAfterMethodCall: map[string]struct{}{ 305 "1": {}, 306 "2": {}, 307 }, 308 }, 309 { 310 Name: "No devices in NvidiaDevice map", 311 Device: &NvidiaDevice{}, 312 AllDevices: []*nvml.FingerprintDeviceData{ 313 { 314 DeviceData: &nvml.DeviceData{ 315 UUID: "1", 316 }, 317 }, 318 { 319 DeviceData: &nvml.DeviceData{ 320 UUID: "2", 321 }, 322 }, 323 { 324 DeviceData: &nvml.DeviceData{ 325 UUID: "3", 326 }, 327 }, 328 }, 329 ExpectedResult: true, 330 DeviceMapAfterMethodCall: map[string]struct{}{ 331 "1": {}, 332 "2": {}, 333 "3": {}, 334 }, 335 }, 336 { 337 Name: "No devices detected", 338 Device: &NvidiaDevice{devices: map[string]struct{}{ 339 "1": {}, 340 "2": {}, 341 "3": {}, 342 }}, 343 AllDevices: nil, 344 ExpectedResult: true, 345 DeviceMapAfterMethodCall: map[string]struct{}{}, 346 }, 347 } { 348 t.Run(testCase.Name, func(t *testing.T) { 349 actualResult := testCase.Device.fingerprintChanged(testCase.AllDevices) 350 req := require.New(t) 351 // check that function returns valid "updated / not updated" state 352 req.Equal(testCase.ExpectedResult, actualResult) 353 // check that function propely updates devices map 354 req.Equal(testCase.Device.devices, testCase.DeviceMapAfterMethodCall) 355 }) 356 } 357 } 358 359 func TestAttributesFromFingerprintDeviceData(t *testing.T) { 360 for _, testCase := range []struct { 361 Name string 362 FingerprintDeviceData *nvml.FingerprintDeviceData 363 ExpectedResult map[string]*structs.Attribute 364 }{ 365 { 366 Name: "All attributes are not nil", 367 FingerprintDeviceData: &nvml.FingerprintDeviceData{ 368 DeviceData: &nvml.DeviceData{ 369 UUID: "1", 370 DeviceName: helper.StringToPtr("Type1"), 371 MemoryMiB: helper.Uint64ToPtr(256), 372 PowerW: helper.UintToPtr(2), 373 BAR1MiB: helper.Uint64ToPtr(256), 374 }, 375 PCIBusID: "pciBusID1", 376 PCIBandwidthMBPerS: helper.UintToPtr(1), 377 CoresClockMHz: helper.UintToPtr(1), 378 MemoryClockMHz: helper.UintToPtr(1), 379 DisplayState: "Enabled", 380 PersistenceMode: "Enabled", 381 }, 382 ExpectedResult: map[string]*structs.Attribute{ 383 MemoryAttr: { 384 Int: helper.Int64ToPtr(256), 385 Unit: structs.UnitMiB, 386 }, 387 PowerAttr: { 388 Int: helper.Int64ToPtr(2), 389 Unit: structs.UnitW, 390 }, 391 BAR1Attr: { 392 Int: helper.Int64ToPtr(256), 393 Unit: structs.UnitMiB, 394 }, 395 PCIBandwidthAttr: { 396 Int: helper.Int64ToPtr(1), 397 Unit: structs.UnitMBPerS, 398 }, 399 CoresClockAttr: { 400 Int: helper.Int64ToPtr(1), 401 Unit: structs.UnitMHz, 402 }, 403 MemoryClockAttr: { 404 Int: helper.Int64ToPtr(1), 405 Unit: structs.UnitMHz, 406 }, 407 DisplayStateAttr: { 408 String: helper.StringToPtr("Enabled"), 409 }, 410 PersistenceModeAttr: { 411 String: helper.StringToPtr("Enabled"), 412 }, 413 }, 414 }, 415 { 416 Name: "nil values are omitted", 417 FingerprintDeviceData: &nvml.FingerprintDeviceData{ 418 DeviceData: &nvml.DeviceData{ 419 UUID: "1", 420 DeviceName: helper.StringToPtr("Type1"), 421 MemoryMiB: nil, 422 PowerW: helper.UintToPtr(2), 423 BAR1MiB: helper.Uint64ToPtr(256), 424 }, 425 PCIBusID: "pciBusID1", 426 DisplayState: "Enabled", 427 PersistenceMode: "Enabled", 428 }, 429 ExpectedResult: map[string]*structs.Attribute{ 430 PowerAttr: { 431 Int: helper.Int64ToPtr(2), 432 Unit: structs.UnitW, 433 }, 434 BAR1Attr: { 435 Int: helper.Int64ToPtr(256), 436 Unit: structs.UnitMiB, 437 }, 438 DisplayStateAttr: { 439 String: helper.StringToPtr("Enabled"), 440 }, 441 PersistenceModeAttr: { 442 String: helper.StringToPtr("Enabled"), 443 }, 444 }, 445 }, 446 } { 447 t.Run(testCase.Name, func(t *testing.T) { 448 actualResult := attributesFromFingerprintDeviceData(testCase.FingerprintDeviceData) 449 require.Equal(t, testCase.ExpectedResult, actualResult) 450 }) 451 } 452 } 453 454 func TestDeviceGroupFromFingerprintData(t *testing.T) { 455 for _, testCase := range []struct { 456 Name string 457 GroupName string 458 Devices []*nvml.FingerprintDeviceData 459 CommonAttributes map[string]*structs.Attribute 460 ExpectedResult *device.DeviceGroup 461 }{ 462 { 463 Name: "Devices are provided", 464 GroupName: "Type1", 465 Devices: []*nvml.FingerprintDeviceData{ 466 { 467 DeviceData: &nvml.DeviceData{ 468 UUID: "1", 469 DeviceName: helper.StringToPtr("Type1"), 470 MemoryMiB: helper.Uint64ToPtr(100), 471 PowerW: helper.UintToPtr(2), 472 BAR1MiB: helper.Uint64ToPtr(256), 473 }, 474 PCIBusID: "pciBusID1", 475 PCIBandwidthMBPerS: helper.UintToPtr(1), 476 CoresClockMHz: helper.UintToPtr(1), 477 MemoryClockMHz: helper.UintToPtr(1), 478 DisplayState: "Enabled", 479 PersistenceMode: "Enabled", 480 }, 481 { 482 DeviceData: &nvml.DeviceData{ 483 UUID: "2", 484 DeviceName: helper.StringToPtr("Type1"), 485 MemoryMiB: helper.Uint64ToPtr(100), 486 PowerW: helper.UintToPtr(2), 487 BAR1MiB: helper.Uint64ToPtr(256), 488 }, 489 PCIBusID: "pciBusID2", 490 PCIBandwidthMBPerS: helper.UintToPtr(1), 491 CoresClockMHz: helper.UintToPtr(1), 492 MemoryClockMHz: helper.UintToPtr(1), 493 DisplayState: "Enabled", 494 PersistenceMode: "Enabled", 495 }, 496 }, 497 ExpectedResult: &device.DeviceGroup{ 498 Vendor: vendor, 499 Type: deviceType, 500 Name: "Type1", 501 Devices: []*device.Device{ 502 { 503 ID: "1", 504 Healthy: true, 505 HwLocality: &device.DeviceLocality{ 506 PciBusID: "pciBusID1", 507 }, 508 }, 509 { 510 ID: "2", 511 Healthy: true, 512 HwLocality: &device.DeviceLocality{ 513 PciBusID: "pciBusID2", 514 }, 515 }, 516 }, 517 Attributes: map[string]*structs.Attribute{ 518 MemoryAttr: { 519 Int: helper.Int64ToPtr(100), 520 Unit: structs.UnitMiB, 521 }, 522 PowerAttr: { 523 Int: helper.Int64ToPtr(2), 524 Unit: structs.UnitW, 525 }, 526 BAR1Attr: { 527 Int: helper.Int64ToPtr(256), 528 Unit: structs.UnitMiB, 529 }, 530 PCIBandwidthAttr: { 531 Int: helper.Int64ToPtr(1), 532 Unit: structs.UnitMBPerS, 533 }, 534 CoresClockAttr: { 535 Int: helper.Int64ToPtr(1), 536 Unit: structs.UnitMHz, 537 }, 538 MemoryClockAttr: { 539 Int: helper.Int64ToPtr(1), 540 Unit: structs.UnitMHz, 541 }, 542 DisplayStateAttr: { 543 String: helper.StringToPtr("Enabled"), 544 }, 545 PersistenceModeAttr: { 546 String: helper.StringToPtr("Enabled"), 547 }, 548 }, 549 }, 550 }, 551 { 552 Name: "Devices and common attributes are provided", 553 GroupName: "Type1", 554 Devices: []*nvml.FingerprintDeviceData{ 555 { 556 DeviceData: &nvml.DeviceData{ 557 UUID: "1", 558 DeviceName: helper.StringToPtr("Type1"), 559 MemoryMiB: helper.Uint64ToPtr(100), 560 PowerW: helper.UintToPtr(2), 561 BAR1MiB: helper.Uint64ToPtr(256), 562 }, 563 PCIBusID: "pciBusID1", 564 PCIBandwidthMBPerS: helper.UintToPtr(1), 565 CoresClockMHz: helper.UintToPtr(1), 566 MemoryClockMHz: helper.UintToPtr(1), 567 DisplayState: "Enabled", 568 PersistenceMode: "Enabled", 569 }, 570 { 571 DeviceData: &nvml.DeviceData{ 572 UUID: "2", 573 DeviceName: helper.StringToPtr("Type1"), 574 MemoryMiB: helper.Uint64ToPtr(100), 575 PowerW: helper.UintToPtr(2), 576 BAR1MiB: helper.Uint64ToPtr(256), 577 }, 578 PCIBusID: "pciBusID2", 579 PCIBandwidthMBPerS: helper.UintToPtr(1), 580 CoresClockMHz: helper.UintToPtr(1), 581 MemoryClockMHz: helper.UintToPtr(1), 582 DisplayState: "Enabled", 583 PersistenceMode: "Enabled", 584 }, 585 }, 586 CommonAttributes: map[string]*structs.Attribute{ 587 DriverVersionAttr: { 588 String: helper.StringToPtr("1"), 589 }, 590 }, 591 ExpectedResult: &device.DeviceGroup{ 592 Vendor: vendor, 593 Type: deviceType, 594 Name: "Type1", 595 Devices: []*device.Device{ 596 { 597 ID: "1", 598 Healthy: true, 599 HwLocality: &device.DeviceLocality{ 600 PciBusID: "pciBusID1", 601 }, 602 }, 603 { 604 ID: "2", 605 Healthy: true, 606 HwLocality: &device.DeviceLocality{ 607 PciBusID: "pciBusID2", 608 }, 609 }, 610 }, 611 Attributes: map[string]*structs.Attribute{ 612 MemoryAttr: { 613 Int: helper.Int64ToPtr(100), 614 Unit: structs.UnitMiB, 615 }, 616 PowerAttr: { 617 Int: helper.Int64ToPtr(2), 618 Unit: structs.UnitW, 619 }, 620 BAR1Attr: { 621 Int: helper.Int64ToPtr(256), 622 Unit: structs.UnitMiB, 623 }, 624 PCIBandwidthAttr: { 625 Int: helper.Int64ToPtr(1), 626 Unit: structs.UnitMBPerS, 627 }, 628 CoresClockAttr: { 629 Int: helper.Int64ToPtr(1), 630 Unit: structs.UnitMHz, 631 }, 632 MemoryClockAttr: { 633 Int: helper.Int64ToPtr(1), 634 Unit: structs.UnitMHz, 635 }, 636 DisplayStateAttr: { 637 String: helper.StringToPtr("Enabled"), 638 }, 639 PersistenceModeAttr: { 640 String: helper.StringToPtr("Enabled"), 641 }, 642 DriverVersionAttr: { 643 String: helper.StringToPtr("1"), 644 }, 645 }, 646 }, 647 }, 648 { 649 Name: "Devices are not provided", 650 GroupName: "Type1", 651 CommonAttributes: map[string]*structs.Attribute{ 652 DriverVersionAttr: { 653 String: helper.StringToPtr("1"), 654 }, 655 }, 656 Devices: nil, 657 ExpectedResult: nil, 658 }, 659 } { 660 t.Run(testCase.Name, func(t *testing.T) { 661 actualResult := deviceGroupFromFingerprintData(testCase.GroupName, testCase.Devices, testCase.CommonAttributes) 662 require.New(t).Equal(testCase.ExpectedResult, actualResult) 663 }) 664 } 665 } 666 667 func TestWriteFingerprintToChannel(t *testing.T) { 668 for _, testCase := range []struct { 669 Name string 670 Device *NvidiaDevice 671 ExpectedWriteToChannel *device.FingerprintResponse 672 }{ 673 { 674 Name: "Check that FingerprintError is handled properly", 675 Device: &NvidiaDevice{ 676 nvmlClient: &MockNvmlClient{ 677 FingerprintError: errors.New(""), 678 }, 679 logger: hclog.NewNullLogger(), 680 }, 681 ExpectedWriteToChannel: &device.FingerprintResponse{ 682 Error: errors.New(""), 683 }, 684 }, 685 { 686 Name: "Check ignore devices works correctly", 687 Device: &NvidiaDevice{ 688 nvmlClient: &MockNvmlClient{ 689 FingerprintResponseReturned: &nvml.FingerprintData{ 690 DriverVersion: "1", 691 Devices: []*nvml.FingerprintDeviceData{ 692 { 693 DeviceData: &nvml.DeviceData{ 694 UUID: "1", 695 DeviceName: helper.StringToPtr("Name"), 696 MemoryMiB: helper.Uint64ToPtr(10), 697 PowerW: helper.UintToPtr(100), 698 BAR1MiB: helper.Uint64ToPtr(256), 699 }, 700 PCIBusID: "pciBusID1", 701 PCIBandwidthMBPerS: helper.UintToPtr(1), 702 CoresClockMHz: helper.UintToPtr(1), 703 MemoryClockMHz: helper.UintToPtr(1), 704 DisplayState: "Enabled", 705 PersistenceMode: "Enabled", 706 }, 707 { 708 DeviceData: &nvml.DeviceData{ 709 UUID: "2", 710 DeviceName: helper.StringToPtr("Name"), 711 MemoryMiB: helper.Uint64ToPtr(10), 712 PowerW: helper.UintToPtr(100), 713 BAR1MiB: helper.Uint64ToPtr(256), 714 }, 715 PCIBusID: "pciBusID2", 716 PCIBandwidthMBPerS: helper.UintToPtr(1), 717 CoresClockMHz: helper.UintToPtr(1), 718 MemoryClockMHz: helper.UintToPtr(1), 719 DisplayState: "Enabled", 720 PersistenceMode: "Enabled", 721 }, 722 }, 723 }, 724 }, 725 ignoredGPUIDs: map[string]struct{}{ 726 "1": {}, 727 }, 728 logger: hclog.NewNullLogger(), 729 }, 730 ExpectedWriteToChannel: &device.FingerprintResponse{ 731 Devices: []*device.DeviceGroup{ 732 { 733 Vendor: vendor, 734 Type: deviceType, 735 Name: "Name", 736 Devices: []*device.Device{ 737 { 738 ID: "2", 739 Healthy: true, 740 HwLocality: &device.DeviceLocality{ 741 PciBusID: "pciBusID2", 742 }, 743 }, 744 }, 745 Attributes: map[string]*structs.Attribute{ 746 MemoryAttr: { 747 Int: helper.Int64ToPtr(10), 748 Unit: structs.UnitMiB, 749 }, 750 PowerAttr: { 751 Int: helper.Int64ToPtr(100), 752 Unit: structs.UnitW, 753 }, 754 BAR1Attr: { 755 Int: helper.Int64ToPtr(256), 756 Unit: structs.UnitMiB, 757 }, 758 PCIBandwidthAttr: { 759 Int: helper.Int64ToPtr(1), 760 Unit: structs.UnitMBPerS, 761 }, 762 CoresClockAttr: { 763 Int: helper.Int64ToPtr(1), 764 Unit: structs.UnitMHz, 765 }, 766 MemoryClockAttr: { 767 Int: helper.Int64ToPtr(1), 768 Unit: structs.UnitMHz, 769 }, 770 DisplayStateAttr: { 771 String: helper.StringToPtr("Enabled"), 772 }, 773 PersistenceModeAttr: { 774 String: helper.StringToPtr("Enabled"), 775 }, 776 DriverVersionAttr: { 777 String: helper.StringToPtr("1"), 778 }, 779 }, 780 }, 781 }, 782 }, 783 }, 784 { 785 Name: "Check devices are split to multiple device groups 1", 786 Device: &NvidiaDevice{ 787 nvmlClient: &MockNvmlClient{ 788 FingerprintResponseReturned: &nvml.FingerprintData{ 789 DriverVersion: "1", 790 Devices: []*nvml.FingerprintDeviceData{ 791 { 792 DeviceData: &nvml.DeviceData{ 793 UUID: "1", 794 DeviceName: helper.StringToPtr("Name1"), 795 MemoryMiB: helper.Uint64ToPtr(10), 796 PowerW: helper.UintToPtr(100), 797 BAR1MiB: helper.Uint64ToPtr(256), 798 }, 799 PCIBusID: "pciBusID1", 800 PCIBandwidthMBPerS: helper.UintToPtr(1), 801 CoresClockMHz: helper.UintToPtr(1), 802 MemoryClockMHz: helper.UintToPtr(1), 803 DisplayState: "Enabled", 804 PersistenceMode: "Enabled", 805 }, 806 { 807 DeviceData: &nvml.DeviceData{ 808 UUID: "2", 809 DeviceName: helper.StringToPtr("Name2"), 810 MemoryMiB: helper.Uint64ToPtr(11), 811 PowerW: helper.UintToPtr(100), 812 BAR1MiB: helper.Uint64ToPtr(256), 813 }, 814 PCIBusID: "pciBusID2", 815 PCIBandwidthMBPerS: helper.UintToPtr(1), 816 CoresClockMHz: helper.UintToPtr(1), 817 MemoryClockMHz: helper.UintToPtr(1), 818 DisplayState: "Enabled", 819 PersistenceMode: "Enabled", 820 }, 821 { 822 DeviceData: &nvml.DeviceData{ 823 UUID: "3", 824 DeviceName: helper.StringToPtr("Name3"), 825 MemoryMiB: helper.Uint64ToPtr(12), 826 PowerW: helper.UintToPtr(100), 827 BAR1MiB: helper.Uint64ToPtr(256), 828 }, 829 PCIBusID: "pciBusID3", 830 PCIBandwidthMBPerS: helper.UintToPtr(1), 831 CoresClockMHz: helper.UintToPtr(1), 832 MemoryClockMHz: helper.UintToPtr(1), 833 DisplayState: "Enabled", 834 PersistenceMode: "Enabled", 835 }, 836 }, 837 }, 838 }, 839 logger: hclog.NewNullLogger(), 840 }, 841 ExpectedWriteToChannel: &device.FingerprintResponse{ 842 Devices: []*device.DeviceGroup{ 843 { 844 Vendor: vendor, 845 Type: deviceType, 846 Name: "Name1", 847 Devices: []*device.Device{ 848 { 849 ID: "1", 850 Healthy: true, 851 HwLocality: &device.DeviceLocality{ 852 PciBusID: "pciBusID1", 853 }, 854 }, 855 }, 856 Attributes: map[string]*structs.Attribute{ 857 MemoryAttr: { 858 Int: helper.Int64ToPtr(10), 859 Unit: structs.UnitMiB, 860 }, 861 PowerAttr: { 862 Int: helper.Int64ToPtr(100), 863 Unit: structs.UnitW, 864 }, 865 BAR1Attr: { 866 Int: helper.Int64ToPtr(256), 867 Unit: structs.UnitMiB, 868 }, 869 PCIBandwidthAttr: { 870 Int: helper.Int64ToPtr(1), 871 Unit: structs.UnitMBPerS, 872 }, 873 CoresClockAttr: { 874 Int: helper.Int64ToPtr(1), 875 Unit: structs.UnitMHz, 876 }, 877 MemoryClockAttr: { 878 Int: helper.Int64ToPtr(1), 879 Unit: structs.UnitMHz, 880 }, 881 DisplayStateAttr: { 882 String: helper.StringToPtr("Enabled"), 883 }, 884 PersistenceModeAttr: { 885 String: helper.StringToPtr("Enabled"), 886 }, 887 DriverVersionAttr: { 888 String: helper.StringToPtr("1"), 889 }, 890 }, 891 }, 892 { 893 Vendor: vendor, 894 Type: deviceType, 895 Name: "Name2", 896 Devices: []*device.Device{ 897 { 898 ID: "2", 899 Healthy: true, 900 HwLocality: &device.DeviceLocality{ 901 PciBusID: "pciBusID2", 902 }, 903 }, 904 }, 905 Attributes: map[string]*structs.Attribute{ 906 MemoryAttr: { 907 Int: helper.Int64ToPtr(11), 908 Unit: structs.UnitMiB, 909 }, 910 PowerAttr: { 911 Int: helper.Int64ToPtr(100), 912 Unit: structs.UnitW, 913 }, 914 BAR1Attr: { 915 Int: helper.Int64ToPtr(256), 916 Unit: structs.UnitMiB, 917 }, 918 PCIBandwidthAttr: { 919 Int: helper.Int64ToPtr(1), 920 Unit: structs.UnitMBPerS, 921 }, 922 CoresClockAttr: { 923 Int: helper.Int64ToPtr(1), 924 Unit: structs.UnitMHz, 925 }, 926 MemoryClockAttr: { 927 Int: helper.Int64ToPtr(1), 928 Unit: structs.UnitMHz, 929 }, 930 DisplayStateAttr: { 931 String: helper.StringToPtr("Enabled"), 932 }, 933 PersistenceModeAttr: { 934 String: helper.StringToPtr("Enabled"), 935 }, 936 DriverVersionAttr: { 937 String: helper.StringToPtr("1"), 938 }, 939 }, 940 }, 941 { 942 Vendor: vendor, 943 Type: deviceType, 944 Name: "Name3", 945 Devices: []*device.Device{ 946 { 947 ID: "3", 948 Healthy: true, 949 HwLocality: &device.DeviceLocality{ 950 PciBusID: "pciBusID3", 951 }, 952 }, 953 }, 954 Attributes: map[string]*structs.Attribute{ 955 MemoryAttr: { 956 Int: helper.Int64ToPtr(12), 957 Unit: structs.UnitMiB, 958 }, 959 PowerAttr: { 960 Int: helper.Int64ToPtr(100), 961 Unit: structs.UnitW, 962 }, 963 BAR1Attr: { 964 Int: helper.Int64ToPtr(256), 965 Unit: structs.UnitMiB, 966 }, 967 PCIBandwidthAttr: { 968 Int: helper.Int64ToPtr(1), 969 Unit: structs.UnitMBPerS, 970 }, 971 CoresClockAttr: { 972 Int: helper.Int64ToPtr(1), 973 Unit: structs.UnitMHz, 974 }, 975 MemoryClockAttr: { 976 Int: helper.Int64ToPtr(1), 977 Unit: structs.UnitMHz, 978 }, 979 DisplayStateAttr: { 980 String: helper.StringToPtr("Enabled"), 981 }, 982 PersistenceModeAttr: { 983 String: helper.StringToPtr("Enabled"), 984 }, 985 DriverVersionAttr: { 986 String: helper.StringToPtr("1"), 987 }, 988 }, 989 }, 990 }, 991 }, 992 }, 993 { 994 Name: "Check devices are split to multiple device groups 2", 995 Device: &NvidiaDevice{ 996 nvmlClient: &MockNvmlClient{ 997 FingerprintResponseReturned: &nvml.FingerprintData{ 998 DriverVersion: "1", 999 Devices: []*nvml.FingerprintDeviceData{ 1000 { 1001 DeviceData: &nvml.DeviceData{ 1002 UUID: "1", 1003 DeviceName: helper.StringToPtr("Name1"), 1004 MemoryMiB: helper.Uint64ToPtr(10), 1005 PowerW: helper.UintToPtr(100), 1006 BAR1MiB: helper.Uint64ToPtr(256), 1007 }, 1008 PCIBusID: "pciBusID1", 1009 PCIBandwidthMBPerS: helper.UintToPtr(1), 1010 CoresClockMHz: helper.UintToPtr(1), 1011 MemoryClockMHz: helper.UintToPtr(1), 1012 DisplayState: "Enabled", 1013 PersistenceMode: "Enabled", 1014 }, 1015 { 1016 DeviceData: &nvml.DeviceData{ 1017 UUID: "2", 1018 DeviceName: helper.StringToPtr("Name2"), 1019 MemoryMiB: helper.Uint64ToPtr(11), 1020 PowerW: helper.UintToPtr(100), 1021 BAR1MiB: helper.Uint64ToPtr(256), 1022 }, 1023 PCIBusID: "pciBusID2", 1024 PCIBandwidthMBPerS: helper.UintToPtr(1), 1025 CoresClockMHz: helper.UintToPtr(1), 1026 MemoryClockMHz: helper.UintToPtr(1), 1027 DisplayState: "Enabled", 1028 PersistenceMode: "Enabled", 1029 }, 1030 { 1031 DeviceData: &nvml.DeviceData{ 1032 UUID: "3", 1033 DeviceName: helper.StringToPtr("Name2"), 1034 MemoryMiB: helper.Uint64ToPtr(12), 1035 PowerW: helper.UintToPtr(100), 1036 BAR1MiB: helper.Uint64ToPtr(256), 1037 }, 1038 PCIBusID: "pciBusID3", 1039 PCIBandwidthMBPerS: helper.UintToPtr(1), 1040 CoresClockMHz: helper.UintToPtr(1), 1041 MemoryClockMHz: helper.UintToPtr(1), 1042 DisplayState: "Enabled", 1043 PersistenceMode: "Enabled", 1044 }, 1045 }, 1046 }, 1047 }, 1048 logger: hclog.NewNullLogger(), 1049 }, 1050 ExpectedWriteToChannel: &device.FingerprintResponse{ 1051 Devices: []*device.DeviceGroup{ 1052 { 1053 Vendor: vendor, 1054 Type: deviceType, 1055 Name: "Name1", 1056 Devices: []*device.Device{ 1057 { 1058 ID: "1", 1059 Healthy: true, 1060 HwLocality: &device.DeviceLocality{ 1061 PciBusID: "pciBusID1", 1062 }, 1063 }, 1064 }, 1065 Attributes: map[string]*structs.Attribute{ 1066 MemoryAttr: { 1067 Int: helper.Int64ToPtr(10), 1068 Unit: structs.UnitMiB, 1069 }, 1070 PowerAttr: { 1071 Int: helper.Int64ToPtr(100), 1072 Unit: structs.UnitW, 1073 }, 1074 BAR1Attr: { 1075 Int: helper.Int64ToPtr(256), 1076 Unit: structs.UnitMiB, 1077 }, 1078 PCIBandwidthAttr: { 1079 Int: helper.Int64ToPtr(1), 1080 Unit: structs.UnitMBPerS, 1081 }, 1082 CoresClockAttr: { 1083 Int: helper.Int64ToPtr(1), 1084 Unit: structs.UnitMHz, 1085 }, 1086 MemoryClockAttr: { 1087 Int: helper.Int64ToPtr(1), 1088 Unit: structs.UnitMHz, 1089 }, 1090 DisplayStateAttr: { 1091 String: helper.StringToPtr("Enabled"), 1092 }, 1093 PersistenceModeAttr: { 1094 String: helper.StringToPtr("Enabled"), 1095 }, 1096 DriverVersionAttr: { 1097 String: helper.StringToPtr("1"), 1098 }, 1099 }, 1100 }, 1101 { 1102 Vendor: vendor, 1103 Type: deviceType, 1104 Name: "Name2", 1105 Devices: []*device.Device{ 1106 { 1107 ID: "2", 1108 Healthy: true, 1109 HwLocality: &device.DeviceLocality{ 1110 PciBusID: "pciBusID2", 1111 }, 1112 }, 1113 { 1114 ID: "3", 1115 Healthy: true, 1116 HwLocality: &device.DeviceLocality{ 1117 PciBusID: "pciBusID3", 1118 }, 1119 }, 1120 }, 1121 Attributes: map[string]*structs.Attribute{ 1122 MemoryAttr: { 1123 Int: helper.Int64ToPtr(11), 1124 Unit: structs.UnitMiB, 1125 }, 1126 PowerAttr: { 1127 Int: helper.Int64ToPtr(100), 1128 Unit: structs.UnitW, 1129 }, 1130 BAR1Attr: { 1131 Int: helper.Int64ToPtr(256), 1132 Unit: structs.UnitMiB, 1133 }, 1134 PCIBandwidthAttr: { 1135 Int: helper.Int64ToPtr(1), 1136 Unit: structs.UnitMBPerS, 1137 }, 1138 CoresClockAttr: { 1139 Int: helper.Int64ToPtr(1), 1140 Unit: structs.UnitMHz, 1141 }, 1142 MemoryClockAttr: { 1143 Int: helper.Int64ToPtr(1), 1144 Unit: structs.UnitMHz, 1145 }, 1146 DisplayStateAttr: { 1147 String: helper.StringToPtr("Enabled"), 1148 }, 1149 PersistenceModeAttr: { 1150 String: helper.StringToPtr("Enabled"), 1151 }, 1152 DriverVersionAttr: { 1153 String: helper.StringToPtr("1"), 1154 }, 1155 }, 1156 }, 1157 }, 1158 }, 1159 }, 1160 } { 1161 t.Run(testCase.Name, func(t *testing.T) { 1162 channel := make(chan *device.FingerprintResponse, 1) 1163 testCase.Device.writeFingerprintToChannel(channel) 1164 actualResult := <-channel 1165 // writeFingerprintToChannel iterates over map keys 1166 // and insterts results to an array, so order of elements in output array 1167 // may be different 1168 // actualResult, expectedResult arrays has to be sorted firsted 1169 sort.Slice(actualResult.Devices, func(i, j int) bool { 1170 return actualResult.Devices[i].Name < actualResult.Devices[j].Name 1171 }) 1172 sort.Slice(testCase.ExpectedWriteToChannel.Devices, func(i, j int) bool { 1173 return testCase.ExpectedWriteToChannel.Devices[i].Name < testCase.ExpectedWriteToChannel.Devices[j].Name 1174 }) 1175 require.Equal(t, testCase.ExpectedWriteToChannel, actualResult) 1176 }) 1177 } 1178 } 1179 1180 // Test if nonworking driver returns empty fingerprint data 1181 func TestFingerprint(t *testing.T) { 1182 for _, testCase := range []struct { 1183 Name string 1184 Device *NvidiaDevice 1185 ExpectedWriteToChannel *device.FingerprintResponse 1186 }{ 1187 { 1188 Name: "Check that working driver returns valid fingeprint data", 1189 Device: &NvidiaDevice{ 1190 initErr: nil, 1191 nvmlClient: &MockNvmlClient{ 1192 FingerprintResponseReturned: &nvml.FingerprintData{ 1193 DriverVersion: "1", 1194 Devices: []*nvml.FingerprintDeviceData{ 1195 { 1196 DeviceData: &nvml.DeviceData{ 1197 UUID: "1", 1198 DeviceName: helper.StringToPtr("Name1"), 1199 MemoryMiB: helper.Uint64ToPtr(10), 1200 PowerW: helper.UintToPtr(100), 1201 BAR1MiB: helper.Uint64ToPtr(256), 1202 }, 1203 PCIBusID: "pciBusID1", 1204 PCIBandwidthMBPerS: helper.UintToPtr(1), 1205 CoresClockMHz: helper.UintToPtr(1), 1206 MemoryClockMHz: helper.UintToPtr(1), 1207 DisplayState: "Enabled", 1208 PersistenceMode: "Enabled", 1209 }, 1210 { 1211 DeviceData: &nvml.DeviceData{ 1212 UUID: "2", 1213 DeviceName: helper.StringToPtr("Name1"), 1214 MemoryMiB: helper.Uint64ToPtr(10), 1215 PowerW: helper.UintToPtr(100), 1216 BAR1MiB: helper.Uint64ToPtr(256), 1217 }, 1218 PCIBusID: "pciBusID2", 1219 PCIBandwidthMBPerS: helper.UintToPtr(1), 1220 CoresClockMHz: helper.UintToPtr(1), 1221 MemoryClockMHz: helper.UintToPtr(1), 1222 DisplayState: "Enabled", 1223 PersistenceMode: "Enabled", 1224 }, 1225 { 1226 DeviceData: &nvml.DeviceData{ 1227 UUID: "3", 1228 DeviceName: helper.StringToPtr("Name1"), 1229 MemoryMiB: helper.Uint64ToPtr(10), 1230 PowerW: helper.UintToPtr(100), 1231 BAR1MiB: helper.Uint64ToPtr(256), 1232 }, 1233 PCIBusID: "pciBusID3", 1234 PCIBandwidthMBPerS: helper.UintToPtr(1), 1235 CoresClockMHz: helper.UintToPtr(1), 1236 MemoryClockMHz: helper.UintToPtr(1), 1237 DisplayState: "Enabled", 1238 PersistenceMode: "Enabled", 1239 }, 1240 }, 1241 }, 1242 }, 1243 logger: hclog.NewNullLogger(), 1244 }, 1245 ExpectedWriteToChannel: &device.FingerprintResponse{ 1246 Devices: []*device.DeviceGroup{ 1247 { 1248 Vendor: vendor, 1249 Type: deviceType, 1250 Name: "Name1", 1251 Devices: []*device.Device{ 1252 { 1253 ID: "1", 1254 Healthy: true, 1255 HwLocality: &device.DeviceLocality{ 1256 PciBusID: "pciBusID1", 1257 }, 1258 }, 1259 { 1260 ID: "2", 1261 Healthy: true, 1262 HwLocality: &device.DeviceLocality{ 1263 PciBusID: "pciBusID2", 1264 }, 1265 }, 1266 { 1267 ID: "3", 1268 Healthy: true, 1269 HwLocality: &device.DeviceLocality{ 1270 PciBusID: "pciBusID3", 1271 }, 1272 }, 1273 }, 1274 Attributes: map[string]*structs.Attribute{ 1275 MemoryAttr: { 1276 Int: helper.Int64ToPtr(10), 1277 Unit: structs.UnitMiB, 1278 }, 1279 PowerAttr: { 1280 Int: helper.Int64ToPtr(100), 1281 Unit: structs.UnitW, 1282 }, 1283 BAR1Attr: { 1284 Int: helper.Int64ToPtr(256), 1285 Unit: structs.UnitMiB, 1286 }, 1287 PCIBandwidthAttr: { 1288 Int: helper.Int64ToPtr(1), 1289 Unit: structs.UnitMBPerS, 1290 }, 1291 CoresClockAttr: { 1292 Int: helper.Int64ToPtr(1), 1293 Unit: structs.UnitMHz, 1294 }, 1295 MemoryClockAttr: { 1296 Int: helper.Int64ToPtr(1), 1297 Unit: structs.UnitMHz, 1298 }, 1299 DisplayStateAttr: { 1300 String: helper.StringToPtr("Enabled"), 1301 }, 1302 PersistenceModeAttr: { 1303 String: helper.StringToPtr("Enabled"), 1304 }, 1305 DriverVersionAttr: { 1306 String: helper.StringToPtr("1"), 1307 }, 1308 }, 1309 }, 1310 }, 1311 }, 1312 }, 1313 { 1314 Name: "Check that not working driver returns error fingeprint data", 1315 Device: &NvidiaDevice{ 1316 initErr: errors.New("foo"), 1317 nvmlClient: &MockNvmlClient{ 1318 FingerprintResponseReturned: &nvml.FingerprintData{ 1319 DriverVersion: "1", 1320 Devices: []*nvml.FingerprintDeviceData{ 1321 { 1322 DeviceData: &nvml.DeviceData{ 1323 UUID: "1", 1324 DeviceName: helper.StringToPtr("Name1"), 1325 MemoryMiB: helper.Uint64ToPtr(10), 1326 }, 1327 }, 1328 { 1329 DeviceData: &nvml.DeviceData{ 1330 UUID: "2", 1331 DeviceName: helper.StringToPtr("Name1"), 1332 MemoryMiB: helper.Uint64ToPtr(10), 1333 }, 1334 }, 1335 { 1336 DeviceData: &nvml.DeviceData{ 1337 UUID: "3", 1338 DeviceName: helper.StringToPtr("Name1"), 1339 MemoryMiB: helper.Uint64ToPtr(10), 1340 }, 1341 }, 1342 }, 1343 }, 1344 }, 1345 logger: hclog.NewNullLogger(), 1346 }, 1347 ExpectedWriteToChannel: &device.FingerprintResponse{ 1348 Error: errors.New("foo"), 1349 }, 1350 }, 1351 } { 1352 t.Run(testCase.Name, func(t *testing.T) { 1353 outCh := make(chan *device.FingerprintResponse) 1354 ctx, cancel := context.WithCancel(context.Background()) 1355 go testCase.Device.fingerprint(ctx, outCh) 1356 result := <-outCh 1357 cancel() 1358 require.New(t).Equal(result, testCase.ExpectedWriteToChannel) 1359 }) 1360 } 1361 }