github.com/1aal/kubeblocks@v0.0.0-20231107070852-e1c03e598921/deploy/helm/dashboards/nvidia-gpu-exporter.json (about) 1 { 2 "annotations": { 3 "list": [ 4 { 5 "builtIn": 1, 6 "datasource": { 7 "type": "datasource", 8 "uid": "grafana" 9 }, 10 "enable": true, 11 "hide": true, 12 "iconColor": "rgba(0, 211, 255, 1)", 13 "name": "Annotations & Alerts", 14 "target": { 15 "limit": 100, 16 "matchAny": false, 17 "tags": [], 18 "type": "dashboard" 19 }, 20 "type": "dashboard" 21 } 22 ] 23 }, 24 "description": "Nvidia GPU Metrics based on the prometheus metrics from github.com/utkuozdemir/nvidia_gpu_exporter", 25 "editable": true, 26 "fiscalYearStartMonth": 0, 27 "gnetId": 14574, 28 "graphTooltip": 0, 29 "id": 11, 30 "links": [], 31 "liveNow": false, 32 "panels": [ 33 { 34 "datasource": { 35 "type": "prometheus", 36 "uid": "prometheus" 37 }, 38 "description": "The official product name of the GPU. This is an alphanumeric string. For all products.", 39 "fieldConfig": { 40 "defaults": { 41 "color": { 42 "mode": "thresholds" 43 }, 44 "decimals": 2, 45 "mappings": [], 46 "thresholds": { 47 "mode": "absolute", 48 "steps": [ 49 { 50 "color": "green", 51 "value": null 52 } 53 ] 54 }, 55 "unit": "none" 56 }, 57 "overrides": [] 58 }, 59 "gridPos": { 60 "h": 3, 61 "w": 4, 62 "x": 0, 63 "y": 0 64 }, 65 "id": 23, 66 "options": { 67 "colorMode": "value", 68 "graphMode": "none", 69 "justifyMode": "auto", 70 "orientation": "auto", 71 "reduceOptions": { 72 "calcs": [ 73 "last" 74 ], 75 "fields": "", 76 "values": false 77 }, 78 "text": {}, 79 "textMode": "name" 80 }, 81 "pluginVersion": "9.2.4", 82 "targets": [ 83 { 84 "datasource": { 85 "type": "prometheus", 86 "uid": "prometheus" 87 }, 88 "exemplar": true, 89 "expr": "nvidia_smi_gpu_info{uuid=\"$gpu\"}", 90 "instant": true, 91 "interval": "", 92 "legendFormat": "{{name}}", 93 "refId": "A" 94 } 95 ], 96 "title": "Name", 97 "type": "stat" 98 }, 99 { 100 "datasource": { 101 "type": "prometheus", 102 "uid": "prometheus" 103 }, 104 "description": "The current performance state for the GPU. States range from P0 (maximum performance) to P12 (minimum performance).", 105 "fieldConfig": { 106 "defaults": { 107 "color": { 108 "mode": "thresholds" 109 }, 110 "decimals": 0, 111 "mappings": [ 112 { 113 "options": { 114 "": { 115 "text": "" 116 } 117 }, 118 "type": "value" 119 } 120 ], 121 "thresholds": { 122 "mode": "absolute", 123 "steps": [ 124 { 125 "color": "green", 126 "value": null 127 } 128 ] 129 }, 130 "unit": "prefix:P" 131 }, 132 "overrides": [] 133 }, 134 "gridPos": { 135 "h": 3, 136 "w": 2, 137 "x": 4, 138 "y": 0 139 }, 140 "id": 22, 141 "options": { 142 "colorMode": "value", 143 "graphMode": "none", 144 "justifyMode": "auto", 145 "orientation": "auto", 146 "reduceOptions": { 147 "calcs": [ 148 "last" 149 ], 150 "fields": "", 151 "values": false 152 }, 153 "text": {}, 154 "textMode": "value" 155 }, 156 "pluginVersion": "9.2.4", 157 "targets": [ 158 { 159 "datasource": { 160 "type": "prometheus", 161 "uid": "prometheus" 162 }, 163 "exemplar": true, 164 "expr": "nvidia_smi_pstate{uuid=\"$gpu\"}", 165 "interval": "", 166 "legendFormat": "", 167 "refId": "A" 168 } 169 ], 170 "title": "P-State", 171 "type": "stat" 172 }, 173 { 174 "datasource": { 175 "type": "prometheus", 176 "uid": "prometheus" 177 }, 178 "description": "Percent of time over the past sample period during which one or more kernels was executing on the GPU.\nThe sample period may be between 1 second and 1/6 second depending on the product.", 179 "fieldConfig": { 180 "defaults": { 181 "color": { 182 "mode": "thresholds" 183 }, 184 "mappings": [], 185 "max": 1, 186 "min": 0, 187 "thresholds": { 188 "mode": "percentage", 189 "steps": [ 190 { 191 "color": "green", 192 "value": null 193 }, 194 { 195 "color": "#EAB839", 196 "value": 70 197 }, 198 { 199 "color": "red", 200 "value": 90 201 } 202 ] 203 }, 204 "unit": "percentunit" 205 }, 206 "overrides": [] 207 }, 208 "gridPos": { 209 "h": 5, 210 "w": 3, 211 "x": 6, 212 "y": 0 213 }, 214 "id": 6, 215 "options": { 216 "orientation": "auto", 217 "reduceOptions": { 218 "calcs": [ 219 "last" 220 ], 221 "fields": "", 222 "values": false 223 }, 224 "showThresholdLabels": false, 225 "showThresholdMarkers": true, 226 "text": {} 227 }, 228 "pluginVersion": "9.2.4", 229 "targets": [ 230 { 231 "datasource": { 232 "type": "prometheus", 233 "uid": "prometheus" 234 }, 235 "exemplar": true, 236 "expr": "nvidia_smi_utilization_gpu_ratio{uuid=\"$gpu\"}", 237 "interval": "", 238 "legendFormat": "{{uuid}}", 239 "refId": "A" 240 } 241 ], 242 "title": "GPU Utilization %", 243 "transformations": [], 244 "type": "gauge" 245 }, 246 { 247 "datasource": { 248 "type": "prometheus", 249 "uid": "prometheus" 250 }, 251 "description": "The last measured power draw for the entire board, in watts. Only available if power management is supported. This reading is accurate to within +/- 5 watts / The software power limit in watts.", 252 "fieldConfig": { 253 "defaults": { 254 "color": { 255 "mode": "thresholds" 256 }, 257 "mappings": [], 258 "max": 1, 259 "min": 0, 260 "thresholds": { 261 "mode": "percentage", 262 "steps": [ 263 { 264 "color": "green", 265 "value": null 266 }, 267 { 268 "color": "#EAB839", 269 "value": 70 270 }, 271 { 272 "color": "red", 273 "value": 90 274 } 275 ] 276 }, 277 "unit": "percentunit" 278 }, 279 "overrides": [] 280 }, 281 "gridPos": { 282 "h": 5, 283 "w": 3, 284 "x": 9, 285 "y": 0 286 }, 287 "id": 21, 288 "options": { 289 "orientation": "auto", 290 "reduceOptions": { 291 "calcs": [ 292 "last" 293 ], 294 "fields": "", 295 "values": false 296 }, 297 "showThresholdLabels": false, 298 "showThresholdMarkers": true, 299 "text": {} 300 }, 301 "pluginVersion": "9.2.4", 302 "targets": [ 303 { 304 "datasource": { 305 "type": "prometheus", 306 "uid": "prometheus" 307 }, 308 "exemplar": true, 309 "expr": "nvidia_smi_power_draw_watts{uuid=\"$gpu\"} / nvidia_smi_power_default_limit_watts{uuid=\"$gpu\"}", 310 "interval": "", 311 "legendFormat": "", 312 "refId": "A" 313 } 314 ], 315 "title": "Power Draw %", 316 "type": "gauge" 317 }, 318 { 319 "datasource": { 320 "type": "prometheus", 321 "uid": "prometheus" 322 }, 323 "description": "The fan speed value is the percent of the product's maximum noise tolerance fan speed that the device's fan is currently intended to run at. This value may exceed 100% in certain cases. Note: The reported speed is the intended fan speed. If the fan is physically blocked and unable to spin, this output will not match the actual fan speed. Many parts do not report fan speeds because they rely on cooling via fans in the surrounding enclosure.\n", 324 "fieldConfig": { 325 "defaults": { 326 "color": { 327 "mode": "thresholds" 328 }, 329 "mappings": [], 330 "max": 1, 331 "min": 0, 332 "thresholds": { 333 "mode": "percentage", 334 "steps": [ 335 { 336 "color": "green", 337 "value": null 338 }, 339 { 340 "color": "#EAB839", 341 "value": 70 342 }, 343 { 344 "color": "red", 345 "value": 90 346 } 347 ] 348 }, 349 "unit": "percentunit" 350 }, 351 "overrides": [] 352 }, 353 "gridPos": { 354 "h": 5, 355 "w": 3, 356 "x": 12, 357 "y": 0 358 }, 359 "id": 4, 360 "options": { 361 "orientation": "auto", 362 "reduceOptions": { 363 "calcs": [ 364 "last" 365 ], 366 "fields": "", 367 "values": false 368 }, 369 "showThresholdLabels": false, 370 "showThresholdMarkers": true, 371 "text": {} 372 }, 373 "pluginVersion": "9.2.4", 374 "targets": [ 375 { 376 "datasource": { 377 "type": "prometheus", 378 "uid": "prometheus" 379 }, 380 "exemplar": true, 381 "expr": "nvidia_smi_fan_speed_ratio{uuid=\"$gpu\"}", 382 "interval": "", 383 "legendFormat": "", 384 "refId": "A" 385 } 386 ], 387 "title": "Fan Speed %", 388 "type": "gauge" 389 }, 390 { 391 "datasource": { 392 "type": "prometheus", 393 "uid": "prometheus" 394 }, 395 "description": "Core GPU temperature. in degrees C.", 396 "fieldConfig": { 397 "defaults": { 398 "color": { 399 "mode": "thresholds" 400 }, 401 "mappings": [], 402 "max": 100, 403 "min": 0, 404 "thresholds": { 405 "mode": "absolute", 406 "steps": [ 407 { 408 "color": "green", 409 "value": null 410 }, 411 { 412 "color": "#EAB839", 413 "value": 70 414 }, 415 { 416 "color": "red", 417 "value": 80 418 } 419 ] 420 }, 421 "unit": "celsius" 422 }, 423 "overrides": [] 424 }, 425 "gridPos": { 426 "h": 5, 427 "w": 3, 428 "x": 15, 429 "y": 0 430 }, 431 "id": 16, 432 "options": { 433 "orientation": "auto", 434 "reduceOptions": { 435 "calcs": [ 436 "last" 437 ], 438 "fields": "", 439 "values": false 440 }, 441 "showThresholdLabels": false, 442 "showThresholdMarkers": true, 443 "text": {} 444 }, 445 "pluginVersion": "9.2.4", 446 "targets": [ 447 { 448 "datasource": { 449 "type": "prometheus", 450 "uid": "prometheus" 451 }, 452 "exemplar": true, 453 "expr": "nvidia_smi_temperature_gpu{uuid=\"$gpu\"}", 454 "interval": "", 455 "legendFormat": "{{uuid}}", 456 "refId": "A" 457 } 458 ], 459 "title": "Temperature", 460 "type": "gauge" 461 }, 462 { 463 "aliasColors": {}, 464 "bars": false, 465 "dashLength": 10, 466 "dashes": false, 467 "datasource": { 468 "type": "prometheus", 469 "uid": "prometheus" 470 }, 471 "description": "Percent of time over the past sample period during which global (device) memory was being read or written.\nThe sample period may be between 1 second and 1/6 second depending on the product.", 472 "fieldConfig": { 473 "defaults": { 474 "unit": "percentunit" 475 }, 476 "overrides": [] 477 }, 478 "fill": 1, 479 "fillGradient": 0, 480 "gridPos": { 481 "h": 5, 482 "w": 6, 483 "x": 18, 484 "y": 0 485 }, 486 "hiddenSeries": false, 487 "id": 11, 488 "legend": { 489 "avg": false, 490 "current": false, 491 "max": false, 492 "min": false, 493 "show": false, 494 "total": false, 495 "values": false 496 }, 497 "lines": true, 498 "linewidth": 1, 499 "nullPointMode": "null", 500 "options": { 501 "alertThreshold": true 502 }, 503 "percentage": false, 504 "pluginVersion": "9.2.4", 505 "pointradius": 2, 506 "points": false, 507 "renderer": "flot", 508 "seriesOverrides": [], 509 "spaceLength": 10, 510 "stack": false, 511 "steppedLine": false, 512 "targets": [ 513 { 514 "datasource": { 515 "type": "prometheus", 516 "uid": "prometheus" 517 }, 518 "exemplar": true, 519 "expr": "nvidia_smi_utilization_memory_ratio{uuid=\"$gpu\"}", 520 "interval": "", 521 "legendFormat": "{{uuid}}", 522 "refId": "A" 523 } 524 ], 525 "thresholds": [ 526 { 527 "$$hashKey": "object:1370", 528 "colorMode": "warning", 529 "fill": true, 530 "line": true, 531 "op": "gt", 532 "value": 0.7, 533 "yaxis": "left" 534 }, 535 { 536 "$$hashKey": "object:1376", 537 "colorMode": "critical", 538 "fill": true, 539 "line": true, 540 "op": "gt", 541 "value": 0.9, 542 "yaxis": "left" 543 } 544 ], 545 "timeRegions": [], 546 "title": "Memory Utilization %", 547 "tooltip": { 548 "shared": true, 549 "sort": 0, 550 "value_type": "individual" 551 }, 552 "type": "graph", 553 "xaxis": { 554 "mode": "time", 555 "show": true, 556 "values": [] 557 }, 558 "yaxes": [ 559 { 560 "$$hashKey": "object:1352", 561 "format": "percentunit", 562 "label": "", 563 "logBase": 1, 564 "max": "1", 565 "min": "0", 566 "show": true 567 }, 568 { 569 "$$hashKey": "object:1353", 570 "format": "short", 571 "logBase": 1, 572 "show": true 573 } 574 ], 575 "yaxis": { 576 "align": false 577 } 578 }, 579 { 580 "datasource": { 581 "type": "prometheus", 582 "uid": "prometheus" 583 }, 584 "description": "The version of the installed NVIDIA display driver. This is an alphanumeric string.", 585 "fieldConfig": { 586 "defaults": { 587 "color": { 588 "mode": "thresholds" 589 }, 590 "decimals": 2, 591 "mappings": [], 592 "thresholds": { 593 "mode": "absolute", 594 "steps": [ 595 { 596 "color": "green", 597 "value": null 598 } 599 ] 600 }, 601 "unit": "none" 602 }, 603 "overrides": [] 604 }, 605 "gridPos": { 606 "h": 2, 607 "w": 3, 608 "x": 0, 609 "y": 3 610 }, 611 "id": 14, 612 "options": { 613 "colorMode": "value", 614 "graphMode": "none", 615 "justifyMode": "auto", 616 "orientation": "auto", 617 "reduceOptions": { 618 "calcs": [ 619 "last" 620 ], 621 "fields": "", 622 "values": false 623 }, 624 "text": {}, 625 "textMode": "name" 626 }, 627 "pluginVersion": "9.2.4", 628 "targets": [ 629 { 630 "datasource": { 631 "type": "prometheus", 632 "uid": "prometheus" 633 }, 634 "exemplar": true, 635 "expr": "nvidia_smi_gpu_info{uuid=\"$gpu\"}", 636 "instant": true, 637 "interval": "", 638 "legendFormat": "{{driver_version}}", 639 "refId": "A" 640 } 641 ], 642 "title": "Driver Version", 643 "type": "stat" 644 }, 645 { 646 "datasource": { 647 "type": "prometheus", 648 "uid": "prometheus" 649 }, 650 "description": "The BIOS of the GPU board.", 651 "fieldConfig": { 652 "defaults": { 653 "color": { 654 "mode": "thresholds" 655 }, 656 "decimals": 2, 657 "mappings": [], 658 "thresholds": { 659 "mode": "absolute", 660 "steps": [ 661 { 662 "color": "green", 663 "value": null 664 } 665 ] 666 }, 667 "unit": "none" 668 }, 669 "overrides": [] 670 }, 671 "gridPos": { 672 "h": 2, 673 "w": 3, 674 "x": 3, 675 "y": 3 676 }, 677 "id": 34, 678 "options": { 679 "colorMode": "value", 680 "graphMode": "none", 681 "justifyMode": "auto", 682 "orientation": "auto", 683 "reduceOptions": { 684 "calcs": [ 685 "last" 686 ], 687 "fields": "", 688 "values": false 689 }, 690 "text": {}, 691 "textMode": "name" 692 }, 693 "pluginVersion": "9.2.4", 694 "targets": [ 695 { 696 "datasource": { 697 "type": "prometheus", 698 "uid": "prometheus" 699 }, 700 "exemplar": true, 701 "expr": "nvidia_smi_gpu_info{uuid=\"$gpu\"}", 702 "instant": true, 703 "interval": "", 704 "legendFormat": "{{vbios_version}}", 705 "refId": "A" 706 } 707 ], 708 "title": "Vbios Version", 709 "type": "stat" 710 }, 711 { 712 "datasource": { 713 "type": "prometheus", 714 "uid": "prometheus" 715 }, 716 "description": "Information about factors that are reducing the frequency of clocks. If all throttle reasons are returned as \"Not Active\" it means that clocks are running as high as possible.", 717 "fieldConfig": { 718 "defaults": { 719 "color": { 720 "mode": "thresholds" 721 }, 722 "decimals": 0, 723 "mappings": [ 724 { 725 "options": { 726 "0": { 727 "text": "Not Active" 728 }, 729 "1": { 730 "text": "Active" 731 } 732 }, 733 "type": "value" 734 } 735 ], 736 "thresholds": { 737 "mode": "absolute", 738 "steps": [ 739 { 740 "color": "green", 741 "value": null 742 } 743 ] 744 }, 745 "unit": "none" 746 }, 747 "overrides": [] 748 }, 749 "gridPos": { 750 "h": 5, 751 "w": 6, 752 "x": 0, 753 "y": 5 754 }, 755 "id": 32, 756 "links": [], 757 "options": { 758 "displayMode": "gradient", 759 "minVizHeight": 10, 760 "minVizWidth": 0, 761 "orientation": "horizontal", 762 "reduceOptions": { 763 "calcs": [ 764 "last" 765 ], 766 "fields": "", 767 "values": false 768 }, 769 "showUnfilled": true, 770 "text": {} 771 }, 772 "pluginVersion": "9.2.4", 773 "targets": [ 774 { 775 "datasource": { 776 "type": "prometheus", 777 "uid": "prometheus" 778 }, 779 "exemplar": true, 780 "expr": "nvidia_smi_clocks_throttle_reasons_gpu_idle{uuid=\"$gpu\"}", 781 "instant": false, 782 "interval": "", 783 "legendFormat": "Idle", 784 "refId": "A" 785 }, 786 { 787 "datasource": { 788 "type": "prometheus", 789 "uid": "prometheus" 790 }, 791 "exemplar": true, 792 "expr": "nvidia_smi_clocks_throttle_reasons_hw_thermal_slowdown{uuid=\"$gpu\"}", 793 "hide": false, 794 "interval": "", 795 "legendFormat": "HW Thermal Slowdown", 796 "refId": "B" 797 }, 798 { 799 "datasource": { 800 "type": "prometheus", 801 "uid": "prometheus" 802 }, 803 "exemplar": true, 804 "expr": "nvidia_smi_clocks_throttle_reasons_sw_power_cap{uuid=\"$gpu\"}", 805 "hide": false, 806 "interval": "", 807 "legendFormat": "SW Power Cap", 808 "refId": "C" 809 }, 810 { 811 "datasource": { 812 "type": "prometheus", 813 "uid": "prometheus" 814 }, 815 "exemplar": true, 816 "expr": "nvidia_smi_clocks_throttle_reasons_applications_clocks_setting{uuid=\"$gpu\"}", 817 "hide": false, 818 "interval": "", 819 "legendFormat": "App Clocks Setting", 820 "refId": "D" 821 }, 822 { 823 "datasource": { 824 "type": "prometheus", 825 "uid": "prometheus" 826 }, 827 "exemplar": true, 828 "expr": "nvidia_smi_clocks_throttle_reasons_hw_power_brake_slowdown{uuid=\"$gpu\"}", 829 "hide": false, 830 "interval": "", 831 "legendFormat": "HW Power Brake", 832 "refId": "E" 833 }, 834 { 835 "datasource": { 836 "type": "prometheus", 837 "uid": "prometheus" 838 }, 839 "exemplar": true, 840 "expr": "nvidia_smi_clocks_throttle_reasons_sw_thermal_slowdown{uuid=\"$gpu\"}", 841 "hide": false, 842 "interval": "", 843 "legendFormat": "SW Thermal Slowdown", 844 "refId": "F" 845 }, 846 { 847 "datasource": { 848 "type": "prometheus", 849 "uid": "prometheus" 850 }, 851 "exemplar": true, 852 "expr": "nvidia_smi_clocks_throttle_reasons_sync_boost{uuid=\"$gpu\"}", 853 "hide": false, 854 "interval": "", 855 "legendFormat": "Sync Boost", 856 "refId": "G" 857 } 858 ], 859 "title": "Throttle Reasons", 860 "type": "bargauge" 861 }, 862 { 863 "datasource": { 864 "type": "prometheus", 865 "uid": "prometheus" 866 }, 867 "description": "Current frequency of graphics (shader) clock\n/\nMaximum frequency of graphics (shader) clock.\n", 868 "fieldConfig": { 869 "defaults": { 870 "color": { 871 "mode": "thresholds" 872 }, 873 "mappings": [], 874 "max": 1, 875 "min": 0, 876 "thresholds": { 877 "mode": "percentage", 878 "steps": [ 879 { 880 "color": "green", 881 "value": null 882 }, 883 { 884 "color": "#EAB839", 885 "value": 70 886 }, 887 { 888 "color": "red", 889 "value": 90 890 } 891 ] 892 }, 893 "unit": "percentunit" 894 }, 895 "overrides": [] 896 }, 897 "gridPos": { 898 "h": 5, 899 "w": 3, 900 "x": 6, 901 "y": 5 902 }, 903 "id": 20, 904 "options": { 905 "orientation": "auto", 906 "reduceOptions": { 907 "calcs": [ 908 "last" 909 ], 910 "fields": "", 911 "values": false 912 }, 913 "showThresholdLabels": false, 914 "showThresholdMarkers": true, 915 "text": {} 916 }, 917 "pluginVersion": "9.2.4", 918 "targets": [ 919 { 920 "datasource": { 921 "type": "prometheus", 922 "uid": "prometheus" 923 }, 924 "exemplar": true, 925 "expr": "nvidia_smi_clocks_current_graphics_clock_hz{uuid=\"$gpu\"} / nvidia_smi_clocks_max_graphics_clock_hz{uuid=\"$gpu\"}", 926 "interval": "", 927 "legendFormat": "", 928 "refId": "A" 929 } 930 ], 931 "title": "GPU Clock Speed %", 932 "type": "gauge" 933 }, 934 { 935 "datasource": { 936 "type": "prometheus", 937 "uid": "prometheus" 938 }, 939 "description": "Current frequency of memory clock / Maximum frequency of memory clock", 940 "fieldConfig": { 941 "defaults": { 942 "color": { 943 "mode": "thresholds" 944 }, 945 "mappings": [], 946 "max": 1, 947 "min": 0, 948 "thresholds": { 949 "mode": "percentage", 950 "steps": [ 951 { 952 "color": "green", 953 "value": null 954 }, 955 { 956 "color": "#EAB839", 957 "value": 70 958 }, 959 { 960 "color": "red", 961 "value": 90 962 } 963 ] 964 }, 965 "unit": "percentunit" 966 }, 967 "overrides": [] 968 }, 969 "gridPos": { 970 "h": 5, 971 "w": 3, 972 "x": 9, 973 "y": 5 974 }, 975 "id": 33, 976 "options": { 977 "orientation": "auto", 978 "reduceOptions": { 979 "calcs": [ 980 "last" 981 ], 982 "fields": "", 983 "values": false 984 }, 985 "showThresholdLabels": false, 986 "showThresholdMarkers": true, 987 "text": {} 988 }, 989 "pluginVersion": "9.2.4", 990 "targets": [ 991 { 992 "datasource": { 993 "type": "prometheus", 994 "uid": "prometheus" 995 }, 996 "exemplar": true, 997 "expr": "nvidia_smi_clocks_current_memory_clock_hz{uuid=\"$gpu\"} / nvidia_smi_clocks_max_memory_clock_hz{uuid=\"$gpu\"}", 998 "interval": "", 999 "legendFormat": "", 1000 "refId": "A" 1001 } 1002 ], 1003 "title": "Memory Clock Speed %", 1004 "type": "gauge" 1005 }, 1006 { 1007 "datasource": { 1008 "type": "prometheus", 1009 "uid": "prometheus" 1010 }, 1011 "description": "Total memory allocated by active contexts / Total installed GPU memory.", 1012 "fieldConfig": { 1013 "defaults": { 1014 "color": { 1015 "mode": "thresholds" 1016 }, 1017 "mappings": [], 1018 "max": 1, 1019 "min": 0, 1020 "thresholds": { 1021 "mode": "percentage", 1022 "steps": [ 1023 { 1024 "color": "green", 1025 "value": null 1026 }, 1027 { 1028 "color": "#EAB839", 1029 "value": 70 1030 }, 1031 { 1032 "color": "red", 1033 "value": 90 1034 } 1035 ] 1036 }, 1037 "unit": "percentunit" 1038 }, 1039 "overrides": [] 1040 }, 1041 "gridPos": { 1042 "h": 5, 1043 "w": 3, 1044 "x": 12, 1045 "y": 5 1046 }, 1047 "id": 25, 1048 "options": { 1049 "orientation": "auto", 1050 "reduceOptions": { 1051 "calcs": [ 1052 "last" 1053 ], 1054 "fields": "", 1055 "values": false 1056 }, 1057 "showThresholdLabels": false, 1058 "showThresholdMarkers": true, 1059 "text": {} 1060 }, 1061 "pluginVersion": "9.2.4", 1062 "targets": [ 1063 { 1064 "datasource": { 1065 "type": "prometheus", 1066 "uid": "prometheus" 1067 }, 1068 "exemplar": true, 1069 "expr": "nvidia_smi_memory_used_bytes{uuid=\"$gpu\"} / nvidia_smi_memory_total_bytes{uuid=\"$gpu\"}", 1070 "interval": "", 1071 "legendFormat": "", 1072 "refId": "A" 1073 } 1074 ], 1075 "title": "Memory Allocation %", 1076 "type": "gauge" 1077 }, 1078 { 1079 "datasource": { 1080 "type": "prometheus", 1081 "uid": "prometheus" 1082 }, 1083 "description": "Percent of time over the past sample period during which global (device) memory was being read or written.\nThe sample period may be between 1 second and 1/6 second depending on the product.", 1084 "fieldConfig": { 1085 "defaults": { 1086 "color": { 1087 "mode": "thresholds" 1088 }, 1089 "mappings": [], 1090 "max": 1, 1091 "min": 0, 1092 "thresholds": { 1093 "mode": "percentage", 1094 "steps": [ 1095 { 1096 "color": "green", 1097 "value": null 1098 }, 1099 { 1100 "color": "#EAB839", 1101 "value": 70 1102 }, 1103 { 1104 "color": "red", 1105 "value": 90 1106 } 1107 ] 1108 }, 1109 "unit": "percentunit" 1110 }, 1111 "overrides": [] 1112 }, 1113 "gridPos": { 1114 "h": 5, 1115 "w": 3, 1116 "x": 15, 1117 "y": 5 1118 }, 1119 "id": 7, 1120 "options": { 1121 "orientation": "auto", 1122 "reduceOptions": { 1123 "calcs": [ 1124 "last" 1125 ], 1126 "fields": "", 1127 "values": false 1128 }, 1129 "showThresholdLabels": false, 1130 "showThresholdMarkers": true, 1131 "text": {} 1132 }, 1133 "pluginVersion": "9.2.4", 1134 "targets": [ 1135 { 1136 "datasource": { 1137 "type": "prometheus", 1138 "uid": "prometheus" 1139 }, 1140 "exemplar": true, 1141 "expr": "nvidia_smi_utilization_memory_ratio{uuid=\"$gpu\"}", 1142 "interval": "", 1143 "legendFormat": "", 1144 "refId": "A" 1145 } 1146 ], 1147 "title": "Memory Utilization %", 1148 "type": "gauge" 1149 }, 1150 { 1151 "aliasColors": {}, 1152 "bars": false, 1153 "dashLength": 10, 1154 "dashes": false, 1155 "datasource": { 1156 "type": "prometheus", 1157 "uid": "prometheus" 1158 }, 1159 "description": "Percent of time over the past sample period during which one or more kernels was executing on the GPU.\nThe sample period may be between 1 second and 1/6 second depending on the product.", 1160 "fieldConfig": { 1161 "defaults": { 1162 "unit": "percentunit" 1163 }, 1164 "overrides": [] 1165 }, 1166 "fill": 1, 1167 "fillGradient": 0, 1168 "gridPos": { 1169 "h": 5, 1170 "w": 6, 1171 "x": 18, 1172 "y": 5 1173 }, 1174 "hiddenSeries": false, 1175 "id": 10, 1176 "legend": { 1177 "avg": false, 1178 "current": false, 1179 "max": false, 1180 "min": false, 1181 "show": false, 1182 "total": false, 1183 "values": false 1184 }, 1185 "lines": true, 1186 "linewidth": 1, 1187 "nullPointMode": "null", 1188 "options": { 1189 "alertThreshold": true 1190 }, 1191 "percentage": false, 1192 "pluginVersion": "9.2.4", 1193 "pointradius": 2, 1194 "points": false, 1195 "renderer": "flot", 1196 "seriesOverrides": [], 1197 "spaceLength": 10, 1198 "stack": false, 1199 "steppedLine": false, 1200 "targets": [ 1201 { 1202 "datasource": { 1203 "type": "prometheus", 1204 "uid": "prometheus" 1205 }, 1206 "exemplar": true, 1207 "expr": "nvidia_smi_utilization_gpu_ratio{uuid=\"$gpu\"}", 1208 "interval": "", 1209 "legendFormat": "", 1210 "refId": "A" 1211 } 1212 ], 1213 "thresholds": [ 1214 { 1215 "$$hashKey": "object:1370", 1216 "colorMode": "warning", 1217 "fill": true, 1218 "line": true, 1219 "op": "gt", 1220 "value": 0.7, 1221 "yaxis": "left" 1222 }, 1223 { 1224 "$$hashKey": "object:1376", 1225 "colorMode": "critical", 1226 "fill": true, 1227 "line": true, 1228 "op": "gt", 1229 "value": 0.9, 1230 "yaxis": "left" 1231 } 1232 ], 1233 "timeRegions": [], 1234 "title": "GPU Utilization %", 1235 "tooltip": { 1236 "shared": true, 1237 "sort": 0, 1238 "value_type": "individual" 1239 }, 1240 "type": "graph", 1241 "xaxis": { 1242 "mode": "time", 1243 "show": true, 1244 "values": [] 1245 }, 1246 "yaxes": [ 1247 { 1248 "$$hashKey": "object:1352", 1249 "format": "percentunit", 1250 "label": "", 1251 "logBase": 1, 1252 "max": "1", 1253 "min": "0", 1254 "show": true 1255 }, 1256 { 1257 "$$hashKey": "object:1353", 1258 "format": "short", 1259 "logBase": 1, 1260 "show": true 1261 } 1262 ], 1263 "yaxis": { 1264 "align": false 1265 } 1266 }, 1267 { 1268 "aliasColors": {}, 1269 "bars": false, 1270 "dashLength": 10, 1271 "dashes": false, 1272 "datasource": { 1273 "type": "prometheus", 1274 "uid": "prometheus" 1275 }, 1276 "description": "Total memory allocated by active contexts.", 1277 "fieldConfig": { 1278 "defaults": { 1279 "unit": "decbytes" 1280 }, 1281 "overrides": [] 1282 }, 1283 "fill": 1, 1284 "fillGradient": 0, 1285 "gridPos": { 1286 "h": 5, 1287 "w": 6, 1288 "x": 0, 1289 "y": 10 1290 }, 1291 "hiddenSeries": false, 1292 "id": 17, 1293 "legend": { 1294 "avg": false, 1295 "current": false, 1296 "max": false, 1297 "min": false, 1298 "show": false, 1299 "total": false, 1300 "values": false 1301 }, 1302 "lines": true, 1303 "linewidth": 1, 1304 "nullPointMode": "null", 1305 "options": { 1306 "alertThreshold": true 1307 }, 1308 "percentage": false, 1309 "pluginVersion": "9.2.4", 1310 "pointradius": 2, 1311 "points": false, 1312 "renderer": "flot", 1313 "seriesOverrides": [], 1314 "spaceLength": 10, 1315 "stack": false, 1316 "steppedLine": false, 1317 "targets": [ 1318 { 1319 "datasource": { 1320 "type": "prometheus", 1321 "uid": "prometheus" 1322 }, 1323 "exemplar": true, 1324 "expr": "nvidia_smi_memory_used_bytes{uuid=\"$gpu\"}", 1325 "interval": "", 1326 "legendFormat": "{{uuid}}", 1327 "refId": "A" 1328 } 1329 ], 1330 "thresholds": [], 1331 "timeRegions": [], 1332 "title": "Memory Allocation", 1333 "tooltip": { 1334 "shared": true, 1335 "sort": 0, 1336 "value_type": "individual" 1337 }, 1338 "type": "graph", 1339 "xaxis": { 1340 "mode": "time", 1341 "show": true, 1342 "values": [] 1343 }, 1344 "yaxes": [ 1345 { 1346 "$$hashKey": "object:1352", 1347 "format": "decbytes", 1348 "label": "", 1349 "logBase": 1, 1350 "min": "0", 1351 "show": true 1352 }, 1353 { 1354 "$$hashKey": "object:1353", 1355 "format": "short", 1356 "logBase": 1, 1357 "show": true 1358 } 1359 ], 1360 "yaxis": { 1361 "align": false 1362 } 1363 }, 1364 { 1365 "aliasColors": {}, 1366 "bars": false, 1367 "dashLength": 10, 1368 "dashes": false, 1369 "datasource": { 1370 "type": "prometheus", 1371 "uid": "prometheus" 1372 }, 1373 "description": "Core GPU temperature. in degrees C.", 1374 "fieldConfig": { 1375 "defaults": { 1376 "unit": "celsius" 1377 }, 1378 "overrides": [] 1379 }, 1380 "fill": 1, 1381 "fillGradient": 0, 1382 "gridPos": { 1383 "h": 5, 1384 "w": 6, 1385 "x": 6, 1386 "y": 10 1387 }, 1388 "hiddenSeries": false, 1389 "id": 15, 1390 "legend": { 1391 "avg": false, 1392 "current": false, 1393 "max": false, 1394 "min": false, 1395 "show": false, 1396 "total": false, 1397 "values": false 1398 }, 1399 "lines": true, 1400 "linewidth": 1, 1401 "nullPointMode": "null", 1402 "options": { 1403 "alertThreshold": true 1404 }, 1405 "percentage": false, 1406 "pluginVersion": "9.2.4", 1407 "pointradius": 2, 1408 "points": false, 1409 "renderer": "flot", 1410 "seriesOverrides": [], 1411 "spaceLength": 10, 1412 "stack": false, 1413 "steppedLine": false, 1414 "targets": [ 1415 { 1416 "datasource": { 1417 "type": "prometheus", 1418 "uid": "prometheus" 1419 }, 1420 "exemplar": true, 1421 "expr": "nvidia_smi_temperature_gpu{uuid=\"$gpu\"}", 1422 "interval": "", 1423 "legendFormat": "{{uuid}}", 1424 "refId": "A" 1425 } 1426 ], 1427 "thresholds": [ 1428 { 1429 "$$hashKey": "object:1805", 1430 "colorMode": "warning", 1431 "fill": true, 1432 "line": true, 1433 "op": "gt", 1434 "value": 70, 1435 "yaxis": "left" 1436 }, 1437 { 1438 "$$hashKey": "object:1811", 1439 "colorMode": "critical", 1440 "fill": true, 1441 "line": true, 1442 "op": "gt", 1443 "value": 80, 1444 "yaxis": "left" 1445 } 1446 ], 1447 "timeRegions": [], 1448 "title": "Temperature", 1449 "tooltip": { 1450 "shared": true, 1451 "sort": 0, 1452 "value_type": "individual" 1453 }, 1454 "type": "graph", 1455 "xaxis": { 1456 "mode": "time", 1457 "show": true, 1458 "values": [] 1459 }, 1460 "yaxes": [ 1461 { 1462 "$$hashKey": "object:1761", 1463 "format": "celsius", 1464 "label": "", 1465 "logBase": 1, 1466 "max": "100", 1467 "min": "0", 1468 "show": true 1469 }, 1470 { 1471 "$$hashKey": "object:1762", 1472 "format": "short", 1473 "logBase": 1, 1474 "show": true 1475 } 1476 ], 1477 "yaxis": { 1478 "align": false 1479 } 1480 }, 1481 { 1482 "aliasColors": {}, 1483 "bars": false, 1484 "dashLength": 10, 1485 "dashes": false, 1486 "datasource": { 1487 "type": "prometheus", 1488 "uid": "prometheus" 1489 }, 1490 "description": "The last measured power draw for the entire board, in watts. Only available if power management is supported. This reading is accurate to within +/- 5 watts", 1491 "fieldConfig": { 1492 "defaults": { 1493 "unit": "watt" 1494 }, 1495 "overrides": [] 1496 }, 1497 "fill": 1, 1498 "fillGradient": 0, 1499 "gridPos": { 1500 "h": 5, 1501 "w": 6, 1502 "x": 12, 1503 "y": 10 1504 }, 1505 "hiddenSeries": false, 1506 "id": 8, 1507 "legend": { 1508 "avg": false, 1509 "current": false, 1510 "max": false, 1511 "min": false, 1512 "show": false, 1513 "total": false, 1514 "values": false 1515 }, 1516 "lines": true, 1517 "linewidth": 1, 1518 "nullPointMode": "null", 1519 "options": { 1520 "alertThreshold": true 1521 }, 1522 "percentage": false, 1523 "pluginVersion": "9.2.4", 1524 "pointradius": 2, 1525 "points": false, 1526 "renderer": "flot", 1527 "seriesOverrides": [], 1528 "spaceLength": 10, 1529 "stack": false, 1530 "steppedLine": false, 1531 "targets": [ 1532 { 1533 "datasource": { 1534 "type": "prometheus", 1535 "uid": "prometheus" 1536 }, 1537 "exemplar": true, 1538 "expr": "nvidia_smi_power_draw_watts{uuid=\"$gpu\"}", 1539 "interval": "", 1540 "legendFormat": "{{uuid}}", 1541 "refId": "A" 1542 } 1543 ], 1544 "thresholds": [], 1545 "timeRegions": [], 1546 "title": "Power Draw", 1547 "tooltip": { 1548 "shared": true, 1549 "sort": 0, 1550 "value_type": "individual" 1551 }, 1552 "type": "graph", 1553 "xaxis": { 1554 "mode": "time", 1555 "show": true, 1556 "values": [] 1557 }, 1558 "yaxes": [ 1559 { 1560 "$$hashKey": "object:658", 1561 "format": "watt", 1562 "logBase": 1, 1563 "min": "0", 1564 "show": true 1565 }, 1566 { 1567 "$$hashKey": "object:659", 1568 "format": "short", 1569 "logBase": 1, 1570 "show": true 1571 } 1572 ], 1573 "yaxis": { 1574 "align": false 1575 } 1576 }, 1577 { 1578 "aliasColors": {}, 1579 "bars": false, 1580 "dashLength": 10, 1581 "dashes": false, 1582 "datasource": { 1583 "type": "prometheus", 1584 "uid": "prometheus" 1585 }, 1586 "description": "The fan speed value is the percent of the product's maximum noise tolerance fan speed that the device's fan is currently intended to run at. This value may exceed 100% in certain cases. Note: The reported speed is the intended fan speed. If the fan is physically blocked and unable to spin, this output will not match the actual fan speed. Many parts do not report fan speeds because they rely on cooling via fans in the surrounding enclosure.\n", 1587 "fieldConfig": { 1588 "defaults": { 1589 "unit": "percentunit" 1590 }, 1591 "overrides": [] 1592 }, 1593 "fill": 1, 1594 "fillGradient": 0, 1595 "gridPos": { 1596 "h": 5, 1597 "w": 6, 1598 "x": 18, 1599 "y": 10 1600 }, 1601 "hiddenSeries": false, 1602 "id": 9, 1603 "legend": { 1604 "avg": false, 1605 "current": false, 1606 "max": false, 1607 "min": false, 1608 "show": false, 1609 "total": false, 1610 "values": false 1611 }, 1612 "lines": true, 1613 "linewidth": 1, 1614 "nullPointMode": "null", 1615 "options": { 1616 "alertThreshold": true 1617 }, 1618 "percentage": false, 1619 "pluginVersion": "9.2.4", 1620 "pointradius": 2, 1621 "points": false, 1622 "renderer": "flot", 1623 "seriesOverrides": [], 1624 "spaceLength": 10, 1625 "stack": false, 1626 "steppedLine": false, 1627 "targets": [ 1628 { 1629 "datasource": { 1630 "type": "prometheus", 1631 "uid": "prometheus" 1632 }, 1633 "exemplar": true, 1634 "expr": "nvidia_smi_fan_speed_ratio{uuid=\"$gpu\"}", 1635 "interval": "", 1636 "legendFormat": "{{uuid}}", 1637 "refId": "A" 1638 } 1639 ], 1640 "thresholds": [ 1641 { 1642 "$$hashKey": "object:1168", 1643 "colorMode": "critical", 1644 "fill": true, 1645 "line": true, 1646 "op": "gt", 1647 "value": 0.9, 1648 "yaxis": "left" 1649 }, 1650 { 1651 "$$hashKey": "object:1174", 1652 "colorMode": "warning", 1653 "fill": true, 1654 "line": true, 1655 "op": "gt", 1656 "value": 0.7, 1657 "yaxis": "left" 1658 } 1659 ], 1660 "timeRegions": [], 1661 "title": "Fan Speed %", 1662 "tooltip": { 1663 "shared": true, 1664 "sort": 0, 1665 "value_type": "individual" 1666 }, 1667 "type": "graph", 1668 "xaxis": { 1669 "mode": "time", 1670 "show": true, 1671 "values": [] 1672 }, 1673 "yaxes": [ 1674 { 1675 "$$hashKey": "object:1275", 1676 "format": "percentunit", 1677 "logBase": 1, 1678 "max": "1", 1679 "min": "0", 1680 "show": true 1681 }, 1682 { 1683 "$$hashKey": "object:1276", 1684 "format": "short", 1685 "logBase": 1, 1686 "show": true 1687 } 1688 ], 1689 "yaxis": { 1690 "align": false 1691 } 1692 }, 1693 { 1694 "aliasColors": {}, 1695 "bars": false, 1696 "dashLength": 10, 1697 "dashes": false, 1698 "datasource": { 1699 "type": "prometheus", 1700 "uid": "prometheus" 1701 }, 1702 "description": "Current frequency of graphics (shader) clock.", 1703 "fieldConfig": { 1704 "defaults": { 1705 "unit": "hertz" 1706 }, 1707 "overrides": [] 1708 }, 1709 "fill": 1, 1710 "fillGradient": 0, 1711 "gridPos": { 1712 "h": 5, 1713 "w": 6, 1714 "x": 0, 1715 "y": 15 1716 }, 1717 "hiddenSeries": false, 1718 "id": 12, 1719 "legend": { 1720 "avg": false, 1721 "current": false, 1722 "max": false, 1723 "min": false, 1724 "show": false, 1725 "total": false, 1726 "values": false 1727 }, 1728 "lines": true, 1729 "linewidth": 1, 1730 "nullPointMode": "null", 1731 "options": { 1732 "alertThreshold": true 1733 }, 1734 "percentage": false, 1735 "pluginVersion": "9.2.4", 1736 "pointradius": 2, 1737 "points": false, 1738 "renderer": "flot", 1739 "seriesOverrides": [], 1740 "spaceLength": 10, 1741 "stack": false, 1742 "steppedLine": false, 1743 "targets": [ 1744 { 1745 "datasource": { 1746 "type": "prometheus", 1747 "uid": "prometheus" 1748 }, 1749 "exemplar": true, 1750 "expr": "nvidia_smi_clocks_current_graphics_clock_hz{uuid=\"$gpu\"}", 1751 "format": "time_series", 1752 "interval": "", 1753 "legendFormat": "{{uuid}}", 1754 "refId": "A" 1755 } 1756 ], 1757 "thresholds": [], 1758 "timeRegions": [], 1759 "title": "Graphics Clock Speed", 1760 "tooltip": { 1761 "shared": true, 1762 "sort": 0, 1763 "value_type": "individual" 1764 }, 1765 "type": "graph", 1766 "xaxis": { 1767 "mode": "time", 1768 "show": true, 1769 "values": [] 1770 }, 1771 "yaxes": [ 1772 { 1773 "$$hashKey": "object:1642", 1774 "format": "hertz", 1775 "logBase": 1, 1776 "min": "0", 1777 "show": true 1778 }, 1779 { 1780 "$$hashKey": "object:1643", 1781 "format": "short", 1782 "logBase": 1, 1783 "show": true 1784 } 1785 ], 1786 "yaxis": { 1787 "align": false 1788 } 1789 }, 1790 { 1791 "aliasColors": {}, 1792 "bars": false, 1793 "dashLength": 10, 1794 "dashes": false, 1795 "datasource": { 1796 "type": "prometheus", 1797 "uid": "prometheus" 1798 }, 1799 "description": "Current frequency of video encoder/decoder clock.", 1800 "fieldConfig": { 1801 "defaults": { 1802 "unit": "hertz" 1803 }, 1804 "overrides": [] 1805 }, 1806 "fill": 1, 1807 "fillGradient": 0, 1808 "gridPos": { 1809 "h": 5, 1810 "w": 6, 1811 "x": 6, 1812 "y": 15 1813 }, 1814 "hiddenSeries": false, 1815 "id": 19, 1816 "legend": { 1817 "avg": false, 1818 "current": false, 1819 "max": false, 1820 "min": false, 1821 "show": false, 1822 "total": false, 1823 "values": false 1824 }, 1825 "lines": true, 1826 "linewidth": 1, 1827 "nullPointMode": "null", 1828 "options": { 1829 "alertThreshold": true 1830 }, 1831 "percentage": false, 1832 "pluginVersion": "9.2.4", 1833 "pointradius": 2, 1834 "points": false, 1835 "renderer": "flot", 1836 "seriesOverrides": [], 1837 "spaceLength": 10, 1838 "stack": false, 1839 "steppedLine": false, 1840 "targets": [ 1841 { 1842 "datasource": { 1843 "type": "prometheus", 1844 "uid": "prometheus" 1845 }, 1846 "exemplar": true, 1847 "expr": "nvidia_smi_clocks_current_video_clock_hz{uuid=\"$gpu\"}", 1848 "format": "time_series", 1849 "interval": "", 1850 "legendFormat": "{{uuid}}", 1851 "refId": "A" 1852 } 1853 ], 1854 "thresholds": [], 1855 "timeRegions": [], 1856 "title": "Video Clock Speed", 1857 "tooltip": { 1858 "shared": true, 1859 "sort": 0, 1860 "value_type": "individual" 1861 }, 1862 "type": "graph", 1863 "xaxis": { 1864 "mode": "time", 1865 "show": true, 1866 "values": [] 1867 }, 1868 "yaxes": [ 1869 { 1870 "$$hashKey": "object:1642", 1871 "format": "hertz", 1872 "logBase": 1, 1873 "min": "0", 1874 "show": true 1875 }, 1876 { 1877 "$$hashKey": "object:1643", 1878 "format": "short", 1879 "logBase": 1, 1880 "show": true 1881 } 1882 ], 1883 "yaxis": { 1884 "align": false 1885 } 1886 }, 1887 { 1888 "aliasColors": {}, 1889 "bars": false, 1890 "dashLength": 10, 1891 "dashes": false, 1892 "datasource": { 1893 "type": "prometheus", 1894 "uid": "prometheus" 1895 }, 1896 "description": "Current frequency of SM (Streaming Multiprocessor) clock.", 1897 "fieldConfig": { 1898 "defaults": { 1899 "unit": "hertz" 1900 }, 1901 "overrides": [] 1902 }, 1903 "fill": 1, 1904 "fillGradient": 0, 1905 "gridPos": { 1906 "h": 5, 1907 "w": 6, 1908 "x": 12, 1909 "y": 15 1910 }, 1911 "hiddenSeries": false, 1912 "id": 24, 1913 "legend": { 1914 "avg": false, 1915 "current": false, 1916 "max": false, 1917 "min": false, 1918 "show": false, 1919 "total": false, 1920 "values": false 1921 }, 1922 "lines": true, 1923 "linewidth": 1, 1924 "nullPointMode": "null", 1925 "options": { 1926 "alertThreshold": true 1927 }, 1928 "percentage": false, 1929 "pluginVersion": "9.2.4", 1930 "pointradius": 2, 1931 "points": false, 1932 "renderer": "flot", 1933 "seriesOverrides": [], 1934 "spaceLength": 10, 1935 "stack": false, 1936 "steppedLine": false, 1937 "targets": [ 1938 { 1939 "datasource": { 1940 "type": "prometheus", 1941 "uid": "prometheus" 1942 }, 1943 "exemplar": true, 1944 "expr": "nvidia_smi_clocks_current_sm_clock_hz{uuid=\"$gpu\"}", 1945 "format": "time_series", 1946 "interval": "", 1947 "legendFormat": "{{uuid}}", 1948 "refId": "A" 1949 } 1950 ], 1951 "thresholds": [], 1952 "timeRegions": [], 1953 "title": "SM Clock Speed", 1954 "tooltip": { 1955 "shared": true, 1956 "sort": 0, 1957 "value_type": "individual" 1958 }, 1959 "type": "graph", 1960 "xaxis": { 1961 "mode": "time", 1962 "show": true, 1963 "values": [] 1964 }, 1965 "yaxes": [ 1966 { 1967 "$$hashKey": "object:1642", 1968 "format": "hertz", 1969 "logBase": 1, 1970 "min": "0", 1971 "show": true 1972 }, 1973 { 1974 "$$hashKey": "object:1643", 1975 "format": "short", 1976 "logBase": 1, 1977 "show": true 1978 } 1979 ], 1980 "yaxis": { 1981 "align": false 1982 } 1983 }, 1984 { 1985 "aliasColors": {}, 1986 "bars": false, 1987 "dashLength": 10, 1988 "dashes": false, 1989 "datasource": { 1990 "type": "prometheus", 1991 "uid": "prometheus" 1992 }, 1993 "description": "Current frequency of memory clock.", 1994 "fieldConfig": { 1995 "defaults": { 1996 "unit": "hertz" 1997 }, 1998 "overrides": [] 1999 }, 2000 "fill": 1, 2001 "fillGradient": 0, 2002 "gridPos": { 2003 "h": 5, 2004 "w": 6, 2005 "x": 18, 2006 "y": 15 2007 }, 2008 "hiddenSeries": false, 2009 "id": 18, 2010 "legend": { 2011 "avg": false, 2012 "current": false, 2013 "max": false, 2014 "min": false, 2015 "show": false, 2016 "total": false, 2017 "values": false 2018 }, 2019 "lines": true, 2020 "linewidth": 1, 2021 "nullPointMode": "null", 2022 "options": { 2023 "alertThreshold": true 2024 }, 2025 "percentage": false, 2026 "pluginVersion": "9.2.4", 2027 "pointradius": 2, 2028 "points": false, 2029 "renderer": "flot", 2030 "seriesOverrides": [], 2031 "spaceLength": 10, 2032 "stack": false, 2033 "steppedLine": false, 2034 "targets": [ 2035 { 2036 "datasource": { 2037 "type": "prometheus", 2038 "uid": "prometheus" 2039 }, 2040 "exemplar": true, 2041 "expr": "nvidia_smi_clocks_current_memory_clock_hz{uuid=\"$gpu\"}", 2042 "format": "time_series", 2043 "interval": "", 2044 "legendFormat": "{{uuid}}", 2045 "refId": "A" 2046 } 2047 ], 2048 "thresholds": [], 2049 "timeRegions": [], 2050 "title": "Memory Clock Speed", 2051 "tooltip": { 2052 "shared": true, 2053 "sort": 0, 2054 "value_type": "individual" 2055 }, 2056 "type": "graph", 2057 "xaxis": { 2058 "mode": "time", 2059 "show": true, 2060 "values": [] 2061 }, 2062 "yaxes": [ 2063 { 2064 "$$hashKey": "object:1642", 2065 "format": "hertz", 2066 "logBase": 1, 2067 "min": "0", 2068 "show": true 2069 }, 2070 { 2071 "$$hashKey": "object:1643", 2072 "format": "short", 2073 "logBase": 1, 2074 "show": true 2075 } 2076 ], 2077 "yaxis": { 2078 "align": false 2079 } 2080 } 2081 ], 2082 "refresh": "10s", 2083 "schemaVersion": 37, 2084 "style": "dark", 2085 "tags": [ 2086 "nvidia", 2087 "nvidia-smi", 2088 "nvidia_gpu_exporter", 2089 "prometheus" 2090 ], 2091 "templating": { 2092 "list": [ 2093 { 2094 "current": { 2095 "isNone": true, 2096 "selected": false, 2097 "text": "None", 2098 "value": "" 2099 }, 2100 "datasource": { 2101 "type": "prometheus", 2102 "uid": "prometheus" 2103 }, 2104 "definition": "label_values(nvidia_smi_index, node)", 2105 "hide": 0, 2106 "includeAll": false, 2107 "label": "node", 2108 "multi": false, 2109 "name": "node", 2110 "options": [], 2111 "query": { 2112 "query": "label_values(nvidia_smi_index, node)", 2113 "refId": "StandardVariableQuery" 2114 }, 2115 "refresh": 1, 2116 "regex": "", 2117 "skipUrlSync": false, 2118 "sort": 1, 2119 "tagValuesQuery": "", 2120 "tagsQuery": "", 2121 "type": "query", 2122 "useTags": false 2123 }, 2124 { 2125 "current": { 2126 "isNone": true, 2127 "selected": false, 2128 "text": "None", 2129 "value": "" 2130 }, 2131 "datasource": { 2132 "type": "prometheus", 2133 "uid": "prometheus" 2134 }, 2135 "definition": "label_values(nvidia_smi_index{node=\"$node\"}, uuid)", 2136 "hide": 0, 2137 "includeAll": false, 2138 "label": "GPU", 2139 "multi": false, 2140 "name": "gpu", 2141 "options": [], 2142 "query": { 2143 "query": "label_values(nvidia_smi_index{node=\"$node\"}, uuid)", 2144 "refId": "StandardVariableQuery" 2145 }, 2146 "refresh": 1, 2147 "regex": "", 2148 "skipUrlSync": false, 2149 "sort": 1, 2150 "tagValuesQuery": "", 2151 "tagsQuery": "", 2152 "type": "query", 2153 "useTags": false 2154 } 2155 ] 2156 }, 2157 "time": { 2158 "from": "now-30m", 2159 "to": "now" 2160 }, 2161 "timepicker": {}, 2162 "timezone": "", 2163 "title": "Nvidia GPU Metrics", 2164 "uid": "vlvPlrgnk", 2165 "version": 7, 2166 "weekStart": "" 2167 }