github.com/netdata/go.d.plugin@v0.58.1/modules/nvidia_smi/metadata.yaml (about)

     1  plugin_name: go.d.plugin
     2  modules:
     3    - meta:
     4        id: collector-go.d.plugin-nvidia_smi
     5        plugin_name: go.d.plugin
     6        module_name: nvidia_smi
     7        monitored_instance:
     8          name: Nvidia GPU
     9          link: https://www.nvidia.com/en-us/
    10          icon_filename: nvidia.svg
    11          categories:
    12            - data-collection.hardware-devices-and-sensors
    13        keywords:
    14          - nvidia
    15          - gpu
    16          - hardware
    17        related_resources:
    18          integrations:
    19            list: []
    20        info_provided_to_referring_integrations:
    21          description: ""
    22        most_popular: false
    23      overview:
    24        data_collection:
    25          metrics_description: |
    26            This collector monitors GPUs performance metrics using
    27            the [nvidia-smi](https://developer.nvidia.com/nvidia-system-management-interface) CLI tool.
    28            
    29            > **Warning**: under development, [loop mode](https://github.com/netdata/netdata/issues/14522) not implemented yet.
    30          method_description: ""
    31        supported_platforms:
    32          include: []
    33          exclude: []
    34        multi_instance: true
    35        additional_permissions:
    36          description: ""
    37        default_behavior:
    38          auto_detection:
    39            description: ""
    40          limits:
    41            description: ""
    42          performance_impact:
    43            description: ""
    44      setup:
    45        prerequisites:
    46          list:
    47            - title: Enable in go.d.conf.
    48              description: |
    49                This collector is disabled by default. You need to explicitly enable it in the `go.d.conf` file.
    50        configuration:
    51          file:
    52            name: go.d/nvidia_smi.conf
    53          options:
    54            description: |
    55              The following options can be defined globally: update_every, autodetection_retry.
    56            folding:
    57              title: Config options
    58              enabled: true
    59            list:
    60              - name: update_every
    61                description: Data collection frequency.
    62                default_value: 10
    63                required: false
    64              - name: autodetection_retry
    65                description: Recheck interval in seconds. Zero means no recheck will be scheduled.
    66                default_value: 0
    67                required: false
    68              - name: binary_path
    69                description: Path to nvidia_smi binary. The default is "nvidia_smi" and the executable is looked for in the directories specified in the PATH environment variable.
    70                default_value: nvidia_smi
    71                required: false
    72              - name: timeout
    73                description: nvidia_smi binary execution timeout.
    74                default_value: 2
    75                required: false
    76              - name: use_csv_format
    77                description: Used format when requesting GPU information. XML is used if set to 'no'.
    78                default_value: true
    79                required: false
    80                details: |
    81                  This module supports data collection in CSV and XML formats. The default is CSV.
    82                  
    83                  - XML provides more metrics, but requesting GPU information consumes more CPU, especially if there are multiple GPUs in the system.
    84                  - CSV provides fewer metrics, but is much lighter than XML in terms of CPU usage.
    85          examples:
    86            folding:
    87              title: Config
    88              enabled: true
    89            list:
    90              - name: XML format
    91                description: Use XML format when requesting GPU information.
    92                config: |
    93                  jobs:
    94                    - name: nvidia_smi
    95                      use_csv_format: no
    96              - name: Custom binary path
    97                description: The executable is not in the directories specified in the PATH environment variable.
    98                config: |
    99                  jobs:
   100                    - name: nvidia_smi
   101                      binary_path: /usr/local/sbin/nvidia_smi
   102      troubleshooting:
   103        problems:
   104          list: []
   105      alerts: []
   106      metrics:
   107        folding:
   108          title: Metrics
   109          enabled: false
   110        description: ""
   111        availability:
   112          - XML
   113          - CSV
   114        scopes:
   115          - name: gpu
   116            description: These metrics refer to the GPU.
   117            labels:
   118              - name: uuid
   119                description: GPU id (e.g. 00000000:00:04.0)
   120              - name: product_name
   121                description: GPU product name (e.g. NVIDIA A100-SXM4-40GB)
   122            metrics:
   123              - name: nvidia_smi.gpu_pcie_bandwidth_usage
   124                availability:
   125                  - XML
   126                description: PCI Express Bandwidth Usage
   127                unit: B/s
   128                chart_type: line
   129                dimensions:
   130                  - name: rx
   131                  - name: tx
   132              - name: nvidia_smi.gpu_pcie_bandwidth_utilization
   133                availability:
   134                  - XML
   135                description: PCI Express Bandwidth Utilization
   136                unit: '%'
   137                chart_type: line
   138                dimensions:
   139                  - name: rx
   140                  - name: tx
   141              - name: nvidia_smi.gpu_fan_speed_perc
   142                availability:
   143                  - XML
   144                  - CSV
   145                description: Fan speed
   146                unit: '%'
   147                chart_type: line
   148                dimensions:
   149                  - name: fan_speed
   150              - name: nvidia_smi.gpu_utilization
   151                availability:
   152                  - XML
   153                  - CSV
   154                description: GPU utilization
   155                unit: '%'
   156                chart_type: line
   157                dimensions:
   158                  - name: gpu
   159              - name: nvidia_smi.gpu_memory_utilization
   160                availability:
   161                  - XML
   162                  - CSV
   163                description: Memory utilization
   164                unit: '%'
   165                chart_type: line
   166                dimensions:
   167                  - name: memory
   168              - name: nvidia_smi.gpu_decoder_utilization
   169                availability:
   170                  - XML
   171                description: Decoder utilization
   172                unit: '%'
   173                chart_type: line
   174                dimensions:
   175                  - name: decoder
   176              - name: nvidia_smi.gpu_encoder_utilization
   177                availability:
   178                  - XML
   179                description: Encoder utilization
   180                unit: '%'
   181                chart_type: line
   182                dimensions:
   183                  - name: encoder
   184              - name: nvidia_smi.gpu_frame_buffer_memory_usage
   185                availability:
   186                  - XML
   187                  - CSV
   188                description: Frame buffer memory usage
   189                unit: B
   190                chart_type: stacked
   191                dimensions:
   192                  - name: free
   193                  - name: used
   194                  - name: reserved
   195              - name: nvidia_smi.gpu_bar1_memory_usage
   196                availability:
   197                  - XML
   198                description: BAR1 memory usage
   199                unit: B
   200                chart_type: stacked
   201                dimensions:
   202                  - name: free
   203                  - name: used
   204              - name: nvidia_smi.gpu_temperature
   205                availability:
   206                  - XML
   207                  - CSV
   208                description: Temperature
   209                unit: Celsius
   210                chart_type: line
   211                dimensions:
   212                  - name: temperature
   213              - name: nvidia_smi.gpu_voltage
   214                availability:
   215                  - XML
   216                description: Voltage
   217                unit: V
   218                chart_type: line
   219                dimensions:
   220                  - name: voltage
   221              - name: nvidia_smi.gpu_clock_freq
   222                availability:
   223                  - XML
   224                  - CSV
   225                description: Clock current frequency
   226                unit: MHz
   227                chart_type: line
   228                dimensions:
   229                  - name: graphics
   230                  - name: video
   231                  - name: sm
   232                  - name: mem
   233              - name: nvidia_smi.gpu_power_draw
   234                availability:
   235                  - XML
   236                  - CSV
   237                description: Power draw
   238                unit: Watts
   239                chart_type: line
   240                dimensions:
   241                  - name: power_draw
   242              - name: nvidia_smi.gpu_performance_state
   243                availability:
   244                  - XML
   245                  - CSV
   246                description: Performance state
   247                unit: state
   248                chart_type: line
   249                dimensions:
   250                  - name: P0-P15
   251              - name: nvidia_smi.gpu_mig_mode_current_status
   252                availability:
   253                  - XML
   254                description: MIG current mode
   255                unit: status
   256                chart_type: line
   257                dimensions:
   258                  - name: enabled
   259                  - name: disabled
   260              - name: nvidia_smi.gpu_mig_devices_count
   261                availability:
   262                  - XML
   263                description: MIG devices
   264                unit: devices
   265                chart_type: line
   266                dimensions:
   267                  - name: mig
   268          - name: mig
   269            description: These metrics refer to the Multi-Instance GPU (MIG).
   270            labels:
   271              - name: uuid
   272                description: GPU id (e.g. 00000000:00:04.0)
   273              - name: product_name
   274                description: GPU product name (e.g. NVIDIA A100-SXM4-40GB)
   275              - name: gpu_instance_id
   276                description: GPU instance id (e.g. 1)
   277            metrics:
   278              - name: nvidia_smi.gpu_mig_frame_buffer_memory_usage
   279                availability:
   280                  - XML
   281                description: Frame buffer memory usage
   282                unit: B
   283                chart_type: stacked
   284                dimensions:
   285                  - name: free
   286                  - name: used
   287                  - name: reserved
   288              - name: nvidia_smi.gpu_mig_bar1_memory_usage
   289                availability:
   290                  - XML
   291                description: BAR1 memory usage
   292                unit: B
   293                chart_type: stacked
   294                dimensions:
   295                  - name: free
   296                  - name: used