github.com/netdata/go.d.plugin@v0.58.1/modules/hdfs/metadata.yaml (about)

     1  plugin_name: go.d.plugin
     2  modules:
     3    - meta:
     4        id: collector-go.d.plugin-hfs
     5        plugin_name: go.d.plugin
     6        module_name: hfs
     7        monitored_instance:
     8          name: Hadoop Distributed File System (HDFS)
     9          link: https://hadoop.apache.org/docs/r1.2.1/hdfs_design.html
    10          icon_filename: hadoop.svg
    11          categories:
    12            - data-collection.storage-mount-points-and-filesystems
    13        keywords:
    14          - hdfs
    15          - hadoop
    16        related_resources:
    17          integrations:
    18            list: []
    19        info_provided_to_referring_integrations:
    20          description: ""
    21        most_popular: true
    22      overview:
    23        data_collection:
    24          metrics_description: |
    25            This collector monitors HDFS nodes.
    26  
    27            Netdata accesses HDFS metrics over `Java Management Extensions` (JMX) through the web interface of an HDFS daemon.
    28          method_description: ""
    29        supported_platforms:
    30          include: []
    31          exclude: []
    32        multi_instance: true
    33        additional_permissions:
    34          description: ""
    35        default_behavior:
    36          auto_detection:
    37            description: ""
    38          limits:
    39            description: ""
    40          performance_impact:
    41            description: ""
    42      setup:
    43        prerequisites:
    44          list: []
    45        configuration:
    46          file:
    47            name: go.d/hdfs.conf
    48          options:
    49            description: |
    50              The following options can be defined globally: update_every, autodetection_retry.
    51            folding:
    52              title: Config options
    53              enabled: true
    54            list:
    55              - name: update_every
    56                description: Data collection frequency.
    57                default_value: 1
    58                required: false
    59              - name: autodetection_retry
    60                description: Recheck interval in seconds. Zero means no recheck will be scheduled.
    61                default_value: 0
    62                required: false
    63              - name: url
    64                description: Server URL.
    65                default_value: http://127.0.0.1:9870/jmx
    66                required: true
    67              - name: timeout
    68                description: HTTP request timeout.
    69                default_value: 1
    70                required: false
    71              - name: username
    72                description: Username for basic HTTP authentication.
    73                default_value: ""
    74                required: false
    75              - name: password
    76                description: Password for basic HTTP authentication.
    77                default_value: ""
    78                required: false
    79              - name: proxy_url
    80                description: Proxy URL.
    81                default_value: ""
    82                required: false
    83              - name: proxy_username
    84                description: Username for proxy basic HTTP authentication.
    85                default_value: ""
    86                required: false
    87              - name: proxy_password
    88                description: Password for proxy basic HTTP authentication.
    89                default_value: ""
    90                required: false
    91              - name: method
    92                description: HTTP request method.
    93                default_value: "GET"
    94                required: false
    95              - name: body
    96                description: HTTP request body.
    97                default_value: ""
    98                required: false
    99              - name: headers
   100                description: HTTP request headers.
   101                default_value: ""
   102                required: false
   103              - name: not_follow_redirects
   104                description: Redirect handling policy. Controls whether the client follows redirects.
   105                default_value: no
   106                required: false
   107              - name: tls_skip_verify
   108                description: Server certificate chain and hostname validation policy. Controls whether the client performs this check.
   109                default_value: no
   110                required: false
   111              - name: tls_ca
   112                description: Certification authority that the client uses when verifying the server's certificates.
   113                default_value: ""
   114                required: false
   115              - name: tls_cert
   116                description: Client TLS certificate.
   117                default_value: ""
   118                required: false
   119              - name: tls_key
   120                description: Client TLS key.
   121                default_value: ""
   122                required: false
   123          examples:
   124            folding:
   125              title: Config
   126              enabled: true
   127            list:
   128              - name: Basic
   129                folding:
   130                  enabled: false
   131                description: A basic example configuration.
   132                config: |
   133                  jobs:
   134                    - name: local
   135                      url: http://127.0.0.1:9870/jmx
   136              - name: HTTP authentication
   137                description: Basic HTTP authentication.
   138                config: |
   139                  jobs:
   140                    - name: local
   141                      url: http://127.0.0.1:9870/jmx
   142                      username: username
   143                      password: password
   144              - name: HTTPS with self-signed certificate
   145                description: |
   146                  Do not validate server certificate chain and hostname.
   147                config: |
   148                  jobs:
   149                    - name: local
   150                      url: https://127.0.0.1:9870/jmx
   151                      tls_skip_verify: yes
   152              - name: Multi-instance
   153                description: |
   154                  > **Note**: When you define multiple jobs, their names must be unique.
   155                  
   156                  Collecting metrics from local and remote instances.
   157                config: |
   158                  jobs:
   159                    - name: local
   160                      url: http://127.0.0.1:9870/jmx
   161                  
   162                    - name: remote
   163                      url: http://192.0.2.1:9870/jmx
   164      troubleshooting:
   165        problems:
   166          list: []
   167      alerts:
   168        - name: hdfs_capacity_usage
   169          metric: hdfs.capacity
   170          info: summary datanodes space capacity utilization
   171          link: https://github.com/netdata/netdata/blob/master/src/health/health.d/hdfs.conf
   172        - name: hdfs_missing_blocks
   173          metric: hdfs.blocks
   174          info: number of missing blocks
   175          link: https://github.com/netdata/netdata/blob/master/src/health/health.d/hdfs.conf
   176        - name: hdfs_stale_nodes
   177          metric: hdfs.data_nodes
   178          info: number of datanodes marked stale due to delayed heartbeat
   179          link: https://github.com/netdata/netdata/blob/master/src/health/health.d/hdfs.conf
   180        - name: hdfs_dead_nodes
   181          metric: hdfs.data_nodes
   182          info: number of datanodes which are currently dead
   183          link: https://github.com/netdata/netdata/blob/master/src/health/health.d/hdfs.conf
   184        - name: hdfs_num_failed_volumes
   185          metric: hdfs.num_failed_volumes
   186          info: number of failed volumes
   187          link: https://github.com/netdata/netdata/blob/master/src/health/health.d/hdfs.conf
   188      metrics:
   189        folding:
   190          title: Metrics
   191          enabled: false
   192        description: ""
   193        availability:
   194          - DataNode
   195          - NameNode
   196        scopes:
   197          - name: global
   198            description: These metrics refer to the entire monitored application.
   199            labels: []
   200            metrics:
   201              - name: hdfs.heap_memory
   202                description: Heap Memory
   203                unit: MiB
   204                chart_type: area
   205                dimensions:
   206                  - name: committed
   207                  - name: used
   208              - name: hdfs.gc_count_total
   209                description: GC Events
   210                unit: events/s
   211                chart_type: line
   212                dimensions:
   213                  - name: gc
   214              - name: hdfs.gc_time_total
   215                description: GC Time
   216                unit: ms
   217                chart_type: line
   218                dimensions:
   219                  - name: ms
   220              - name: hdfs.gc_threshold
   221                description: Number of Times That the GC Threshold is Exceeded
   222                unit: events/s
   223                chart_type: line
   224                dimensions:
   225                  - name: info
   226                  - name: warn
   227              - name: hdfs.threads
   228                description: Number of Threads
   229                unit: num
   230                chart_type: stacked
   231                dimensions:
   232                  - name: new
   233                  - name: runnable
   234                  - name: blocked
   235                  - name: waiting
   236                  - name: timed_waiting
   237                  - name: terminated
   238              - name: hdfs.logs_total
   239                description: Number of Logs
   240                unit: logs/s
   241                chart_type: stacked
   242                dimensions:
   243                  - name: info
   244                  - name: error
   245                  - name: warn
   246                  - name: fatal
   247              - name: hdfs.rpc_bandwidth
   248                description: RPC Bandwidth
   249                unit: kilobits/s
   250                chart_type: area
   251                dimensions:
   252                  - name: received
   253                  - name: sent
   254              - name: hdfs.rpc_calls
   255                description: RPC Calls
   256                unit: calls/s
   257                chart_type: line
   258                dimensions:
   259                  - name: calls
   260              - name: hdfs.open_connections
   261                description: RPC Open Connections
   262                unit: connections
   263                chart_type: line
   264                dimensions:
   265                  - name: open
   266              - name: hdfs.call_queue_length
   267                description: RPC Call Queue Length
   268                unit: num
   269                chart_type: line
   270                dimensions:
   271                  - name: length
   272              - name: hdfs.avg_queue_time
   273                description: RPC Avg Queue Time
   274                unit: ms
   275                chart_type: line
   276                dimensions:
   277                  - name: time
   278              - name: hdfs.avg_processing_time
   279                description: RPC Avg Processing Time
   280                unit: ms
   281                chart_type: line
   282                dimensions:
   283                  - name: time
   284              - name: hdfs.capacity
   285                description: Capacity Across All Datanodes
   286                unit: KiB
   287                chart_type: stacked
   288                availability:
   289                  - NameNode
   290                dimensions:
   291                  - name: remaining
   292                  - name: used
   293              - name: hdfs.used_capacity
   294                description: Used Capacity Across All Datanodes
   295                unit: KiB
   296                chart_type: stacked
   297                availability:
   298                  - NameNode
   299                dimensions:
   300                  - name: dfs
   301                  - name: non_dfs
   302              - name: hdfs.load
   303                description: Number of Concurrent File Accesses (read/write) Across All DataNodes
   304                unit: load
   305                chart_type: line
   306                availability:
   307                  - NameNode
   308                dimensions:
   309                  - name: load
   310              - name: hdfs.volume_failures_total
   311                description: Number of Volume Failures Across All Datanodes
   312                unit: events/s
   313                chart_type: line
   314                availability:
   315                  - NameNode
   316                dimensions:
   317                  - name: failures
   318              - name: hdfs.files_total
   319                description: Number of Tracked Files
   320                unit: num
   321                chart_type: line
   322                availability:
   323                  - NameNode
   324                dimensions:
   325                  - name: files
   326              - name: hdfs.blocks_total
   327                description: Number of Allocated Blocks in the System
   328                unit: num
   329                chart_type: line
   330                availability:
   331                  - NameNode
   332                dimensions:
   333                  - name: blocks
   334              - name: hdfs.blocks
   335                description: Number of Problem Blocks (can point to an unhealthy cluster)
   336                unit: num
   337                chart_type: line
   338                availability:
   339                  - NameNode
   340                dimensions:
   341                  - name: corrupt
   342                  - name: missing
   343                  - name: under_replicated
   344              - name: hdfs.data_nodes
   345                description: Number of Data Nodes By Status
   346                unit: num
   347                chart_type: stacked
   348                availability:
   349                  - NameNode
   350                dimensions:
   351                  - name: live
   352                  - name: dead
   353                  - name: stale
   354              - name: hdfs.datanode_capacity
   355                description: Capacity
   356                unit: KiB
   357                chart_type: stacked
   358                availability:
   359                  - DataNode
   360                dimensions:
   361                  - name: remaining
   362                  - name: used
   363              - name: hdfs.datanode_used_capacity
   364                description: Used Capacity
   365                unit: KiB
   366                chart_type: stacked
   367                availability:
   368                  - DataNode
   369                dimensions:
   370                  - name: dfs
   371                  - name: non_dfs
   372              - name: hdfs.datanode_failed_volumes
   373                description: Number of Failed Volumes
   374                unit: num
   375                chart_type: line
   376                availability:
   377                  - DataNode
   378                dimensions:
   379                  - name: failed volumes
   380              - name: hdfs.datanode_bandwidth
   381                description: Bandwidth
   382                unit: KiB/s
   383                chart_type: area
   384                availability:
   385                  - DataNode
   386                dimensions:
   387                  - name: reads
   388                  - name: writes