github.com/netdata/go.d.plugin@v0.58.1/modules/consul/metadata.yaml (about)

     1  plugin_name: go.d.plugin
     2  modules:
     3    - meta:
     4        id: collector-go.d.plugin-consul
     5        plugin_name: go.d.plugin
     6        module_name: consul
     7        monitored_instance:
     8          name: Consul
     9          link: https://www.consul.io/
    10          categories:
    11            - data-collection.service-discovery-registry
    12          icon_filename: consul.svg
    13        alternative_monitored_instances: []
    14        related_resources:
    15          integrations:
    16            list: []
    17        info_provided_to_referring_integrations:
    18          description: ""
    19        keywords:
    20          - service networking platform
    21          - hashicorp
    22        most_popular: true
    23      overview:
    24        data_collection:
    25          metrics_description: |
    26            This collector monitors [key metrics](https://developer.hashicorp.com/consul/docs/agent/telemetry#key-metrics) of Consul Agents: transaction timings, leadership changes, memory usage and more.
    27          method_description: |
    28            It periodically sends HTTP requests to [Consul REST API](https://developer.hashicorp.com/consul/api-docs).
    29            
    30            Used endpoints:
    31            
    32            - [/operator/autopilot/health](https://developer.hashicorp.com/consul/api-docs/operator/autopilot#read-health)
    33            - [/agent/checks](https://developer.hashicorp.com/consul/api-docs/agent/check#list-checks)
    34            - [/agent/self](https://developer.hashicorp.com/consul/api-docs/agent#read-configuration)
    35            - [/agent/metrics](https://developer.hashicorp.com/consul/api-docs/agent#view-metrics)
    36            - [/coordinate/nodes](https://developer.hashicorp.com/consul/api-docs/coordinate#read-lan-coordinates-for-all-nodes)
    37        supported_platforms:
    38          include: []
    39          exclude: []
    40        multi_instance: true
    41        additional_permissions:
    42          description: ""
    43        default_behavior:
    44          auto_detection:
    45            description: |
    46              This collector discovers instances running on the local host, that provide metrics on port 8500.
    47              
    48              On startup, it tries to collect metrics from:
    49              
    50              - http://localhost:8500
    51              - http://127.0.0.1:8500
    52          limits:
    53            description: ""
    54          performance_impact:
    55            description: ""
    56      setup:
    57        prerequisites:
    58          list:
    59            - title: Enable Prometheus telemetry
    60              description: |
    61                [Enable](https://developer.hashicorp.com/consul/docs/agent/config/config-files#telemetry-prometheus_retention_time) telemetry on your Consul agent, by increasing the value of `prometheus_retention_time` from `0`.
    62            - title: Add required ACLs to Token
    63              description: |
    64                Required **only if authentication is enabled**.
    65                
    66                |       ACL       | Endpoint                                                                                                                                                                                                                                                                                       |
    67                |:---------------:|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
    68                | `operator:read` | [autopilot health status](https://developer.hashicorp.com/consul/api-docs/operator/autopilot#read-health)                                                                                                                                                                                      |
    69                |   `node:read`   | [checks](https://developer.hashicorp.com/consul/api-docs/agent/check#list-checks)                                                                                                                                                                                                              |
    70                |  `agent:read`   | [configuration](https://developer.hashicorp.com/consul/api-docs/agent#read-configuration), [metrics](https://developer.hashicorp.com/consul/api-docs/agent#view-metrics), and [lan coordinates](https://developer.hashicorp.com/consul/api-docs/coordinate#read-lan-coordinates-for-all-nodes) |
    71        configuration:
    72          file:
    73            name: go.d/consul.conf
    74          options:
    75            description: |
    76              The following options can be defined globally: update_every, autodetection_retry.
    77            folding:
    78              title: All options
    79              enabled: true
    80            list:
    81              - name: update_every
    82                description: Data collection frequency.
    83                default_value: 1
    84                required: false
    85              - name: autodetection_retry
    86                description: Recheck interval in seconds. Zero means no recheck will be scheduled.
    87                default_value: 0
    88                required: false
    89              - name: url
    90                description: Server URL.
    91                default_value: http://localhost:8500
    92                required: true
    93              - name: acl_token
    94                description: ACL token used in every request.
    95                default_value: ""
    96                required: false
    97              - name: max_checks
    98                description: Checks processing/charting limit.
    99                default_value: ""
   100                required: false
   101              - name: max_filter
   102                description: Checks processing/charting filter. Uses [simple patterns](https://github.com/netdata/netdata/blob/master/src/libnetdata/simple_pattern/README.md).
   103                default_value: ""
   104                required: false
   105              - name: username
   106                description: Username for basic HTTP authentication.
   107                default_value: ""
   108                required: false
   109              - name: password
   110                description: Password for basic HTTP authentication.
   111                default_value: ""
   112                required: false
   113              - name: proxy_url
   114                description: Proxy URL.
   115                default_value: ""
   116                required: false
   117              - name: proxy_username
   118                description: Username for proxy basic HTTP authentication.
   119                default_value: ""
   120                required: false
   121              - name: proxy_password
   122                description: Password for proxy basic HTTP authentication.
   123                default_value: ""
   124                required: false
   125              - name: timeout
   126                description: HTTP request timeout.
   127                default_value: 1
   128                required: false
   129              - name: method
   130                description: HTTP request method.
   131                default_value: GET
   132                required: false
   133              - name: body
   134                description: HTTP request body.
   135                default_value: ""
   136                required: false
   137              - name: headers
   138                description: HTTP request headers.
   139                default_value: ""
   140                required: false
   141              - name: not_follow_redirects
   142                description: Redirect handling policy. Controls whether the client follows redirects.
   143                default_value: false
   144                required: false
   145              - name: tls_skip_verify
   146                description: Server certificate chain and hostname validation policy. Controls whether the client performs this check.
   147                default_value: false
   148                required: false
   149              - name: tls_ca
   150                description: Certification authority that the client uses when verifying the server's certificates.
   151                default_value: ""
   152                required: false
   153              - name: tls_cert
   154                description: Client tls certificate.
   155                default_value: ""
   156                required: false
   157              - name: tls_key
   158                description: Client tls key.
   159                default_value: ""
   160                required: false
   161          examples:
   162            folding:
   163              title: Config
   164              enabled: true
   165            list:
   166              - name: Basic
   167                description: An example configuration.
   168                folding:
   169                  enabled: false
   170                config: |
   171                  jobs:
   172                    - name: local
   173                      url: http://127.0.0.1:8500
   174                      acl_token: "ec15675e-2999-d789-832e-8c4794daa8d7"
   175              - name: Basic HTTP auth
   176                description: Local server with basic HTTP authentication.
   177                config: |
   178                  jobs:
   179                    - name: local
   180                      url: http://127.0.0.1:8500
   181                      acl_token: "ec15675e-2999-d789-832e-8c4794daa8d7"
   182                      username: foo
   183                      password: bar
   184              - name: Multi-instance
   185                description: |
   186                  > **Note**: When you define multiple jobs, their names must be unique.
   187                  
   188                  Collecting metrics from local and remote instances.
   189                config: |
   190                  jobs:
   191                    - name: local
   192                      url: http://127.0.0.1:8500
   193                      acl_token: "ec15675e-2999-d789-832e-8c4794daa8d7"
   194                  
   195                    - name: remote
   196                      url: http://203.0.113.10:8500
   197                      acl_token: "ada7f751-f654-8872-7f93-498e799158b6"
   198      troubleshooting:
   199        problems:
   200          list: []
   201      alerts:
   202        - name: consul_node_health_check_status
   203          link: https://github.com/netdata/netdata/blob/master/src/health/health.d/consul.conf
   204          metric: consul.node_health_check_status
   205          info: node health check ${label:check_name} has failed on server ${label:node_name} datacenter ${label:datacenter}
   206        - name: consul_service_health_check_status
   207          link: https://github.com/netdata/netdata/blob/master/src/health/health.d/consul.conf
   208          metric: consul.service_health_check_status
   209          info: service health check ${label:check_name} for service ${label:service_name} has failed on server ${label:node_name} datacenter ${label:datacenter}
   210        - name: consul_client_rpc_requests_exceeded
   211          link: https://github.com/netdata/netdata/blob/master/src/health/health.d/consul.conf
   212          metric: consul.client_rpc_requests_exceeded_rate
   213          info: number of rate-limited RPC requests made by server ${label:node_name} datacenter ${label:datacenter}
   214        - name: consul_client_rpc_requests_failed
   215          link: https://github.com/netdata/netdata/blob/master/src/health/health.d/consul.conf
   216          metric: consul.client_rpc_requests_failed_rate
   217          info: number of failed RPC requests made by server ${label:node_name} datacenter ${label:datacenter}
   218        - name: consul_gc_pause_time
   219          link: https://github.com/netdata/netdata/blob/master/src/health/health.d/consul.conf
   220          metric: consul.gc_pause_time
   221          info: time spent in stop-the-world garbage collection pauses on server ${label:node_name} datacenter ${label:datacenter}
   222        - name: consul_autopilot_health_status
   223          link: https://github.com/netdata/netdata/blob/master/src/health/health.d/consul.conf
   224          metric: consul.autopilot_health_status
   225          info: datacenter ${label:datacenter} cluster is unhealthy as reported by server ${label:node_name}
   226        - name: consul_autopilot_server_health_status
   227          link: https://github.com/netdata/netdata/blob/master/src/health/health.d/consul.conf
   228          metric: consul.autopilot_server_health_status
   229          info: server ${label:node_name} from datacenter ${label:datacenter} is unhealthy
   230        - name: consul_raft_leader_last_contact_time
   231          link: https://github.com/netdata/netdata/blob/master/src/health/health.d/consul.conf
   232          metric: consul.raft_leader_last_contact_time
   233          info: median time elapsed since leader server ${label:node_name} datacenter ${label:datacenter} was last able to contact the follower nodes
   234        - name: consul_raft_leadership_transitions
   235          link: https://github.com/netdata/netdata/blob/master/src/health/health.d/consul.conf
   236          metric: consul.raft_leadership_transitions_rate
   237          info: there has been a leadership change and server ${label:node_name} datacenter ${label:datacenter} has become the leader
   238        - name: consul_raft_thread_main_saturation
   239          link: https://github.com/netdata/netdata/blob/master/src/health/health.d/consul.conf
   240          metric: consul.raft_thread_main_saturation_perc
   241          info: average saturation of the main Raft goroutine on server ${label:node_name} datacenter ${label:datacenter}
   242        - name: consul_raft_thread_fsm_saturation
   243          link: https://github.com/netdata/netdata/blob/master/src/health/health.d/consul.conf
   244          metric: consul.raft_thread_fsm_saturation_perc
   245          info: average saturation of the FSM Raft goroutine on server ${label:node_name} datacenter ${label:datacenter}
   246        - name: consul_license_expiration_time
   247          link: https://github.com/netdata/netdata/blob/master/src/health/health.d/consul.conf
   248          metric: consul.license_expiration_time
   249          info: Consul Enterprise licence expiration time on node ${label:node_name} datacenter ${label:datacenter}
   250      metrics:
   251        folding:
   252          title: Metrics
   253          enabled: false
   254        description: |
   255          The set of metrics depends on the [Consul Agent mode](https://developer.hashicorp.com/consul/docs/install/glossary#agent).
   256        availability:
   257          - Leader
   258          - Follower
   259          - Client
   260        scopes:
   261          - name: global
   262            description: These metrics refer to the entire monitored application.
   263            labels: []
   264            metrics:
   265              - name: consul.client_rpc_requests_rate
   266                description: Client RPC requests
   267                unit: requests/s
   268                chart_type: line
   269                dimensions:
   270                  - name: rpc
   271              - name: consul.client_rpc_requests_exceeded_rate
   272                description: Client rate-limited RPC requests
   273                unit: requests/s
   274                chart_type: line
   275                dimensions:
   276                  - name: exceeded
   277              - name: consul.client_rpc_requests_failed_rate
   278                description: Client failed RPC requests
   279                unit: requests/s
   280                chart_type: line
   281                dimensions:
   282                  - name: failed
   283              - name: consul.memory_allocated
   284                description: Memory allocated by the Consul process
   285                unit: bytes
   286                chart_type: line
   287                dimensions:
   288                  - name: allocated
   289              - name: consul.memory_sys
   290                description: Memory obtained from the OS
   291                unit: bytes
   292                chart_type: line
   293                dimensions:
   294                  - name: sys
   295              - name: consul.gc_pause_time
   296                description: Garbage collection stop-the-world pause time
   297                unit: seconds
   298                chart_type: line
   299                dimensions:
   300                  - name: gc_pause
   301              - name: consul.kvs_apply_time
   302                description: KVS apply time
   303                unit: ms
   304                chart_type: line
   305                availability:
   306                  - Leader
   307                  - Follower
   308                dimensions:
   309                  - name: quantile_0.5
   310                  - name: quantile_0.9
   311                  - name: quantile_0.99
   312              - name: consul.kvs_apply_operations_rate
   313                description: KVS apply operations
   314                unit: ops/s
   315                chart_type: line
   316                availability:
   317                  - Leader
   318                  - Follower
   319                dimensions:
   320                  - name: kvs_apply
   321              - name: consul.txn_apply_time
   322                description: Transaction apply time
   323                unit: ms
   324                chart_type: line
   325                availability:
   326                  - Leader
   327                  - Follower
   328                dimensions:
   329                  - name: quantile_0.5
   330                  - name: quantile_0.9
   331                  - name: quantile_0.99
   332              - name: consul.txn_apply_operations_rate
   333                description: Transaction apply operations
   334                unit: ops/s
   335                chart_type: line
   336                availability:
   337                  - Leader
   338                  - Follower
   339                dimensions:
   340                  - name: txn_apply
   341              - name: consul.autopilot_health_status
   342                description: Autopilot cluster health status
   343                unit: status
   344                chart_type: line
   345                availability:
   346                  - Leader
   347                  - Follower
   348                dimensions:
   349                  - name: healthy
   350                  - name: unhealthy
   351              - name: consul.autopilot_failure_tolerance
   352                description: Autopilot cluster failure tolerance
   353                unit: servers
   354                chart_type: line
   355                availability:
   356                  - Leader
   357                  - Follower
   358                dimensions:
   359                  - name: failure_tolerance
   360              - name: consul.autopilot_server_health_status
   361                description: Autopilot server health status
   362                unit: status
   363                chart_type: line
   364                availability:
   365                  - Leader
   366                  - Follower
   367                dimensions:
   368                  - name: healthy
   369                  - name: unhealthy
   370              - name: consul.autopilot_server_stable_time
   371                description: Autopilot server stable time
   372                unit: seconds
   373                chart_type: line
   374                availability:
   375                  - Leader
   376                  - Follower
   377                dimensions:
   378                  - name: stable
   379              - name: consul.autopilot_server_serf_status
   380                description: Autopilot server Serf status
   381                unit: status
   382                chart_type: line
   383                availability:
   384                  - Leader
   385                  - Follower
   386                dimensions:
   387                  - name: active
   388                  - name: failed
   389                  - name: left
   390                  - name: none
   391              - name: consul.autopilot_server_voter_status
   392                description: Autopilot server Raft voting membership
   393                unit: status
   394                chart_type: line
   395                availability:
   396                  - Leader
   397                  - Follower
   398                dimensions:
   399                  - name: voter
   400                  - name: not_voter
   401              - name: consul.network_lan_rtt
   402                description: Network lan RTT
   403                unit: ms
   404                chart_type: line
   405                availability:
   406                  - Leader
   407                  - Follower
   408                dimensions:
   409                  - name: min
   410                  - name: max
   411                  - name: avg
   412              - name: consul.raft_commit_time
   413                description: Raft commit time
   414                unit: ms
   415                chart_type: line
   416                availability:
   417                  - Leader
   418                dimensions:
   419                  - name: quantile_0.5
   420                  - name: quantile_0.9
   421                  - name: quantile_0.99
   422              - name: consul.raft_commits_rate
   423                description: Raft commits rate
   424                unit: commits/s
   425                chart_type: line
   426                availability:
   427                  - Leader
   428                dimensions:
   429                  - name: commits
   430              - name: consul.raft_leader_last_contact_time
   431                description: Raft leader last contact time
   432                unit: ms
   433                chart_type: line
   434                availability:
   435                  - Leader
   436                dimensions:
   437                  - name: quantile_0.5
   438                  - name: quantile_0.9
   439                  - name: quantile_0.99
   440              - name: consul.raft_leader_oldest_log_age
   441                description: Raft leader oldest log age
   442                unit: seconds
   443                chart_type: line
   444                availability:
   445                  - Leader
   446                dimensions:
   447                  - name: oldest_log_age
   448              - name: consul.raft_follower_last_contact_leader_time
   449                description: Raft follower last contact with the leader time
   450                unit: ms
   451                chart_type: line
   452                availability:
   453                  - Follower
   454                dimensions:
   455                  - name: leader_last_contact
   456              - name: consul.raft_rpc_install_snapshot_time
   457                description: Raft RPC install snapshot time
   458                unit: ms
   459                chart_type: line
   460                availability:
   461                  - Follower
   462                dimensions:
   463                  - name: quantile_0.5
   464                  - name: quantile_0.9
   465                  - name: quantile_0.99
   466              - name: consul.raft_leader_elections_rate
   467                description: Raft leader elections rate
   468                unit: elections/s
   469                chart_type: line
   470                availability:
   471                  - Leader
   472                  - Follower
   473                dimensions:
   474                  - name: leader
   475              - name: consul.raft_leadership_transitions_rate
   476                description: Raft leadership transitions rate
   477                unit: transitions/s
   478                chart_type: line
   479                availability:
   480                  - Leader
   481                  - Follower
   482                dimensions:
   483                  - name: leadership
   484              - name: consul.server_leadership_status
   485                description: Server leadership status
   486                unit: status
   487                chart_type: line
   488                availability:
   489                  - Leader
   490                  - Follower
   491                dimensions:
   492                  - name: leader
   493                  - name: not_leader
   494              - name: consul.raft_thread_main_saturation_perc
   495                description: Raft main thread saturation
   496                unit: percentage
   497                chart_type: line
   498                availability:
   499                  - Leader
   500                  - Follower
   501                dimensions:
   502                  - name: quantile_0.5
   503                  - name: quantile_0.9
   504                  - name: quantile_0.99
   505              - name: consul.raft_thread_fsm_saturation_perc
   506                description: Raft FSM thread saturation
   507                unit: percentage
   508                chart_type: line
   509                availability:
   510                  - Leader
   511                  - Follower
   512                dimensions:
   513                  - name: quantile_0.5
   514                  - name: quantile_0.9
   515                  - name: quantile_0.99
   516              - name: consul.raft_fsm_last_restore_duration
   517                description: Raft last restore duration
   518                unit: ms
   519                chart_type: line
   520                availability:
   521                  - Leader
   522                  - Follower
   523                dimensions:
   524                  - name: last_restore_duration
   525              - name: consul.raft_boltdb_freelist_bytes
   526                description: Raft BoltDB freelist
   527                unit: bytes
   528                chart_type: line
   529                availability:
   530                  - Leader
   531                  - Follower
   532                dimensions:
   533                  - name: freelist
   534              - name: consul.raft_boltdb_logs_per_batch_rate
   535                description: Raft BoltDB logs written per batch
   536                unit: logs/s
   537                chart_type: line
   538                availability:
   539                  - Leader
   540                  - Follower
   541                dimensions:
   542                  - name: written
   543              - name: consul.raft_boltdb_store_logs_time
   544                description: Raft BoltDB store logs time
   545                unit: ms
   546                chart_type: line
   547                availability:
   548                  - Leader
   549                  - Follower
   550                dimensions:
   551                  - name: quantile_0.5
   552                  - name: quantile_0.9
   553                  - name: quantile_0.99
   554              - name: consul.license_expiration_time
   555                description: License expiration time
   556                unit: seconds
   557                chart_type: line
   558                dimensions:
   559                  - name: license_expiration
   560          - name: node check
   561            description: Metrics about checks on Node level.
   562            labels:
   563              - name: datacenter
   564                description: Datacenter Identifier
   565              - name: node_name
   566                description: The node's name
   567              - name: check_name
   568                description: The check's name
   569            metrics:
   570              - name: consul.node_health_check_status
   571                description: Node health check status
   572                unit: status
   573                chart_type: line
   574                dimensions:
   575                  - name: passing
   576                  - name: maintenance
   577                  - name: warning
   578                  - name: critical
   579          - name: service check
   580            description: Metrics about checks at a Service level.
   581            labels:
   582              - name: datacenter
   583                description: Datacenter Identifier
   584              - name: node_name
   585                description: The node's name
   586              - name: check_name
   587                description: The check's name
   588              - name: service_name
   589                description: The service's name
   590            metrics:
   591              - name: consul.service_health_check_status
   592                description: Service health check status
   593                unit: status
   594                chart_type: line
   595                dimensions:
   596                  - name: passing
   597                  - name: maintenance
   598                  - name: warning
   599                  - name: critical