go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/server/cmd/statsd-to-tsmon/docker/envoy/statsd-to-tsmon.cfg (about)

     1  # Cluster stats.
     2  # https://www.envoyproxy.io/docs/envoy/latest/configuration/upstream/cluster_manager/cluster_stats
     3  
     4  
     5  metrics {
     6    metric: "luci/envoy/cluster/cx/active"
     7    kind: GAUGE
     8    desc: "Number of currently active upstream connections"
     9  
    10    fields: "cluster"
    11  
    12    rules {
    13      pattern: "*.cluster.${cluster}.upstream_cx_active"
    14      fields {
    15        key: "cluster"
    16        value: "${cluster}"
    17      }
    18    }
    19  }
    20  
    21  
    22  metrics {
    23    metric: "luci/envoy/cluster/cx/total"
    24    kind: COUNTER
    25    desc: "Total number of upstream connections made"
    26  
    27    fields: "cluster"
    28    fields: "protocol"
    29  
    30    rules {
    31      pattern: "*.cluster.${cluster}.upstream_cx_http1_total"
    32      fields {
    33        key: "cluster"
    34        value: "${cluster}"
    35      }
    36      fields {
    37        key: "protocol"
    38        value: "http1"
    39      }
    40    }
    41    rules {
    42      pattern: "*.cluster.${cluster}.upstream_cx_http2_total"
    43      fields {
    44        key: "cluster"
    45        value: "${cluster}"
    46      }
    47      fields {
    48        key: "protocol"
    49        value: "http2"
    50      }
    51    }
    52  }
    53  
    54  
    55  metrics {
    56    metric: "luci/envoy/cluster/cx/issues"
    57    kind: COUNTER
    58    desc: "Total number of upstream connection failures/timeouts/disconnects"
    59  
    60    fields: "cluster"
    61    fields: "kind"
    62  
    63    rules {
    64      pattern: "*.cluster.${cluster}.upstream_cx_connect_fail"
    65      fields {
    66        key: "cluster"
    67        value: "${cluster}"
    68      }
    69      fields {
    70        key: "kind"
    71        value: "connect_fail"
    72      }
    73    }
    74    rules {
    75      pattern: "*.cluster.${cluster}.upstream_cx_connect_timeout"
    76      fields {
    77        key: "cluster"
    78        value: "${cluster}"
    79      }
    80      fields {
    81        key: "kind"
    82        value: "connect_timeout"
    83      }
    84    }
    85    rules {
    86      pattern: "*.cluster.${cluster}.upstream_cx_idle_timeout"
    87      fields {
    88        key: "cluster"
    89        value: "${cluster}"
    90      }
    91      fields {
    92        key: "kind"
    93        value: "idle_timeout"
    94      }
    95    }
    96    rules {
    97      pattern: "*.cluster.${cluster}.upstream_cx_connect_attempts_exceeded"
    98      fields {
    99        key: "cluster"
   100        value: "${cluster}"
   101      }
   102      fields {
   103        key: "kind"
   104        value: "connect_attempts_exceeded"
   105      }
   106    }
   107    rules {
   108      pattern: "*.cluster.${cluster}.upstream_cx_overflow"
   109      fields {
   110        key: "cluster"
   111        value: "${cluster}"
   112      }
   113      fields {
   114        key: "kind"
   115        value: "overflow"
   116      }
   117    }
   118    rules {
   119      pattern: "*.cluster.${cluster}.upstream_cx_close_notify"
   120      fields {
   121        key: "cluster"
   122        value: "${cluster}"
   123      }
   124      fields {
   125        key: "kind"
   126        value: "close_notify"
   127      }
   128    }
   129    rules {
   130      pattern: "*.cluster.${cluster}.upstream_cx_pool_overflow"
   131      fields {
   132        key: "cluster"
   133        value: "${cluster}"
   134      }
   135      fields {
   136        key: "kind"
   137        value: "pool_overflow"
   138      }
   139    }
   140    rules {
   141      pattern: "*.cluster.${cluster}.upstream_cx_protocol_error"
   142      fields {
   143        key: "cluster"
   144        value: "${cluster}"
   145      }
   146      fields {
   147        key: "kind"
   148        value: "protocol_error"
   149      }
   150    }
   151    rules {
   152      pattern: "*.cluster.${cluster}.upstream_cx_max_requests"
   153      fields {
   154        key: "cluster"
   155        value: "${cluster}"
   156      }
   157      fields {
   158        key: "kind"
   159        value: "max_requests"
   160      }
   161    }
   162    rules {
   163      pattern: "*.cluster.${cluster}.upstream_cx_none_healthy"
   164      fields {
   165        key: "cluster"
   166        value: "${cluster}"
   167      }
   168      fields {
   169        key: "kind"
   170        value: "none_healthy"
   171      }
   172    }
   173  }
   174  
   175  
   176  metrics {
   177    metric: "luci/envoy/cluster/cx/bytes/buffered"
   178    kind: GAUGE
   179    desc: "Currently buffered connection bytes (per direction)"
   180    units: BYTES
   181  
   182    fields: "cluster"
   183    fields: "direction"
   184  
   185    rules {
   186      pattern: "*.cluster.${cluster}.upstream_cx_rx_bytes_buffered"
   187      fields {
   188        key: "cluster"
   189        value: "${cluster}"
   190      }
   191      fields {
   192        key: "direction"
   193        value: "received"
   194      }
   195    }
   196    rules {
   197      pattern: "*.cluster.${cluster}.upstream_cx_tx_bytes_buffered"
   198      fields {
   199        key: "cluster"
   200        value: "${cluster}"
   201      }
   202      fields {
   203        key: "direction"
   204        value: "sent"
   205      }
   206    }
   207  }
   208  
   209  
   210  metrics {
   211    metric: "luci/envoy/cluster/cx/bytes/total"
   212    kind: COUNTER
   213    desc: "Total connection bytes (per direction)"
   214    units: BYTES
   215  
   216    fields: "cluster"
   217    fields: "direction"
   218  
   219    rules {
   220      pattern: "*.cluster.${cluster}.upstream_cx_rx_bytes_total"
   221      fields {
   222        key: "cluster"
   223        value: "${cluster}"
   224      }
   225      fields {
   226        key: "direction"
   227        value: "received"
   228      }
   229    }
   230    rules {
   231      pattern: "*.cluster.${cluster}.upstream_cx_tx_bytes_total"
   232      fields {
   233        key: "cluster"
   234        value: "${cluster}"
   235      }
   236      fields {
   237        key: "direction"
   238        value: "sent"
   239      }
   240    }
   241  }
   242  
   243  
   244  metrics {
   245    metric: "luci/envoy/cluster/cx/connect"
   246    kind: CUMULATIVE_DISTRIBUTION
   247    desc: "How long it takes to connect to an upstream"
   248    units: MILLISECONDS
   249  
   250    fields: "cluster"
   251  
   252    rules {
   253      pattern: "*.cluster.${cluster}.upstream_cx_connect_ms"
   254      fields {
   255        key: "cluster"
   256        value: "${cluster}"
   257      }
   258    }
   259  }
   260  
   261  
   262  metrics {
   263    metric: "luci/envoy/cluster/cx/length"
   264    kind: CUMULATIVE_DISTRIBUTION
   265    desc: "Total lifetime of a connection"
   266    units: MILLISECONDS
   267  
   268    fields: "cluster"
   269  
   270    rules {
   271      pattern: "*.cluster.${cluster}.upstream_cx_length_ms"
   272      fields {
   273        key: "cluster"
   274        value: "${cluster}"
   275      }
   276    }
   277  }
   278  
   279  
   280  metrics {
   281    metric: "luci/envoy/cluster/rq/active"
   282    kind: GAUGE
   283    desc: "Number of currently active upstream requests"
   284  
   285    fields: "cluster"
   286  
   287    rules {
   288      pattern: "*.cluster.${cluster}.upstream_rq_active"
   289      fields {
   290        key: "cluster"
   291        value: "${cluster}"
   292      }
   293    }
   294  }
   295  
   296  
   297  metrics {
   298    metric: "luci/envoy/cluster/rq/pending/active"
   299    kind: GAUGE
   300    desc: "Number of active requests pending a connection pool connection"
   301  
   302    fields: "cluster"
   303  
   304    rules {
   305      pattern: "*.cluster.${cluster}.upstream_rq_pending_active"
   306      fields {
   307        key: "cluster"
   308        value: "${cluster}"
   309      }
   310    }
   311  }
   312  
   313  
   314  metrics {
   315    metric: "luci/envoy/cluster/rq/pending/total"
   316    kind: COUNTER
   317    desc: "Total number of requests that were delayed waiting for a connection"
   318  
   319    fields: "cluster"
   320  
   321    rules {
   322      pattern: "*.cluster.${cluster}.upstream_rq_pending_total"
   323      fields {
   324        key: "cluster"
   325        value: "${cluster}"
   326      }
   327    }
   328  }
   329  
   330  
   331  metrics {
   332    metric: "luci/envoy/cluster/rq/completed"
   333    kind: COUNTER
   334    desc: "Total number of requests completed"
   335  
   336    fields: "cluster"
   337    fields: "codes"
   338  
   339    rules {
   340      pattern: "*.cluster.${cluster}.upstream_rq_1xx"
   341      fields {
   342        key: "cluster"
   343        value: "${cluster}"
   344      }
   345      fields {
   346        key: "codes"
   347        value: "1xx"
   348      }
   349    }
   350    rules {
   351      pattern: "*.cluster.${cluster}.upstream_rq_2xx"
   352      fields {
   353        key: "cluster"
   354        value: "${cluster}"
   355      }
   356      fields {
   357        key: "codes"
   358        value: "2xx"
   359      }
   360    }
   361    rules {
   362      pattern: "*.cluster.${cluster}.upstream_rq_3xx"
   363      fields {
   364        key: "cluster"
   365        value: "${cluster}"
   366      }
   367      fields {
   368        key: "codes"
   369        value: "3xx"
   370      }
   371    }
   372    rules {
   373      pattern: "*.cluster.${cluster}.upstream_rq_4xx"
   374      fields {
   375        key: "cluster"
   376        value: "${cluster}"
   377      }
   378      fields {
   379        key: "codes"
   380        value: "4xx"
   381      }
   382    }
   383    rules {
   384      pattern: "*.cluster.${cluster}.upstream_rq_5xx"
   385      fields {
   386        key: "cluster"
   387        value: "${cluster}"
   388      }
   389      fields {
   390        key: "codes"
   391        value: "5xx"
   392      }
   393    }
   394    rules {
   395      pattern: "*.cluster.${cluster}.upstream_rq_unknown"
   396      fields {
   397        key: "cluster"
   398        value: "${cluster}"
   399      }
   400      fields {
   401        key: "codes"
   402        value: "unknown"
   403      }
   404    }
   405  }
   406  
   407  
   408  metrics {
   409    metric: "luci/envoy/cluster/rq/duration"
   410    kind: CUMULATIVE_DISTRIBUTION
   411    desc: "Request time"
   412    units: MILLISECONDS
   413  
   414    fields: "cluster"
   415  
   416    rules {
   417      pattern: "*.cluster.${cluster}.upstream_rq_time"
   418      fields {
   419        key: "cluster"
   420        value: "${cluster}"
   421      }
   422    }
   423  }
   424  
   425  
   426  metrics {
   427    metric: "luci/envoy/cluster/rq/issues"
   428    kind: COUNTER
   429    desc: "Total number of requests failed due to cluster issues"
   430  
   431    fields: "cluster"
   432    fields: "kind"
   433  
   434    rules {
   435      pattern: "*.cluster.${cluster}.upstream_rq_pending_overflow"
   436      fields {
   437        key: "cluster"
   438        value: "${cluster}"
   439      }
   440      fields {
   441        key: "kind"
   442        value: "pending_overflow"
   443      }
   444    }
   445    rules {
   446      pattern: "*.cluster.${cluster}.upstream_rq_pending_failure_eject"
   447      fields {
   448        key: "cluster"
   449        value: "${cluster}"
   450      }
   451      fields {
   452        key: "kind"
   453        value: "pending_failure_eject"
   454      }
   455    }
   456    rules {
   457      pattern: "*.cluster.${cluster}.upstream_rq_cancelled"
   458      fields {
   459        key: "cluster"
   460        value: "${cluster}"
   461      }
   462      fields {
   463        key: "kind"
   464        value: "cancelled"
   465      }
   466    }
   467    rules {
   468      pattern: "*.cluster.${cluster}.upstream_rq_maintenance_mode"
   469      fields {
   470        key: "cluster"
   471        value: "${cluster}"
   472      }
   473      fields {
   474        key: "kind"
   475        value: "maintenance_mode"
   476      }
   477    }
   478    rules {
   479      pattern: "*.cluster.${cluster}.upstream_rq_timeout"
   480      fields {
   481        key: "cluster"
   482        value: "${cluster}"
   483      }
   484      fields {
   485        key: "kind"
   486        value: "timeout"
   487      }
   488    }
   489    rules {
   490      pattern: "*.cluster.${cluster}.upstream_rq_max_duration_reached"
   491      fields {
   492        key: "cluster"
   493        value: "${cluster}"
   494      }
   495      fields {
   496        key: "kind"
   497        value: "max_duration_reached"
   498      }
   499    }
   500    rules {
   501      pattern: "*.cluster.${cluster}.upstream_rq_per_try_timeout"
   502      fields {
   503        key: "cluster"
   504        value: "${cluster}"
   505      }
   506      fields {
   507        key: "kind"
   508        value: "per_try_timeout"
   509      }
   510    }
   511    rules {
   512      pattern: "*.cluster.${cluster}.upstream_rq_rx_reset"
   513      fields {
   514        key: "cluster"
   515        value: "${cluster}"
   516      }
   517      fields {
   518        key: "kind"
   519        value: "rx_reset"
   520      }
   521    }
   522    rules {
   523      pattern: "*.cluster.${cluster}.upstream_rq_tx_reset"
   524      fields {
   525        key: "cluster"
   526        value: "${cluster}"
   527      }
   528      fields {
   529        key: "kind"
   530        value: "tx_reset"
   531      }
   532    }
   533  }
   534  
   535  
   536  metrics {
   537    metric: "luci/envoy/cluster/rq/retries"
   538    kind: COUNTER
   539    desc: "Total number request retries per outcome"
   540  
   541    fields: "cluster"
   542    fields: "outcome"
   543  
   544    rules {
   545      pattern: "*.cluster.${cluster}.upstream_rq_retry_success"
   546      fields {
   547        key: "cluster"
   548        value: "${cluster}"
   549      }
   550      fields {
   551        key: "outcome"
   552        value: "success"
   553      }
   554    }
   555    rules {
   556      pattern: "*.cluster.${cluster}.upstream_rq_retry_overflow"
   557      fields {
   558        key: "cluster"
   559        value: "${cluster}"
   560      }
   561      fields {
   562        key: "outcome"
   563        value: "overflow"
   564      }
   565    }
   566    rules {
   567      pattern: "*.cluster.${cluster}.upstream_rq_retry_limit_exceeded"
   568      fields {
   569        key: "cluster"
   570        value: "${cluster}"
   571      }
   572      fields {
   573        key: "outcome"
   574        value: "limit_exceeded"
   575      }
   576    }
   577  }
   578  
   579  
   580  metrics {
   581    metric: "luci/envoy/cluster/circuit_breakers/open"
   582    kind: GAUGE
   583    desc: "Whether the circuit breaker is closed (0) or open (1)"
   584  
   585    fields: "cluster"
   586    fields: "priority"
   587    fields: "name"
   588  
   589    rules {
   590      pattern: "*.cluster.${cluster}.circuit_breakers.${priority}.cx_open"
   591      fields {
   592        key: "cluster"
   593        value: "${cluster}"
   594      }
   595      fields {
   596        key: "priority"
   597        value: "${priority}"
   598      }
   599      fields {
   600        key: "name"
   601        value: "cx"
   602      }
   603    }
   604    rules {
   605      pattern: "*.cluster.${cluster}.circuit_breakers.${priority}.cx_pool_open"
   606      fields {
   607        key: "cluster"
   608        value: "${cluster}"
   609      }
   610      fields {
   611        key: "priority"
   612        value: "${priority}"
   613      }
   614      fields {
   615        key: "name"
   616        value: "cx_pool"
   617      }
   618    }
   619    rules {
   620      pattern: "*.cluster.${cluster}.circuit_breakers.${priority}.rq_pending_open"
   621      fields {
   622        key: "cluster"
   623        value: "${cluster}"
   624      }
   625      fields {
   626        key: "priority"
   627        value: "${priority}"
   628      }
   629      fields {
   630        key: "name"
   631        value: "rq_pending"
   632      }
   633    }
   634    rules {
   635      pattern: "*.cluster.${cluster}.circuit_breakers.${priority}.rq_open"
   636      fields {
   637        key: "cluster"
   638        value: "${cluster}"
   639      }
   640      fields {
   641        key: "priority"
   642        value: "${priority}"
   643      }
   644      fields {
   645        key: "name"
   646        value: "rq"
   647      }
   648    }
   649    rules {
   650      pattern: "*.cluster.${cluster}.circuit_breakers.${priority}.rq_retry_open"
   651      fields {
   652        key: "cluster"
   653        value: "${cluster}"
   654      }
   655      fields {
   656        key: "priority"
   657        value: "${priority}"
   658      }
   659      fields {
   660        key: "name"
   661        value: "rq_retry"
   662      }
   663    }
   664  }
   665  
   666  
   667  metrics {
   668    metric: "luci/envoy/cluster/membership"
   669    kind: GAUGE
   670    desc: "Number of cluster upstream endpoints per their health status"
   671  
   672    fields: "cluster"
   673    fields: "state"
   674  
   675    rules {
   676      pattern: "*.cluster.${cluster}.membership_healthy"
   677      fields {
   678        key: "cluster"
   679        value: "${cluster}"
   680      }
   681      fields {
   682        key: "state"
   683        value: "healthy"
   684      }
   685    }
   686    rules {
   687      pattern: "*.cluster.${cluster}.membership_degraded"
   688      fields {
   689        key: "cluster"
   690        value: "${cluster}"
   691      }
   692      fields {
   693        key: "state"
   694        value: "degraded"
   695      }
   696    }
   697  }