go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/server/cmd/statsd-to-tsmon/docker/envoy/statsd-to-tsmon.cfg (about) 1 # Cluster stats. 2 # https://www.envoyproxy.io/docs/envoy/latest/configuration/upstream/cluster_manager/cluster_stats 3 4 5 metrics { 6 metric: "luci/envoy/cluster/cx/active" 7 kind: GAUGE 8 desc: "Number of currently active upstream connections" 9 10 fields: "cluster" 11 12 rules { 13 pattern: "*.cluster.${cluster}.upstream_cx_active" 14 fields { 15 key: "cluster" 16 value: "${cluster}" 17 } 18 } 19 } 20 21 22 metrics { 23 metric: "luci/envoy/cluster/cx/total" 24 kind: COUNTER 25 desc: "Total number of upstream connections made" 26 27 fields: "cluster" 28 fields: "protocol" 29 30 rules { 31 pattern: "*.cluster.${cluster}.upstream_cx_http1_total" 32 fields { 33 key: "cluster" 34 value: "${cluster}" 35 } 36 fields { 37 key: "protocol" 38 value: "http1" 39 } 40 } 41 rules { 42 pattern: "*.cluster.${cluster}.upstream_cx_http2_total" 43 fields { 44 key: "cluster" 45 value: "${cluster}" 46 } 47 fields { 48 key: "protocol" 49 value: "http2" 50 } 51 } 52 } 53 54 55 metrics { 56 metric: "luci/envoy/cluster/cx/issues" 57 kind: COUNTER 58 desc: "Total number of upstream connection failures/timeouts/disconnects" 59 60 fields: "cluster" 61 fields: "kind" 62 63 rules { 64 pattern: "*.cluster.${cluster}.upstream_cx_connect_fail" 65 fields { 66 key: "cluster" 67 value: "${cluster}" 68 } 69 fields { 70 key: "kind" 71 value: "connect_fail" 72 } 73 } 74 rules { 75 pattern: "*.cluster.${cluster}.upstream_cx_connect_timeout" 76 fields { 77 key: "cluster" 78 value: "${cluster}" 79 } 80 fields { 81 key: "kind" 82 value: "connect_timeout" 83 } 84 } 85 rules { 86 pattern: "*.cluster.${cluster}.upstream_cx_idle_timeout" 87 fields { 88 key: "cluster" 89 value: "${cluster}" 90 } 91 fields { 92 key: "kind" 93 value: "idle_timeout" 94 } 95 } 96 rules { 97 pattern: "*.cluster.${cluster}.upstream_cx_connect_attempts_exceeded" 98 fields { 99 key: "cluster" 100 value: "${cluster}" 101 } 102 fields { 103 key: "kind" 104 value: "connect_attempts_exceeded" 105 } 106 } 107 rules { 108 pattern: "*.cluster.${cluster}.upstream_cx_overflow" 109 fields { 110 key: "cluster" 111 value: "${cluster}" 112 } 113 fields { 114 key: "kind" 115 value: "overflow" 116 } 117 } 118 rules { 119 pattern: "*.cluster.${cluster}.upstream_cx_close_notify" 120 fields { 121 key: "cluster" 122 value: "${cluster}" 123 } 124 fields { 125 key: "kind" 126 value: "close_notify" 127 } 128 } 129 rules { 130 pattern: "*.cluster.${cluster}.upstream_cx_pool_overflow" 131 fields { 132 key: "cluster" 133 value: "${cluster}" 134 } 135 fields { 136 key: "kind" 137 value: "pool_overflow" 138 } 139 } 140 rules { 141 pattern: "*.cluster.${cluster}.upstream_cx_protocol_error" 142 fields { 143 key: "cluster" 144 value: "${cluster}" 145 } 146 fields { 147 key: "kind" 148 value: "protocol_error" 149 } 150 } 151 rules { 152 pattern: "*.cluster.${cluster}.upstream_cx_max_requests" 153 fields { 154 key: "cluster" 155 value: "${cluster}" 156 } 157 fields { 158 key: "kind" 159 value: "max_requests" 160 } 161 } 162 rules { 163 pattern: "*.cluster.${cluster}.upstream_cx_none_healthy" 164 fields { 165 key: "cluster" 166 value: "${cluster}" 167 } 168 fields { 169 key: "kind" 170 value: "none_healthy" 171 } 172 } 173 } 174 175 176 metrics { 177 metric: "luci/envoy/cluster/cx/bytes/buffered" 178 kind: GAUGE 179 desc: "Currently buffered connection bytes (per direction)" 180 units: BYTES 181 182 fields: "cluster" 183 fields: "direction" 184 185 rules { 186 pattern: "*.cluster.${cluster}.upstream_cx_rx_bytes_buffered" 187 fields { 188 key: "cluster" 189 value: "${cluster}" 190 } 191 fields { 192 key: "direction" 193 value: "received" 194 } 195 } 196 rules { 197 pattern: "*.cluster.${cluster}.upstream_cx_tx_bytes_buffered" 198 fields { 199 key: "cluster" 200 value: "${cluster}" 201 } 202 fields { 203 key: "direction" 204 value: "sent" 205 } 206 } 207 } 208 209 210 metrics { 211 metric: "luci/envoy/cluster/cx/bytes/total" 212 kind: COUNTER 213 desc: "Total connection bytes (per direction)" 214 units: BYTES 215 216 fields: "cluster" 217 fields: "direction" 218 219 rules { 220 pattern: "*.cluster.${cluster}.upstream_cx_rx_bytes_total" 221 fields { 222 key: "cluster" 223 value: "${cluster}" 224 } 225 fields { 226 key: "direction" 227 value: "received" 228 } 229 } 230 rules { 231 pattern: "*.cluster.${cluster}.upstream_cx_tx_bytes_total" 232 fields { 233 key: "cluster" 234 value: "${cluster}" 235 } 236 fields { 237 key: "direction" 238 value: "sent" 239 } 240 } 241 } 242 243 244 metrics { 245 metric: "luci/envoy/cluster/cx/connect" 246 kind: CUMULATIVE_DISTRIBUTION 247 desc: "How long it takes to connect to an upstream" 248 units: MILLISECONDS 249 250 fields: "cluster" 251 252 rules { 253 pattern: "*.cluster.${cluster}.upstream_cx_connect_ms" 254 fields { 255 key: "cluster" 256 value: "${cluster}" 257 } 258 } 259 } 260 261 262 metrics { 263 metric: "luci/envoy/cluster/cx/length" 264 kind: CUMULATIVE_DISTRIBUTION 265 desc: "Total lifetime of a connection" 266 units: MILLISECONDS 267 268 fields: "cluster" 269 270 rules { 271 pattern: "*.cluster.${cluster}.upstream_cx_length_ms" 272 fields { 273 key: "cluster" 274 value: "${cluster}" 275 } 276 } 277 } 278 279 280 metrics { 281 metric: "luci/envoy/cluster/rq/active" 282 kind: GAUGE 283 desc: "Number of currently active upstream requests" 284 285 fields: "cluster" 286 287 rules { 288 pattern: "*.cluster.${cluster}.upstream_rq_active" 289 fields { 290 key: "cluster" 291 value: "${cluster}" 292 } 293 } 294 } 295 296 297 metrics { 298 metric: "luci/envoy/cluster/rq/pending/active" 299 kind: GAUGE 300 desc: "Number of active requests pending a connection pool connection" 301 302 fields: "cluster" 303 304 rules { 305 pattern: "*.cluster.${cluster}.upstream_rq_pending_active" 306 fields { 307 key: "cluster" 308 value: "${cluster}" 309 } 310 } 311 } 312 313 314 metrics { 315 metric: "luci/envoy/cluster/rq/pending/total" 316 kind: COUNTER 317 desc: "Total number of requests that were delayed waiting for a connection" 318 319 fields: "cluster" 320 321 rules { 322 pattern: "*.cluster.${cluster}.upstream_rq_pending_total" 323 fields { 324 key: "cluster" 325 value: "${cluster}" 326 } 327 } 328 } 329 330 331 metrics { 332 metric: "luci/envoy/cluster/rq/completed" 333 kind: COUNTER 334 desc: "Total number of requests completed" 335 336 fields: "cluster" 337 fields: "codes" 338 339 rules { 340 pattern: "*.cluster.${cluster}.upstream_rq_1xx" 341 fields { 342 key: "cluster" 343 value: "${cluster}" 344 } 345 fields { 346 key: "codes" 347 value: "1xx" 348 } 349 } 350 rules { 351 pattern: "*.cluster.${cluster}.upstream_rq_2xx" 352 fields { 353 key: "cluster" 354 value: "${cluster}" 355 } 356 fields { 357 key: "codes" 358 value: "2xx" 359 } 360 } 361 rules { 362 pattern: "*.cluster.${cluster}.upstream_rq_3xx" 363 fields { 364 key: "cluster" 365 value: "${cluster}" 366 } 367 fields { 368 key: "codes" 369 value: "3xx" 370 } 371 } 372 rules { 373 pattern: "*.cluster.${cluster}.upstream_rq_4xx" 374 fields { 375 key: "cluster" 376 value: "${cluster}" 377 } 378 fields { 379 key: "codes" 380 value: "4xx" 381 } 382 } 383 rules { 384 pattern: "*.cluster.${cluster}.upstream_rq_5xx" 385 fields { 386 key: "cluster" 387 value: "${cluster}" 388 } 389 fields { 390 key: "codes" 391 value: "5xx" 392 } 393 } 394 rules { 395 pattern: "*.cluster.${cluster}.upstream_rq_unknown" 396 fields { 397 key: "cluster" 398 value: "${cluster}" 399 } 400 fields { 401 key: "codes" 402 value: "unknown" 403 } 404 } 405 } 406 407 408 metrics { 409 metric: "luci/envoy/cluster/rq/duration" 410 kind: CUMULATIVE_DISTRIBUTION 411 desc: "Request time" 412 units: MILLISECONDS 413 414 fields: "cluster" 415 416 rules { 417 pattern: "*.cluster.${cluster}.upstream_rq_time" 418 fields { 419 key: "cluster" 420 value: "${cluster}" 421 } 422 } 423 } 424 425 426 metrics { 427 metric: "luci/envoy/cluster/rq/issues" 428 kind: COUNTER 429 desc: "Total number of requests failed due to cluster issues" 430 431 fields: "cluster" 432 fields: "kind" 433 434 rules { 435 pattern: "*.cluster.${cluster}.upstream_rq_pending_overflow" 436 fields { 437 key: "cluster" 438 value: "${cluster}" 439 } 440 fields { 441 key: "kind" 442 value: "pending_overflow" 443 } 444 } 445 rules { 446 pattern: "*.cluster.${cluster}.upstream_rq_pending_failure_eject" 447 fields { 448 key: "cluster" 449 value: "${cluster}" 450 } 451 fields { 452 key: "kind" 453 value: "pending_failure_eject" 454 } 455 } 456 rules { 457 pattern: "*.cluster.${cluster}.upstream_rq_cancelled" 458 fields { 459 key: "cluster" 460 value: "${cluster}" 461 } 462 fields { 463 key: "kind" 464 value: "cancelled" 465 } 466 } 467 rules { 468 pattern: "*.cluster.${cluster}.upstream_rq_maintenance_mode" 469 fields { 470 key: "cluster" 471 value: "${cluster}" 472 } 473 fields { 474 key: "kind" 475 value: "maintenance_mode" 476 } 477 } 478 rules { 479 pattern: "*.cluster.${cluster}.upstream_rq_timeout" 480 fields { 481 key: "cluster" 482 value: "${cluster}" 483 } 484 fields { 485 key: "kind" 486 value: "timeout" 487 } 488 } 489 rules { 490 pattern: "*.cluster.${cluster}.upstream_rq_max_duration_reached" 491 fields { 492 key: "cluster" 493 value: "${cluster}" 494 } 495 fields { 496 key: "kind" 497 value: "max_duration_reached" 498 } 499 } 500 rules { 501 pattern: "*.cluster.${cluster}.upstream_rq_per_try_timeout" 502 fields { 503 key: "cluster" 504 value: "${cluster}" 505 } 506 fields { 507 key: "kind" 508 value: "per_try_timeout" 509 } 510 } 511 rules { 512 pattern: "*.cluster.${cluster}.upstream_rq_rx_reset" 513 fields { 514 key: "cluster" 515 value: "${cluster}" 516 } 517 fields { 518 key: "kind" 519 value: "rx_reset" 520 } 521 } 522 rules { 523 pattern: "*.cluster.${cluster}.upstream_rq_tx_reset" 524 fields { 525 key: "cluster" 526 value: "${cluster}" 527 } 528 fields { 529 key: "kind" 530 value: "tx_reset" 531 } 532 } 533 } 534 535 536 metrics { 537 metric: "luci/envoy/cluster/rq/retries" 538 kind: COUNTER 539 desc: "Total number request retries per outcome" 540 541 fields: "cluster" 542 fields: "outcome" 543 544 rules { 545 pattern: "*.cluster.${cluster}.upstream_rq_retry_success" 546 fields { 547 key: "cluster" 548 value: "${cluster}" 549 } 550 fields { 551 key: "outcome" 552 value: "success" 553 } 554 } 555 rules { 556 pattern: "*.cluster.${cluster}.upstream_rq_retry_overflow" 557 fields { 558 key: "cluster" 559 value: "${cluster}" 560 } 561 fields { 562 key: "outcome" 563 value: "overflow" 564 } 565 } 566 rules { 567 pattern: "*.cluster.${cluster}.upstream_rq_retry_limit_exceeded" 568 fields { 569 key: "cluster" 570 value: "${cluster}" 571 } 572 fields { 573 key: "outcome" 574 value: "limit_exceeded" 575 } 576 } 577 } 578 579 580 metrics { 581 metric: "luci/envoy/cluster/circuit_breakers/open" 582 kind: GAUGE 583 desc: "Whether the circuit breaker is closed (0) or open (1)" 584 585 fields: "cluster" 586 fields: "priority" 587 fields: "name" 588 589 rules { 590 pattern: "*.cluster.${cluster}.circuit_breakers.${priority}.cx_open" 591 fields { 592 key: "cluster" 593 value: "${cluster}" 594 } 595 fields { 596 key: "priority" 597 value: "${priority}" 598 } 599 fields { 600 key: "name" 601 value: "cx" 602 } 603 } 604 rules { 605 pattern: "*.cluster.${cluster}.circuit_breakers.${priority}.cx_pool_open" 606 fields { 607 key: "cluster" 608 value: "${cluster}" 609 } 610 fields { 611 key: "priority" 612 value: "${priority}" 613 } 614 fields { 615 key: "name" 616 value: "cx_pool" 617 } 618 } 619 rules { 620 pattern: "*.cluster.${cluster}.circuit_breakers.${priority}.rq_pending_open" 621 fields { 622 key: "cluster" 623 value: "${cluster}" 624 } 625 fields { 626 key: "priority" 627 value: "${priority}" 628 } 629 fields { 630 key: "name" 631 value: "rq_pending" 632 } 633 } 634 rules { 635 pattern: "*.cluster.${cluster}.circuit_breakers.${priority}.rq_open" 636 fields { 637 key: "cluster" 638 value: "${cluster}" 639 } 640 fields { 641 key: "priority" 642 value: "${priority}" 643 } 644 fields { 645 key: "name" 646 value: "rq" 647 } 648 } 649 rules { 650 pattern: "*.cluster.${cluster}.circuit_breakers.${priority}.rq_retry_open" 651 fields { 652 key: "cluster" 653 value: "${cluster}" 654 } 655 fields { 656 key: "priority" 657 value: "${priority}" 658 } 659 fields { 660 key: "name" 661 value: "rq_retry" 662 } 663 } 664 } 665 666 667 metrics { 668 metric: "luci/envoy/cluster/membership" 669 kind: GAUGE 670 desc: "Number of cluster upstream endpoints per their health status" 671 672 fields: "cluster" 673 fields: "state" 674 675 rules { 676 pattern: "*.cluster.${cluster}.membership_healthy" 677 fields { 678 key: "cluster" 679 value: "${cluster}" 680 } 681 fields { 682 key: "state" 683 value: "healthy" 684 } 685 } 686 rules { 687 pattern: "*.cluster.${cluster}.membership_degraded" 688 fields { 689 key: "cluster" 690 value: "${cluster}" 691 } 692 fields { 693 key: "state" 694 value: "degraded" 695 } 696 } 697 }