github.com/1aal/kubeblocks@v0.0.0-20231107070852-e1c03e598921/deploy/orioledb/templates/metrics-configmap-14.yaml (about) 1 apiVersion: v1 2 kind: ConfigMap 3 metadata: 4 name: orioledb-custom-metrics 5 labels: 6 {{- include "orioledb.labels" . | nindent 4 }} 7 data: 8 custom-metrics.yaml: |- 9 pg_postmaster: 10 query: "/*kb-monitor*/SELECT pg_postmaster_start_time as start_time_seconds from pg_postmaster_start_time()" 11 master: true 12 metrics: 13 - start_time_seconds: 14 usage: "GAUGE" 15 description: "Time at which postmaster started" 16 17 pg_replication: 18 query: | 19 /*kb-monitor*/SELECT 20 (case when (not pg_is_in_recovery() or pg_last_wal_receive_lsn() = pg_last_wal_replay_lsn()) then 0 else greatest (0, extract(epoch from (now() - pg_last_xact_replay_timestamp()))) end) as lag, 21 (case when pg_is_in_recovery() then 0 else 1 end) as is_master 22 master: true 23 metrics: 24 - lag: 25 usage: "GAUGE" 26 description: "Replication lag behind master in seconds" 27 - is_master: 28 usage: "GAUGE" 29 description: "Instance is master or slave" 30 31 pg_stat_user_tables: 32 query: | 33 /*kb-monitor*/SELECT 34 current_database() datname, 35 schemaname, 36 relname, 37 seq_scan, 38 seq_tup_read, 39 idx_scan, 40 idx_tup_fetch, 41 n_tup_ins, 42 n_tup_upd, 43 n_tup_del, 44 n_tup_hot_upd, 45 n_live_tup, 46 n_dead_tup, 47 n_mod_since_analyze, 48 n_ins_since_vacuum, 49 COALESCE(last_vacuum, '1970-01-01Z') as last_vacuum, 50 COALESCE(last_autovacuum, '1970-01-01Z') as last_autovacuum, 51 COALESCE(last_analyze, '1970-01-01Z') as last_analyze, 52 COALESCE(last_autoanalyze, '1970-01-01Z') as last_autoanalyze, 53 vacuum_count, 54 autovacuum_count, 55 analyze_count, 56 autoanalyze_count 57 FROM 58 pg_stat_user_tables 59 metrics: 60 - datname: 61 usage: "LABEL" 62 description: "Name of current database" 63 - schemaname: 64 usage: "LABEL" 65 description: "Name of the schema that this table is in" 66 - relname: 67 usage: "LABEL" 68 description: "Name of this table" 69 - seq_scan: 70 usage: "COUNTER" 71 description: "Number of sequential scans initiated on this table" 72 - seq_tup_read: 73 usage: "COUNTER" 74 description: "Number of live rows fetched by sequential scans" 75 - idx_scan: 76 usage: "COUNTER" 77 description: "Number of index scans initiated on this table" 78 - idx_tup_fetch: 79 usage: "COUNTER" 80 description: "Number of live rows fetched by index scans" 81 - n_tup_ins: 82 usage: "COUNTER" 83 description: "Number of rows inserted" 84 - n_tup_upd: 85 usage: "COUNTER" 86 description: "Number of rows updated" 87 - n_tup_del: 88 usage: "COUNTER" 89 description: "Number of rows deleted" 90 - n_tup_hot_upd: 91 usage: "COUNTER" 92 description: "Number of rows HOT updated (i.e., with no separate index update required)" 93 - n_live_tup: 94 usage: "GAUGE" 95 description: "Estimated number of live rows" 96 - n_dead_tup: 97 usage: "GAUGE" 98 description: "Estimated number of dead rows" 99 - n_mod_since_analyze: 100 usage: "GAUGE" 101 description: "Estimated number of rows changed since last analyze" 102 - n_ins_since_vacuum: 103 usage: "GAUGE" 104 description: "Estimated number of rows inserted since this table was last vacuumed" 105 - last_vacuum: 106 usage: "GAUGE" 107 description: "Last time at which this table was manually vacuumed (not counting VACUUM FULL)" 108 - last_autovacuum: 109 usage: "GAUGE" 110 description: "Last time at which this table was vacuumed by the autovacuum daemon" 111 - last_analyze: 112 usage: "GAUGE" 113 description: "Last time at which this table was manually analyzed" 114 - last_autoanalyze: 115 usage: "GAUGE" 116 description: "Last time at which this table was analyzed by the autovacuum daemon" 117 - vacuum_count: 118 usage: "COUNTER" 119 description: "Number of times this table has been manually vacuumed (not counting VACUUM FULL)" 120 - autovacuum_count: 121 usage: "COUNTER" 122 description: "Number of times this table has been vacuumed by the autovacuum daemon" 123 - analyze_count: 124 usage: "COUNTER" 125 description: "Number of times this table has been manually analyzed" 126 - autoanalyze_count: 127 usage: "COUNTER" 128 description: "Number of times this table has been analyzed by the autovacuum daemon" 129 130 pg_statio_user_tables: 131 query: | 132 /*kb-monitor*/SELECT 133 current_database() datname, 134 schemaname, 135 relname, 136 heap_blks_read, 137 heap_blks_hit, 138 idx_blks_read, 139 idx_blks_hit, 140 toast_blks_read, 141 toast_blks_hit, 142 tidx_blks_read, 143 tidx_blks_hit 144 FROM 145 pg_statio_user_tables 146 metrics: 147 - datname: 148 usage: "LABEL" 149 description: "Name of current database" 150 - schemaname: 151 usage: "LABEL" 152 description: "Name of the schema that this table is in" 153 - relname: 154 usage: "LABEL" 155 description: "Name of this table" 156 - heap_blks_read: 157 usage: "COUNTER" 158 description: "Number of disk blocks read from this table" 159 - heap_blks_hit: 160 usage: "COUNTER" 161 description: "Number of buffer hits in this table" 162 - idx_blks_read: 163 usage: "COUNTER" 164 description: "Number of disk blocks read from all indexes on this table" 165 - idx_blks_hit: 166 usage: "COUNTER" 167 description: "Number of buffer hits in all indexes on this table" 168 - toast_blks_read: 169 usage: "COUNTER" 170 description: "Number of disk blocks read from this table's TOAST table (if any)" 171 - toast_blks_hit: 172 usage: "COUNTER" 173 description: "Number of buffer hits in this table's TOAST table (if any)" 174 - tidx_blks_read: 175 usage: "COUNTER" 176 description: "Number of disk blocks read from this table's TOAST table indexes (if any)" 177 - tidx_blks_hit: 178 usage: "COUNTER" 179 description: "Number of buffer hits in this table's TOAST table indexes (if any)" 180 181 # WARNING: This set of metrics can be very expensive on a busy server as every unique query executed will create an additional time series 182 pg_stat_statements_by_mean_exec_time: 183 query: | 184 /*kb-monitor*/SELECT 185 t2.rolname, 186 t3.datname, 187 queryid, 188 left(regexp_replace(trim(both ' ' from lower(query)),'\s+',' ','g'), 256) as query, 189 plans, 190 total_plan_time / 1000 as plan_time_seconds, 191 min_plan_time / 1000 as min_plan_time_seconds, 192 max_plan_time / 1000 as max_plan_time_seconds, 193 mean_plan_time / 1000 as mean_plan_time_seconds, 194 stddev_plan_time / 1000 as stddev_plan_time_seconds, 195 calls, 196 total_exec_time / 1000 as exec_time_seconds, 197 min_exec_time / 1000 as min_exec_time_seconds, 198 max_exec_time / 1000 as max_exec_time_seconds, 199 mean_exec_time / 1000 as mean_exec_time_seconds, 200 stddev_exec_time / 1000 as stddev_exec_time_seconds, 201 rows, 202 shared_blks_hit, 203 shared_blks_read, 204 shared_blks_dirtied, 205 shared_blks_written, 206 local_blks_hit, 207 local_blks_read, 208 local_blks_dirtied, 209 local_blks_written, 210 temp_blks_read, 211 temp_blks_written, 212 blk_read_time / 1000 as blk_read_time_seconds, 213 blk_write_time / 1000 as blk_write_time_seconds, 214 wal_records, 215 wal_fpi, 216 wal_bytes 217 FROM 218 pg_stat_statements t1 219 JOIN 220 pg_roles t2 221 ON (t1.userid=t2.oid) 222 JOIN 223 pg_database t3 224 ON (t1.dbid=t3.oid) 225 WHERE t2.rolname != 'rdsadmin' 226 ORDER BY mean_exec_time DESC 227 LIMIT 50 228 master: true 229 metrics: 230 - rolname: 231 usage: "LABEL" 232 description: "Name of user" 233 - datname: 234 usage: "LABEL" 235 description: "Name of database" 236 - queryid: 237 usage: "LABEL" 238 description: "Query ID" 239 - query: 240 usage: "LABEL" 241 description: "First 64 chars for simple formatted query text" 242 - plans: 243 usage: "COUNTER" 244 description: "Number of times the statement was planned" 245 - plan_time_seconds: 246 usage: "COUNTER" 247 description: "Total time spent planning the statement" 248 - min_plan_time_seconds: 249 usage: "GAUGE" 250 description: "Minimum time spent planning the statement" 251 - max_plan_time_seconds: 252 usage: "GAUGE" 253 description: "Maximum time spent planning the statement" 254 - mean_plan_time_seconds: 255 usage: "GAUGE" 256 description: "Mean time spent planning the statement" 257 - stddev_plan_time_seconds: 258 usage: "GAUGE" 259 description: "Population standard deviation of time spent planning the statement" 260 - calls: 261 usage: "COUNTER" 262 description: "Number of times executed" 263 - exec_time_seconds: 264 usage: "COUNTER" 265 description: "Total time spent in the statement" 266 - min_exec_time_seconds: 267 usage: "GAUGE" 268 description: "Minimum time spent in the statement" 269 - max_exec_time_seconds: 270 usage: "GAUGE" 271 description: "Maximum time spent in the statement" 272 - mean_exec_time_seconds: 273 usage: "GAUGE" 274 description: "Mean time spent in the statement" 275 - stddev_exec_time_seconds: 276 usage: "GAUGE" 277 description: "Population standard deviation of time spent in the statement" 278 - rows: 279 usage: "COUNTER" 280 description: "Total number of rows retrieved or affected by the statement" 281 - shared_blks_hit: 282 usage: "COUNTER" 283 description: "Total number of shared block cache hits by the statement" 284 - shared_blks_read: 285 usage: "COUNTER" 286 description: "Total number of shared blocks read by the statement" 287 - shared_blks_dirtied: 288 usage: "COUNTER" 289 description: "Total number of shared blocks dirtied by the statement" 290 - shared_blks_written: 291 usage: "COUNTER" 292 description: "Total number of shared blocks written by the statement" 293 - local_blks_hit: 294 usage: "COUNTER" 295 description: "Total number of local block cache hits by the statement" 296 - local_blks_read: 297 usage: "COUNTER" 298 description: "Total number of local blocks read by the statement" 299 - local_blks_dirtied: 300 usage: "COUNTER" 301 description: "Total number of local blocks dirtied by the statement" 302 - local_blks_written: 303 usage: "COUNTER" 304 description: "Total number of local blocks written by the statement" 305 - temp_blks_read: 306 usage: "COUNTER" 307 description: "Total number of temp blocks read by the statement" 308 - temp_blks_written: 309 usage: "COUNTER" 310 description: "Total number of temp blocks written by the statement" 311 - blk_read_time_seconds: 312 usage: "COUNTER" 313 description: "Total time the statement spent reading blocks, in milliseconds (if track_io_timing is enabled, otherwise zero)" 314 - blk_write_time_seconds: 315 usage: "COUNTER" 316 description: "Total time the statement spent writing blocks, in milliseconds (if track_io_timing is enabled, otherwise zero)" 317 - wal_records: 318 usage: "COUNTER" 319 description: "Total number of WAL records generated by the statement" 320 - wal_fpi: 321 usage: "COUNTER" 322 description: "Total number of WAL full page images generated by the statement" 323 - wal_bytes: 324 usage: "COUNTER" 325 description: "Total amount of WAL generated by the statement in bytes" 326 327 pg_stat_statements_by_calls: 328 query: | 329 /*kb-monitor*/SELECT 330 t2.rolname, 331 t3.datname, 332 queryid, 333 left(regexp_replace(trim(both ' ' from lower(query)),'\s+',' ','g'), 256) as query, 334 plans, 335 total_plan_time / 1000 as plan_time_seconds, 336 min_plan_time / 1000 as min_plan_time_seconds, 337 max_plan_time / 1000 as max_plan_time_seconds, 338 mean_plan_time / 1000 as mean_plan_time_seconds, 339 stddev_plan_time / 1000 as stddev_plan_time_seconds, 340 calls, 341 total_exec_time / 1000 as exec_time_seconds, 342 min_exec_time / 1000 as min_exec_time_seconds, 343 max_exec_time / 1000 as max_exec_time_seconds, 344 mean_exec_time / 1000 as mean_exec_time_seconds, 345 stddev_exec_time / 1000 as stddev_exec_time_seconds, 346 rows, 347 shared_blks_hit, 348 shared_blks_read, 349 shared_blks_dirtied, 350 shared_blks_written, 351 local_blks_hit, 352 local_blks_read, 353 local_blks_dirtied, 354 local_blks_written, 355 temp_blks_read, 356 temp_blks_written, 357 blk_read_time / 1000 as blk_read_time_seconds, 358 blk_write_time / 1000 as blk_write_time_seconds, 359 wal_records, 360 wal_fpi, 361 wal_bytes 362 FROM 363 pg_stat_statements t1 364 JOIN 365 pg_roles t2 366 ON (t1.userid=t2.oid) 367 JOIN 368 pg_database t3 369 ON (t1.dbid=t3.oid) 370 WHERE t2.rolname != 'rdsadmin' 371 ORDER BY calls DESC 372 LIMIT 50 373 master: true 374 metrics: 375 - rolname: 376 usage: "LABEL" 377 description: "Name of user" 378 - datname: 379 usage: "LABEL" 380 description: "Name of database" 381 - queryid: 382 usage: "LABEL" 383 description: "Query ID" 384 - query: 385 usage: "LABEL" 386 description: "First 64 chars for simple formatted query text" 387 - plans: 388 usage: "COUNTER" 389 description: "Number of times the statement was planned" 390 - plan_time_seconds: 391 usage: "COUNTER" 392 description: "Total time spent planning the statement" 393 - min_plan_time_seconds: 394 usage: "GAUGE" 395 description: "Minimum time spent planning the statement" 396 - max_plan_time_seconds: 397 usage: "GAUGE" 398 description: "Maximum time spent planning the statement" 399 - mean_plan_time_seconds: 400 usage: "GAUGE" 401 description: "Mean time spent planning the statement" 402 - stddev_plan_time_seconds: 403 usage: "GAUGE" 404 description: "Population standard deviation of time spent planning the statement" 405 - calls: 406 usage: "COUNTER" 407 description: "Number of times executed" 408 - exec_time_seconds: 409 usage: "COUNTER" 410 description: "Total time spent in the statement" 411 - min_exec_time_seconds: 412 usage: "GAUGE" 413 description: "Minimum time spent in the statement" 414 - max_exec_time_seconds: 415 usage: "GAUGE" 416 description: "Maximum time spent in the statement" 417 - mean_exec_time_seconds: 418 usage: "GAUGE" 419 description: "Mean time spent in the statement" 420 - stddev_exec_time_seconds: 421 usage: "GAUGE" 422 description: "Population standard deviation of time spent in the statement" 423 - rows: 424 usage: "COUNTER" 425 description: "Total number of rows retrieved or affected by the statement" 426 - shared_blks_hit: 427 usage: "COUNTER" 428 description: "Total number of shared block cache hits by the statement" 429 - shared_blks_read: 430 usage: "COUNTER" 431 description: "Total number of shared blocks read by the statement" 432 - shared_blks_dirtied: 433 usage: "COUNTER" 434 description: "Total number of shared blocks dirtied by the statement" 435 - shared_blks_written: 436 usage: "COUNTER" 437 description: "Total number of shared blocks written by the statement" 438 - local_blks_hit: 439 usage: "COUNTER" 440 description: "Total number of local block cache hits by the statement" 441 - local_blks_read: 442 usage: "COUNTER" 443 description: "Total number of local blocks read by the statement" 444 - local_blks_dirtied: 445 usage: "COUNTER" 446 description: "Total number of local blocks dirtied by the statement" 447 - local_blks_written: 448 usage: "COUNTER" 449 description: "Total number of local blocks written by the statement" 450 - temp_blks_read: 451 usage: "COUNTER" 452 description: "Total number of temp blocks read by the statement" 453 - temp_blks_written: 454 usage: "COUNTER" 455 description: "Total number of temp blocks written by the statement" 456 - blk_read_time_seconds: 457 usage: "COUNTER" 458 description: "Total time the statement spent reading blocks, in milliseconds (if track_io_timing is enabled, otherwise zero)" 459 - blk_write_time_seconds: 460 usage: "COUNTER" 461 description: "Total time the statement spent writing blocks, in milliseconds (if track_io_timing is enabled, otherwise zero)" 462 - wal_records: 463 usage: "COUNTER" 464 description: "Total number of WAL records generated by the statement" 465 - wal_fpi: 466 usage: "COUNTER" 467 description: "Total number of WAL full page images generated by the statement" 468 - wal_bytes: 469 usage: "COUNTER" 470 description: "Total amount of WAL generated by the statement in bytes" 471 472 pg_stat_statements_stats: 473 query: | 474 /*kb-monitor*/SELECT 475 t2.rolname, 476 t3.datname, 477 calls, 478 total_exec_time / 1000 as exec_time_seconds, 479 mean_exec_time / 1000 as mean_exec_time_seconds, 480 max_exec_time / 1000 as max_exec_time_seconds, 481 rows 482 FROM ( 483 SELECT 484 userid, 485 dbid, 486 SUM(calls) AS calls, 487 SUM(total_exec_time) as total_exec_time, 488 AVG(mean_exec_time) as mean_exec_time, 489 MAX(max_exec_time) as max_exec_time, 490 SUM(rows) AS rows 491 FROM 492 pg_stat_statements 493 GROUP BY userid, dbid 494 ) tmp 495 JOIN 496 pg_roles t2 497 ON (tmp.userid=t2.oid) 498 JOIN 499 pg_database t3 500 ON (tmp.dbid=t3.oid) 501 WHERE t2.rolname != 'rdsadmin' 502 master: true 503 metrics: 504 - rolname: 505 usage: "LABEL" 506 description: "Name of user" 507 - datname: 508 usage: "LABEL" 509 description: "Name of database" 510 - calls: 511 usage: "COUNTER" 512 description: "Number of times executed" 513 - exec_time_seconds: 514 usage: "COUNTER" 515 description: "Total time spent in the statement" 516 - mean_exec_time_seconds: 517 usage: "GAUGE" 518 description: "Mean time spent in the statement" 519 - max_exec_time_seconds: 520 usage: "GAUGE" 521 description: "Maximum time spent in the statement" 522 - rows: 523 usage: "COUNTER" 524 description: "Total number of rows retrieved or affected by the statement" 525 526 pg_wal_log_file: 527 query: "/*kb-monitor*/SELECT count(*) AS count FROM pg_ls_dir('pg_wal') WHERE pg_ls_dir ~ '^[0-9A-F]{24}'" 528 master: true 529 metrics: 530 - count: 531 usage: "GAUGE" 532 description: "Wal log file count" 533 534 pg_stat_activity_detail: 535 query: | 536 /*kb-monitor*/SELECT 537 datname,wait_event_type,wait_event,state,backend_type,COUNT(1) AS count 538 FROM pg_stat_activity 539 GROUP BY datname,wait_event_type,wait_event,state,backend_type 540 master: true 541 metrics: 542 - datname: 543 usage: "LABEL" 544 description: "Name of current database" 545 - wait_event_type: 546 usage: "LABEL" 547 description: "The type of event for which the backend is waiting, if any; otherwise NULL." 548 - wait_event: 549 usage: "LABEL" 550 description: "Wait event name if backend is currently waiting, otherwise NULL. " 551 - state: 552 usage: "LABEL" 553 description: "Current overall state of this backend." 554 - backend_type: 555 usage: "LABEL" 556 description: "Type of current backend." 557 - count: 558 usage: "GAUGE" 559 description: "Const value of 1" 560 561 pg_locks_detail: 562 query: | 563 /*kb-monitor*/SELECT 564 locktype,datname,relation,mode,granted,fastpath,max_wait_age_seconds,count 565 FROM ( 566 SELECT locktype,database,relation,mode,granted,fastpath,MAX(wait_age_seconds) AS max_wait_age_seconds,COUNT(1) AS count 567 FROM ( 568 SELECT locktype,database,relation::regclass,mode,granted::int,fastpath::int, 569 COALESCE(EXTRACT(EPOCH FROM now() - waitstart),0) AS wait_age_seconds 570 FROM pg_locks 571 ) p1 572 GROUP BY locktype,database,relation,mode,granted,fastpath 573 ) p2 574 LEFT JOIN ( 575 SELECT oid,datname FROM pg_database 576 ) pd 577 ON p2.database=pd.oid 578 master: true 579 metrics: 580 - locktype: 581 usage: "LABEL" 582 description: "Type of the lockable object" 583 - datname: 584 usage: "LABEL" 585 description: "Name of current database" 586 - relation: 587 usage: "LABEL" 588 description: "Relation targeted by the lock, or null if the target is not a relation or part of a relation" 589 - mode: 590 usage: "LABEL" 591 description: "Name of the lock mode held or desired by this process" 592 - granted: 593 usage: "LABEL" 594 description: "True if lock is held, false if lock is awaited" 595 - fastpath: 596 usage: "LABEL" 597 description: "True if lock was taken via fast path, false if taken via main lock table" 598 - max_wait_age_seconds: 599 usage: "GAUGE" 600 description: "Max time in seconds when the server process started waiting for this lock, or null if the lock is held" 601 - count: 602 usage: "GAUGE" 603 description: "Const value of 1" 604 605 pg_stat_wal: 606 query: | 607 /*kb-monitor*/SELECT 608 wal_records, 609 wal_fpi, 610 wal_bytes, 611 wal_buffers_full, 612 wal_write, 613 wal_sync, 614 wal_write_time / 1000 as wal_write_time_seconds, 615 wal_sync_time / 1000 as wal_sync_time_seconds 616 FROM pg_stat_wal 617 master: true 618 metrics: 619 - wal_records: 620 usage: "COUNTER" 621 description: "Total number of WAL records generated" 622 - wal_fpi: 623 usage: "COUNTER" 624 description: "Total number of WAL full page images generated" 625 - wal_bytes: 626 usage: "COUNTER" 627 description: "Total amount of WAL generated in bytes" 628 - wal_buffers_full: 629 usage: "COUNTER" 630 description: "Number of times WAL data was written to disk because WAL buffers became full" 631 - wal_write: 632 usage: "COUNTER" 633 description: "Number of times WAL buffers were written out to disk via XLogWrite request." 634 - wal_sync: 635 usage: "COUNTER" 636 description: "Number of times WAL files were synced to disk via issue_xlog_fsync request" 637 - wal_write_time_seconds: 638 usage: "COUNTER" 639 description: "Total amount of time spent writing WAL buffers to disk via XLogWrite request" 640 - wal_sync_time_seconds: 641 usage: "COUNTER" 642 description: "Total amount of time spent syncing WAL files to disk via issue_xlog_fsync request"