github.com/1aal/kubeblocks@v0.0.0-20231107070852-e1c03e598921/deploy/postgresql/templates/metrics-configmap-12.yaml (about) 1 apiVersion: v1 2 kind: ConfigMap 3 metadata: 4 name: postgresql12-custom-metrics 5 labels: 6 {{- include "postgresql.labels" . | nindent 4 }} 7 data: 8 custom-metrics.yaml: |- 9 pg_postmaster: 10 query: "/*kb-monitor*/SELECT pg_postmaster_start_time as start_time_seconds from pg_postmaster_start_time()" 11 master: true 12 metrics: 13 - start_time_seconds: 14 usage: "GAUGE" 15 description: "Time at which postmaster started" 16 17 pg_replication: 18 query: | 19 /*kb-monitor*/SELECT 20 (case when (not pg_is_in_recovery() or pg_last_wal_receive_lsn() = pg_last_wal_replay_lsn()) then 0 else greatest (0, extract(epoch from (now() - pg_last_xact_replay_timestamp()))) end) as lag, 21 (case when pg_is_in_recovery() then 0 else 1 end) as is_master 22 master: true 23 metrics: 24 - lag: 25 usage: "GAUGE" 26 description: "Replication lag behind master in seconds" 27 - is_master: 28 usage: "GAUGE" 29 description: "Instance is master or slave" 30 31 pg_stat_user_tables: 32 query: | 33 /*kb-monitor*/SELECT 34 current_database() datname, 35 schemaname, 36 relname, 37 seq_scan, 38 seq_tup_read, 39 idx_scan, 40 idx_tup_fetch, 41 n_tup_ins, 42 n_tup_upd, 43 n_tup_del, 44 n_tup_hot_upd, 45 n_live_tup, 46 n_dead_tup, 47 n_mod_since_analyze, 48 COALESCE(last_vacuum, '1970-01-01Z') as last_vacuum, 49 COALESCE(last_autovacuum, '1970-01-01Z') as last_autovacuum, 50 COALESCE(last_analyze, '1970-01-01Z') as last_analyze, 51 COALESCE(last_autoanalyze, '1970-01-01Z') as last_autoanalyze, 52 vacuum_count, 53 autovacuum_count, 54 analyze_count, 55 autoanalyze_count 56 FROM 57 pg_stat_user_tables 58 metrics: 59 - datname: 60 usage: "LABEL" 61 description: "Name of current database" 62 - schemaname: 63 usage: "LABEL" 64 description: "Name of the schema that this table is in" 65 - relname: 66 usage: "LABEL" 67 description: "Name of this table" 68 - seq_scan: 69 usage: "COUNTER" 70 description: "Number of sequential scans initiated on this table" 71 - seq_tup_read: 72 usage: "COUNTER" 73 description: "Number of live rows fetched by sequential scans" 74 - idx_scan: 75 usage: "COUNTER" 76 description: "Number of index scans initiated on this table" 77 - idx_tup_fetch: 78 usage: "COUNTER" 79 description: "Number of live rows fetched by index scans" 80 - n_tup_ins: 81 usage: "COUNTER" 82 description: "Number of rows inserted" 83 - n_tup_upd: 84 usage: "COUNTER" 85 description: "Number of rows updated" 86 - n_tup_del: 87 usage: "COUNTER" 88 description: "Number of rows deleted" 89 - n_tup_hot_upd: 90 usage: "COUNTER" 91 description: "Number of rows HOT updated (i.e., with no separate index update required)" 92 - n_live_tup: 93 usage: "GAUGE" 94 description: "Estimated number of live rows" 95 - n_dead_tup: 96 usage: "GAUGE" 97 description: "Estimated number of dead rows" 98 - n_mod_since_analyze: 99 usage: "GAUGE" 100 description: "Estimated number of rows changed since last analyze" 101 - last_vacuum: 102 usage: "GAUGE" 103 description: "Last time at which this table was manually vacuumed (not counting VACUUM FULL)" 104 - last_autovacuum: 105 usage: "GAUGE" 106 description: "Last time at which this table was vacuumed by the autovacuum daemon" 107 - last_analyze: 108 usage: "GAUGE" 109 description: "Last time at which this table was manually analyzed" 110 - last_autoanalyze: 111 usage: "GAUGE" 112 description: "Last time at which this table was analyzed by the autovacuum daemon" 113 - vacuum_count: 114 usage: "COUNTER" 115 description: "Number of times this table has been manually vacuumed (not counting VACUUM FULL)" 116 - autovacuum_count: 117 usage: "COUNTER" 118 description: "Number of times this table has been vacuumed by the autovacuum daemon" 119 - analyze_count: 120 usage: "COUNTER" 121 description: "Number of times this table has been manually analyzed" 122 - autoanalyze_count: 123 usage: "COUNTER" 124 description: "Number of times this table has been analyzed by the autovacuum daemon" 125 126 pg_statio_user_tables: 127 query: | 128 /*kb-monitor*/SELECT 129 current_database() datname, 130 schemaname, 131 relname, 132 heap_blks_read, 133 heap_blks_hit, 134 idx_blks_read, 135 idx_blks_hit, 136 toast_blks_read, 137 toast_blks_hit, 138 tidx_blks_read, 139 tidx_blks_hit 140 FROM 141 pg_statio_user_tables 142 metrics: 143 - datname: 144 usage: "LABEL" 145 description: "Name of current database" 146 - schemaname: 147 usage: "LABEL" 148 description: "Name of the schema that this table is in" 149 - relname: 150 usage: "LABEL" 151 description: "Name of this table" 152 - heap_blks_read: 153 usage: "COUNTER" 154 description: "Number of disk blocks read from this table" 155 - heap_blks_hit: 156 usage: "COUNTER" 157 description: "Number of buffer hits in this table" 158 - idx_blks_read: 159 usage: "COUNTER" 160 description: "Number of disk blocks read from all indexes on this table" 161 - idx_blks_hit: 162 usage: "COUNTER" 163 description: "Number of buffer hits in all indexes on this table" 164 - toast_blks_read: 165 usage: "COUNTER" 166 description: "Number of disk blocks read from this table's TOAST table (if any)" 167 - toast_blks_hit: 168 usage: "COUNTER" 169 description: "Number of buffer hits in this table's TOAST table (if any)" 170 - tidx_blks_read: 171 usage: "COUNTER" 172 description: "Number of disk blocks read from this table's TOAST table indexes (if any)" 173 - tidx_blks_hit: 174 usage: "COUNTER" 175 description: "Number of buffer hits in this table's TOAST table indexes (if any)" 176 177 # WARNING: This set of metrics can be very expensive on a busy server as every unique query executed will create an additional time series 178 pg_stat_statements_by_mean_exec_time: 179 query: | 180 /*kb-monitor*/SELECT 181 t2.rolname, 182 t3.datname, 183 queryid, 184 left(regexp_replace(trim(both ' ' from lower(query)),'\s+',' ','g'), 256) as query, 185 calls, 186 total_time / 1000 as exec_time_seconds, 187 min_time / 1000 as min_exec_time_seconds, 188 max_time / 1000 as max_exec_time_seconds, 189 mean_time / 1000 as mean_exec_time_seconds, 190 stddev_time / 1000 as stddev_exec_time_seconds, 191 rows, 192 shared_blks_hit, 193 shared_blks_read, 194 shared_blks_dirtied, 195 shared_blks_written, 196 local_blks_hit, 197 local_blks_read, 198 local_blks_dirtied, 199 local_blks_written, 200 temp_blks_read, 201 temp_blks_written, 202 blk_read_time / 1000 as blk_read_time_seconds, 203 blk_write_time / 1000 as blk_write_time_seconds 204 FROM 205 pg_stat_statements t1 206 JOIN 207 pg_roles t2 208 ON (t1.userid=t2.oid) 209 JOIN 210 pg_database t3 211 ON (t1.dbid=t3.oid) 212 WHERE t2.rolname != 'rdsadmin' 213 ORDER BY mean_time DESC 214 LIMIT 50 215 master: true 216 metrics: 217 - rolname: 218 usage: "LABEL" 219 description: "Name of user" 220 - datname: 221 usage: "LABEL" 222 description: "Name of database" 223 - queryid: 224 usage: "LABEL" 225 description: "Query ID" 226 - query: 227 usage: "LABEL" 228 description: "First 64 chars for simple formatted query text" 229 - calls: 230 usage: "COUNTER" 231 description: "Number of times executed" 232 - exec_time_seconds: 233 usage: "COUNTER" 234 description: "Total time spent in the statement" 235 - min_exec_time_seconds: 236 usage: "GAUGE" 237 description: "Minimum time spent in the statement" 238 - max_exec_time_seconds: 239 usage: "GAUGE" 240 description: "Maximum time spent in the statement" 241 - mean_exec_time_seconds: 242 usage: "GAUGE" 243 description: "Mean time spent in the statement" 244 - stddev_exec_time_seconds: 245 usage: "GAUGE" 246 description: "Population standard deviation of time spent in the statement" 247 - rows: 248 usage: "COUNTER" 249 description: "Total number of rows retrieved or affected by the statement" 250 - shared_blks_hit: 251 usage: "COUNTER" 252 description: "Total number of shared block cache hits by the statement" 253 - shared_blks_read: 254 usage: "COUNTER" 255 description: "Total number of shared blocks read by the statement" 256 - shared_blks_dirtied: 257 usage: "COUNTER" 258 description: "Total number of shared blocks dirtied by the statement" 259 - shared_blks_written: 260 usage: "COUNTER" 261 description: "Total number of shared blocks written by the statement" 262 - local_blks_hit: 263 usage: "COUNTER" 264 description: "Total number of local block cache hits by the statement" 265 - local_blks_read: 266 usage: "COUNTER" 267 description: "Total number of local blocks read by the statement" 268 - local_blks_dirtied: 269 usage: "COUNTER" 270 description: "Total number of local blocks dirtied by the statement" 271 - local_blks_written: 272 usage: "COUNTER" 273 description: "Total number of local blocks written by the statement" 274 - temp_blks_read: 275 usage: "COUNTER" 276 description: "Total number of temp blocks read by the statement" 277 - temp_blks_written: 278 usage: "COUNTER" 279 description: "Total number of temp blocks written by the statement" 280 - blk_read_time_seconds: 281 usage: "COUNTER" 282 description: "Total time the statement spent reading blocks, in milliseconds (if track_io_timing is enabled, otherwise zero)" 283 - blk_write_time_seconds: 284 usage: "COUNTER" 285 description: "Total time the statement spent writing blocks, in milliseconds (if track_io_timing is enabled, otherwise zero)" 286 287 pg_stat_statements_by_calls: 288 query: | 289 /*kb-monitor*/SELECT 290 t2.rolname, 291 t3.datname, 292 queryid, 293 left(regexp_replace(trim(both ' ' from lower(query)),'\s+',' ','g'), 256) as query, 294 calls, 295 total_time / 1000 as exec_time_seconds, 296 min_time / 1000 as min_exec_time_seconds, 297 max_time / 1000 as max_exec_time_seconds, 298 mean_time / 1000 as mean_exec_time_seconds, 299 stddev_time / 1000 as stddev_exec_time_seconds, 300 rows, 301 shared_blks_hit, 302 shared_blks_read, 303 shared_blks_dirtied, 304 shared_blks_written, 305 local_blks_hit, 306 local_blks_read, 307 local_blks_dirtied, 308 local_blks_written, 309 temp_blks_read, 310 temp_blks_written, 311 blk_read_time / 1000 as blk_read_time_seconds, 312 blk_write_time / 1000 as blk_write_time_seconds 313 FROM 314 pg_stat_statements t1 315 JOIN 316 pg_roles t2 317 ON (t1.userid=t2.oid) 318 JOIN 319 pg_database t3 320 ON (t1.dbid=t3.oid) 321 WHERE t2.rolname != 'rdsadmin' 322 ORDER BY calls DESC 323 LIMIT 50 324 master: true 325 metrics: 326 - rolname: 327 usage: "LABEL" 328 description: "Name of user" 329 - datname: 330 usage: "LABEL" 331 description: "Name of database" 332 - queryid: 333 usage: "LABEL" 334 description: "Query ID" 335 - query: 336 usage: "LABEL" 337 description: "First 64 chars for simple formatted query text" 338 - calls: 339 usage: "COUNTER" 340 description: "Number of times executed" 341 - exec_time_seconds: 342 usage: "COUNTER" 343 description: "Total time spent in the statement" 344 - min_exec_time_seconds: 345 usage: "GAUGE" 346 description: "Minimum time spent in the statement" 347 - max_exec_time_seconds: 348 usage: "GAUGE" 349 description: "Maximum time spent in the statement" 350 - mean_exec_time_seconds: 351 usage: "GAUGE" 352 description: "Mean time spent in the statement" 353 - stddev_exec_time_seconds: 354 usage: "GAUGE" 355 description: "Population standard deviation of time spent in the statement" 356 - rows: 357 usage: "COUNTER" 358 description: "Total number of rows retrieved or affected by the statement" 359 - shared_blks_hit: 360 usage: "COUNTER" 361 description: "Total number of shared block cache hits by the statement" 362 - shared_blks_read: 363 usage: "COUNTER" 364 description: "Total number of shared blocks read by the statement" 365 - shared_blks_dirtied: 366 usage: "COUNTER" 367 description: "Total number of shared blocks dirtied by the statement" 368 - shared_blks_written: 369 usage: "COUNTER" 370 description: "Total number of shared blocks written by the statement" 371 - local_blks_hit: 372 usage: "COUNTER" 373 description: "Total number of local block cache hits by the statement" 374 - local_blks_read: 375 usage: "COUNTER" 376 description: "Total number of local blocks read by the statement" 377 - local_blks_dirtied: 378 usage: "COUNTER" 379 description: "Total number of local blocks dirtied by the statement" 380 - local_blks_written: 381 usage: "COUNTER" 382 description: "Total number of local blocks written by the statement" 383 - temp_blks_read: 384 usage: "COUNTER" 385 description: "Total number of temp blocks read by the statement" 386 - temp_blks_written: 387 usage: "COUNTER" 388 description: "Total number of temp blocks written by the statement" 389 - blk_read_time_seconds: 390 usage: "COUNTER" 391 description: "Total time the statement spent reading blocks, in milliseconds (if track_io_timing is enabled, otherwise zero)" 392 - blk_write_time_seconds: 393 usage: "COUNTER" 394 description: "Total time the statement spent writing blocks, in milliseconds (if track_io_timing is enabled, otherwise zero)" 395 396 pg_stat_statements_stats: 397 query: | 398 /*kb-monitor*/SELECT 399 t2.rolname, 400 t3.datname, 401 calls, 402 total_exec_time / 1000 as exec_time_seconds, 403 mean_exec_time / 1000 as mean_exec_time_seconds, 404 max_exec_time / 1000 as max_exec_time_seconds, 405 rows 406 FROM ( 407 SELECT 408 userid, 409 dbid, 410 SUM(calls) AS calls, 411 SUM(total_time) as total_exec_time, 412 AVG(mean_time) as mean_exec_time, 413 MAX(max_time) as max_exec_time, 414 SUM(rows) AS rows 415 FROM 416 pg_stat_statements 417 GROUP BY userid, dbid 418 ) tmp 419 JOIN 420 pg_roles t2 421 ON (tmp.userid=t2.oid) 422 JOIN 423 pg_database t3 424 ON (tmp.dbid=t3.oid) 425 WHERE t2.rolname != 'rdsadmin' 426 master: true 427 metrics: 428 - rolname: 429 usage: "LABEL" 430 description: "Name of user" 431 - datname: 432 usage: "LABEL" 433 description: "Name of database" 434 - calls: 435 usage: "COUNTER" 436 description: "Number of times executed" 437 - exec_time_seconds: 438 usage: "COUNTER" 439 description: "Total time spent in the statement" 440 - mean_exec_time_seconds: 441 usage: "GAUGE" 442 description: "Mean time spent in the statement" 443 - max_exec_time_seconds: 444 usage: "GAUGE" 445 description: "Maximum time spent in the statement" 446 - rows: 447 usage: "COUNTER" 448 description: "Total number of rows retrieved or affected by the statement" 449 450 pg_wal_log_file: 451 query: "/*kb-monitor*/SELECT count(*) AS count FROM pg_ls_dir('pg_wal') WHERE pg_ls_dir ~ '^[0-9A-F]{24}'" 452 master: true 453 metrics: 454 - count: 455 usage: "GAUGE" 456 description: "Wal log file count" 457 458 pg_stat_activity_detail: 459 query: | 460 /*kb-monitor*/SELECT 461 datname,wait_event_type,wait_event,state,backend_type,COUNT(1) AS count 462 FROM pg_stat_activity 463 GROUP BY datname,wait_event_type,wait_event,state,backend_type 464 master: true 465 metrics: 466 - datname: 467 usage: "LABEL" 468 description: "Name of current database" 469 - wait_event_type: 470 usage: "LABEL" 471 description: "The type of event for which the backend is waiting, if any; otherwise NULL." 472 - wait_event: 473 usage: "LABEL" 474 description: "Wait event name if backend is currently waiting, otherwise NULL. " 475 - state: 476 usage: "LABEL" 477 description: "Current overall state of this backend." 478 - backend_type: 479 usage: "LABEL" 480 description: "Type of current backend." 481 - count: 482 usage: "GAUGE" 483 description: "Const value of 1" 484 485 pg_locks_detail: 486 query: | 487 /*kb-monitor*/SELECT 488 locktype,datname,relation,mode,granted,fastpath,count 489 FROM ( 490 SELECT locktype,database,relation,mode,granted,fastpath,COUNT(1) AS count 491 FROM ( 492 SELECT locktype,database,relation::regclass,mode,granted::int,fastpath::int 493 FROM pg_locks 494 ) p1 495 GROUP BY locktype,database,relation,mode,granted,fastpath 496 ) p2 497 LEFT JOIN ( 498 SELECT oid,datname FROM pg_database 499 ) pd 500 ON p2.database=pd.oid 501 master: true 502 metrics: 503 - locktype: 504 usage: "LABEL" 505 description: "Type of the lockable object" 506 - datname: 507 usage: "LABEL" 508 description: "Name of current database" 509 - relation: 510 usage: "LABEL" 511 description: "Relation targeted by the lock, or null if the target is not a relation or part of a relation" 512 - mode: 513 usage: "LABEL" 514 description: "Name of the lock mode held or desired by this process" 515 - granted: 516 usage: "LABEL" 517 description: "True if lock is held, false if lock is awaited" 518 - fastpath: 519 usage: "LABEL" 520 description: "True if lock was taken via fast path, false if taken via main lock table" 521 - count: 522 usage: "GAUGE" 523 description: "Const value of 1"