github.com/1aal/kubeblocks@v0.0.0-20231107070852-e1c03e598921/deploy/postgresql/templates/metrics-configmap-12.yaml (about)

     1  apiVersion: v1
     2  kind: ConfigMap
     3  metadata:
     4    name: postgresql12-custom-metrics
     5    labels:
     6        {{- include "postgresql.labels" . | nindent 4 }}
     7  data:
     8    custom-metrics.yaml: |-
     9      pg_postmaster:
    10        query: "/*kb-monitor*/SELECT pg_postmaster_start_time as start_time_seconds from pg_postmaster_start_time()"
    11        master: true
    12        metrics:
    13          - start_time_seconds:
    14              usage: "GAUGE"
    15              description: "Time at which postmaster started"
    16  
    17      pg_replication:
    18        query: |
    19          /*kb-monitor*/SELECT
    20          (case when (not pg_is_in_recovery() or pg_last_wal_receive_lsn() = pg_last_wal_replay_lsn()) then 0 else greatest (0, extract(epoch from (now() - pg_last_xact_replay_timestamp()))) end) as lag,
    21          (case when pg_is_in_recovery() then 0 else 1 end) as is_master
    22        master: true
    23        metrics:
    24          - lag:
    25              usage: "GAUGE"
    26              description: "Replication lag behind master in seconds"
    27          - is_master:
    28              usage: "GAUGE"
    29              description: "Instance is master or slave"
    30  
    31      pg_stat_user_tables:
    32        query: |
    33          /*kb-monitor*/SELECT
    34          current_database() datname,
    35          schemaname,
    36          relname,
    37          seq_scan,
    38          seq_tup_read,
    39          idx_scan,
    40          idx_tup_fetch,
    41          n_tup_ins,
    42          n_tup_upd,
    43          n_tup_del,
    44          n_tup_hot_upd,
    45          n_live_tup,
    46          n_dead_tup,
    47          n_mod_since_analyze,
    48          COALESCE(last_vacuum, '1970-01-01Z') as last_vacuum,
    49          COALESCE(last_autovacuum, '1970-01-01Z') as last_autovacuum,
    50          COALESCE(last_analyze, '1970-01-01Z') as last_analyze,
    51          COALESCE(last_autoanalyze, '1970-01-01Z') as last_autoanalyze,
    52          vacuum_count,
    53          autovacuum_count,
    54          analyze_count,
    55          autoanalyze_count
    56          FROM
    57          pg_stat_user_tables
    58        metrics:
    59          - datname:
    60              usage: "LABEL"
    61              description: "Name of current database"
    62          - schemaname:
    63              usage: "LABEL"
    64              description: "Name of the schema that this table is in"
    65          - relname:
    66              usage: "LABEL"
    67              description: "Name of this table"
    68          - seq_scan:
    69              usage: "COUNTER"
    70              description: "Number of sequential scans initiated on this table"
    71          - seq_tup_read:
    72              usage: "COUNTER"
    73              description: "Number of live rows fetched by sequential scans"
    74          - idx_scan:
    75              usage: "COUNTER"
    76              description: "Number of index scans initiated on this table"
    77          - idx_tup_fetch:
    78              usage: "COUNTER"
    79              description: "Number of live rows fetched by index scans"
    80          - n_tup_ins:
    81              usage: "COUNTER"
    82              description: "Number of rows inserted"
    83          - n_tup_upd:
    84              usage: "COUNTER"
    85              description: "Number of rows updated"
    86          - n_tup_del:
    87              usage: "COUNTER"
    88              description: "Number of rows deleted"
    89          - n_tup_hot_upd:
    90              usage: "COUNTER"
    91              description: "Number of rows HOT updated (i.e., with no separate index update required)"
    92          - n_live_tup:
    93              usage: "GAUGE"
    94              description: "Estimated number of live rows"
    95          - n_dead_tup:
    96              usage: "GAUGE"
    97              description: "Estimated number of dead rows"
    98          - n_mod_since_analyze:
    99              usage: "GAUGE"
   100              description: "Estimated number of rows changed since last analyze"
   101          - last_vacuum:
   102              usage: "GAUGE"
   103              description: "Last time at which this table was manually vacuumed (not counting VACUUM FULL)"
   104          - last_autovacuum:
   105              usage: "GAUGE"
   106              description: "Last time at which this table was vacuumed by the autovacuum daemon"
   107          - last_analyze:
   108              usage: "GAUGE"
   109              description: "Last time at which this table was manually analyzed"
   110          - last_autoanalyze:
   111              usage: "GAUGE"
   112              description: "Last time at which this table was analyzed by the autovacuum daemon"
   113          - vacuum_count:
   114              usage: "COUNTER"
   115              description: "Number of times this table has been manually vacuumed (not counting VACUUM FULL)"
   116          - autovacuum_count:
   117              usage: "COUNTER"
   118              description: "Number of times this table has been vacuumed by the autovacuum daemon"
   119          - analyze_count:
   120              usage: "COUNTER"
   121              description: "Number of times this table has been manually analyzed"
   122          - autoanalyze_count:
   123              usage: "COUNTER"
   124              description: "Number of times this table has been analyzed by the autovacuum daemon"
   125  
   126      pg_statio_user_tables:
   127        query: |
   128          /*kb-monitor*/SELECT
   129          current_database() datname,
   130          schemaname,
   131          relname,
   132          heap_blks_read,
   133          heap_blks_hit,
   134          idx_blks_read,
   135          idx_blks_hit,
   136          toast_blks_read,
   137          toast_blks_hit,
   138          tidx_blks_read,
   139          tidx_blks_hit
   140          FROM
   141          pg_statio_user_tables
   142        metrics:
   143          - datname:
   144              usage: "LABEL"
   145              description: "Name of current database"
   146          - schemaname:
   147              usage: "LABEL"
   148              description: "Name of the schema that this table is in"
   149          - relname:
   150              usage: "LABEL"
   151              description: "Name of this table"
   152          - heap_blks_read:
   153              usage: "COUNTER"
   154              description: "Number of disk blocks read from this table"
   155          - heap_blks_hit:
   156              usage: "COUNTER"
   157              description: "Number of buffer hits in this table"
   158          - idx_blks_read:
   159              usage: "COUNTER"
   160              description: "Number of disk blocks read from all indexes on this table"
   161          - idx_blks_hit:
   162              usage: "COUNTER"
   163              description: "Number of buffer hits in all indexes on this table"
   164          - toast_blks_read:
   165              usage: "COUNTER"
   166              description: "Number of disk blocks read from this table's TOAST table (if any)"
   167          - toast_blks_hit:
   168              usage: "COUNTER"
   169              description: "Number of buffer hits in this table's TOAST table (if any)"
   170          - tidx_blks_read:
   171              usage: "COUNTER"
   172              description: "Number of disk blocks read from this table's TOAST table indexes (if any)"
   173          - tidx_blks_hit:
   174              usage: "COUNTER"
   175              description: "Number of buffer hits in this table's TOAST table indexes (if any)"
   176  
   177      # WARNING: This set of metrics can be very expensive on a busy server as every unique query executed will create an additional time series
   178      pg_stat_statements_by_mean_exec_time:
   179        query: |
   180          /*kb-monitor*/SELECT
   181          t2.rolname,
   182          t3.datname,
   183          queryid,
   184          left(regexp_replace(trim(both ' ' from lower(query)),'\s+',' ','g'), 256) as query,
   185          calls,
   186          total_time / 1000 as exec_time_seconds,
   187          min_time / 1000 as min_exec_time_seconds,
   188          max_time / 1000 as max_exec_time_seconds,
   189          mean_time / 1000 as mean_exec_time_seconds,
   190          stddev_time / 1000 as stddev_exec_time_seconds,
   191          rows,
   192          shared_blks_hit,
   193          shared_blks_read,
   194          shared_blks_dirtied,
   195          shared_blks_written,
   196          local_blks_hit,
   197          local_blks_read,
   198          local_blks_dirtied,
   199          local_blks_written,
   200          temp_blks_read,
   201          temp_blks_written,
   202          blk_read_time / 1000 as blk_read_time_seconds,
   203          blk_write_time / 1000 as blk_write_time_seconds
   204          FROM
   205          pg_stat_statements t1
   206          JOIN
   207          pg_roles t2
   208          ON (t1.userid=t2.oid)
   209          JOIN
   210          pg_database t3
   211          ON (t1.dbid=t3.oid)
   212          WHERE t2.rolname != 'rdsadmin'
   213          ORDER BY mean_time DESC
   214          LIMIT 50
   215        master: true
   216        metrics:
   217          - rolname:
   218              usage: "LABEL"
   219              description: "Name of user"
   220          - datname:
   221              usage: "LABEL"
   222              description: "Name of database"
   223          - queryid:
   224              usage: "LABEL"
   225              description: "Query ID"
   226          - query:
   227              usage: "LABEL"
   228              description: "First 64 chars for simple formatted query text"
   229          - calls:
   230              usage: "COUNTER"
   231              description: "Number of times executed"
   232          - exec_time_seconds:
   233              usage: "COUNTER"
   234              description: "Total time spent in the statement"
   235          - min_exec_time_seconds:
   236              usage: "GAUGE"
   237              description: "Minimum time spent in the statement"
   238          - max_exec_time_seconds:
   239              usage: "GAUGE"
   240              description: "Maximum time spent in the statement"
   241          - mean_exec_time_seconds:
   242              usage: "GAUGE"
   243              description: "Mean time spent in the statement"
   244          - stddev_exec_time_seconds:
   245              usage: "GAUGE"
   246              description: "Population standard deviation of time spent in the statement"
   247          - rows:
   248              usage: "COUNTER"
   249              description: "Total number of rows retrieved or affected by the statement"
   250          - shared_blks_hit:
   251              usage: "COUNTER"
   252              description: "Total number of shared block cache hits by the statement"
   253          - shared_blks_read:
   254              usage: "COUNTER"
   255              description: "Total number of shared blocks read by the statement"
   256          - shared_blks_dirtied:
   257              usage: "COUNTER"
   258              description: "Total number of shared blocks dirtied by the statement"
   259          - shared_blks_written:
   260              usage: "COUNTER"
   261              description: "Total number of shared blocks written by the statement"
   262          - local_blks_hit:
   263              usage: "COUNTER"
   264              description: "Total number of local block cache hits by the statement"
   265          - local_blks_read:
   266              usage: "COUNTER"
   267              description: "Total number of local blocks read by the statement"
   268          - local_blks_dirtied:
   269              usage: "COUNTER"
   270              description: "Total number of local blocks dirtied by the statement"
   271          - local_blks_written:
   272              usage: "COUNTER"
   273              description: "Total number of local blocks written by the statement"
   274          - temp_blks_read:
   275              usage: "COUNTER"
   276              description: "Total number of temp blocks read by the statement"
   277          - temp_blks_written:
   278              usage: "COUNTER"
   279              description: "Total number of temp blocks written by the statement"
   280          - blk_read_time_seconds:
   281              usage: "COUNTER"
   282              description: "Total time the statement spent reading blocks, in milliseconds (if track_io_timing is enabled, otherwise zero)"
   283          - blk_write_time_seconds:
   284              usage: "COUNTER"
   285              description: "Total time the statement spent writing blocks, in milliseconds (if track_io_timing is enabled, otherwise zero)"
   286  
   287      pg_stat_statements_by_calls:
   288        query: |
   289          /*kb-monitor*/SELECT
   290          t2.rolname,
   291          t3.datname,
   292          queryid,
   293          left(regexp_replace(trim(both ' ' from lower(query)),'\s+',' ','g'), 256) as query,
   294          calls,
   295          total_time / 1000 as exec_time_seconds,
   296          min_time / 1000 as min_exec_time_seconds,
   297          max_time / 1000 as max_exec_time_seconds,
   298          mean_time / 1000 as mean_exec_time_seconds,
   299          stddev_time / 1000 as stddev_exec_time_seconds,
   300          rows,
   301          shared_blks_hit,
   302          shared_blks_read,
   303          shared_blks_dirtied,
   304          shared_blks_written,
   305          local_blks_hit,
   306          local_blks_read,
   307          local_blks_dirtied,
   308          local_blks_written,
   309          temp_blks_read,
   310          temp_blks_written,
   311          blk_read_time / 1000 as blk_read_time_seconds,
   312          blk_write_time / 1000 as blk_write_time_seconds
   313          FROM
   314          pg_stat_statements t1
   315          JOIN
   316          pg_roles t2
   317          ON (t1.userid=t2.oid)
   318          JOIN
   319          pg_database t3
   320          ON (t1.dbid=t3.oid)
   321          WHERE t2.rolname != 'rdsadmin'
   322          ORDER BY calls DESC
   323          LIMIT 50
   324        master: true
   325        metrics:
   326          - rolname:
   327              usage: "LABEL"
   328              description: "Name of user"
   329          - datname:
   330              usage: "LABEL"
   331              description: "Name of database"
   332          - queryid:
   333              usage: "LABEL"
   334              description: "Query ID"
   335          - query:
   336              usage: "LABEL"
   337              description: "First 64 chars for simple formatted query text"
   338          - calls:
   339              usage: "COUNTER"
   340              description: "Number of times executed"
   341          - exec_time_seconds:
   342              usage: "COUNTER"
   343              description: "Total time spent in the statement"
   344          - min_exec_time_seconds:
   345              usage: "GAUGE"
   346              description: "Minimum time spent in the statement"
   347          - max_exec_time_seconds:
   348              usage: "GAUGE"
   349              description: "Maximum time spent in the statement"
   350          - mean_exec_time_seconds:
   351              usage: "GAUGE"
   352              description: "Mean time spent in the statement"
   353          - stddev_exec_time_seconds:
   354              usage: "GAUGE"
   355              description: "Population standard deviation of time spent in the statement"
   356          - rows:
   357              usage: "COUNTER"
   358              description: "Total number of rows retrieved or affected by the statement"
   359          - shared_blks_hit:
   360              usage: "COUNTER"
   361              description: "Total number of shared block cache hits by the statement"
   362          - shared_blks_read:
   363              usage: "COUNTER"
   364              description: "Total number of shared blocks read by the statement"
   365          - shared_blks_dirtied:
   366              usage: "COUNTER"
   367              description: "Total number of shared blocks dirtied by the statement"
   368          - shared_blks_written:
   369              usage: "COUNTER"
   370              description: "Total number of shared blocks written by the statement"
   371          - local_blks_hit:
   372              usage: "COUNTER"
   373              description: "Total number of local block cache hits by the statement"
   374          - local_blks_read:
   375              usage: "COUNTER"
   376              description: "Total number of local blocks read by the statement"
   377          - local_blks_dirtied:
   378              usage: "COUNTER"
   379              description: "Total number of local blocks dirtied by the statement"
   380          - local_blks_written:
   381              usage: "COUNTER"
   382              description: "Total number of local blocks written by the statement"
   383          - temp_blks_read:
   384              usage: "COUNTER"
   385              description: "Total number of temp blocks read by the statement"
   386          - temp_blks_written:
   387              usage: "COUNTER"
   388              description: "Total number of temp blocks written by the statement"
   389          - blk_read_time_seconds:
   390              usage: "COUNTER"
   391              description: "Total time the statement spent reading blocks, in milliseconds (if track_io_timing is enabled, otherwise zero)"
   392          - blk_write_time_seconds:
   393              usage: "COUNTER"
   394              description: "Total time the statement spent writing blocks, in milliseconds (if track_io_timing is enabled, otherwise zero)"
   395  
   396      pg_stat_statements_stats:
   397        query: |
   398          /*kb-monitor*/SELECT
   399            t2.rolname,
   400            t3.datname,
   401            calls,
   402            total_exec_time / 1000 as exec_time_seconds,
   403            mean_exec_time / 1000 as mean_exec_time_seconds,
   404            max_exec_time / 1000 as max_exec_time_seconds,
   405            rows
   406          FROM (
   407            SELECT
   408              userid,
   409              dbid,
   410              SUM(calls) AS calls,
   411              SUM(total_time) as total_exec_time,
   412              AVG(mean_time) as mean_exec_time,
   413              MAX(max_time) as max_exec_time,
   414              SUM(rows) AS rows
   415            FROM
   416              pg_stat_statements
   417            GROUP BY userid, dbid
   418          ) tmp
   419          JOIN
   420            pg_roles t2
   421          ON (tmp.userid=t2.oid)
   422          JOIN
   423            pg_database t3
   424          ON (tmp.dbid=t3.oid)
   425          WHERE t2.rolname != 'rdsadmin'
   426        master: true
   427        metrics:
   428          - rolname:
   429              usage: "LABEL"
   430              description: "Name of user"
   431          - datname:
   432              usage: "LABEL"
   433              description: "Name of database"
   434          - calls:
   435              usage: "COUNTER"
   436              description: "Number of times executed"
   437          - exec_time_seconds:
   438              usage: "COUNTER"
   439              description: "Total time spent in the statement"
   440          - mean_exec_time_seconds:
   441              usage: "GAUGE"
   442              description: "Mean time spent in the statement"
   443          - max_exec_time_seconds:
   444              usage: "GAUGE"
   445              description: "Maximum time spent in the statement"
   446          - rows:
   447              usage: "COUNTER"
   448              description: "Total number of rows retrieved or affected by the statement"
   449  
   450      pg_wal_log_file:
   451        query: "/*kb-monitor*/SELECT count(*) AS count FROM pg_ls_dir('pg_wal') WHERE pg_ls_dir ~ '^[0-9A-F]{24}'"
   452        master: true
   453        metrics:
   454          - count:
   455              usage: "GAUGE"
   456              description: "Wal log file count"
   457  
   458      pg_stat_activity_detail:
   459        query: |
   460          /*kb-monitor*/SELECT
   461            datname,wait_event_type,wait_event,state,backend_type,COUNT(1) AS count
   462          FROM pg_stat_activity
   463          GROUP BY datname,wait_event_type,wait_event,state,backend_type
   464        master: true
   465        metrics:
   466          - datname:
   467              usage: "LABEL"
   468              description: "Name of current database"
   469          - wait_event_type:
   470              usage: "LABEL"
   471              description: "The type of event for which the backend is waiting, if any; otherwise NULL."
   472          - wait_event:
   473              usage: "LABEL"
   474              description: "Wait event name if backend is currently waiting, otherwise NULL. "
   475          - state:
   476              usage: "LABEL"
   477              description: "Current overall state of this backend."
   478          - backend_type:
   479              usage: "LABEL"
   480              description: "Type of current backend."
   481          - count:
   482              usage: "GAUGE"
   483              description: "Const value of 1"
   484  
   485      pg_locks_detail:
   486        query: |
   487          /*kb-monitor*/SELECT
   488            locktype,datname,relation,mode,granted,fastpath,count
   489          FROM (
   490            SELECT locktype,database,relation,mode,granted,fastpath,COUNT(1) AS count
   491            FROM (
   492              SELECT locktype,database,relation::regclass,mode,granted::int,fastpath::int
   493              FROM pg_locks
   494            ) p1
   495            GROUP BY locktype,database,relation,mode,granted,fastpath
   496          ) p2
   497          LEFT JOIN (
   498            SELECT oid,datname FROM pg_database
   499          ) pd
   500          ON p2.database=pd.oid
   501        master: true
   502        metrics:
   503          - locktype:
   504              usage: "LABEL"
   505              description: "Type of the lockable object"
   506          - datname:
   507              usage: "LABEL"
   508              description: "Name of current database"
   509          - relation:
   510              usage: "LABEL"
   511              description: "Relation targeted by the lock, or null if the target is not a relation or part of a relation"
   512          - mode:
   513              usage: "LABEL"
   514              description: "Name of the lock mode held or desired by this process"
   515          - granted:
   516              usage: "LABEL"
   517              description: "True if lock is held, false if lock is awaited"
   518          - fastpath:
   519              usage: "LABEL"
   520              description: "True if lock was taken via fast path, false if taken via main lock table"
   521          - count:
   522              usage: "GAUGE"
   523              description: "Const value of 1"