From bbe4c8d284855f86e06964096d8b931860ffef76 Mon Sep 17 00:00:00 2001 From: "dementii.priadko" <45518657+DEMNERD@users.noreply.github.com> Date: Tue, 28 Oct 2025 19:53:08 +0200 Subject: [PATCH 1/9] Switch to Victoria Metrics Added auto-restart to docker-compose.yml --- README.md | 10 +- config/pgwatch-prometheus/metrics.yml | 142 +++++++++++++++++--------- docker-compose.yml | 20 ++-- 3 files changed, 112 insertions(+), 60 deletions(-) diff --git a/README.md b/README.md index 2e52413..82f66b8 100644 --- a/README.md +++ b/README.md @@ -19,7 +19,7 @@ Built for senior DBAs, SREs, and AI systems who need rapid root cause analysis a - **Dual-purpose architecture**: Built for both human experts and AI systems requiring structured performance data - **Comprehensive query analysis**: Complete `pg_stat_statements` metrics with historical trends and plan variations - **Active Session History**: Postgres's answer to Oracle ASH and AWS RDS Performance Insights -- **Hybrid storage**: Prometheus for metrics, Postgres for query texts — best of both worlds +- **Hybrid storage**: Victoria Metrics (Prometheus-compatible) for metrics, Postgres for query texts — best of both worlds > 📖 **Read more**: [postgres_ai monitoring v0.7 announcement](https://postgres.ai/blog/20250722-postgres-ai-v0-7-expert-level-postgresql-monitoring) - detailed technical overview and architecture decisions. @@ -48,7 +48,7 @@ Experience the full monitoring solution: **https://demo.postgres.ai** (login: `d ## 🏗️ Architecture - **Collection**: pgwatch v3 (by Cybertec) for metrics gathering -- **Storage**: Prometheus for time-series data + Postgres for query texts +- **Storage**: Victoria Metrics for time-series data + Postgres for query texts - **Visualization**: Grafana with expert-designed dashboards - **Analysis**: Structured data output for AI system integration @@ -71,7 +71,7 @@ This monitoring solution exposes several ports that **MUST** be properly firewal - **Port 3000** (Grafana) - Contains sensitive database metrics and dashboards - **Port 58080** (PGWatch Postgres) - Database monitoring interface - **Port 58089** (PGWatch Prometheus) - Database monitoring interface -- **Port 59090** (Prometheus) - Metrics storage and queries +- **Port 59090** (Victoria Metrics) - Metrics storage and queries - **Port 59091** (PGWatch Prometheus endpoint) - Metrics collection - **Port 55000** (Flask API) - Backend API service - **Port 55432** (Demo DB) - When using `--demo` option @@ -98,6 +98,8 @@ grant pg_monitor to postgres_ai_mon; grant select on pg_stat_statements to postgres_ai_mon; grant select on pg_stat_database to postgres_ai_mon; grant select on pg_stat_user_tables to postgres_ai_mon; +grant select on pg_stat_user_indexes to postgres_ai_mon; +grant select on pg_index to postgres_ai_mon; -- Create a public view for pg_statistic access (required for bloat metrics on user schemas) create view public.pg_statistic as @@ -195,7 +197,7 @@ After running quickstart: Technical URLs (for advanced users): - **Demo DB**: postgresql://postgres:postgres@localhost:55432/target_database - **Monitoring**: http://localhost:58080 (PGWatch) -- **Metrics**: http://localhost:59090 (Prometheus) +- **Metrics**: http://localhost:59090 (Victoria Metrics) ## 📖 Help diff --git a/config/pgwatch-prometheus/metrics.yml b/config/pgwatch-prometheus/metrics.yml index 52654f4..37b7e60 100644 --- a/config/pgwatch-prometheus/metrics.yml +++ b/config/pgwatch-prometheus/metrics.yml @@ -854,34 +854,32 @@ metrics: pg_stat_statements: sqls: 11: | - WITH ranked_statements as ( + WITH aggregated_statements as ( select - pg_get_userbyid(userid) as user, pg_database.datname, - pg_stat_statements.queryid , - pg_stat_statements.plans as plans_total, - pg_stat_statements.calls, - pg_stat_statements.total_exec_time as exec_time_total, - pg_stat_statements.total_plan_time as plan_time_total, - pg_stat_statements.rows, - (current_setting('block_size')::int * pg_stat_statements.shared_blks_hit) as shared_bytes_hit_total, - (current_setting('block_size')::int * pg_stat_statements.shared_blks_read) as shared_bytes_read_total, - (current_setting('block_size')::int * pg_stat_statements.shared_blks_dirtied) as shared_bytes_dirtied_total, - (current_setting('block_size')::int * pg_stat_statements.shared_blks_written) as shared_bytes_written_total, - pg_stat_statements.blk_read_time as block_read_total, - pg_stat_statements.blk_write_time as block_write_total, - pg_stat_statements.wal_records, - pg_stat_statements.wal_fpi, - pg_stat_statements.wal_bytes, - (current_setting('block_size')::int * pg_stat_statements.temp_blks_read) as temp_bytes_read, - (current_setting('block_size')::int * pg_stat_statements.temp_blks_written) as temp_bytes_written, - row_number() over (order by total_exec_time desc) as rn + pg_stat_statements.queryid, + sum(pg_stat_statements.plans) as plans_total, + sum(pg_stat_statements.calls) as calls, + sum(pg_stat_statements.total_exec_time) as exec_time_total, + sum(pg_stat_statements.total_plan_time) as plan_time_total, + sum(pg_stat_statements.rows) as rows, + sum(current_setting('block_size')::int * pg_stat_statements.shared_blks_hit) as shared_bytes_hit_total, + sum(current_setting('block_size')::int * pg_stat_statements.shared_blks_read) as shared_bytes_read_total, + sum(current_setting('block_size')::int * pg_stat_statements.shared_blks_dirtied) as shared_bytes_dirtied_total, + sum(current_setting('block_size')::int * pg_stat_statements.shared_blks_written) as shared_bytes_written_total, + sum(pg_stat_statements.blk_read_time) as block_read_total, + sum(pg_stat_statements.blk_write_time) as block_write_total, + sum(pg_stat_statements.wal_records) as wal_records, + sum(pg_stat_statements.wal_fpi) as wal_fpi, + sum(pg_stat_statements.wal_bytes) as wal_bytes, + sum(current_setting('block_size')::int * pg_stat_statements.temp_blks_read) as temp_bytes_read, + sum(current_setting('block_size')::int * pg_stat_statements.temp_blks_written) as temp_bytes_written from pg_stat_statements join pg_database on pg_database.oid = pg_stat_statements.dbid + group by pg_database.datname, pg_stat_statements.queryid ) select - ranked_statements.user as tag_user, datname as tag_datname, queryid as tag_queryid, calls::int8 as calls, @@ -900,36 +898,34 @@ metrics: wal_bytes::int8 as wal_bytes, temp_bytes_read::int8 as temp_bytes_read, temp_bytes_written::int8 as temp_bytes_written - from ranked_statements + from aggregated_statements 17: | - WITH ranked_statements as ( + WITH aggregated_statements as ( select - pg_get_userbyid(userid) as user, pg_database.datname, - pg_stat_statements.queryid , - pg_stat_statements.plans as plans_total, - pg_stat_statements.calls, - pg_stat_statements.total_exec_time as exec_time_total, - pg_stat_statements.total_plan_time as plan_time_total, - pg_stat_statements.rows, - (current_setting('block_size')::int * pg_stat_statements.shared_blks_hit) as shared_bytes_hit_total, - (current_setting('block_size')::int * pg_stat_statements.shared_blks_read) as shared_bytes_read_total, - (current_setting('block_size')::int * pg_stat_statements.shared_blks_dirtied) as shared_bytes_dirtied_total, - (current_setting('block_size')::int * pg_stat_statements.shared_blks_written) as shared_bytes_written_total, - pg_stat_statements.shared_blk_read_time as block_read_total, - pg_stat_statements.shared_blk_write_time as block_write_total, - pg_stat_statements.wal_records, - pg_stat_statements.wal_fpi, - pg_stat_statements.wal_bytes, - (current_setting('block_size')::int * pg_stat_statements.temp_blks_read) as temp_bytes_read, - (current_setting('block_size')::int * pg_stat_statements.temp_blks_written) as temp_bytes_written, - row_number() over (order by total_exec_time desc) as rn + pg_stat_statements.queryid, + sum(pg_stat_statements.plans) as plans_total, + sum(pg_stat_statements.calls) as calls, + sum(pg_stat_statements.total_exec_time) as exec_time_total, + sum(pg_stat_statements.total_plan_time) as plan_time_total, + sum(pg_stat_statements.rows) as rows, + sum(current_setting('block_size')::int * pg_stat_statements.shared_blks_hit) as shared_bytes_hit_total, + sum(current_setting('block_size')::int * pg_stat_statements.shared_blks_read) as shared_bytes_read_total, + sum(current_setting('block_size')::int * pg_stat_statements.shared_blks_dirtied) as shared_bytes_dirtied_total, + sum(current_setting('block_size')::int * pg_stat_statements.shared_blks_written) as shared_bytes_written_total, + sum(pg_stat_statements.shared_blk_read_time) as block_read_total, + sum(pg_stat_statements.shared_blk_write_time) as block_write_total, + sum(pg_stat_statements.wal_records) as wal_records, + sum(pg_stat_statements.wal_fpi) as wal_fpi, + sum(pg_stat_statements.wal_bytes) as wal_bytes, + sum(current_setting('block_size')::int * pg_stat_statements.temp_blks_read) as temp_bytes_read, + sum(current_setting('block_size')::int * pg_stat_statements.temp_blks_written) as temp_bytes_written from pg_stat_statements join pg_database on pg_database.oid = pg_stat_statements.dbid + group by pg_database.datname, pg_stat_statements.queryid ) select - ranked_statements.user as tag_user, datname as tag_datname, queryid as tag_queryid, calls::int8 as calls, @@ -948,7 +944,7 @@ metrics: wal_bytes::int8 as wal_bytes, temp_bytes_read::int8 as temp_bytes_read, temp_bytes_written::int8 as temp_bytes_written - from ranked_statements + from aggregated_statements gauges: - calls - plans_total @@ -1650,7 +1646,8 @@ metrics: pg_relation_size(i.indexrelid) as index_bytes, ci.relpages, (case when a.amname = 'btree' then true else false end) as idx_is_btree, - array_to_string(i.indclass, ', ') as opclasses + array_to_string(i.indclass, ', ') as opclasses, + si.stats_reset from pg_index i join pg_class ci on ci.oid = i.indexrelid and ci.relkind = 'i' join pg_class cr on cr.oid = i.indrelid and cr.relkind = 'r' @@ -1679,6 +1676,7 @@ metrics: i.relpages, idx_is_btree, i.opclasses, + i.stats_reset, ( select count(1) from fk_indexes fi @@ -1705,7 +1703,8 @@ metrics: relpages, idx_is_btree, opclasses AS tag_opclasses, - supports_fk + supports_fk, + extract(epoch from stats_reset) as stats_reset_epoch from index_ratios where idx_scan = 0 @@ -1858,6 +1857,54 @@ metrics: gauges: - '*' + stats_reset: + description: > + This metric tracks when statistics were last reset for indexes and tables. + It provides visibility into the freshness of statistics data, which is essential for understanding + the reliability of index and table usage metrics. A recent reset time indicates that usage statistics + may not reflect long-term patterns. + sqls: + 11: | + with index_stats as ( + select + schemaname as schema_name, + indexrelname as index_name, + relname as table_name, + 'index' as stat_type, + stats_reset, + extract(epoch from stats_reset) as stats_reset_epoch, + extract(epoch from now() - stats_reset) as seconds_since_reset + from pg_stat_user_indexes + where stats_reset is not null + ), table_stats as ( + select + schemaname as schema_name, + null::text as index_name, + relname as table_name, + 'table' as stat_type, + stats_reset, + extract(epoch from stats_reset) as stats_reset_epoch, + extract(epoch from now() - stats_reset) as seconds_since_reset + from pg_stat_user_tables + where stats_reset is not null + ), combined_stats as ( + select * from index_stats + union all + select * from table_stats + ) + select + schema_name as tag_schema_name, + table_name as tag_table_name, + coalesce(index_name, 'table') as tag_index_name, + stat_type as tag_stat_type, + stats_reset_epoch, + seconds_since_reset + from combined_stats + order by stats_reset desc; + gauges: + - 'stats_reset_epoch' + - 'seconds_since_reset' + archive_lag: description: > This metric measures the lag in WAL archive processing. @@ -2091,6 +2138,8 @@ metrics: tidx_blks_hit from pg_statio_all_tables + order by pg_total_relation_size((schemaname || '.' || relname)::regclass) desc + limit 5000; gauges: - '*' @@ -2133,6 +2182,7 @@ presets: redundant_indexes: 10800 unused_indexes: 7200 rarely_used_indexes: 10800 + stats_reset: 3600 archive_lag: 15 pg_vacuum_progress: 30 pg_index_pilot: diff --git a/docker-compose.yml b/docker-compose.yml index 5e712d6..f3c98f6 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -54,22 +54,22 @@ services: - sink_postgres_data:/var/lib/postgresql/data - ./config/sink-postgres/init.sql:/docker-entrypoint-initdb.d/init.sql - # Prometheus Sink - Storage for metrics in Prometheus format + # VictoriaMetrics Sink - Storage for metrics in Prometheus format sink-prometheus: - image: prom/prometheus:v3.4.2 + image: victoriametrics/victoria-metrics:v1.105.0 container_name: sink-prometheus ports: - "${BIND_HOST:-}59090:9090" volumes: + - victoria_metrics_data:/victoria-metrics-data - ./config/prometheus/prometheus.yml:/etc/prometheus/prometheus.yml - - prometheus_data:/prometheus command: - - "--config.file=/etc/prometheus/prometheus.yml" - - "--storage.tsdb.path=/prometheus" - - "--web.console.libraries=/etc/prometheus/console_libraries" - - "--web.console.templates=/etc/prometheus/consoles" - - "--storage.tsdb.retention.time=200h" - - "--web.enable-lifecycle" + - "-storageDataPath=/victoria-metrics-data" + - "-retentionPeriod=200h" + - "-httpListenAddr=:9090" + - "-promscrape.config=/etc/prometheus/prometheus.yml" + - "-promscrape.config.strictParse=false" + - "-promscrape.maxScrapeSize=128000000" # PGWatch Instance 1 - Monitoring service (Postgres sink) pgwatch-postgres: @@ -236,5 +236,5 @@ services: volumes: target_db_data: sink_postgres_data: - prometheus_data: + victoria_metrics_data: grafana_data: From 30e42e4171eea93610a4026eb93676d77c8967f9 Mon Sep 17 00:00:00 2001 From: Dementii Priadko Date: Tue, 28 Oct 2025 19:44:27 +0000 Subject: [PATCH 2/9] Apply 1 suggestion(s) to 1 file(s) Co-authored-by: Nikolay Samokhvalov --- config/pgwatch-prometheus/metrics.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config/pgwatch-prometheus/metrics.yml b/config/pgwatch-prometheus/metrics.yml index 37b7e60..cf2a4c3 100644 --- a/config/pgwatch-prometheus/metrics.yml +++ b/config/pgwatch-prometheus/metrics.yml @@ -900,7 +900,7 @@ metrics: temp_bytes_written::int8 as temp_bytes_written from aggregated_statements 17: | - WITH aggregated_statements as ( + with aggregated_statements as ( select pg_database.datname, pg_stat_statements.queryid, From 444343e6aa094bd465195700418373028e8d5372 Mon Sep 17 00:00:00 2001 From: "dementii.priadko" <45518657+DEMNERD@users.noreply.github.com> Date: Tue, 28 Oct 2025 21:52:41 +0200 Subject: [PATCH 3/9] Updated blk_read_time vs blk_write_time in 1st dashboard --- .../dashboards/Dashboard_1_Node_performance_overview.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config/grafana/dashboards/Dashboard_1_Node_performance_overview.json b/config/grafana/dashboards/Dashboard_1_Node_performance_overview.json index 9037e5f..c8e9ab7 100644 --- a/config/grafana/dashboards/Dashboard_1_Node_performance_overview.json +++ b/config/grafana/dashboards/Dashboard_1_Node_performance_overview.json @@ -1751,7 +1751,7 @@ "expr": "sum(irate(pgwatch_pg_stat_statements_block_write_total{cluster='$cluster_name', node_name='$node_name', datname=\"$db_name\"}[$__rate_interval]))", "hide": false, "interval": "20", - "legendFormat": "blk_read_time", + "legendFormat": "blk_write_time", "range": true, "refId": "B" } From e59fbbd3dbfd801bac36e553b92b64a9cf78d698 Mon Sep 17 00:00:00 2001 From: "dementii.priadko" <45518657+DEMNERD@users.noreply.github.com> Date: Tue, 28 Oct 2025 22:35:22 +0200 Subject: [PATCH 4/9] Added explicit definition of si alias in metrics related to indexes --- config/pgwatch-prometheus/metrics.yml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/config/pgwatch-prometheus/metrics.yml b/config/pgwatch-prometheus/metrics.yml index cf2a4c3..e65af7f 100644 --- a/config/pgwatch-prometheus/metrics.yml +++ b/config/pgwatch-prometheus/metrics.yml @@ -1465,7 +1465,7 @@ metrics: join pg_class cr on cr.oid = i.indrelid and cr.relkind = 'r' join pg_namespace n on n.oid = ci.relnamespace join pg_constraint cn on cn.conrelid = cr.oid - left join pg_stat_user_indexes si on si.indexrelid = i.indexrelid + left join pg_stat_user_indexes as si on si.indexrelid = i.indexrelid where contype = 'f' and i.indisunique is false @@ -1620,7 +1620,7 @@ metrics: join pg_class cr on cr.oid = i.indrelid and cr.relkind = 'r' join pg_namespace n on n.oid = ci.relnamespace join pg_constraint cn on cn.conrelid = cr.oid - left join pg_stat_user_indexes si on si.indexrelid = i.indexrelid + left join pg_stat_user_indexes as si on si.indexrelid = i.indexrelid where contype = 'f' and i.indisunique is false @@ -1653,7 +1653,7 @@ metrics: join pg_class cr on cr.oid = i.indrelid and cr.relkind = 'r' join pg_namespace n on n.oid = ci.relnamespace join pg_am a ON ci.relam = a.oid - left join pg_stat_user_indexes si on si.indexrelid = i.indexrelid + left join pg_stat_user_indexes as si on si.indexrelid = i.indexrelid where i.indisunique = false and i.indisvalid = true @@ -1732,7 +1732,7 @@ metrics: join pg_class cr on cr.oid = i.indrelid and cr.relkind = 'r' join pg_namespace n on n.oid = ci.relnamespace join pg_constraint cn on cn.conrelid = cr.oid - left join pg_stat_user_indexes si on si.indexrelid = i.indexrelid + left join pg_stat_user_indexes as si on si.indexrelid = i.indexrelid where contype = 'f' and i.indisunique is false @@ -1767,7 +1767,7 @@ metrics: join pg_class cr on cr.oid = i.indrelid and cr.relkind = 'r' join pg_namespace n on n.oid = ci.relnamespace join pg_am a ON ci.relam = a.oid - left join pg_stat_user_indexes si on si.indexrelid = i.indexrelid + left join pg_stat_user_indexes as si on si.indexrelid = i.indexrelid where i.indisunique = false and i.indisvalid = true From a149d5e69fd845e82e5a00fe6bec873317edfb8d Mon Sep 17 00:00:00 2001 From: "dementii.priadko" <45518657+DEMNERD@users.noreply.github.com> Date: Wed, 29 Oct 2025 00:01:43 +0200 Subject: [PATCH 5/9] Fixed self-monitoring dashboard --- .../dashboards/Self_Monitoring_Dashboard.json | 1435 +++++++++-------- config/prometheus/prometheus.yml | 4 +- 2 files changed, 774 insertions(+), 665 deletions(-) diff --git a/config/grafana/dashboards/Self_Monitoring_Dashboard.json b/config/grafana/dashboards/Self_Monitoring_Dashboard.json index 024a95a..39dbdf6 100644 --- a/config/grafana/dashboards/Self_Monitoring_Dashboard.json +++ b/config/grafana/dashboards/Self_Monitoring_Dashboard.json @@ -18,9 +18,8 @@ "editable": true, "fiscalYearStartMonth": 0, "graphTooltip": 0, - "id": null, + "id": 23, "links": [], - "liveNow": false, "panels": [ { "collapsed": false, @@ -50,8 +49,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -75,14 +73,17 @@ "graphMode": "area", "justifyMode": "auto", "orientation": "auto", + "percentChangeColorMode": "standard", "reduceOptions": { - "values": false, "calcs": [ "lastNotNull" ], - "fields": "" + "fields": "", + "values": false }, - "textMode": "auto" + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true }, "pluginVersion": "12.0.2", "targets": [ @@ -91,7 +92,7 @@ "type": "prometheus", "uid": "P7A0D6631BB10B34F" }, - "expr": "count(up{job=~\"cadvisor|node-exporter|postgres-sink|prometheus|pgwatch-prometheus\"})", + "expr": "count(up{job=~\"cadvisor|node-exporter|postgres-sink|victoriametrics|pgwatch-prometheus\"})", "refId": "A" } ], @@ -113,8 +114,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "yellow", @@ -142,14 +142,17 @@ "graphMode": "area", "justifyMode": "auto", "orientation": "auto", + "percentChangeColorMode": "standard", "reduceOptions": { - "values": false, "calcs": [ "lastNotNull" ], - "fields": "" + "fields": "", + "values": false }, - "textMode": "auto" + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true }, "pluginVersion": "12.0.2", "targets": [ @@ -180,8 +183,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "yellow", @@ -209,14 +211,17 @@ "graphMode": "area", "justifyMode": "auto", "orientation": "auto", + "percentChangeColorMode": "standard", "reduceOptions": { - "values": false, "calcs": [ "lastNotNull" ], - "fields": "" + "fields": "", + "values": false }, - "textMode": "auto" + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true }, "pluginVersion": "12.0.2", "targets": [ @@ -247,8 +252,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "yellow", @@ -276,14 +280,17 @@ "graphMode": "area", "justifyMode": "auto", "orientation": "auto", + "percentChangeColorMode": "standard", "reduceOptions": { - "values": false, "calcs": [ "lastNotNull" ], - "fields": "" + "fields": "", + "values": false }, - "textMode": "auto" + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true }, "pluginVersion": "12.0.2", "targets": [ @@ -314,8 +321,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -339,14 +345,17 @@ "graphMode": "area", "justifyMode": "auto", "orientation": "auto", + "percentChangeColorMode": "standard", "reduceOptions": { - "values": false, "calcs": [ "lastNotNull" ], - "fields": "" + "fields": "", + "values": false }, - "textMode": "auto" + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true }, "pluginVersion": "12.0.2", "targets": [ @@ -355,11 +364,11 @@ "type": "prometheus", "uid": "P7A0D6631BB10B34F" }, - "expr": "prometheus_tsdb_storage_blocks_bytes", + "expr": "sum(vm_data_size_bytes{job=\"victoriametrics\"})", "refId": "A" } ], - "title": "Prometheus storage size", + "title": "Victoria Metrics storage size", "type": "stat" }, { @@ -377,8 +386,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" } ] }, @@ -398,14 +406,17 @@ "graphMode": "area", "justifyMode": "auto", "orientation": "auto", + "percentChangeColorMode": "standard", "reduceOptions": { - "values": false, "calcs": [ "lastNotNull" ], - "fields": "" + "fields": "", + "values": false }, - "textMode": "auto" + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true }, "pluginVersion": "12.0.2", "targets": [ @@ -414,11 +425,11 @@ "type": "prometheus", "uid": "P7A0D6631BB10B34F" }, - "expr": "prometheus_tsdb_head_series", + "expr": "sum(vm_rows{job=\"victoriametrics\"})", "refId": "A" } ], - "title": "Prometheus time series", + "title": "Victoria Metrics time series", "type": "stat" }, { @@ -427,11 +438,11 @@ "h": 1, "w": 24, "x": 0, - "y": 39 + "y": 5 }, - "id": 101, + "id": 104, "panels": [], - "title": "Container resource usage", + "title": "Host stats", "type": "row" }, { @@ -445,19 +456,22 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, + "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 10, "gradientMode": "none", "hideFrom": { + "legend": false, "tooltip": false, - "viz": false, - "legend": false + "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -468,7 +482,7 @@ "spanNulls": false, "stacking": { "group": "A", - "mode": "none" + "mode": "normal" }, "thresholdsStyle": { "mode": "off" @@ -479,30 +493,138 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" } ] }, - "unit": "percent" + "unit": "percentunit" }, - "overrides": [] + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "user" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "blue", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "system" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "red", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "iowait" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "orange", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "nice" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "purple", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "irq" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "yellow", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "softirq" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "light-yellow", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "steal" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "semi-dark-red", + "mode": "fixed" + } + } + ] + } + ] }, "gridPos": { "h": 8, "w": 12, "x": 0, - "y": 40 + "y": 6 }, - "id": 10, + "id": 40, "options": { "legend": { - "calcs": [], - "displayMode": "list", + "calcs": [ + "last" + ], + "displayMode": "table", "placement": "bottom", "showLegend": true }, "tooltip": { + "hideZeros": false, "mode": "multi", "sort": "none" } @@ -514,12 +636,66 @@ "type": "prometheus", "uid": "P7A0D6631BB10B34F" }, - "expr": "rate(container_cpu_usage_seconds_total{name=~\".+\"}[5m]) * 100", - "legendFormat": "{{name}}", + "expr": "avg(irate(node_cpu_seconds_total{mode=\"user\"}[5m]))", + "legendFormat": "user", "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "expr": "avg(irate(node_cpu_seconds_total{mode=\"system\"}[5m]))", + "legendFormat": "system", + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "expr": "avg(irate(node_cpu_seconds_total{mode=\"iowait\"}[5m]))", + "legendFormat": "iowait", + "refId": "C" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "expr": "avg(irate(node_cpu_seconds_total{mode=\"nice\"}[5m]))", + "legendFormat": "nice", + "refId": "D" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "expr": "avg(irate(node_cpu_seconds_total{mode=\"irq\"}[5m]))", + "legendFormat": "irq", + "refId": "E" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "expr": "avg(irate(node_cpu_seconds_total{mode=\"softirq\"}[5m]))", + "legendFormat": "softirq", + "refId": "F" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "expr": "avg(irate(node_cpu_seconds_total{mode=\"steal\"}[5m]))", + "legendFormat": "steal", + "refId": "G" } ], - "title": "Container CPU usage", + "title": "System CPU usage breakdown", "type": "timeseries" }, { @@ -533,19 +709,22 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, + "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 10, "gradientMode": "none", "hideFrom": { + "legend": false, "tooltip": false, - "viz": false, - "legend": false + "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -556,7 +735,7 @@ "spanNulls": false, "stacking": { "group": "A", - "mode": "none" + "mode": "normal" }, "thresholdsStyle": { "mode": "off" @@ -567,30 +746,93 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" } ] }, "unit": "bytes" }, - "overrides": [] + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Application" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "red", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Buffers" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "yellow", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Cache" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "orange", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Free" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "green", + "mode": "fixed" + } + } + ] + } + ] }, "gridPos": { "h": 8, "w": 12, "x": 12, - "y": 40 + "y": 6 }, - "id": 11, + "id": 41, "options": { "legend": { - "calcs": [], - "displayMode": "list", + "calcs": [ + "last" + ], + "displayMode": "table", "placement": "bottom", "showLegend": true }, "tooltip": { + "hideZeros": false, "mode": "multi", "sort": "none" } @@ -602,12 +844,39 @@ "type": "prometheus", "uid": "P7A0D6631BB10B34F" }, - "expr": "container_memory_usage_bytes{name=~\".+\"}", - "legendFormat": "{{name}}", + "expr": "node_memory_MemTotal_bytes - node_memory_MemFree_bytes - node_memory_Buffers_bytes - node_memory_Cached_bytes - node_memory_SReclaimable_bytes", + "legendFormat": "Application", "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "expr": "node_memory_Buffers_bytes", + "legendFormat": "Buffers", + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "expr": "node_memory_Cached_bytes + node_memory_SReclaimable_bytes", + "legendFormat": "Cache", + "refId": "C" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "expr": "node_memory_MemFree_bytes", + "legendFormat": "Free", + "refId": "D" } ], - "title": "Container memory usage", + "title": "System memory usage breakdown", "type": "timeseries" }, { @@ -621,19 +890,22 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, + "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 10, "gradientMode": "none", "hideFrom": { + "legend": false, "tooltip": false, - "viz": false, - "legend": false + "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -655,8 +927,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" } ] }, @@ -666,7 +937,7 @@ { "matcher": { "id": "byRegexp", - "options": "/.*rx.*/" + "options": "/.*receive.*/" }, "properties": [ { @@ -681,9 +952,9 @@ "h": 8, "w": 12, "x": 0, - "y": 48 + "y": 14 }, - "id": 12, + "id": 42, "options": { "legend": { "calcs": [], @@ -692,6 +963,7 @@ "showLegend": true }, "tooltip": { + "hideZeros": false, "mode": "multi", "sort": "none" } @@ -703,8 +975,8 @@ "type": "prometheus", "uid": "P7A0D6631BB10B34F" }, - "expr": "rate(container_network_receive_bytes_total{name=~\".+\"}[5m])", - "legendFormat": "{{name}} rx", + "expr": "rate(node_network_receive_bytes_total{device!~\"lo|veth.*\"}[5m])", + "legendFormat": "{{device}} receive", "refId": "A" }, { @@ -712,12 +984,12 @@ "type": "prometheus", "uid": "P7A0D6631BB10B34F" }, - "expr": "rate(container_network_transmit_bytes_total{name=~\".+\"}[5m])", - "legendFormat": "{{name}} tx", + "expr": "rate(node_network_transmit_bytes_total{device!~\"lo|veth.*\"}[5m])", + "legendFormat": "{{device}} transmit", "refId": "B" } ], - "title": "Container network I/O", + "title": "System network I/O", "type": "timeseries" }, { @@ -731,19 +1003,22 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, + "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 10, "gradientMode": "none", "hideFrom": { + "legend": false, "tooltip": false, - "viz": false, - "legend": false + "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -765,35 +1040,21 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" } ] }, - "unit": "Bps" + "unit": "percent" }, - "overrides": [ - { - "matcher": { - "id": "byRegexp", - "options": "/.*read.*/" - }, - "properties": [ - { - "id": "custom.transform", - "value": "negative-Y" - } - ] - } - ] + "overrides": [] }, "gridPos": { "h": 8, "w": 12, "x": 12, - "y": 48 + "y": 14 }, - "id": 13, + "id": 43, "options": { "legend": { "calcs": [], @@ -802,6 +1063,7 @@ "showLegend": true }, "tooltip": { + "hideZeros": false, "mode": "multi", "sort": "none" } @@ -813,21 +1075,12 @@ "type": "prometheus", "uid": "P7A0D6631BB10B34F" }, - "expr": "rate(container_fs_reads_bytes_total{name=~\".+\"}[5m])", - "legendFormat": "{{name}} read", + "expr": "(node_filesystem_size_bytes{fstype!~\"tmpfs|fuse.lxcfs|squashfs|vfat\"} - node_filesystem_avail_bytes{fstype!~\"tmpfs|fuse.lxcfs|squashfs|vfat\"}) / node_filesystem_size_bytes{fstype!~\"tmpfs|fuse.lxcfs|squashfs|vfat\"} * 100", + "legendFormat": "{{mountpoint}}", "refId": "A" - }, - { - "datasource": { - "type": "prometheus", - "uid": "P7A0D6631BB10B34F" - }, - "expr": "rate(container_fs_writes_bytes_total{name=~\".+\"}[5m])", - "legendFormat": "{{name}} write", - "refId": "B" } ], - "title": "Container disk I/O", + "title": "System disk usage", "type": "timeseries" }, { @@ -836,11 +1089,11 @@ "h": 1, "w": 24, "x": 0, - "y": 56 + "y": 22 }, - "id": 102, + "id": 105, "panels": [], - "title": "Prometheus metrics", + "title": "Disk I/O metrics", "type": "row" }, { @@ -854,19 +1107,22 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, + "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 10, "gradientMode": "none", "hideFrom": { + "legend": false, "tooltip": false, - "viz": false, - "legend": false + "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -888,30 +1144,46 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" } ] }, - "unit": "samplesps" + "unit": "iops" }, - "overrides": [] + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/.*read.*/" + }, + "properties": [ + { + "id": "custom.transform", + "value": "negative-Y" + } + ] + } + ] }, "gridPos": { "h": 8, "w": 12, "x": 0, - "y": 57 + "y": 23 }, - "id": 20, + "id": 50, "options": { "legend": { - "calcs": [], - "displayMode": "list", + "calcs": [ + "mean", + "max" + ], + "displayMode": "table", "placement": "bottom", "showLegend": true }, "tooltip": { + "hideZeros": false, "mode": "multi", "sort": "none" } @@ -923,12 +1195,21 @@ "type": "prometheus", "uid": "P7A0D6631BB10B34F" }, - "expr": "rate(prometheus_tsdb_head_samples_appended_total[5m])", - "legendFormat": "Samples ingested", + "expr": "rate(node_disk_reads_completed_total{device!~\"dm-.*\"}[5m])", + "legendFormat": "{{device}} read", "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "expr": "rate(node_disk_writes_completed_total{device!~\"dm-.*\"}[5m])", + "legendFormat": "{{device}} write", + "refId": "B" } ], - "title": "Prometheus sample ingestion rate", + "title": "Disk I/O operations (IOPS)", "type": "timeseries" }, { @@ -942,19 +1223,22 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, + "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 10, "gradientMode": "none", "hideFrom": { + "legend": false, "tooltip": false, - "viz": false, - "legend": false + "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -976,30 +1260,46 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" } ] }, - "unit": "s" + "unit": "Bps" }, - "overrides": [] + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/.*read.*/" + }, + "properties": [ + { + "id": "custom.transform", + "value": "negative-Y" + } + ] + } + ] }, "gridPos": { "h": 8, "w": 12, "x": 12, - "y": 57 + "y": 23 }, - "id": 21, + "id": 51, "options": { "legend": { - "calcs": [], - "displayMode": "list", + "calcs": [ + "mean", + "max" + ], + "displayMode": "table", "placement": "bottom", "showLegend": true }, "tooltip": { + "hideZeros": false, "mode": "multi", "sort": "none" } @@ -1011,12 +1311,21 @@ "type": "prometheus", "uid": "P7A0D6631BB10B34F" }, - "expr": "prometheus_target_interval_length_seconds{quantile=\"0.99\"}", - "legendFormat": "{{job}}", + "expr": "rate(node_disk_read_bytes_total{device!~\"dm-.*\"}[5m])", + "legendFormat": "{{device}} read", "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "expr": "rate(node_disk_written_bytes_total{device!~\"dm-.*\"}[5m])", + "legendFormat": "{{device}} write", + "refId": "B" } ], - "title": "Prometheus scrape duration (p99)", + "title": "Disk I/O throughput", "type": "timeseries" }, { @@ -1030,19 +1339,22 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, + "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 10, "gradientMode": "none", "hideFrom": { + "legend": false, "tooltip": false, - "viz": false, - "legend": false + "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -1060,16 +1372,25 @@ } }, "mappings": [], + "max": 1, + "min": 0, "thresholds": { "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" + }, + { + "color": "yellow", + "value": 0.7 + }, + { + "color": "red", + "value": 0.9 } ] }, - "unit": "bytes" + "unit": "percentunit" }, "overrides": [] }, @@ -1077,17 +1398,21 @@ "h": 8, "w": 12, "x": 0, - "y": 65 + "y": 31 }, - "id": 22, + "id": 52, "options": { "legend": { - "calcs": [], - "displayMode": "list", + "calcs": [ + "mean", + "max" + ], + "displayMode": "table", "placement": "bottom", "showLegend": true }, "tooltip": { + "hideZeros": false, "mode": "multi", "sort": "none" } @@ -1099,12 +1424,12 @@ "type": "prometheus", "uid": "P7A0D6631BB10B34F" }, - "expr": "prometheus_tsdb_storage_blocks_bytes", - "legendFormat": "Storage size", + "expr": "rate(node_disk_io_time_seconds_total{device!~\"dm-.*\"}[5m])", + "legendFormat": "{{device}}", "refId": "A" } ], - "title": "Prometheus storage size", + "title": "Disk I/O utilization", "type": "timeseries" }, { @@ -1118,19 +1443,22 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, + "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 10, "gradientMode": "none", "hideFrom": { + "legend": false, "tooltip": false, - "viz": false, - "legend": false + "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -1152,12 +1480,11 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" } ] }, - "unit": "short" + "unit": "s" }, "overrides": [] }, @@ -1165,17 +1492,21 @@ "h": 8, "w": 12, "x": 12, - "y": 65 + "y": 31 }, - "id": 23, + "id": 53, "options": { "legend": { - "calcs": [], - "displayMode": "list", + "calcs": [ + "mean", + "max" + ], + "displayMode": "table", "placement": "bottom", "showLegend": true }, "tooltip": { + "hideZeros": false, "mode": "multi", "sort": "none" } @@ -1187,12 +1518,21 @@ "type": "prometheus", "uid": "P7A0D6631BB10B34F" }, - "expr": "prometheus_tsdb_head_series", - "legendFormat": "Series count", + "expr": "rate(node_disk_read_time_seconds_total{device!~\"dm-.*\"}[5m]) / rate(node_disk_reads_completed_total{device!~\"dm-.*\"}[5m])", + "legendFormat": "{{device}} read", "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "expr": "rate(node_disk_write_time_seconds_total{device!~\"dm-.*\"}[5m]) / rate(node_disk_writes_completed_total{device!~\"dm-.*\"}[5m])", + "legendFormat": "{{device}} write", + "refId": "B" } ], - "title": "Prometheus time series count", + "title": "Disk I/O average latency", "type": "timeseries" }, { @@ -1201,11 +1541,11 @@ "h": 1, "w": 24, "x": 0, - "y": 73 + "y": 39 }, - "id": 103, + "id": 101, "panels": [], - "title": "Sink Postgres database", + "title": "Container resource usage", "type": "row" }, { @@ -1219,19 +1559,22 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, + "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 10, "gradientMode": "none", "hideFrom": { + "legend": false, "tooltip": false, - "viz": false, - "legend": false + "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -1253,12 +1596,11 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" } ] }, - "unit": "short" + "unit": "percent" }, "overrides": [] }, @@ -1266,9 +1608,9 @@ "h": 8, "w": 12, "x": 0, - "y": 74 + "y": 40 }, - "id": 30, + "id": 10, "options": { "legend": { "calcs": [], @@ -1277,6 +1619,7 @@ "showLegend": true }, "tooltip": { + "hideZeros": false, "mode": "multi", "sort": "none" } @@ -1288,21 +1631,12 @@ "type": "prometheus", "uid": "P7A0D6631BB10B34F" }, - "expr": "pg_stat_database_numbackends{datname=\"measurements\"}", - "legendFormat": "Active connections", + "expr": "rate(container_cpu_usage_seconds_total{name=~\".+\"}[5m]) * 100", + "legendFormat": "{{name}}", "refId": "A" - }, - { - "datasource": { - "type": "prometheus", - "uid": "P7A0D6631BB10B34F" - }, - "expr": "pg_settings_max_connections", - "legendFormat": "Max connections", - "refId": "B" } ], - "title": "Sink Postgres connections", + "title": "Container CPU usage", "type": "timeseries" }, { @@ -1316,19 +1650,22 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, + "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 10, "gradientMode": "none", "hideFrom": { + "legend": false, "tooltip": false, - "viz": false, - "legend": false + "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -1350,12 +1687,11 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" } ] }, - "unit": "qps" + "unit": "bytes" }, "overrides": [] }, @@ -1363,9 +1699,9 @@ "h": 8, "w": 12, "x": 12, - "y": 74 + "y": 40 }, - "id": 31, + "id": 11, "options": { "legend": { "calcs": [], @@ -1374,6 +1710,7 @@ "showLegend": true }, "tooltip": { + "hideZeros": false, "mode": "multi", "sort": "none" } @@ -1385,21 +1722,12 @@ "type": "prometheus", "uid": "P7A0D6631BB10B34F" }, - "expr": "rate(pg_stat_database_xact_commit{datname=\"measurements\"}[5m])", - "legendFormat": "Commits", + "expr": "container_memory_usage_bytes{name=~\".+\"}", + "legendFormat": "{{name}}", "refId": "A" - }, - { - "datasource": { - "type": "prometheus", - "uid": "P7A0D6631BB10B34F" - }, - "expr": "rate(pg_stat_database_xact_rollback{datname=\"measurements\"}[5m])", - "legendFormat": "Rollbacks", - "refId": "B" } ], - "title": "Sink Postgres transactions", + "title": "Container memory usage", "type": "timeseries" }, { @@ -1413,19 +1741,22 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, + "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 10, "gradientMode": "none", "hideFrom": { + "legend": false, "tooltip": false, - "viz": false, - "legend": false + "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -1447,22 +1778,34 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" } ] }, - "unit": "bytes" + "unit": "Bps" }, - "overrides": [] + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/.*rx.*/" + }, + "properties": [ + { + "id": "custom.transform", + "value": "negative-Y" + } + ] + } + ] }, "gridPos": { "h": 8, "w": 12, "x": 0, - "y": 82 + "y": 48 }, - "id": 32, + "id": 12, "options": { "legend": { "calcs": [], @@ -1471,6 +1814,7 @@ "showLegend": true }, "tooltip": { + "hideZeros": false, "mode": "multi", "sort": "none" } @@ -1482,12 +1826,21 @@ "type": "prometheus", "uid": "P7A0D6631BB10B34F" }, - "expr": "pg_database_size_bytes{datname=\"measurements\"}", - "legendFormat": "Database size", + "expr": "rate(container_network_receive_bytes_total{name=~\".+\"}[5m])", + "legendFormat": "{{name}} rx", "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "expr": "rate(container_network_transmit_bytes_total{name=~\".+\"}[5m])", + "legendFormat": "{{name}} tx", + "refId": "B" } ], - "title": "Sink Postgres database size", + "title": "Container network I/O", "type": "timeseries" }, { @@ -1501,19 +1854,22 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, + "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 10, "gradientMode": "none", "hideFrom": { + "legend": false, "tooltip": false, - "viz": false, - "legend": false + "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -1535,22 +1891,34 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" } ] }, "unit": "Bps" }, - "overrides": [] + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/.*read.*/" + }, + "properties": [ + { + "id": "custom.transform", + "value": "negative-Y" + } + ] + } + ] }, "gridPos": { "h": 8, "w": 12, "x": 12, - "y": 82 + "y": 48 }, - "id": 33, + "id": 13, "options": { "legend": { "calcs": [], @@ -1559,6 +1927,7 @@ "showLegend": true }, "tooltip": { + "hideZeros": false, "mode": "multi", "sort": "none" } @@ -1570,8 +1939,8 @@ "type": "prometheus", "uid": "P7A0D6631BB10B34F" }, - "expr": "rate(pg_stat_database_blks_read{datname=\"measurements\"}[5m]) * 8192", - "legendFormat": "Disk reads", + "expr": "rate(container_fs_reads_bytes_total{name=~\".+\"}[5m])", + "legendFormat": "{{name}} read", "refId": "A" }, { @@ -1579,12 +1948,12 @@ "type": "prometheus", "uid": "P7A0D6631BB10B34F" }, - "expr": "rate(pg_stat_database_blks_hit{datname=\"measurements\"}[5m]) * 8192", - "legendFormat": "Cache hits", + "expr": "rate(container_fs_writes_bytes_total{name=~\".+\"}[5m])", + "legendFormat": "{{name}} write", "refId": "B" } ], - "title": "Sink Postgres block I/O", + "title": "Container disk I/O", "type": "timeseries" }, { @@ -1593,11 +1962,11 @@ "h": 1, "w": 24, "x": 0, - "y": 5 + "y": 56 }, - "id": 104, + "id": 102, "panels": [], - "title": "Host stats", + "title": "Victoria Metrics metrics", "type": "row" }, { @@ -1611,19 +1980,22 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, + "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 10, "gradientMode": "none", "hideFrom": { + "legend": false, "tooltip": false, - "viz": false, - "legend": false + "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -1634,7 +2006,7 @@ "spanNulls": false, "stacking": { "group": "A", - "mode": "normal" + "mode": "none" }, "thresholdsStyle": { "mode": "off" @@ -1645,136 +2017,30 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" } ] }, - "unit": "percentunit" + "unit": "samplesps" }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "user" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "blue", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "system" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "red", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "iowait" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "orange", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "nice" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "purple", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "irq" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "yellow", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "softirq" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "light-yellow", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "steal" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "semi-dark-red", - "mode": "fixed" - } - } - ] - } - ] + "overrides": [] }, "gridPos": { "h": 8, "w": 12, "x": 0, - "y": 6 + "y": 57 }, - "id": 40, + "id": 20, "options": { "legend": { - "calcs": ["last"], - "displayMode": "table", + "calcs": [], + "displayMode": "list", "placement": "bottom", "showLegend": true }, "tooltip": { + "hideZeros": false, "mode": "multi", "sort": "none" } @@ -1786,66 +2052,12 @@ "type": "prometheus", "uid": "P7A0D6631BB10B34F" }, - "expr": "avg(irate(node_cpu_seconds_total{mode=\"user\"}[5m]))", - "legendFormat": "user", + "expr": "sum(rate(vm_rows_inserted_total{job=\"victoriametrics\"}[5m]))", + "legendFormat": "Rows inserted", "refId": "A" - }, - { - "datasource": { - "type": "prometheus", - "uid": "P7A0D6631BB10B34F" - }, - "expr": "avg(irate(node_cpu_seconds_total{mode=\"system\"}[5m]))", - "legendFormat": "system", - "refId": "B" - }, - { - "datasource": { - "type": "prometheus", - "uid": "P7A0D6631BB10B34F" - }, - "expr": "avg(irate(node_cpu_seconds_total{mode=\"iowait\"}[5m]))", - "legendFormat": "iowait", - "refId": "C" - }, - { - "datasource": { - "type": "prometheus", - "uid": "P7A0D6631BB10B34F" - }, - "expr": "avg(irate(node_cpu_seconds_total{mode=\"nice\"}[5m]))", - "legendFormat": "nice", - "refId": "D" - }, - { - "datasource": { - "type": "prometheus", - "uid": "P7A0D6631BB10B34F" - }, - "expr": "avg(irate(node_cpu_seconds_total{mode=\"irq\"}[5m]))", - "legendFormat": "irq", - "refId": "E" - }, - { - "datasource": { - "type": "prometheus", - "uid": "P7A0D6631BB10B34F" - }, - "expr": "avg(irate(node_cpu_seconds_total{mode=\"softirq\"}[5m]))", - "legendFormat": "softirq", - "refId": "F" - }, - { - "datasource": { - "type": "prometheus", - "uid": "P7A0D6631BB10B34F" - }, - "expr": "avg(irate(node_cpu_seconds_total{mode=\"steal\"}[5m]))", - "legendFormat": "steal", - "refId": "G" } ], - "title": "System CPU usage breakdown", + "title": "Victoria Metrics ingestion rate", "type": "timeseries" }, { @@ -1859,125 +2071,67 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, + "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 10, "gradientMode": "none", "hideFrom": { + "legend": false, "tooltip": false, - "viz": false, - "legend": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - }, - "unit": "bytes" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "Application" + "viz": false }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "red", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Buffers" + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "yellow", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Cache" + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "orange", - "mode": "fixed" - } - } - ] + "thresholdsStyle": { + "mode": "off" + } }, - { - "matcher": { - "id": "byName", - "options": "Free" - }, - "properties": [ + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ { - "id": "color", - "value": { - "fixedColor": "green", - "mode": "fixed" - } + "color": "green" } ] - } - ] + }, + "unit": "s" + }, + "overrides": [] }, "gridPos": { "h": 8, "w": 12, "x": 12, - "y": 6 + "y": 57 }, - "id": 41, + "id": 21, "options": { "legend": { - "calcs": ["last"], - "displayMode": "table", + "calcs": [], + "displayMode": "list", "placement": "bottom", "showLegend": true }, "tooltip": { + "hideZeros": false, "mode": "multi", "sort": "none" } @@ -1989,39 +2143,12 @@ "type": "prometheus", "uid": "P7A0D6631BB10B34F" }, - "expr": "node_memory_MemTotal_bytes - node_memory_MemFree_bytes - node_memory_Buffers_bytes - node_memory_Cached_bytes - node_memory_SReclaimable_bytes", - "legendFormat": "Application", + "expr": "avg(scrape_duration_seconds{job=~\"cadvisor|node-exporter|postgres-sink|pgwatch-prometheus\"}) by (job)", + "legendFormat": "{{job}}", "refId": "A" - }, - { - "datasource": { - "type": "prometheus", - "uid": "P7A0D6631BB10B34F" - }, - "expr": "node_memory_Buffers_bytes", - "legendFormat": "Buffers", - "refId": "B" - }, - { - "datasource": { - "type": "prometheus", - "uid": "P7A0D6631BB10B34F" - }, - "expr": "node_memory_Cached_bytes + node_memory_SReclaimable_bytes", - "legendFormat": "Cache", - "refId": "C" - }, - { - "datasource": { - "type": "prometheus", - "uid": "P7A0D6631BB10B34F" - }, - "expr": "node_memory_MemFree_bytes", - "legendFormat": "Free", - "refId": "D" } ], - "title": "System memory usage breakdown", + "title": "Scrape duration by target", "type": "timeseries" }, { @@ -2035,19 +2162,23 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", + "axisSoftMin": 0, "barAlignment": 0, + "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 10, "gradientMode": "none", "hideFrom": { + "legend": false, "tooltip": false, - "viz": false, - "legend": false + "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -2069,35 +2200,21 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" } ] }, - "unit": "Bps" + "unit": "bytes" }, - "overrides": [ - { - "matcher": { - "id": "byRegexp", - "options": "/.*receive.*/" - }, - "properties": [ - { - "id": "custom.transform", - "value": "negative-Y" - } - ] - } - ] + "overrides": [] }, "gridPos": { "h": 8, "w": 12, "x": 0, - "y": 14 + "y": 65 }, - "id": 42, + "id": 22, "options": { "legend": { "calcs": [], @@ -2106,6 +2223,7 @@ "showLegend": true }, "tooltip": { + "hideZeros": false, "mode": "multi", "sort": "none" } @@ -2117,8 +2235,8 @@ "type": "prometheus", "uid": "P7A0D6631BB10B34F" }, - "expr": "rate(node_network_receive_bytes_total{device!~\"lo|veth.*\"}[5m])", - "legendFormat": "{{device}} receive", + "expr": "sum(vm_data_size_bytes{job=\"victoriametrics\"})", + "legendFormat": "Data size", "refId": "A" }, { @@ -2126,12 +2244,12 @@ "type": "prometheus", "uid": "P7A0D6631BB10B34F" }, - "expr": "rate(node_network_transmit_bytes_total{device!~\"lo|veth.*\"}[5m])", - "legendFormat": "{{device}} transmit", + "expr": "sum(vm_cache_size_bytes{job=\"victoriametrics\"})", + "legendFormat": "Cache size", "refId": "B" } ], - "title": "System network I/O", + "title": "Victoria Metrics storage size", "type": "timeseries" }, { @@ -2145,19 +2263,22 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, + "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 10, "gradientMode": "none", "hideFrom": { + "legend": false, "tooltip": false, - "viz": false, - "legend": false + "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -2179,12 +2300,11 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" } ] }, - "unit": "percent" + "unit": "short" }, "overrides": [] }, @@ -2192,9 +2312,9 @@ "h": 8, "w": 12, "x": 12, - "y": 14 + "y": 65 }, - "id": 43, + "id": 23, "options": { "legend": { "calcs": [], @@ -2203,6 +2323,7 @@ "showLegend": true }, "tooltip": { + "hideZeros": false, "mode": "multi", "sort": "none" } @@ -2214,12 +2335,21 @@ "type": "prometheus", "uid": "P7A0D6631BB10B34F" }, - "expr": "(node_filesystem_size_bytes{fstype!~\"tmpfs|fuse.lxcfs|squashfs|vfat\"} - node_filesystem_avail_bytes{fstype!~\"tmpfs|fuse.lxcfs|squashfs|vfat\"}) / node_filesystem_size_bytes{fstype!~\"tmpfs|fuse.lxcfs|squashfs|vfat\"} * 100", - "legendFormat": "{{mountpoint}}", + "expr": "sum(vm_rows{job=\"victoriametrics\"})", + "legendFormat": "Total rows", "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, + "expr": "sum(rate(vm_rows_inserted_total{job=\"victoriametrics\"}[5m]))", + "legendFormat": "Rows inserted rate", + "refId": "B" } ], - "title": "System disk usage", + "title": "Victoria Metrics rows count", "type": "timeseries" }, { @@ -2228,11 +2358,11 @@ "h": 1, "w": 24, "x": 0, - "y": 22 + "y": 73 }, - "id": 105, + "id": 103, "panels": [], - "title": "Disk I/O metrics", + "title": "Sink Postgres database", "type": "row" }, { @@ -2246,19 +2376,22 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, + "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 10, "gradientMode": "none", "hideFrom": { + "legend": false, "tooltip": false, - "viz": false, - "legend": false + "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -2280,43 +2413,30 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" } ] }, - "unit": "iops" + "unit": "short" }, - "overrides": [ - { - "matcher": { - "id": "byRegexp", - "options": "/.*read.*/" - }, - "properties": [ - { - "id": "custom.transform", - "value": "negative-Y" - } - ] - } - ] + "overrides": [] }, "gridPos": { "h": 8, "w": 12, "x": 0, - "y": 23 + "y": 74 }, - "id": 50, + "id": 30, "options": { "legend": { - "calcs": ["mean", "max"], - "displayMode": "table", + "calcs": [], + "displayMode": "list", "placement": "bottom", "showLegend": true }, "tooltip": { + "hideZeros": false, "mode": "multi", "sort": "none" } @@ -2328,8 +2448,8 @@ "type": "prometheus", "uid": "P7A0D6631BB10B34F" }, - "expr": "rate(node_disk_reads_completed_total{device!~\"dm-.*\"}[5m])", - "legendFormat": "{{device}} read", + "expr": "pg_stat_database_numbackends{datname=\"measurements\"}", + "legendFormat": "Active connections", "refId": "A" }, { @@ -2337,12 +2457,12 @@ "type": "prometheus", "uid": "P7A0D6631BB10B34F" }, - "expr": "rate(node_disk_writes_completed_total{device!~\"dm-.*\"}[5m])", - "legendFormat": "{{device}} write", + "expr": "pg_settings_max_connections", + "legendFormat": "Max connections", "refId": "B" } ], - "title": "Disk I/O operations (IOPS)", + "title": "Sink Postgres connections", "type": "timeseries" }, { @@ -2356,19 +2476,22 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, + "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 10, "gradientMode": "none", "hideFrom": { + "legend": false, "tooltip": false, - "viz": false, - "legend": false + "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -2390,43 +2513,30 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" } ] }, - "unit": "Bps" + "unit": "qps" }, - "overrides": [ - { - "matcher": { - "id": "byRegexp", - "options": "/.*read.*/" - }, - "properties": [ - { - "id": "custom.transform", - "value": "negative-Y" - } - ] - } - ] + "overrides": [] }, "gridPos": { "h": 8, "w": 12, "x": 12, - "y": 23 + "y": 74 }, - "id": 51, + "id": 31, "options": { "legend": { - "calcs": ["mean", "max"], - "displayMode": "table", + "calcs": [], + "displayMode": "list", "placement": "bottom", "showLegend": true }, "tooltip": { + "hideZeros": false, "mode": "multi", "sort": "none" } @@ -2438,8 +2548,8 @@ "type": "prometheus", "uid": "P7A0D6631BB10B34F" }, - "expr": "rate(node_disk_read_bytes_total{device!~\"dm-.*\"}[5m])", - "legendFormat": "{{device}} read", + "expr": "rate(pg_stat_database_xact_commit{datname=\"measurements\"}[5m])", + "legendFormat": "Commits", "refId": "A" }, { @@ -2447,12 +2557,12 @@ "type": "prometheus", "uid": "P7A0D6631BB10B34F" }, - "expr": "rate(node_disk_written_bytes_total{device!~\"dm-.*\"}[5m])", - "legendFormat": "{{device}} write", + "expr": "rate(pg_stat_database_xact_rollback{datname=\"measurements\"}[5m])", + "legendFormat": "Rollbacks", "refId": "B" } ], - "title": "Disk I/O throughput", + "title": "Sink Postgres transactions", "type": "timeseries" }, { @@ -2466,19 +2576,22 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, + "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 10, "gradientMode": "none", "hideFrom": { + "legend": false, "tooltip": false, - "viz": false, - "legend": false + "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -2496,26 +2609,15 @@ } }, "mappings": [], - "max": 1, - "min": 0, "thresholds": { "mode": "absolute", "steps": [ { - "color": "green", - "value": null - }, - { - "color": "yellow", - "value": 0.7 - }, - { - "color": "red", - "value": 0.9 + "color": "green" } ] }, - "unit": "percentunit" + "unit": "bytes" }, "overrides": [] }, @@ -2523,17 +2625,18 @@ "h": 8, "w": 12, "x": 0, - "y": 31 + "y": 82 }, - "id": 52, + "id": 32, "options": { "legend": { - "calcs": ["mean", "max"], - "displayMode": "table", + "calcs": [], + "displayMode": "list", "placement": "bottom", "showLegend": true }, "tooltip": { + "hideZeros": false, "mode": "multi", "sort": "none" } @@ -2545,12 +2648,12 @@ "type": "prometheus", "uid": "P7A0D6631BB10B34F" }, - "expr": "rate(node_disk_io_time_seconds_total{device!~\"dm-.*\"}[5m])", - "legendFormat": "{{device}}", + "expr": "pg_database_size_bytes{datname=\"measurements\"}", + "legendFormat": "Database size", "refId": "A" } ], - "title": "Disk I/O utilization", + "title": "Sink Postgres database size", "type": "timeseries" }, { @@ -2564,19 +2667,22 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, + "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 10, "gradientMode": "none", "hideFrom": { + "legend": false, "tooltip": false, - "viz": false, - "legend": false + "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -2598,12 +2704,11 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" } ] }, - "unit": "s" + "unit": "Bps" }, "overrides": [] }, @@ -2611,17 +2716,18 @@ "h": 8, "w": 12, "x": 12, - "y": 31 + "y": 82 }, - "id": 53, + "id": 33, "options": { "legend": { - "calcs": ["mean", "max"], - "displayMode": "table", + "calcs": [], + "displayMode": "list", "placement": "bottom", "showLegend": true }, "tooltip": { + "hideZeros": false, "mode": "multi", "sort": "none" } @@ -2633,8 +2739,8 @@ "type": "prometheus", "uid": "P7A0D6631BB10B34F" }, - "expr": "rate(node_disk_read_time_seconds_total{device!~\"dm-.*\"}[5m]) / rate(node_disk_reads_completed_total{device!~\"dm-.*\"}[5m])", - "legendFormat": "{{device}} read", + "expr": "rate(pg_stat_database_blks_read{datname=\"measurements\"}[5m]) * 8192", + "legendFormat": "Disk reads", "refId": "A" }, { @@ -2642,17 +2748,22 @@ "type": "prometheus", "uid": "P7A0D6631BB10B34F" }, - "expr": "rate(node_disk_write_time_seconds_total{device!~\"dm-.*\"}[5m]) / rate(node_disk_writes_completed_total{device!~\"dm-.*\"}[5m])", - "legendFormat": "{{device}} write", + "expr": "rate(pg_stat_database_blks_hit{datname=\"measurements\"}[5m]) * 8192", + "legendFormat": "Cache hits", "refId": "B" } ], - "title": "Disk I/O average latency", + "title": "Sink Postgres block I/O", "type": "timeseries" } ], - "schemaVersion": 39, - "tags": ["self-monitoring", "monitoring-stack"], + "preload": false, + "refresh": "", + "schemaVersion": 41, + "tags": [ + "self-monitoring", + "monitoring-stack" + ], "templating": { "list": [] }, @@ -2664,7 +2775,5 @@ "timezone": "", "title": "Self-monitoring dashboard", "uid": "self-monitoring", - "version": 0, - "weekStart": "" -} - + "version": 7 +} \ No newline at end of file diff --git a/config/prometheus/prometheus.yml b/config/prometheus/prometheus.yml index bc678c2..3802387 100644 --- a/config/prometheus/prometheus.yml +++ b/config/prometheus/prometheus.yml @@ -16,8 +16,8 @@ scrape_configs: scrape_timeout: 25s # Timeout for each scrape (must be < scrape_interval) metrics_path: /pgwatch - # Self-monitoring: Prometheus internal metrics - - job_name: 'prometheus' + # Self-monitoring: Victoria Metrics internal metrics + - job_name: 'victoriametrics' static_configs: - targets: ['localhost:9090'] scrape_interval: 15s From 72c65cd577a937786852a59ce17f6e363b6b4ae6 Mon Sep 17 00:00:00 2001 From: "dementii.priadko" <45518657+DEMNERD@users.noreply.github.com> Date: Wed, 29 Oct 2025 01:32:50 +0200 Subject: [PATCH 6/9] Made retention period 2 weeks instead of 8 days --- docker-compose.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker-compose.yml b/docker-compose.yml index f3c98f6..d60be9e 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -65,7 +65,7 @@ services: - ./config/prometheus/prometheus.yml:/etc/prometheus/prometheus.yml command: - "-storageDataPath=/victoria-metrics-data" - - "-retentionPeriod=200h" + - "-retentionPeriod=336h" - "-httpListenAddr=:9090" - "-promscrape.config=/etc/prometheus/prometheus.yml" - "-promscrape.config.strictParse=false" From b958fa0036c0152c66a44671936881050baf9960 Mon Sep 17 00:00:00 2001 From: "dementii.priadko" <45518657+DEMNERD@users.noreply.github.com> Date: Wed, 29 Oct 2025 16:13:29 +0200 Subject: [PATCH 7/9] Changed blk_write_time in (blk_read_time vs blk_write_time per call) --- .../dashboards/Dashboard_1_Node_performance_overview.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config/grafana/dashboards/Dashboard_1_Node_performance_overview.json b/config/grafana/dashboards/Dashboard_1_Node_performance_overview.json index c8e9ab7..3857460 100644 --- a/config/grafana/dashboards/Dashboard_1_Node_performance_overview.json +++ b/config/grafana/dashboards/Dashboard_1_Node_performance_overview.json @@ -1863,7 +1863,7 @@ "expr": "sum(irate(pgwatch_pg_stat_statements_block_write_total{cluster='$cluster_name', node_name='$node_name', datname=\"$db_name\"}[$__rate_interval]))/sum(irate(pgwatch_pg_stat_statements_calls{cluster='$cluster_name', node_name='$node_name', datname=\"$db_name\"}[$__rate_interval]))", "hide": false, "interval": "20", - "legendFormat": "blk_read_time", + "legendFormat": "blk_write_time", "range": true, "refId": "B" } From 6a76444ade063cbc9d31c53f458cf12c5fae8768 Mon Sep 17 00:00:00 2001 From: "dementii.priadko" <45518657+DEMNERD@users.noreply.github.com> Date: Wed, 29 Oct 2025 22:08:38 +0200 Subject: [PATCH 8/9] 1. StatIO for indexes 2. Switch to pg_class from pgwatch_pg_btree_bloat_real_size_mib 3. Change to sentence case in dash2 table 4. all_ instead of user_ for tables/indexes 5. fixed a bug of not filtering top_k for aggregated table stats 6. Added statement timeouts to all metrics --- .../dashboards/Dashboard_10_Index health.json | 4 +- ...Dashboard_2_Aggregated_query_analysis.json | 96 ++-- .../dashboards/Dashboard_8_Table_Stats.json | 422 +++++++++--------- .../Dashboard_9_Single_table_analysis.json | 4 +- config/pgwatch-prometheus/metrics.yml | 190 ++++---- 5 files changed, 372 insertions(+), 344 deletions(-) diff --git a/config/grafana/dashboards/Dashboard_10_Index health.json b/config/grafana/dashboards/Dashboard_10_Index health.json index 27a0035..78a45e2 100644 --- a/config/grafana/dashboards/Dashboard_10_Index health.json +++ b/config/grafana/dashboards/Dashboard_10_Index health.json @@ -593,9 +593,9 @@ "targets": [ { "editorMode": "code", - "expr": "topk($top_n, pgwatch_pg_btree_bloat_real_size_mib{cluster='$cluster_name', node_name='$node_name', datname=\"$db_name\"})*1024*1024", + "expr": "topk($top_n, pgwatch_pg_class_relation_size_bytes{relkind=\"105\", cluster='$cluster_name', node_name='$node_name', datname=\"$db_name\"})", "interval": "20", - "legendFormat": "{{schemaname}}.{{tblname}}.{{idxname}}", + "legendFormat": "{{schemaname}}.{{relname}}", "range": true, "refId": "A" } diff --git a/config/grafana/dashboards/Dashboard_2_Aggregated_query_analysis.json b/config/grafana/dashboards/Dashboard_2_Aggregated_query_analysis.json index 6f44cd5..812f189 100644 --- a/config/grafana/dashboards/Dashboard_2_Aggregated_query_analysis.json +++ b/config/grafana/dashboards/Dashboard_2_Aggregated_query_analysis.json @@ -217,32 +217,32 @@ }, { "selector": "exec_time", - "text": "Exec Time (ms)", + "text": "Exec time (ms)", "type": "number" }, { "selector": "exec_time_per_sec", - "text": "Exec Time/sec (ms/s)", + "text": "Exec time/sec (ms/s)", "type": "number" }, { "selector": "exec_time_per_call", - "text": "Exec Time/call (ms)", + "text": "Exec time/call (ms)", "type": "number" }, { "selector": "plan_time", - "text": "Planning Time (ms)", + "text": "Planning time (ms)", "type": "number" }, { "selector": "plan_time_per_sec", - "text": "Planning Time/sec (ms/s)", + "text": "Planning time/sec (ms/s)", "type": "number" }, { "selector": "plan_time_per_call", - "text": "Planning Time/call (ms)", + "text": "Planning time/call (ms)", "type": "number" }, { @@ -262,92 +262,92 @@ }, { "selector": "shared_blks_hit", - "text": "Shared Blocks Hit", + "text": "Shared blocks hit", "type": "number" }, { "selector": "shared_blks_hit_per_sec", - "text": "Shared Blocks Hit/sec", + "text": "Shared blocks hit/sec", "type": "number" }, { "selector": "shared_blks_hit_per_call", - "text": "Shared Blocks Hit/call", + "text": "Shared blocks hit/call", "type": "number" }, { "selector": "shared_blks_read", - "text": "Shared Blocks Read", + "text": "Shared blocks read", "type": "number" }, { "selector": "shared_blks_read_per_sec", - "text": "Shared Blocks Read/sec", + "text": "Shared blocks read/sec", "type": "number" }, { "selector": "shared_blks_read_per_call", - "text": "Shared Blocks Read/call", + "text": "Shared blocks read/call", "type": "number" }, { "selector": "shared_blks_dirtied", - "text": "Shared Blocks Dirtied", + "text": "Shared blocks dirtied", "type": "number" }, { "selector": "shared_blks_dirtied_per_sec", - "text": "Shared Blocks Dirtied/sec", + "text": "Shared blocks dirtied/sec", "type": "number" }, { "selector": "shared_blks_dirtied_per_call", - "text": "Shared Blocks Dirtied/call", + "text": "Shared blocks dirtied/call", "type": "number" }, { "selector": "shared_blks_written", - "text": "Shared Blocks Written", + "text": "Shared blocks written", "type": "number" }, { "selector": "shared_blks_written_per_sec", - "text": "Shared Blocks Written/sec", + "text": "Shared blocks written/sec", "type": "number" }, { "selector": "shared_blks_written_per_call", - "text": "Shared Blocks Written/call", + "text": "Shared blocks written/call", "type": "number" }, { "selector": "blk_read_time", - "text": "Block Read Time (ms)", + "text": "Block read time (ms)", "type": "number" }, { "selector": "blk_read_time_per_sec", - "text": "Block Read Time/sec (ms)", + "text": "Block read time/sec (ms)", "type": "number" }, { "selector": "blk_read_time_per_call", - "text": "Block Read Time/call (ms)", + "text": "Block read time/call (ms)", "type": "number" }, { "selector": "blk_write_time", - "text": "Block Write Time (ms)", + "text": "Block write time (ms)", "type": "number" }, { "selector": "blk_write_time_per_sec", - "text": "Block Write Time/sec (ms)", + "text": "Block write time/sec (ms)", "type": "number" }, { "selector": "blk_write_time_per_call", - "text": "Block Write Time/call (ms)", + "text": "Block write time/call (ms)", "type": "number" } ], @@ -395,37 +395,37 @@ }, "includeByName": {}, "indexByName": { - "Block Read Time (ms)": 27, - "Block Read Time/call (ms)": 29, - "Block Read Time/sec (ms)": 28, - "Block Write Time (ms)": 24, - "Block Write Time/call (ms)": 26, - "Block Write Time/sec (ms)": 25, + "Block read time (ms)": 27, + "Block read time/call (ms)": 29, + "Block read time/sec (ms)": 28, + "Block write time (ms)": 24, + "Block write time/call (ms)": 26, + "Block write time/sec (ms)": 25, "Calls": 1, "Calls/sec": 2, "Duration (seconds)": 30, - "Exec Time (ms)": 3, - "Exec Time/call (ms)": 5, - "Exec Time/sec (ms/s)": 4, - "Planning Time (ms)": 6, - "Planning Time/call (ms)": 8, - "Planning Time/sec (ms/s)": 7, + "Exec time (ms)": 3, + "Exec time/call (ms)": 5, + "Exec time/sec (ms/s)": 4, + "Planning time (ms)": 6, + "Planning time/call (ms)": 8, + "Planning time/sec (ms/s)": 7, "Query ID": 0, "Rows": 9, "Rows/call": 11, "Rows/sec": 10, - "Shared Blocks Dirtied": 21, - "Shared Blocks Dirtied/call": 23, - "Shared Blocks Dirtied/sec": 22, - "Shared Blocks Hit": 12, - "Shared Blocks Hit/call": 14, - "Shared Blocks Hit/sec": 13, - "Shared Blocks Read": 15, - "Shared Blocks Read/call": 17, - "Shared Blocks Read/sec": 16, - "Shared Blocks Written": 18, - "Shared Blocks Written/call": 20, - "Shared Blocks Written/sec": 19 + "Shared blocks dirtied": 21, + "Shared blocks dirtied/call": 23, + "Shared blocks dirtied/sec": 22, + "Shared blocks hit": 12, + "Shared blocks hit/call": 14, + "Shared blocks hit/sec": 13, + "Shared blocks read": 15, + "Shared blocks read/call": 17, + "Shared blocks read/sec": 16, + "Shared blocks written": 18, + "Shared blocks written/call": 20, + "Shared blocks written/sec": 19 }, "renameByName": {} } diff --git a/config/grafana/dashboards/Dashboard_8_Table_Stats.json b/config/grafana/dashboards/Dashboard_8_Table_Stats.json index 29eba3a..118d312 100644 --- a/config/grafana/dashboards/Dashboard_8_Table_Stats.json +++ b/config/grafana/dashboards/Dashboard_8_Table_Stats.json @@ -18,7 +18,7 @@ "editable": true, "fiscalYearStartMonth": 0, "graphTooltip": 1, - "id": 15, + "id": 10, "links": [], "panels": [ { @@ -326,7 +326,7 @@ { "disableTextWrap": false, "editorMode": "code", - "expr": "pgwatch_table_size_detailed_table_main_size_b{schema=~\"$schema_name\", datname=\"$db_name\", cluster=\"$cluster_name\", node_name=\"$node_name\"}", + "expr": "topk($top_n, pgwatch_table_size_detailed_table_main_size_b{schema=~\"$schema_name\", datname=\"$db_name\", cluster=\"$cluster_name\", node_name=\"$node_name\"})", "fullMetaSearch": false, "includeNullMetadata": true, "legendFormat": "{{schema}}.{{table_name}}", @@ -530,7 +530,7 @@ { "disableTextWrap": false, "editorMode": "code", - "expr": "pgwatch_table_size_detailed_toast_indexes_size_b{schema=~\"$schema_name\", datname=\"$db_name\", cluster=\"$cluster_name\", node_name=\"$node_name\"}", + "expr": "topk($top_n, pgwatch_table_size_detailed_toast_indexes_size_b{schema=~\"$schema_name\", datname=\"$db_name\", cluster=\"$cluster_name\", node_name=\"$node_name\"})", "fullMetaSearch": false, "includeNullMetadata": true, "legendFormat": "{{schema}}.{{table_name}}", @@ -856,9 +856,9 @@ "x": 0, "y": 53 }, - "id": 22, + "id": 11, "panels": [], - "title": "Estimated bloat stats", + "title": "Tuple stats", "type": "row" }, { @@ -877,12 +877,10 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMax": 100, - "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", - "fillOpacity": 0, + "fillOpacity": 100, "gradientMode": "none", "hideFrom": { "legend": false, @@ -897,10 +895,10 @@ "type": "linear" }, "showPoints": "auto", - "spanNulls": true, + "spanNulls": false, "stacking": { "group": "A", - "mode": "none" + "mode": "normal" }, "thresholdsStyle": { "mode": "off" @@ -918,18 +916,17 @@ "value": 80 } ] - }, - "unit": "percent" + } }, "overrides": [] }, "gridPos": { - "h": 13, + "h": 10, "w": 12, "x": 0, "y": 54 }, - "id": 8, + "id": 3, "options": { "legend": { "calcs": [ @@ -952,18 +949,23 @@ "pluginVersion": "12.0.2", "targets": [ { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, "disableTextWrap": false, "editorMode": "code", - "expr": "topk($top_n, pgwatch_pg_table_bloat_bloat_pct{schemaname=~\"$schema_name\", datname=\"$db_name\", cluster=\"$cluster_name\", node_name=\"$node_name\"})", + "expr": "topk($top_n, sum by(schema, table_name, table_full_name) (irate(pgwatch_table_stats_n_tup_ins{datname=\"$db_name\", cluster=\"$cluster_name\", node_name=\"$node_name\", schema=~\"$schema_name\"}[$__rate_interval])))", "fullMetaSearch": false, "includeNullMetadata": true, - "legendFormat": "{{schemaname}}.{{tblname}}", + "interval": "60", + "legendFormat": "{{table_full_name}}", "range": true, "refId": "A", "useBackend": false } ], - "title": "Top $top_n tables by estimated heap bloat %", + "title": "Top $top_n tables by tuple inserts per second", "type": "timeseries" }, { @@ -982,11 +984,10 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", - "fillOpacity": 100, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -1001,15 +1002,21 @@ "type": "linear" }, "showPoints": "auto", - "spanNulls": true, + "spanNulls": false, "stacking": { "group": "A", - "mode": "normal" + "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, + "links": [ + { + "title": "Table details", + "url": "/d/9-single-table-analysis/9-single-table-analysis?orgId=1&var-cluster_name=${cluster_name}&var-node_name=${node_name}&var-db_name=${db_name}&var-schema_name=${__field.labels.schema}&var-table_name=${__field.labels.table_name}" + } + ], "mappings": [], "thresholds": { "mode": "absolute", @@ -1023,17 +1030,17 @@ } ] }, - "unit": "bytes" + "unit": "ops" }, "overrides": [] }, "gridPos": { - "h": 13, + "h": 10, "w": 12, "x": 12, "y": 54 }, - "id": 2, + "id": 5, "options": { "legend": { "calcs": [ @@ -1044,7 +1051,7 @@ "displayMode": "table", "placement": "bottom", "showLegend": true, - "sortBy": "Last", + "sortBy": "Mean", "sortDesc": true }, "tooltip": { @@ -1056,33 +1063,25 @@ "pluginVersion": "12.0.2", "targets": [ { + "datasource": { + "type": "prometheus", + "uid": "P7A0D6631BB10B34F" + }, "disableTextWrap": false, "editorMode": "code", - "expr": "topk($top_n, pgwatch_pg_table_bloat_bloat_size{schemaname=~\"$schema_name\", datname=\"$db_name\", cluster=\"$cluster_name\", node_name=\"$node_name\"})", + "expr": "topk($top_n, sum by(schema, table_name, table_full_name) (irate(pgwatch_table_stats_n_tup_del{datname=\"$db_name\", cluster=\"$cluster_name\", node_name=\"$node_name\", schema=~\"$schema_name\"}[$__rate_interval])))", "fullMetaSearch": false, "includeNullMetadata": true, - "legendFormat": "{{schemaname}}.{{tblname}}", + "interval": "60", + "legendFormat": "{{table_full_name}}", "range": true, "refId": "A", "useBackend": false } ], - "title": "Top $top_n tables by estimated heap bloat size", + "title": "Top $top_n tables by tuple deletes per second", "type": "timeseries" }, - { - "collapsed": false, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 67 - }, - "id": 11, - "panels": [], - "title": "Tuple stats", - "type": "row" - }, { "datasource": { "type": "prometheus", @@ -1099,6 +1098,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -1126,6 +1126,12 @@ "mode": "off" } }, + "links": [ + { + "title": "Table details", + "url": "/d/9-single-table-analysis/9-single-table-analysis?orgId=1&var-cluster_name=${cluster_name}&var-node_name=${node_name}&var-db_name=${db_name}&var-schema_name=${__field.labels.schema}&var-table_name=${__field.labels.table_name}" + } + ], "mappings": [], "thresholds": { "mode": "absolute", @@ -1138,7 +1144,8 @@ "value": 80 } ] - } + }, + "unit": "ops" }, "overrides": [] }, @@ -1146,9 +1153,9 @@ "h": 10, "w": 12, "x": 0, - "y": 68 + "y": 64 }, - "id": 3, + "id": 6, "options": { "legend": { "calcs": [ @@ -1177,7 +1184,7 @@ }, "disableTextWrap": false, "editorMode": "code", - "expr": "topk($top_n, sum by(schema, table_name, table_full_name) (irate(pgwatch_table_stats_n_tup_ins{datname=\"$db_name\", cluster=\"$cluster_name\", node_name=\"$node_name\", schema=~\"$schema_name\"}[$__rate_interval])))", + "expr": "topk($top_n, sum by(schema, table_name, table_full_name) (irate(pgwatch_table_stats_n_tup_hot_upd{datname=\"$db_name\", cluster=\"$cluster_name\", node_name=\"$node_name\", schema=~\"$schema_name\"}[$__rate_interval])))", "fullMetaSearch": false, "includeNullMetadata": true, "interval": "60", @@ -1187,7 +1194,7 @@ "useBackend": false } ], - "title": "Top $top_n tables by tuple inserts per second", + "title": "Top $top_n tables by tuple HOT updates per second", "type": "timeseries" }, { @@ -1209,7 +1216,7 @@ "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", - "fillOpacity": 0, + "fillOpacity": 100, "gradientMode": "none", "hideFrom": { "legend": false, @@ -1227,7 +1234,7 @@ "spanNulls": false, "stacking": { "group": "A", - "mode": "none" + "mode": "normal" }, "thresholdsStyle": { "mode": "off" @@ -1260,9 +1267,9 @@ "h": 10, "w": 12, "x": 12, - "y": 68 + "y": 64 }, - "id": 5, + "id": 10, "options": { "legend": { "calcs": [ @@ -1291,7 +1298,7 @@ }, "disableTextWrap": false, "editorMode": "code", - "expr": "topk($top_n, sum by(schema, table_name, table_full_name) (irate(pgwatch_table_stats_n_tup_del{datname=\"$db_name\", cluster=\"$cluster_name\", node_name=\"$node_name\", schema=~\"$schema_name\"}[$__rate_interval])))", + "expr": "topk($top_n, sum by(schema, table_name, table_full_name) (irate(pgwatch_table_stats_n_tup_upd{datname=\"$db_name\", cluster=\"$cluster_name\", node_name=\"$node_name\", schema=~\"$schema_name\"}[$__rate_interval]) - irate(pgwatch_table_stats_n_tup_hot_upd{datname=\"$db_name\", cluster=\"$cluster_name\", node_name=\"$node_name\", schema=~\"$schema_name\"}[$__rate_interval])))", "fullMetaSearch": false, "includeNullMetadata": true, "interval": "60", @@ -1301,7 +1308,7 @@ "useBackend": false } ], - "title": "Top $top_n tables by tuple deletes per second", + "title": "Top $top_n tables by tuple non-HOT updates per second", "type": "timeseries" }, { @@ -1348,12 +1355,6 @@ "mode": "off" } }, - "links": [ - { - "title": "Table details", - "url": "/d/9-single-table-analysis/9-single-table-analysis?orgId=1&var-cluster_name=${cluster_name}&var-node_name=${node_name}&var-db_name=${db_name}&var-schema_name=${__field.labels.schema}&var-table_name=${__field.labels.table_name}" - } - ], "mappings": [], "thresholds": { "mode": "absolute", @@ -1372,12 +1373,12 @@ "overrides": [] }, "gridPos": { - "h": 10, - "w": 12, + "h": 13, + "w": 24, "x": 0, - "y": 78 + "y": 74 }, - "id": 6, + "id": 40, "options": { "legend": { "calcs": [ @@ -1400,23 +1401,19 @@ "pluginVersion": "12.0.2", "targets": [ { - "datasource": { - "type": "prometheus", - "uid": "P7A0D6631BB10B34F" - }, "disableTextWrap": false, "editorMode": "code", - "expr": "topk($top_n, sum by(schema, table_name, table_full_name) (irate(pgwatch_table_stats_n_tup_hot_upd{datname=\"$db_name\", cluster=\"$cluster_name\", node_name=\"$node_name\", schema=~\"$schema_name\"}[$__rate_interval])))", + "expr": "topk($top_n, irate(pgwatch_pg_stat_all_tables_seq_tup_read{schemaname=~\"$schema_name\", datname=\"$db_name\", cluster=\"$cluster_name\", node_name=\"$node_name\"}[$__rate_interval]))", "fullMetaSearch": false, "includeNullMetadata": true, - "interval": "60", - "legendFormat": "{{table_full_name}}", + "interval": "20", + "legendFormat": "{{schemaname}}.{{relname}}", "range": true, "refId": "A", "useBackend": false } ], - "title": "Top $top_n tables by tuple HOT updates per second", + "title": "Top $top_n tables by sequential reads of live tuples", "type": "timeseries" }, { @@ -1435,6 +1432,7 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", + "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", @@ -1462,12 +1460,6 @@ "mode": "off" } }, - "links": [ - { - "title": "Table details", - "url": "/d/9-single-table-analysis/9-single-table-analysis?orgId=1&var-cluster_name=${cluster_name}&var-node_name=${node_name}&var-db_name=${db_name}&var-schema_name=${__field.labels.schema}&var-table_name=${__field.labels.table_name}" - } - ], "mappings": [], "thresholds": { "mode": "absolute", @@ -1486,12 +1478,12 @@ "overrides": [] }, "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 78 + "h": 13, + "w": 24, + "x": 0, + "y": 87 }, - "id": 10, + "id": 41, "options": { "legend": { "calcs": [ @@ -1514,25 +1506,34 @@ "pluginVersion": "12.0.2", "targets": [ { - "datasource": { - "type": "prometheus", - "uid": "P7A0D6631BB10B34F" - }, "disableTextWrap": false, "editorMode": "code", - "expr": "topk($top_n, sum by(schema, table_name, table_full_name) (irate(pgwatch_table_stats_n_tup_upd{datname=\"$db_name\", cluster=\"$cluster_name\", node_name=\"$node_name\", schema=~\"$schema_name\"}[$__rate_interval]) - irate(pgwatch_table_stats_n_tup_hot_upd{datname=\"$db_name\", cluster=\"$cluster_name\", node_name=\"$node_name\", schema=~\"$schema_name\"}[$__rate_interval])))", + "expr": "topk($top_n, irate(pgwatch_pg_stat_all_tables_idx_tup_fetch{schemaname=~\"$schema_name\", datname=\"$db_name\", cluster=\"$cluster_name\", node_name=\"$node_name\"}[$__rate_interval]))", "fullMetaSearch": false, "includeNullMetadata": true, - "interval": "60", - "legendFormat": "{{table_full_name}}", + "interval": "20", + "legendFormat": "{{schemaname}}.{{relname}}", "range": true, "refId": "A", "useBackend": false } ], - "title": "Top $top_n tables by tuple non-HOT updates per second", + "title": "Top $top_n tables by index fetches of live tuples", "type": "timeseries" }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 100 + }, + "id": 30, + "panels": [], + "title": "IO stats", + "type": "row" + }, { "datasource": { "type": "prometheus", @@ -1590,17 +1591,17 @@ } ] }, - "unit": "ops" + "unit": "binBps" }, "overrides": [] }, "gridPos": { "h": 13, - "w": 24, + "w": 12, "x": 0, - "y": 88 + "y": 101 }, - "id": 40, + "id": 31, "options": { "legend": { "calcs": [ @@ -1610,9 +1611,7 @@ ], "displayMode": "table", "placement": "bottom", - "showLegend": true, - "sortBy": "Mean", - "sortDesc": true + "showLegend": true }, "tooltip": { "hideZeros": false, @@ -1625,7 +1624,7 @@ { "disableTextWrap": false, "editorMode": "code", - "expr": "topk($top_n, irate(pgwatch_pg_stat_user_tables_seq_tup_read{schemaname=~\"$schema_name\", datname=\"$db_name\", cluster=\"$cluster_name\", node_name=\"$node_name\"}[$__rate_interval]))", + "expr": "topk($top_n, \n irate(pgwatch_pg_statio_all_tables_heap_blks_hit{schemaname=~\"$schema_name\", datname=\"$db_name\", cluster=\"$cluster_name\", node_name=\"$node_name\"}[$__rate_interval]) + \n irate(pgwatch_pg_statio_all_tables_idx_blks_hit{schemaname=~\"$schema_name\", datname=\"$db_name\", cluster=\"$cluster_name\", node_name=\"$node_name\"}[$__rate_interval]) + \n irate(pgwatch_pg_statio_all_tables_toast_blks_hit{schemaname=~\"$schema_name\", datname=\"$db_name\", cluster=\"$cluster_name\", node_name=\"$node_name\"}[$__rate_interval]) + \n irate(pgwatch_pg_statio_all_tables_tidx_blks_hit{schemaname=~\"$schema_name\", datname=\"$db_name\", cluster=\"$cluster_name\", node_name=\"$node_name\"}[$__rate_interval])\n) * on() group_left() last_over_time(pgwatch_settings_numeric_value{datname=\"$db_name\", cluster=\"$cluster_name\", node_name=\"$node_name\", setting_name=\"block_size\"}[1d])", "fullMetaSearch": false, "includeNullMetadata": true, "interval": "20", @@ -1635,7 +1634,7 @@ "useBackend": false } ], - "title": "Top $top_n tables by sequential reads of live tuples", + "title": "Top $top_n tables by total shared block hits", "type": "timeseries" }, { @@ -1658,7 +1657,7 @@ "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", - "fillOpacity": 100, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -1676,13 +1675,14 @@ "spanNulls": false, "stacking": { "group": "A", - "mode": "normal" + "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "mappings": [], + "noValue": "-", "thresholds": { "mode": "absolute", "steps": [ @@ -1695,17 +1695,17 @@ } ] }, - "unit": "ops" + "unit": "percentunit" }, "overrides": [] }, "gridPos": { "h": 13, - "w": 24, - "x": 0, + "w": 12, + "x": 12, "y": 101 }, - "id": 41, + "id": 42, "options": { "legend": { "calcs": [ @@ -1715,9 +1715,7 @@ ], "displayMode": "table", "placement": "bottom", - "showLegend": true, - "sortBy": "Mean", - "sortDesc": true + "showLegend": true }, "tooltip": { "hideZeros": false, @@ -1730,7 +1728,7 @@ { "disableTextWrap": false, "editorMode": "code", - "expr": "topk($top_n, irate(pgwatch_pg_stat_user_tables_idx_tup_fetch{schemaname=~\"$schema_name\", datname=\"$db_name\", cluster=\"$cluster_name\", node_name=\"$node_name\"}[$__rate_interval]))", + "expr": "topk($top_n, \n (irate(pgwatch_pg_statio_all_tables_heap_blks_hit{schemaname=~\"$schema_name\", datname=\"$db_name\", cluster=\"$cluster_name\", node_name=\"$node_name\"}[$__rate_interval]) + \n irate(pgwatch_pg_statio_all_tables_idx_blks_hit{schemaname=~\"$schema_name\", datname=\"$db_name\", cluster=\"$cluster_name\", node_name=\"$node_name\"}[$__rate_interval]) + \n irate(pgwatch_pg_statio_all_tables_toast_blks_hit{schemaname=~\"$schema_name\", datname=\"$db_name\", cluster=\"$cluster_name\", node_name=\"$node_name\"}[$__rate_interval]) + \n irate(pgwatch_pg_statio_all_tables_tidx_blks_hit{schemaname=~\"$schema_name\", datname=\"$db_name\", cluster=\"$cluster_name\", node_name=\"$node_name\"}[$__rate_interval]))/(irate(pgwatch_pg_statio_all_tables_heap_blks_read{schemaname=~\"$schema_name\", datname=\"$db_name\", cluster=\"$cluster_name\", node_name=\"$node_name\"}[$__rate_interval]) + \n irate(pgwatch_pg_statio_all_tables_idx_blks_read{schemaname=~\"$schema_name\", datname=\"$db_name\", cluster=\"$cluster_name\", node_name=\"$node_name\"}[$__rate_interval]) + \n irate(pgwatch_pg_statio_all_tables_toast_blks_read{schemaname=~\"$schema_name\", datname=\"$db_name\", cluster=\"$cluster_name\", node_name=\"$node_name\"}[$__rate_interval]) + \n irate(pgwatch_pg_statio_all_tables_tidx_blks_read{schemaname=~\"$schema_name\", datname=\"$db_name\", cluster=\"$cluster_name\", node_name=\"$node_name\"}[$__rate_interval]) + (irate(pgwatch_pg_statio_all_tables_heap_blks_hit{schemaname=~\"$schema_name\", datname=\"$db_name\", cluster=\"$cluster_name\", node_name=\"$node_name\"}[$__rate_interval]) + \n irate(pgwatch_pg_statio_all_tables_idx_blks_hit{schemaname=~\"$schema_name\", datname=\"$db_name\", cluster=\"$cluster_name\", node_name=\"$node_name\"}[$__rate_interval]) + \n irate(pgwatch_pg_statio_all_tables_toast_blks_hit{schemaname=~\"$schema_name\", datname=\"$db_name\", cluster=\"$cluster_name\", node_name=\"$node_name\"}[$__rate_interval]) + \n irate(pgwatch_pg_statio_all_tables_tidx_blks_hit{schemaname=~\"$schema_name\", datname=\"$db_name\", cluster=\"$cluster_name\", node_name=\"$node_name\"}[$__rate_interval]))) \n) ", "fullMetaSearch": false, "includeNullMetadata": true, "interval": "20", @@ -1740,22 +1738,9 @@ "useBackend": false } ], - "title": "Top $top_n tables by index fetches of live tuples", + "title": "Top $top_n tables by total shared block hit ratio", "type": "timeseries" }, - { - "collapsed": false, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 114 - }, - "id": 30, - "panels": [], - "title": "IO stats", - "type": "row" - }, { "datasource": { "type": "prometheus", @@ -1794,7 +1779,7 @@ "spanNulls": false, "stacking": { "group": "A", - "mode": "normal" + "mode": "none" }, "thresholdsStyle": { "mode": "off" @@ -1821,9 +1806,9 @@ "h": 13, "w": 12, "x": 0, - "y": 115 + "y": 114 }, - "id": 31, + "id": 37, "options": { "legend": { "calcs": [ @@ -1846,7 +1831,7 @@ { "disableTextWrap": false, "editorMode": "code", - "expr": "topk($top_n, \n irate(pgwatch_pg_statio_all_tables_heap_blks_hit{schemaname=~\"$schema_name\", datname=\"$db_name\", cluster=\"$cluster_name\", node_name=\"$node_name\"}[$__rate_interval]) + \n irate(pgwatch_pg_statio_all_tables_idx_blks_hit{schemaname=~\"$schema_name\", datname=\"$db_name\", cluster=\"$cluster_name\", node_name=\"$node_name\"}[$__rate_interval]) + \n irate(pgwatch_pg_statio_all_tables_toast_blks_hit{schemaname=~\"$schema_name\", datname=\"$db_name\", cluster=\"$cluster_name\", node_name=\"$node_name\"}[$__rate_interval]) + \n irate(pgwatch_pg_statio_all_tables_tidx_blks_hit{schemaname=~\"$schema_name\", datname=\"$db_name\", cluster=\"$cluster_name\", node_name=\"$node_name\"}[$__rate_interval])\n) * on() group_left() last_over_time(pgwatch_settings_numeric_value{datname=\"$db_name\", cluster=\"$cluster_name\", node_name=\"$node_name\", setting_name=\"block_size\"}[1d])", + "expr": "topk($top_n, irate(pgwatch_pg_statio_all_tables_heap_blks_read{schemaname=~\"$schema_name\", datname=\"$db_name\", cluster=\"$cluster_name\", node_name=\"$node_name\"}[$__rate_interval]) + \nirate(pgwatch_pg_statio_all_tables_idx_blks_read{schemaname=~\"$schema_name\", datname=\"$db_name\", cluster=\"$cluster_name\", node_name=\"$node_name\"}[$__rate_interval]) + \nirate(pgwatch_pg_statio_all_tables_toast_blks_read{schemaname=~\"$schema_name\", datname=\"$db_name\", cluster=\"$cluster_name\", node_name=\"$node_name\"}[$__rate_interval]) + \nirate(pgwatch_pg_statio_all_tables_tidx_blks_read{schemaname=~\"$schema_name\", datname=\"$db_name\", cluster=\"$cluster_name\", node_name=\"$node_name\"}[$__rate_interval])) * on() group_left() last_over_time(pgwatch_settings_numeric_value{datname=\"$db_name\", cluster=\"$cluster_name\", node_name=\"$node_name\", setting_name=\"block_size\"}[1d])", "fullMetaSearch": false, "includeNullMetadata": true, "interval": "20", @@ -1856,7 +1841,7 @@ "useBackend": false } ], - "title": "Top $top_n tables by total shared block hits", + "title": "Top $top_n tables by total shared block reads", "type": "timeseries" }, { @@ -1925,9 +1910,9 @@ "h": 13, "w": 12, "x": 12, - "y": 115 + "y": 114 }, - "id": 42, + "id": 43, "options": { "legend": { "calcs": [ @@ -1950,7 +1935,7 @@ { "disableTextWrap": false, "editorMode": "code", - "expr": "topk($top_n, \n (irate(pgwatch_pg_statio_all_tables_heap_blks_hit{schemaname=~\"$schema_name\", datname=\"$db_name\", cluster=\"$cluster_name\", node_name=\"$node_name\"}[$__rate_interval]) + \n irate(pgwatch_pg_statio_all_tables_idx_blks_hit{schemaname=~\"$schema_name\", datname=\"$db_name\", cluster=\"$cluster_name\", node_name=\"$node_name\"}[$__rate_interval]) + \n irate(pgwatch_pg_statio_all_tables_toast_blks_hit{schemaname=~\"$schema_name\", datname=\"$db_name\", cluster=\"$cluster_name\", node_name=\"$node_name\"}[$__rate_interval]) + \n irate(pgwatch_pg_statio_all_tables_tidx_blks_hit{schemaname=~\"$schema_name\", datname=\"$db_name\", cluster=\"$cluster_name\", node_name=\"$node_name\"}[$__rate_interval]))/(irate(pgwatch_pg_statio_all_tables_heap_blks_read{schemaname=~\"$schema_name\", datname=\"$db_name\", cluster=\"$cluster_name\", node_name=\"$node_name\"}[$__rate_interval]) + \n irate(pgwatch_pg_statio_all_tables_idx_blks_read{schemaname=~\"$schema_name\", datname=\"$db_name\", cluster=\"$cluster_name\", node_name=\"$node_name\"}[$__rate_interval]) + \n irate(pgwatch_pg_statio_all_tables_toast_blks_read{schemaname=~\"$schema_name\", datname=\"$db_name\", cluster=\"$cluster_name\", node_name=\"$node_name\"}[$__rate_interval]) + \n irate(pgwatch_pg_statio_all_tables_tidx_blks_read{schemaname=~\"$schema_name\", datname=\"$db_name\", cluster=\"$cluster_name\", node_name=\"$node_name\"}[$__rate_interval]) + (irate(pgwatch_pg_statio_all_tables_heap_blks_hit{schemaname=~\"$schema_name\", datname=\"$db_name\", cluster=\"$cluster_name\", node_name=\"$node_name\"}[$__rate_interval]) + \n irate(pgwatch_pg_statio_all_tables_idx_blks_hit{schemaname=~\"$schema_name\", datname=\"$db_name\", cluster=\"$cluster_name\", node_name=\"$node_name\"}[$__rate_interval]) + \n irate(pgwatch_pg_statio_all_tables_toast_blks_hit{schemaname=~\"$schema_name\", datname=\"$db_name\", cluster=\"$cluster_name\", node_name=\"$node_name\"}[$__rate_interval]) + \n irate(pgwatch_pg_statio_all_tables_tidx_blks_hit{schemaname=~\"$schema_name\", datname=\"$db_name\", cluster=\"$cluster_name\", node_name=\"$node_name\"}[$__rate_interval]))) \n) ", + "expr": "topk($top_n, \n (irate(pgwatch_pg_statio_all_tables_heap_blks_read{schemaname=~\"$schema_name\", datname=\"$db_name\", cluster=\"$cluster_name\", node_name=\"$node_name\"}[$__rate_interval]) + \n irate(pgwatch_pg_statio_all_tables_idx_blks_read{schemaname=~\"$schema_name\", datname=\"$db_name\", cluster=\"$cluster_name\", node_name=\"$node_name\"}[$__rate_interval]) + \n irate(pgwatch_pg_statio_all_tables_toast_blks_read{schemaname=~\"$schema_name\", datname=\"$db_name\", cluster=\"$cluster_name\", node_name=\"$node_name\"}[$__rate_interval]) + \n irate(pgwatch_pg_statio_all_tables_tidx_blks_read{schemaname=~\"$schema_name\", datname=\"$db_name\", cluster=\"$cluster_name\", node_name=\"$node_name\"}[$__rate_interval]))/(irate(pgwatch_pg_statio_all_tables_heap_blks_read{schemaname=~\"$schema_name\", datname=\"$db_name\", cluster=\"$cluster_name\", node_name=\"$node_name\"}[$__rate_interval]) + \n irate(pgwatch_pg_statio_all_tables_idx_blks_read{schemaname=~\"$schema_name\", datname=\"$db_name\", cluster=\"$cluster_name\", node_name=\"$node_name\"}[$__rate_interval]) + \n irate(pgwatch_pg_statio_all_tables_toast_blks_read{schemaname=~\"$schema_name\", datname=\"$db_name\", cluster=\"$cluster_name\", node_name=\"$node_name\"}[$__rate_interval]) + \n irate(pgwatch_pg_statio_all_tables_tidx_blks_read{schemaname=~\"$schema_name\", datname=\"$db_name\", cluster=\"$cluster_name\", node_name=\"$node_name\"}[$__rate_interval]) + (irate(pgwatch_pg_statio_all_tables_heap_blks_hit{schemaname=~\"$schema_name\", datname=\"$db_name\", cluster=\"$cluster_name\", node_name=\"$node_name\"}[$__rate_interval]) + \n irate(pgwatch_pg_statio_all_tables_idx_blks_hit{schemaname=~\"$schema_name\", datname=\"$db_name\", cluster=\"$cluster_name\", node_name=\"$node_name\"}[$__rate_interval]) + \n irate(pgwatch_pg_statio_all_tables_toast_blks_hit{schemaname=~\"$schema_name\", datname=\"$db_name\", cluster=\"$cluster_name\", node_name=\"$node_name\"}[$__rate_interval]) + \n irate(pgwatch_pg_statio_all_tables_tidx_blks_hit{schemaname=~\"$schema_name\", datname=\"$db_name\", cluster=\"$cluster_name\", node_name=\"$node_name\"}[$__rate_interval]))) \n) ", "fullMetaSearch": false, "includeNullMetadata": true, "interval": "20", @@ -1960,7 +1945,7 @@ "useBackend": false } ], - "title": "Top $top_n tables by total shared block hit ratio", + "title": "Top $top_n tables by total shared block read ratio", "type": "timeseries" }, { @@ -2001,7 +1986,7 @@ "spanNulls": false, "stacking": { "group": "A", - "mode": "none" + "mode": "normal" }, "thresholdsStyle": { "mode": "off" @@ -2026,11 +2011,11 @@ }, "gridPos": { "h": 13, - "w": 12, + "w": 24, "x": 0, - "y": 128 + "y": 127 }, - "id": 37, + "id": 36, "options": { "legend": { "calcs": [ @@ -2053,7 +2038,7 @@ { "disableTextWrap": false, "editorMode": "code", - "expr": "topk($top_n, irate(pgwatch_pg_statio_all_tables_heap_blks_read{schemaname=~\"$schema_name\", datname=\"$db_name\", cluster=\"$cluster_name\", node_name=\"$node_name\"}[$__rate_interval]) + \nirate(pgwatch_pg_statio_all_tables_idx_blks_read{schemaname=~\"$schema_name\", datname=\"$db_name\", cluster=\"$cluster_name\", node_name=\"$node_name\"}[$__rate_interval]) + \nirate(pgwatch_pg_statio_all_tables_toast_blks_read{schemaname=~\"$schema_name\", datname=\"$db_name\", cluster=\"$cluster_name\", node_name=\"$node_name\"}[$__rate_interval]) + \nirate(pgwatch_pg_statio_all_tables_tidx_blks_read{schemaname=~\"$schema_name\", datname=\"$db_name\", cluster=\"$cluster_name\", node_name=\"$node_name\"}[$__rate_interval])) * on() group_left() last_over_time(pgwatch_settings_numeric_value{datname=\"$db_name\", cluster=\"$cluster_name\", node_name=\"$node_name\", setting_name=\"block_size\"}[1d])", + "expr": "topk($top_n, irate(pgwatch_pg_statio_all_tables_heap_blks_hit{schemaname=~\"$schema_name\", datname=\"$db_name\", cluster=\"$cluster_name\", node_name=\"$node_name\"}[$__rate_interval]))* on() group_left() last_over_time(pgwatch_settings_numeric_value{datname=\"$db_name\", cluster=\"$cluster_name\", node_name=\"$node_name\", setting_name=\"block_size\"}[1d])", "fullMetaSearch": false, "includeNullMetadata": true, "interval": "20", @@ -2063,7 +2048,7 @@ "useBackend": false } ], - "title": "Top $top_n tables by total shared block reads", + "title": "Top $top_n tables by table (without TOAST) shared block hits", "type": "timeseries" }, { @@ -2086,7 +2071,7 @@ "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", - "fillOpacity": 0, + "fillOpacity": 100, "gradientMode": "none", "hideFrom": { "legend": false, @@ -2104,14 +2089,13 @@ "spanNulls": false, "stacking": { "group": "A", - "mode": "none" + "mode": "normal" }, "thresholdsStyle": { "mode": "off" } }, "mappings": [], - "noValue": "-", "thresholds": { "mode": "absolute", "steps": [ @@ -2124,17 +2108,17 @@ } ] }, - "unit": "percentunit" + "unit": "binBps" }, "overrides": [] }, "gridPos": { "h": 13, - "w": 12, - "x": 12, - "y": 128 + "w": 24, + "x": 0, + "y": 140 }, - "id": 43, + "id": 32, "options": { "legend": { "calcs": [ @@ -2157,7 +2141,7 @@ { "disableTextWrap": false, "editorMode": "code", - "expr": "topk($top_n, \n (irate(pgwatch_pg_statio_all_tables_heap_blks_read{schemaname=~\"$schema_name\", datname=\"$db_name\", cluster=\"$cluster_name\", node_name=\"$node_name\"}[$__rate_interval]) + \n irate(pgwatch_pg_statio_all_tables_idx_blks_read{schemaname=~\"$schema_name\", datname=\"$db_name\", cluster=\"$cluster_name\", node_name=\"$node_name\"}[$__rate_interval]) + \n irate(pgwatch_pg_statio_all_tables_toast_blks_read{schemaname=~\"$schema_name\", datname=\"$db_name\", cluster=\"$cluster_name\", node_name=\"$node_name\"}[$__rate_interval]) + \n irate(pgwatch_pg_statio_all_tables_tidx_blks_read{schemaname=~\"$schema_name\", datname=\"$db_name\", cluster=\"$cluster_name\", node_name=\"$node_name\"}[$__rate_interval]))/(irate(pgwatch_pg_statio_all_tables_heap_blks_read{schemaname=~\"$schema_name\", datname=\"$db_name\", cluster=\"$cluster_name\", node_name=\"$node_name\"}[$__rate_interval]) + \n irate(pgwatch_pg_statio_all_tables_idx_blks_read{schemaname=~\"$schema_name\", datname=\"$db_name\", cluster=\"$cluster_name\", node_name=\"$node_name\"}[$__rate_interval]) + \n irate(pgwatch_pg_statio_all_tables_toast_blks_read{schemaname=~\"$schema_name\", datname=\"$db_name\", cluster=\"$cluster_name\", node_name=\"$node_name\"}[$__rate_interval]) + \n irate(pgwatch_pg_statio_all_tables_tidx_blks_read{schemaname=~\"$schema_name\", datname=\"$db_name\", cluster=\"$cluster_name\", node_name=\"$node_name\"}[$__rate_interval]) + (irate(pgwatch_pg_statio_all_tables_heap_blks_hit{schemaname=~\"$schema_name\", datname=\"$db_name\", cluster=\"$cluster_name\", node_name=\"$node_name\"}[$__rate_interval]) + \n irate(pgwatch_pg_statio_all_tables_idx_blks_hit{schemaname=~\"$schema_name\", datname=\"$db_name\", cluster=\"$cluster_name\", node_name=\"$node_name\"}[$__rate_interval]) + \n irate(pgwatch_pg_statio_all_tables_toast_blks_hit{schemaname=~\"$schema_name\", datname=\"$db_name\", cluster=\"$cluster_name\", node_name=\"$node_name\"}[$__rate_interval]) + \n irate(pgwatch_pg_statio_all_tables_tidx_blks_hit{schemaname=~\"$schema_name\", datname=\"$db_name\", cluster=\"$cluster_name\", node_name=\"$node_name\"}[$__rate_interval]))) \n) ", + "expr": "topk($top_n, irate(pgwatch_pg_statio_all_tables_heap_blks_read{schemaname=~\"$schema_name\", datname=\"$db_name\", cluster=\"$cluster_name\", node_name=\"$node_name\"}[$__rate_interval]))", "fullMetaSearch": false, "includeNullMetadata": true, "interval": "20", @@ -2167,7 +2151,7 @@ "useBackend": false } ], - "title": "Top $top_n tables by total shared block read ratio", + "title": "Top $top_n tables by table (without TOAST) shared block reads", "type": "timeseries" }, { @@ -2235,9 +2219,9 @@ "h": 13, "w": 24, "x": 0, - "y": 141 + "y": 153 }, - "id": 36, + "id": 38, "options": { "legend": { "calcs": [ @@ -2260,7 +2244,7 @@ { "disableTextWrap": false, "editorMode": "code", - "expr": "topk($top_n, irate(pgwatch_pg_statio_all_tables_heap_blks_hit{schemaname=~\"$schema_name\", datname=\"$db_name\", cluster=\"$cluster_name\", node_name=\"$node_name\"}[$__rate_interval]))* on() group_left() last_over_time(pgwatch_settings_numeric_value{datname=\"$db_name\", cluster=\"$cluster_name\", node_name=\"$node_name\", setting_name=\"block_size\"}[1d])", + "expr": "topk($top_n, irate(pgwatch_pg_statio_all_tables_toast_blks_hit{schemaname=~\"$schema_name\", datname=\"$db_name\", cluster=\"$cluster_name\", node_name=\"$node_name\"}[$__rate_interval]))* on() group_left() last_over_time(pgwatch_settings_numeric_value{datname=\"$db_name\", cluster=\"$cluster_name\", node_name=\"$node_name\", setting_name=\"block_size\"}[1d])", "fullMetaSearch": false, "includeNullMetadata": true, "interval": "20", @@ -2270,7 +2254,7 @@ "useBackend": false } ], - "title": "Top $top_n tables by table (without TOAST) shared block hits", + "title": "Top $top_n tables by TOAST shared block hits", "type": "timeseries" }, { @@ -2338,9 +2322,9 @@ "h": 13, "w": 24, "x": 0, - "y": 154 + "y": 166 }, - "id": 32, + "id": 39, "options": { "legend": { "calcs": [ @@ -2363,7 +2347,7 @@ { "disableTextWrap": false, "editorMode": "code", - "expr": "topk($top_n, irate(pgwatch_pg_statio_all_tables_heap_blks_read{schemaname=~\"$schema_name\", datname=\"$db_name\", cluster=\"$cluster_name\", node_name=\"$node_name\"}[$__rate_interval]))", + "expr": "topk($top_n, irate(pgwatch_pg_statio_all_tables_toast_blks_read{schemaname=~\"$schema_name\", datname=\"$db_name\", cluster=\"$cluster_name\", node_name=\"$node_name\"}[$__rate_interval]))", "fullMetaSearch": false, "includeNullMetadata": true, "interval": "20", @@ -2373,7 +2357,7 @@ "useBackend": false } ], - "title": "Top $top_n tables by table (without TOAST) shared block reads", + "title": "Top $top_n tables by TOAST shared block reads", "type": "timeseries" }, { @@ -2441,9 +2425,9 @@ "h": 13, "w": 24, "x": 0, - "y": 167 + "y": 179 }, - "id": 38, + "id": 33, "options": { "legend": { "calcs": [ @@ -2466,7 +2450,7 @@ { "disableTextWrap": false, "editorMode": "code", - "expr": "topk($top_n, irate(pgwatch_pg_statio_all_tables_toast_blks_hit{schemaname=~\"$schema_name\", datname=\"$db_name\", cluster=\"$cluster_name\", node_name=\"$node_name\"}[$__rate_interval]))* on() group_left() last_over_time(pgwatch_settings_numeric_value{datname=\"$db_name\", cluster=\"$cluster_name\", node_name=\"$node_name\", setting_name=\"block_size\"}[1d])", + "expr": "topk($top_n, irate(pgwatch_pg_statio_all_tables_idx_blks_hit{schemaname=~\"$schema_name\", datname=\"$db_name\", cluster=\"$cluster_name\", node_name=\"$node_name\"}[$__rate_interval])) * on() group_left() last_over_time(pgwatch_settings_numeric_value{datname=\"$db_name\", cluster=\"$cluster_name\", node_name=\"$node_name\", setting_name=\"block_size\"}[1d])", "fullMetaSearch": false, "includeNullMetadata": true, "interval": "20", @@ -2476,7 +2460,7 @@ "useBackend": false } ], - "title": "Top $top_n tables by TOAST shared block hits", + "title": "Top $top_n tables by index shared block hits", "type": "timeseries" }, { @@ -2544,9 +2528,9 @@ "h": 13, "w": 24, "x": 0, - "y": 180 + "y": 192 }, - "id": 39, + "id": 34, "options": { "legend": { "calcs": [ @@ -2569,7 +2553,7 @@ { "disableTextWrap": false, "editorMode": "code", - "expr": "topk($top_n, irate(pgwatch_pg_statio_all_tables_toast_blks_read{schemaname=~\"$schema_name\", datname=\"$db_name\", cluster=\"$cluster_name\", node_name=\"$node_name\"}[$__rate_interval]))", + "expr": "topk($top_n, irate(pgwatch_pg_statio_all_tables_idx_blks_read{schemaname=~\"$schema_name\", datname=\"$db_name\", cluster=\"$cluster_name\", node_name=\"$node_name\"}[$__rate_interval]))* on() group_left() last_over_time(pgwatch_settings_numeric_value{datname=\"$db_name\", cluster=\"$cluster_name\", node_name=\"$node_name\", setting_name=\"block_size\"}[1d])", "fullMetaSearch": false, "includeNullMetadata": true, "interval": "20", @@ -2579,9 +2563,47 @@ "useBackend": false } ], - "title": "Top $top_n tables by TOAST shared block reads", + "title": "Top $top_n tables by index shared block reads", "type": "timeseries" }, + { + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 24, + "x": 0, + "y": 219 + }, + "id": 20, + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "

\n Brought to you by PostgresAI\n \"Postgres.AI\"\n

\n", + "mode": "html" + }, + "pluginVersion": "12.0.2", + "title": "", + "type": "text" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 205 + }, + "id": 22, + "panels": [], + "title": "Estimated bloat stats", + "type": "row" + }, { "datasource": { "type": "prometheus", @@ -2598,11 +2620,12 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", + "axisSoftMax": 100, "axisSoftMin": 0, "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", - "fillOpacity": 100, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -2617,10 +2640,10 @@ "type": "linear" }, "showPoints": "auto", - "spanNulls": false, + "spanNulls": true, "stacking": { "group": "A", - "mode": "normal" + "mode": "none" }, "thresholdsStyle": { "mode": "off" @@ -2639,17 +2662,17 @@ } ] }, - "unit": "binBps" + "unit": "percent" }, "overrides": [] }, "gridPos": { "h": 13, - "w": 24, + "w": 12, "x": 0, - "y": 193 + "y": 206 }, - "id": 33, + "id": 8, "options": { "legend": { "calcs": [ @@ -2659,7 +2682,9 @@ ], "displayMode": "table", "placement": "bottom", - "showLegend": true + "showLegend": true, + "sortBy": "Mean", + "sortDesc": true }, "tooltip": { "hideZeros": false, @@ -2672,17 +2697,16 @@ { "disableTextWrap": false, "editorMode": "code", - "expr": "topk($top_n, irate(pgwatch_pg_statio_all_tables_idx_blks_hit{schemaname=~\"$schema_name\", datname=\"$db_name\", cluster=\"$cluster_name\", node_name=\"$node_name\"}[$__rate_interval])) * on() group_left() last_over_time(pgwatch_settings_numeric_value{datname=\"$db_name\", cluster=\"$cluster_name\", node_name=\"$node_name\", setting_name=\"block_size\"}[1d])", + "expr": "topk($top_n, pgwatch_pg_table_bloat_bloat_pct{schemaname=~\"$schema_name\", datname=\"$db_name\", cluster=\"$cluster_name\", node_name=\"$node_name\"})", "fullMetaSearch": false, "includeNullMetadata": true, - "interval": "20", - "legendFormat": "{{schemaname}}.{{relname}}", + "legendFormat": "{{schemaname}}.{{tblname}}", "range": true, "refId": "A", "useBackend": false } ], - "title": "Top $top_n tables by index shared block hits", + "title": "Top $top_n tables by estimated heap bloat %", "type": "timeseries" }, { @@ -2720,7 +2744,7 @@ "type": "linear" }, "showPoints": "auto", - "spanNulls": false, + "spanNulls": true, "stacking": { "group": "A", "mode": "normal" @@ -2742,17 +2766,17 @@ } ] }, - "unit": "binBps" + "unit": "bytes" }, "overrides": [] }, "gridPos": { "h": 13, - "w": 24, - "x": 0, + "w": 12, + "x": 12, "y": 206 }, - "id": 34, + "id": 2, "options": { "legend": { "calcs": [ @@ -2762,7 +2786,9 @@ ], "displayMode": "table", "placement": "bottom", - "showLegend": true + "showLegend": true, + "sortBy": "Last", + "sortDesc": true }, "tooltip": { "hideZeros": false, @@ -2775,43 +2801,17 @@ { "disableTextWrap": false, "editorMode": "code", - "expr": "topk($top_n, irate(pgwatch_pg_statio_all_tables_idx_blks_read{schemaname=~\"$schema_name\", datname=\"$db_name\", cluster=\"$cluster_name\", node_name=\"$node_name\"}[$__rate_interval]))* on() group_left() last_over_time(pgwatch_settings_numeric_value{datname=\"$db_name\", cluster=\"$cluster_name\", node_name=\"$node_name\", setting_name=\"block_size\"}[1d])", + "expr": "topk($top_n, pgwatch_pg_table_bloat_bloat_size{schemaname=~\"$schema_name\", datname=\"$db_name\", cluster=\"$cluster_name\", node_name=\"$node_name\"})", "fullMetaSearch": false, "includeNullMetadata": true, - "interval": "20", - "legendFormat": "{{schemaname}}.{{relname}}", + "legendFormat": "{{schemaname}}.{{tblname}}", "range": true, "refId": "A", "useBackend": false } ], - "title": "Top $top_n tables by index shared block reads", + "title": "Top $top_n tables by estimated heap bloat size", "type": "timeseries" - }, - { - "fieldConfig": { - "defaults": {}, - "overrides": [] - }, - "gridPos": { - "h": 3, - "w": 24, - "x": 0, - "y": 219 - }, - "id": 20, - "options": { - "code": { - "language": "plaintext", - "showLineNumbers": false, - "showMiniMap": false - }, - "content": "

\n Brought to you by PostgresAI\n \"Postgres.AI\"\n

\n", - "mode": "html" - }, - "pluginVersion": "12.0.2", - "title": "", - "type": "text" } ], "preload": false, @@ -2934,5 +2934,5 @@ "timezone": "utc", "title": "08. Aggregated table analysis", "uid": "92657f2a-985b-4d1a-99ed-2fac6e0c53e2", - "version": 97 + "version": 2 } \ No newline at end of file diff --git a/config/grafana/dashboards/Dashboard_9_Single_table_analysis.json b/config/grafana/dashboards/Dashboard_9_Single_table_analysis.json index 5600409..a7364ec 100644 --- a/config/grafana/dashboards/Dashboard_9_Single_table_analysis.json +++ b/config/grafana/dashboards/Dashboard_9_Single_table_analysis.json @@ -1399,7 +1399,7 @@ { "disableTextWrap": false, "editorMode": "code", - "expr": "irate(pgwatch_pg_stat_user_tables_seq_tup_read{schemaname=~\"$schema_name\", datname=\"$db_name\", cluster=\"$cluster_name\", node_name=\"$node_name\", relname=\"$table_name\"}[$__rate_interval])", + "expr": "irate(pgwatch_pg_stat_all_tables_seq_tup_read{schemaname=~\"$schema_name\", datname=\"$db_name\", cluster=\"$cluster_name\", node_name=\"$node_name\", relname=\"$table_name\"}[$__rate_interval])", "fullMetaSearch": false, "includeNullMetadata": true, "interval": "20", @@ -1415,7 +1415,7 @@ }, "disableTextWrap": false, "editorMode": "code", - "expr": "irate(pgwatch_pg_stat_user_tables_idx_tup_fetch{schemaname=~\"$schema_name\", datname=\"$db_name\", cluster=\"$cluster_name\", node_name=\"$node_name\", relname=\"$table_name\"}[$__rate_interval])", + "expr": "irate(pgwatch_pg_stat_all_tables_idx_tup_fetch{schemaname=~\"$schema_name\", datname=\"$db_name\", cluster=\"$cluster_name\", node_name=\"$node_name\", relname=\"$table_name\"}[$__rate_interval])", "fullMetaSearch": false, "hide": false, "includeNullMetadata": true, diff --git a/config/pgwatch-prometheus/metrics.yml b/config/pgwatch-prometheus/metrics.yml index e65af7f..a87b649 100644 --- a/config/pgwatch-prometheus/metrics.yml +++ b/config/pgwatch-prometheus/metrics.yml @@ -57,6 +57,7 @@ metrics: pg_stat_bgwriter node_status: primary is_instance_level: true + statement_timeout_seconds: 15 checkpointer: description: > Retrieves key statistics from the PostgreSQL `pg_stat_checkpointer` view, providing insights into the checkpointer's performance. @@ -79,6 +80,7 @@ metrics: (extract(epoch from now() - stats_reset))::int as last_reset_s from pg_stat_checkpointer + statement_timeout_seconds: 15 db_size: description: > Retrieves the size of the current database and the size of the `pg_catalog` schema, providing insights into the storage usage of the database. @@ -273,6 +275,7 @@ metrics: datname = current_database() gauges: - '*' + statement_timeout_seconds: 15 locks_mode: description: > Retrieves lock mode statistics from the PostgreSQL `pg_locks` view, providing insights into the different lock modes currently held in the database. @@ -299,6 +302,7 @@ metrics: unnest('{AccessShareLock, ExclusiveLock, RowShareLock, RowExclusiveLock, ShareLock, ShareRowExclusiveLock, AccessExclusiveLock, ShareUpdateExclusiveLock}'::text[]) lockmodes gauges: - '*' + statement_timeout_seconds: 15 replication: description: > This metric collects replication statistics from the `pg_stat_replication` view. @@ -330,6 +334,7 @@ metrics: gauges: - '*' is_instance_level: true + statement_timeout_seconds: 15 replication_slots: description: > This metric collects information about replication slots from the `pg_replication_slots` view. @@ -352,6 +357,7 @@ metrics: gauges: - '*' is_instance_level: true + statement_timeout_seconds: 15 settings: description: > This metric collects various PostgreSQL server settings and configurations. @@ -374,6 +380,7 @@ metrics: gauges: - '*' is_instance_level: true + statement_timeout_seconds: 15 table_stats: description: > @@ -433,7 +440,7 @@ metrics: analyze_count, autoanalyze_count, case when c.relkind <> 'p' then age(c.relfrozenxid) else 0 end as tx_freeze_age - from pg_stat_user_tables ut + from pg_stat_all_tables ut join pg_class c on c.oid = ut.relid where @@ -570,7 +577,7 @@ metrics: autoanalyze_count, case when c.relkind <> 'p' then age(c.relfrozenxid) else 0 end as tx_freeze_age, extract(epoch from now() - last_seq_scan)::int8 as last_seq_scan_s - from pg_stat_user_tables ut + from pg_stat_all_tables ut join pg_class c on c.oid = ut.relid left join pg_class t on t.oid = c.reltoastrelid left join pg_index ti on ti.indrelid = t.oid @@ -697,6 +704,7 @@ metrics: gauges: - '*' is_instance_level: true + statement_timeout_seconds: 15 wait_events: description: > This metric retrieves information about wait events for active sessions in the PostgreSQL database. @@ -726,6 +734,7 @@ metrics: gauges: - total is_instance_level: true + statement_timeout_seconds: 15 pg_stat_activity: sqls: 11: |- @@ -761,6 +770,7 @@ metrics: gauges: - count - max_tx_duration + statement_timeout_seconds: 15 pg_archiver: sqls: 11: | @@ -788,6 +798,7 @@ metrics: from current_wal cw, archive_wal aw gauges: - pending_wal_count + statement_timeout_seconds: 15 pg_blocked: sqls: 11: |- @@ -805,6 +816,7 @@ metrics: limit 5000 gauges: - queries + statement_timeout_seconds: 15 pg_database_wraparound: sqls: 11: | @@ -820,6 +832,7 @@ metrics: gauges: - age_datfrozenxid - age_datminmxid + statement_timeout_seconds: 15 pg_long_running_transactions: sqls: 11: | @@ -831,6 +844,7 @@ metrics: where state is distinct from 'idle' and (now() - xact_start) > '1 minutes'::interval and query not like 'autovacuum:%' gauges: - '*' + statement_timeout_seconds: 15 pg_stat_replication: sqls: 11: | @@ -851,6 +865,7 @@ metrics: - flush_lsn - replay_lsn - reply_time_lag + statement_timeout_seconds: 15 pg_stat_statements: sqls: 11: | @@ -962,6 +977,7 @@ metrics: - wal_bytes - temp_bytes_read - temp_bytes_written + statement_timeout_seconds: 15 pg_class: description: > Direct access to pg_class system catalog data for all relations (tables, indexes, etc). @@ -989,7 +1005,8 @@ metrics: - relpages - relation_size_bytes - total_relation_size_bytes - pg_stat_user_indexes: + statement_timeout_seconds: 15 + pg_stat_all_indexes: sqls: 11: | select schemaname as tag_schemaname, @@ -998,14 +1015,15 @@ metrics: idx_scan, idx_tup_read, idx_tup_fetch - from pg_stat_user_indexes + from pg_stat_all_indexes order by idx_scan desc limit 5000 gauges: - idx_scan - idx_tup_read - idx_tup_fetch - pg_stat_user_tables: + statement_timeout_seconds: 15 + pg_stat_all_tables: sqls: 11: | select @@ -1027,7 +1045,7 @@ metrics: (vacuum_count + autovacuum_count) as vacuum_count, (analyze_count + autoanalyze_count) as analyze_count from - pg_stat_user_tables + pg_stat_all_tables order by n_live_tup + n_dead_tup desc limit 5000 gauges: @@ -1045,6 +1063,7 @@ metrics: - last_analyze - vacuum_count - analyze_count + statement_timeout_seconds: 15 pg_stat_wal_receiver: sqls: 11: | @@ -1073,6 +1092,7 @@ metrics: - latest_end_lsn - latest_end_time - upstream_node + statement_timeout_seconds: 15 pg_stuck_idle_in_transaction: sqls: 11: | @@ -1081,6 +1101,7 @@ metrics: where state = 'idle in transaction' and (now() - query_start) > '10 minutes'::interval gauges: - queries + statement_timeout_seconds: 15 pg_total_relation_size: sqls: 11: | @@ -1094,6 +1115,7 @@ metrics: limit 5000 gauges: - bytes + statement_timeout_seconds: 15 pg_txid: sqls: 11: | @@ -1106,6 +1128,7 @@ metrics: - current - xmin - xmin_age + statement_timeout_seconds: 15 pg_xlog_position: sqls: 11: | @@ -1117,7 +1140,7 @@ metrics: end as bytes gauges: - bytes - + statement_timeout_seconds: 15 pg_btree_bloat: description: > This metric analyzes index bloat by calculating the estimated vs actual index pages and sizes. @@ -1232,7 +1255,7 @@ metrics: - bloat_pct - is_na - reltuples - + statement_timeout_seconds: 15 pg_table_bloat: description: > This metric analyzes table bloat by calculating the estimated vs actual table pages and sizes. @@ -1396,7 +1419,7 @@ metrics: (confrelid::regclass)::text as tag_fk_table_ref, array_to_string(indclass, ', ') as tag_opclasses from - pg_stat_user_indexes + pg_stat_all_indexes join pg_index using (indexrelid) left join pg_constraint on array_to_string(indkey, ',') = array_to_string(conkey, ',') @@ -1445,7 +1468,7 @@ metrics: from num_data; gauges: - '*' - + statement_timeout_seconds: 15 redundant_indexes: description: > This metric identifies redundant indexes that can potentially be dropped to save storage space and improve write performance. @@ -1465,7 +1488,7 @@ metrics: join pg_class cr on cr.oid = i.indrelid and cr.relkind = 'r' join pg_namespace n on n.oid = ci.relnamespace join pg_constraint cn on cn.conrelid = cr.oid - left join pg_stat_user_indexes as si on si.indexrelid = i.indexrelid + left join pg_stat_all_indexes as si on si.indexrelid = i.indexrelid where contype = 'f' and i.indisunique is false @@ -1515,7 +1538,7 @@ metrics: inner join pg_opclass op2 on i2.indclass[0] = op2.oid inner join pg_am am1 on op1.opcmethod = am1.oid inner join pg_am am2 on op2.opcmethod = am2.oid - join pg_stat_user_indexes as s on s.indexrelid = i2.indexrelid + join pg_stat_all_indexes as s on s.indexrelid = i2.indexrelid join pg_class as trel on trel.oid = i2.indrelid join pg_namespace as tnsp on trel.relnamespace = tnsp.oid join pg_class as irel on irel.oid = i2.indexrelid @@ -1601,6 +1624,7 @@ metrics: select * from redundant_indexes_grouped; gauges: - '*' + statement_timeout_seconds: 15 unused_indexes: description: > This metric identifies unused indexes in the database. @@ -1620,7 +1644,7 @@ metrics: join pg_class cr on cr.oid = i.indrelid and cr.relkind = 'r' join pg_namespace n on n.oid = ci.relnamespace join pg_constraint cn on cn.conrelid = cr.oid - left join pg_stat_user_indexes as si on si.indexrelid = i.indexrelid + left join pg_stat_all_indexes as si on si.indexrelid = i.indexrelid where contype = 'f' and i.indisunique is false @@ -1632,7 +1656,7 @@ metrics: tables.idx_scan + tables.seq_scan as all_scans, ( tables.n_tup_ins + tables.n_tup_upd + tables.n_tup_del ) as writes, pg_relation_size(relid) as table_size - from pg_stat_user_tables as tables + from pg_stat_all_tables as tables join pg_class c on c.oid = relid where c.relpages > 5 ), indexes as ( @@ -1646,14 +1670,13 @@ metrics: pg_relation_size(i.indexrelid) as index_bytes, ci.relpages, (case when a.amname = 'btree' then true else false end) as idx_is_btree, - array_to_string(i.indclass, ', ') as opclasses, - si.stats_reset + array_to_string(i.indclass, ', ') as opclasses from pg_index i join pg_class ci on ci.oid = i.indexrelid and ci.relkind = 'i' join pg_class cr on cr.oid = i.indrelid and cr.relkind = 'r' join pg_namespace n on n.oid = ci.relnamespace join pg_am a ON ci.relam = a.oid - left join pg_stat_user_indexes as si on si.indexrelid = i.indexrelid + left join pg_stat_all_indexes as si on si.indexrelid = i.indexrelid where i.indisunique = false and i.indisvalid = true @@ -1676,7 +1699,6 @@ metrics: i.relpages, idx_is_btree, i.opclasses, - i.stats_reset, ( select count(1) from fk_indexes fi @@ -1703,8 +1725,7 @@ metrics: relpages, idx_is_btree, opclasses AS tag_opclasses, - supports_fk, - extract(epoch from stats_reset) as stats_reset_epoch + supports_fk from index_ratios where idx_scan = 0 @@ -1712,7 +1733,7 @@ metrics: order by index_size_bytes desc; gauges: - '*' - + statement_timeout_seconds: 15 rarely_used_indexes: description: > This metric identifies rarely used indexes in the database. @@ -1732,7 +1753,7 @@ metrics: join pg_class cr on cr.oid = i.indrelid and cr.relkind = 'r' join pg_namespace n on n.oid = ci.relnamespace join pg_constraint cn on cn.conrelid = cr.oid - left join pg_stat_user_indexes as si on si.indexrelid = i.indexrelid + left join pg_stat_all_indexes as si on si.indexrelid = i.indexrelid where contype = 'f' and i.indisunique is false @@ -1744,7 +1765,7 @@ metrics: tables.idx_scan + tables.seq_scan as all_scans, ( tables.n_tup_ins + tables.n_tup_upd + tables.n_tup_del ) as writes, pg_relation_size(relid) as table_size - from pg_stat_user_tables as tables + from pg_stat_all_tables as tables join pg_class c on c.oid = relid where c.relpages > 5 ), all_writes as ( @@ -1767,7 +1788,7 @@ metrics: join pg_class cr on cr.oid = i.indrelid and cr.relkind = 'r' join pg_namespace n on n.oid = ci.relnamespace join pg_am a ON ci.relam = a.oid - left join pg_stat_user_indexes as si on si.indexrelid = i.indexrelid + left join pg_stat_all_indexes as si on si.indexrelid = i.indexrelid where i.indisunique = false and i.indisvalid = true @@ -1856,55 +1877,28 @@ metrics: order by grp, index_size_bytes desc; gauges: - '*' - + statement_timeout_seconds: 15 stats_reset: description: > - This metric tracks when statistics were last reset for indexes and tables. + This metric tracks when statistics were last reset at the database level. It provides visibility into the freshness of statistics data, which is essential for understanding - the reliability of index and table usage metrics. A recent reset time indicates that usage statistics - may not reflect long-term patterns. + the reliability of usage metrics. A recent reset time indicates that usage statistics + may not reflect long-term patterns. Note that Postgres tracks stats resets at the database level, + not per-index or per-table. sqls: 11: | - with index_stats as ( - select - schemaname as schema_name, - indexrelname as index_name, - relname as table_name, - 'index' as stat_type, - stats_reset, - extract(epoch from stats_reset) as stats_reset_epoch, - extract(epoch from now() - stats_reset) as seconds_since_reset - from pg_stat_user_indexes - where stats_reset is not null - ), table_stats as ( - select - schemaname as schema_name, - null::text as index_name, - relname as table_name, - 'table' as stat_type, - stats_reset, - extract(epoch from stats_reset) as stats_reset_epoch, - extract(epoch from now() - stats_reset) as seconds_since_reset - from pg_stat_user_tables - where stats_reset is not null - ), combined_stats as ( - select * from index_stats - union all - select * from table_stats - ) select - schema_name as tag_schema_name, - table_name as tag_table_name, - coalesce(index_name, 'table') as tag_index_name, - stat_type as tag_stat_type, - stats_reset_epoch, - seconds_since_reset - from combined_stats - order by stats_reset desc; + datname as tag_database_name, + stats_reset, + extract(epoch from stats_reset) as stats_reset_epoch, + extract(epoch from now() - stats_reset) as seconds_since_reset + from pg_stat_database + where datname = current_database() + and stats_reset is not null; gauges: - 'stats_reset_epoch' - 'seconds_since_reset' - + statement_timeout_seconds: 15 archive_lag: description: > This metric measures the lag in WAL archive processing. @@ -1966,6 +1960,7 @@ metrics: - 'wal_files_behind' - 'current_lsn_numeric' - 'archived_wal_start_lsn_numeric' + statement_timeout_seconds: 15 pg_vacuum_progress: sqls: 11: | @@ -2058,6 +2053,7 @@ metrics: join pg_namespace N on (N.oid = C.relnamespace) gauges: - '*' + statement_timeout_seconds: 15 pg_index_pilot: sqls: 11: | @@ -2079,7 +2075,7 @@ metrics: - is_valid - estimated_tuples - best_ratio - + statement_timeout_seconds: 15 pg_index_pilot_config: sqls: 12: | @@ -2114,11 +2110,12 @@ metrics: order by id; gauges: - config_value_normalized - + statement_timeout_seconds: 15 pg_statio_all_tables: description: > Retrieves table-level I/O statistics from the PostgreSQL `pg_statio_all_tables` view, providing insights into I/O operations for all tables. It returns block-level read and hit statistics for heap, index, TOAST, and TOAST index operations broken down by schema and table. + Joined with pg_class for efficient ordering by table size. This metric helps administrators monitor table-level I/O performance and identify which tables are generating the most I/O activity. Compatible with all PostgreSQL versions. sqls: @@ -2126,23 +2123,53 @@ metrics: select /* pgwatch_generated */ (extract(epoch from now()) * 1e9)::int8 as epoch_ns, current_database() as tag_datname, - schemaname as tag_schemaname, - relname as tag_relname, - heap_blks_read, - heap_blks_hit, - idx_blks_read, - idx_blks_hit, - toast_blks_read, - toast_blks_hit, - tidx_blks_read, - tidx_blks_hit + s.schemaname as tag_schemaname, + s.relname as tag_relname, + s.heap_blks_read, + s.heap_blks_hit, + s.idx_blks_read, + s.idx_blks_hit, + s.toast_blks_read, + s.toast_blks_hit, + s.tidx_blks_read, + s.tidx_blks_hit from - pg_statio_all_tables - order by pg_total_relation_size((schemaname || '.' || relname)::regclass) desc + pg_statio_all_tables as s + join pg_class as c on + s.relname = c.relname + and s.schemaname = c.relnamespace::regnamespace::name + order by c.relpages desc limit 5000; gauges: - '*' - + statement_timeout_seconds: 15 + pg_statio_all_indexes: + description: > + Retrieves index-level I/O statistics from the PostgreSQL `pg_statio_all_indexes` view, providing insights into I/O operations for all indexes. + It returns block-level read and hit statistics for index operations broken down by schema, table, and index name. + Joined with pg_class for efficient ordering by index size. + This metric helps administrators monitor index-level I/O performance and identify which indexes are generating the most I/O activity. + Compatible with all PostgreSQL versions. + sqls: + 11: |- + select /* pgwatch_generated */ + (extract(epoch from now()) * 1e9)::int8 as epoch_ns, + current_database() as tag_datname, + s.schemaname as tag_schemaname, + s.relname as tag_relname, + s.indexrelname as tag_indexrelname, + s.idx_blks_read, + s.idx_blks_hit + from + pg_statio_all_indexes as s + join pg_class as c on + s.indexrelname = c.relname + and s.schemaname = c.relnamespace::regnamespace::name + order by c.relpages desc + limit 5000; + gauges: + - '*' + statement_timeout_seconds: 15 presets: full: @@ -2162,12 +2189,13 @@ presets: pg_stat_activity: 15 pg_stat_wal_receiver: 30 pg_archiver: 30 - pg_stat_user_tables: 30 + pg_stat_all_tables: 30 pg_class: 30 - pg_stat_user_indexes: 30 + pg_stat_all_indexes: 30 pg_stat_statements: 30 pg_stat_replication: 30 pg_statio_all_tables: 30 + pg_statio_all_indexes: 30 pg_total_relation_size: 30 pg_blocked: 30 pg_long_running_transactions: 30 From 6c97c8e31fe4a6eb0f5061f2e5fe0194b5aeed71 Mon Sep 17 00:00:00 2001 From: "dementii.priadko" <45518657+DEMNERD@users.noreply.github.com> Date: Wed, 29 Oct 2025 22:47:02 +0200 Subject: [PATCH 9/9] Fixed index table --- .../dashboards/Dashboard_10_Index health.json | 412 ++++++++++-------- 1 file changed, 219 insertions(+), 193 deletions(-) diff --git a/config/grafana/dashboards/Dashboard_10_Index health.json b/config/grafana/dashboards/Dashboard_10_Index health.json index 78a45e2..b5bf58f 100644 --- a/config/grafana/dashboards/Dashboard_10_Index health.json +++ b/config/grafana/dashboards/Dashboard_10_Index health.json @@ -18,11 +18,11 @@ "editable": true, "fiscalYearStartMonth": 0, "graphTooltip": 0, - "id": 6, + "id": 1, "links": [], "panels": [ { - "collapsed": false, + "collapsed": true, "gridPos": { "h": 1, "w": 24, @@ -30,202 +30,228 @@ "y": 0 }, "id": 19, - "panels": [], - "title": "Detailed index view", - "type": "row" - }, - { - "datasource": { - "type": "yesoreyeram-infinity-datasource", - "uid": "aerffb0z8rjlsc" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "custom": { - "align": "auto", - "cellOptions": { - "type": "auto" - }, - "inspect": false - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 15, - "w": 24, - "x": 0, - "y": 1 - }, - "id": 24, - "options": { - "cellHeight": "sm", - "footer": { - "countRows": false, - "fields": "", - "reducer": [ - "sum" - ], - "show": false - }, - "showHeader": true - }, - "pluginVersion": "12.0.2", - "targets": [ + "panels": [ { - "columns": [ - { - "selector": "bloat_pct", - "text": "", - "type": "number" - }, - { - "selector": "bloat_size", - "text": "", - "type": "number" - }, - { - "selector": "fillfactor", - "text": "", - "type": "number" - }, - { - "selector": "idxname", - "text": "", - "type": "string" - }, - { - "selector": "schemaname", - "text": "", - "type": "string" - }, - { - "selector": "tblname", - "text": "", - "type": "string" - }, - { - "selector": "real_size_mib", - "text": "", - "type": "number" - }, - { - "selector": "is_na", - "text": "", - "type": "number" - } - ], "datasource": { "type": "yesoreyeram-infinity-datasource", "uid": "aerffb0z8rjlsc" }, - "filterExpression": "is_na<1 && bloat_pct>0", - "filters": [], - "format": "table", - "global_query_id": "", - "parser": "backend", - "refId": "A", - "root_selector": "", - "source": "url", - "type": "csv", - "url": "http://flask-pgss-api:5000/btree_bloat/csv", - "url_options": { - "data": "", - "method": "GET", - "params": [ - { - "key": "cluster_name", - "value": "$cluster_name" + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "cellOptions": { + "type": "auto" + }, + "inspect": false }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [ { - "key": "node_name", - "value": "$node_name" + "matcher": { + "id": "byName", + "options": "Bloat %" + }, + "properties": [ + { + "id": "unit", + "value": "percent" + } + ] }, { - "key": "db_name", - "value": "$db_name" + "matcher": { + "id": "byName", + "options": "Fillfactor" + }, + "properties": [ + { + "id": "unit" + } + ] } ] - } - } - ], - "title": "", - "transformations": [ - { - "id": "calculateField", + }, + "gridPos": { + "h": 15, + "w": 24, + "x": 0, + "y": 1 + }, + "id": 24, "options": { - "alias": "Index Size", - "binary": { - "left": { - "matcher": { - "id": "byName", - "options": "real_size_mib" + "cellHeight": "sm", + "footer": { + "countRows": false, + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true + }, + "pluginVersion": "12.0.2", + "targets": [ + { + "columns": [ + { + "selector": "bloat_pct", + "text": "", + "type": "number" + }, + { + "selector": "bloat_size", + "text": "", + "type": "number" + }, + { + "selector": "fillfactor", + "text": "", + "type": "number" + }, + { + "selector": "idxname", + "text": "", + "type": "string" + }, + { + "selector": "schemaname", + "text": "", + "type": "string" + }, + { + "selector": "tblname", + "text": "", + "type": "string" + }, + { + "selector": "real_size_mib", + "text": "", + "type": "number" + }, + { + "selector": "is_na", + "text": "", + "type": "number" } + ], + "datasource": { + "type": "yesoreyeram-infinity-datasource", + "uid": "aerffb0z8rjlsc" }, - "operator": "*", - "right": { - "fixed": "1048576" + "filterExpression": "is_na<1 && bloat_pct>0", + "filters": [], + "format": "table", + "global_query_id": "", + "parser": "backend", + "refId": "A", + "root_selector": "", + "source": "url", + "type": "csv", + "url": "http://flask-pgss-api:5000/btree_bloat/csv", + "url_options": { + "data": "", + "method": "GET", + "params": [ + { + "key": "cluster_name", + "value": "$cluster_name" + }, + { + "key": "node_name", + "value": "$node_name" + }, + { + "key": "db_name", + "value": "$db_name" + } + ] } - }, - "mode": "binary", - "reduce": { - "include": [ - "Index Size (mb)" - ], - "reducer": "sum" } - } - }, - { - "id": "organize", - "options": { - "excludeByName": { - "Index Size (mb)": true, - "is_na": true, - "real_size_mib": true - }, - "includeByName": {}, - "indexByName": { - "Index Size": 3, - "bloat_pct": 5, - "bloat_size": 4, - "fillfactor": 6, - "idxname": 2, - "is_na": 7, - "real_size_mib": 8, - "schemaname": 0, - "tblname": 1 + ], + "title": "", + "transformations": [ + { + "id": "calculateField", + "options": { + "alias": "Index Size", + "binary": { + "left": { + "matcher": { + "id": "byName", + "options": "real_size_mib" + } + }, + "operator": "*", + "right": { + "fixed": "1048576" + } + }, + "mode": "binary", + "reduce": { + "include": [ + "Index Size (mb)" + ], + "reducer": "sum" + } + } }, - "renameByName": { - "Schema": "", - "bloat_pct": "Bloat %", - "bloat_size": "Bloat size", - "fillfactor": "Fillfactor", - "idxname": "Index", - "real_size_mib": "", - "schemaname": "Schema", - "tblname": "Table" + { + "id": "organize", + "options": { + "excludeByName": { + "Index Size (mb)": true, + "is_na": true, + "real_size_mib": true + }, + "includeByName": {}, + "indexByName": { + "Index Size": 3, + "bloat_pct": 5, + "bloat_size": 4, + "fillfactor": 6, + "idxname": 2, + "is_na": 7, + "real_size_mib": 8, + "schemaname": 0, + "tblname": 1 + }, + "renameByName": { + "Schema": "", + "bloat_pct": "Bloat %", + "bloat_size": "Bloat size", + "fillfactor": "Fillfactor", + "idxname": "Index", + "real_size_mib": "", + "schemaname": "Schema", + "tblname": "Table" + } + } } - } + ], + "type": "table" } ], - "type": "table" + "title": "Detailed index view", + "type": "row" }, { "collapsed": false, @@ -233,7 +259,7 @@ "h": 1, "w": 24, "x": 0, - "y": 16 + "y": 1 }, "id": 18, "panels": [], @@ -256,8 +282,8 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "barAlignment": 0, "axisSoftMin": 0, + "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 0, @@ -325,7 +351,7 @@ "h": 12, "w": 24, "x": 0, - "y": 17 + "y": 2 }, "id": 2, "options": { @@ -377,8 +403,8 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "barAlignment": 0, "axisSoftMin": 0, + "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 0, @@ -446,7 +472,7 @@ "h": 12, "w": 24, "x": 0, - "y": 29 + "y": 14 }, "id": 22, "options": { @@ -498,8 +524,8 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "barAlignment": 0, "axisSoftMin": 0, + "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 0, @@ -567,7 +593,7 @@ "h": 12, "w": 24, "x": 0, - "y": 41 + "y": 26 }, "id": 23, "options": { @@ -612,7 +638,7 @@ "h": 3, "w": 24, "x": 0, - "y": 53 + "y": 38 }, "id": 21, "options": { @@ -662,8 +688,8 @@ }, { "current": { - "text": "local", - "value": "local" + "text": "default", + "value": "default" }, "definition": "label_values(pgwatch_settings_configured,cluster)", "label": "Cluster name", @@ -680,8 +706,8 @@ }, { "current": { - "text": "node-01", - "value": "node-01" + "text": "postgres_ai", + "value": "postgres_ai" }, "definition": "label_values(pgwatch_settings_configured{cluster=\"$cluster_name\"},node_name)", "label": "Node name", @@ -698,8 +724,8 @@ }, { "current": { - "text": "target_database", - "value": "target_database" + "text": "workloaddb", + "value": "workloaddb" }, "definition": "label_values(pgwatch_pg_database_wraparound_age_datfrozenxid{cluster=\"$cluster_name\", node_name=\"$node_name\", datname!=\"template1\"},datname)", "label": "DB name", @@ -724,5 +750,5 @@ "timezone": "utc", "title": "10. Aggregated index analysis", "uid": "db3b37d1-1540-4f7e-95c9-4082f2ca349e", - "version": 11 + "version": 8 } \ No newline at end of file