Every now and then I’ll run the queries below to help me figure out ways to optimize the queries my application is using, and to figure out if certain tables and views (relations) need to be optimized.
Updated January 18, 2020, to add a query to alter the autovacuum and autoanalyze factors on a per table basis because I have a “cache” table that is updated quite frequently and generates many dead tuples every minute. The table would balloon from 250 MB to 15 GB in a few hours. I hope that reducing these factors to just a threshold of 100 dead tuples will prevent the inflation. I also need to monitor this that it doesn’t make my RDS database work too hard.
/* Show the last time at which every table and view was vacuumed and analyzed */
SELECT relname, last_vacuum, last_autovacuum, last_analyze, last_autoanalyze
FROM pg_stat_all_tables d_d
WHERE schemaname = 'public';
/* See which queries are most popular */
SELECT * FROM pg_stat_statements where calls >=1000; /* replace 1000 with any integer, depending on how active your database is queried */
select pg_stat_statements_reset();
/* Kill a specific query */
select pg_terminate_backend(INTEGER); /* replace INTEGER with the process ID of a specific query */
/* Get the size of the database */
select pg_size_pretty(pg_database_size('DATABASE_NAME')); /* replace DATABASE_NAME with the name of your database */
/* Kill queries that are taking too long; replace DATABASE_NAME with the name of your database */
SELECT query, pg_terminate_backend(pid)
FROM pg_stat_activity
WHERE datname = 'DATABASE_NAME'
AND pid <> pg_backend_pid()
AND (state = 'active'
OR state = 'idle')
AND state_change < current_timestamp - INTERVAL '15' MINUTE; /* change 15 to any integer that you think means a query is taking too long */
/* See which queries are running */
select * from pg_stat_activity;
SELECT * FROM pg_stat_activity WHERE wait_event IS NOT NULL AND backend_type = 'client backend';
/* Find very large indexes and indexes that aren't used much */
WITH table_scans as (
SELECT relid,
tables.idx_scan + tables.seq_scan as all_scans,
( tables.n_tup_ins + tables.n_tup_upd + tables.n_tup_del ) as writes,
pg_relation_size(relid) as table_size
FROM pg_stat_user_tables as tables
),
all_writes as (
SELECT sum(writes) as total_writes
FROM table_scans
),
indexes as (
SELECT idx_stat.relid, idx_stat.indexrelid,
idx_stat.schemaname, idx_stat.relname as tablename,
idx_stat.indexrelname as indexname,
idx_stat.idx_scan,
pg_relation_size(idx_stat.indexrelid) as index_bytes,
indexdef ~* 'USING btree' AS idx_is_btree
FROM pg_stat_user_indexes as idx_stat
JOIN pg_index
USING (indexrelid)
JOIN pg_indexes as indexes
ON idx_stat.schemaname = indexes.schemaname
AND idx_stat.relname = indexes.tablename
AND idx_stat.indexrelname = indexes.indexname
WHERE pg_index.indisunique = FALSE
),
index_ratios AS (
SELECT schemaname, tablename, indexname,
idx_scan, all_scans,
round(( CASE WHEN all_scans = 0 THEN 0.0::NUMERIC
ELSE idx_scan::NUMERIC/all_scans * 100 END),2) as index_scan_pct,
writes,
round((CASE WHEN writes = 0 THEN idx_scan::NUMERIC ELSE idx_scan::NUMERIC/writes END),2)
as scans_per_write,
pg_size_pretty(index_bytes) as index_size,
pg_size_pretty(table_size) as table_size,
idx_is_btree, index_bytes
FROM indexes
JOIN table_scans
USING (relid)
),
index_groups AS (
SELECT 'Never Used Indexes' as reason, *, 1 as grp
FROM index_ratios
WHERE
idx_scan = 0
and idx_is_btree
UNION ALL
SELECT 'Low Scans, High Writes' as reason, *, 2 as grp
FROM index_ratios
WHERE
scans_per_write <= 1
and index_scan_pct < 10
and idx_scan > 0
and writes > 100
and idx_is_btree
UNION ALL
SELECT 'Seldom Used Large Indexes' as reason, *, 3 as grp
FROM index_ratios
WHERE
index_scan_pct < 5
and scans_per_write > 1
and idx_scan > 0
and idx_is_btree
and index_bytes > 100000000
UNION ALL
SELECT 'High-Write Large Non-Btree' as reason, index_ratios.*, 4 as grp
FROM index_ratios, all_writes
WHERE
( writes::NUMERIC / ( total_writes + 1 ) ) > 0.02
AND NOT idx_is_btree
AND index_bytes > 100000000
ORDER BY grp, index_bytes DESC )
SELECT reason, schemaname, tablename, indexname,
index_scan_pct, scans_per_write, index_size, table_size
FROM index_groups;
/* Modify the autovacuum and autoanalyze factors for a single table
https://klotzandrew.com/blog/posgres-per-table-autovacuum-management */
ALTER TABLE cache SET (autovacuum_analyze_scale_factor = 0, autovacuum_analyze_threshold = 100);
WITH raw_data AS (
SELECT
pg_namespace.nspname,
pg_class.relname,
pg_class.oid AS relid,
pg_class.reltuples,
pg_stat_all_tables.n_dead_tup,
pg_stat_all_tables.n_mod_since_analyze,
(SELECT split_part(x, '=', 2) FROM unnest(pg_class.reloptions) q (x) WHERE x ~ '^autovacuum_analyze_scale_factor=' ) as c_analyze_factor,
(SELECT split_part(x, '=', 2) FROM unnest(pg_class.reloptions) q (x) WHERE x ~ '^autovacuum_analyze_threshold=' ) as c_analyze_threshold,
(SELECT split_part(x, '=', 2) FROM unnest(pg_class.reloptions) q (x) WHERE x ~ '^autovacuum_vacuum_scale_factor=' ) as c_vacuum_factor,
(SELECT split_part(x, '=', 2) FROM unnest(pg_class.reloptions) q (x) WHERE x ~ '^autovacuum_vacuum_threshold=' ) as c_vacuum_threshold,
to_char(pg_stat_all_tables.last_vacuum, 'YYYY-MM-DD HH24:MI:SS') as last_vacuum,
to_char(pg_stat_all_tables.last_autovacuum, 'YYYY-MM-DD HH24:MI:SS') as last_autovacuum
FROM
pg_class
JOIN pg_namespace ON pg_class.relnamespace = pg_namespace.oid
LEFT OUTER JOIN pg_stat_all_tables ON pg_class.oid = pg_stat_all_tables.relid
WHERE
n_dead_tup IS NOT NULL
AND nspname NOT IN ('information_schema', 'pg_catalog')
AND nspname NOT LIKE 'pg_toast%'
AND pg_class.relkind = 'r'
), data AS (
SELECT
*,
COALESCE(raw_data.c_analyze_factor, current_setting('autovacuum_analyze_scale_factor'))::float8 AS analyze_factor,
COALESCE(raw_data.c_analyze_threshold, current_setting('autovacuum_analyze_threshold'))::float8 AS analyze_threshold,
COALESCE(raw_data.c_vacuum_factor, current_setting('autovacuum_vacuum_scale_factor'))::float8 AS vacuum_factor,
COALESCE(raw_data.c_vacuum_threshold, current_setting('autovacuum_vacuum_threshold'))::float8 AS vacuum_threshold
FROM raw_data
)
SELECT
relid,
nspname,
relname,
reltuples,
n_dead_tup,
ROUND(reltuples * vacuum_factor + vacuum_threshold) AS v_threshold,
n_mod_since_analyze,
ROUND(reltuples * analyze_factor + analyze_threshold) AS a_threshold,
c_analyze_factor as caf,
c_analyze_threshold as cat,
c_vacuum_factor as cvf,
c_vacuum_threshold as cvt,
analyze_factor as af,
analyze_threshold as at,
vacuum_factor as vf,
vacuum_threshold as vt,
last_vacuum,
last_autovacuum
FROM
data
ORDER BY n_dead_tup DESC;