From dca452fbe1abf085eca47db8c62ec5b25172fbb8 Mon Sep 17 00:00:00 2001 From: Anthony Landreth Date: Thu, 19 Sep 2024 16:54:14 -0400 Subject: [PATCH 1/3] Adds pg17 files to postgres_exporter --- .../common/pg17/queries_general.yml | 44 ++ .../pg17/queries_pg_stat_statements.yml | 169 +++++++ .../queries_pg_stat_statements_reset_info.yml | 19 + postgres_exporter/common/pg17/setup.sql | 463 ++++++++++++++++++ .../pg17/sysconfig.postgres_exporter_pg17 | 23 + .../sysconfig.postgres_exporter_pg17_per_db | 21 + 6 files changed, 739 insertions(+) create mode 100644 postgres_exporter/common/pg17/queries_general.yml create mode 100644 postgres_exporter/common/pg17/queries_pg_stat_statements.yml create mode 100644 postgres_exporter/common/pg17/queries_pg_stat_statements_reset_info.yml create mode 100644 postgres_exporter/common/pg17/setup.sql create mode 100644 postgres_exporter/linux/pg17/sysconfig.postgres_exporter_pg17 create mode 100644 postgres_exporter/linux/pg17/sysconfig.postgres_exporter_pg17_per_db diff --git a/postgres_exporter/common/pg17/queries_general.yml b/postgres_exporter/common/pg17/queries_general.yml new file mode 100644 index 0000000..0565eb1 --- /dev/null +++ b/postgres_exporter/common/pg17/queries_general.yml @@ -0,0 +1,44 @@ +### +# +# Begin File: PG17 queries_general.yml +# +# Copyright © 2017-2024 Crunchy Data Solutions, Inc. All Rights Reserved. +# +### + +ccp_stat_bgwriter: + query: "SELECT buffers_clean, maxwritten_clean, buffers_alloc FROM pg_catalog.pg_stat_bgwriter" + metrics: + - buffers_clean: + usage: "GAUGE" + description: "Number of buffers written by the background writer" + - maxwritten_clean: + usage: "GAUGE" + description: "Number of times the background writer stopped a cleaning scan because it had written too many buffers" + - buffers_alloc: + usage: "GAUGE" + description: "Number of buffers allocated" + + +ccp_data_checksum_failure: + query: "SELECT datname AS dbname + , checksum_failures AS count + , coalesce(extract(epoch from (clock_timestamp() - checksum_last_failure)), 0) AS time_since_last_failure_seconds + FROM pg_catalog.pg_stat_database;" + metrics: + - dbname: + usage: "LABEL" + description: "Database name" + - count: + usage: "GAUGE" + description: "Total number of checksum failures on this database" + - time_since_last_failure_seconds: + usage: "GAUGE" + description: "Time interval in seconds since the last checksum failure was encountered" + + +### +# +# End File: PG17 queries_general.yml +# +### diff --git a/postgres_exporter/common/pg17/queries_pg_stat_statements.yml b/postgres_exporter/common/pg17/queries_pg_stat_statements.yml new file mode 100644 index 0000000..9f1a827 --- /dev/null +++ b/postgres_exporter/common/pg17/queries_pg_stat_statements.yml @@ -0,0 +1,169 @@ +### +# +# Begin File: PG17 queries_pg_stat_statements.yml +# +# Copyright © 2017-2024 Crunchy Data Solutions, Inc. All Rights Reserved. +# +### + +ccp_pg_stat_statements_total: + query: "SELECT pg_get_userbyid(s.userid) as role, + d.datname AS dbname, + sum(s.calls) AS calls_count, + sum(s.total_exec_time) AS exec_time_ms, + avg(s.mean_exec_time) AS mean_exec_time_ms, + sum(s.rows) AS row_count + FROM public.pg_stat_statements s + JOIN pg_catalog.pg_database d + ON d.oid = s.dbid + GROUP BY 1,2" + metrics: + - role: + usage: "LABEL" + description: "Role that executed the statement" + - dbname: + usage: "LABEL" + description: "Database in which the statement was executed" + - calls_count: + usage: "GAUGE" + description: "Total number of queries run per user/database" + - exec_time_ms: + usage: "GAUGE" + description: "Total runtime of all queries per user/database" + - mean_exec_time_ms: + usage: "GAUGE" + description: "Mean runtime of all queries per user/database" + - row_count: + usage: "GAUGE" + description: "Total rows returned from all queries per user/database" + + +ccp_pg_stat_statements_top_mean: + query: "SELECT pg_get_userbyid(s.userid) as role, + d.datname AS dbname, + s.queryid, + btrim(replace(left(s.query, 40), '\n', '')) AS query, + max(s.mean_exec_time) exec_time_ms + FROM public.pg_stat_statements s + JOIN pg_catalog.pg_database d + ON d.oid = s.dbid + GROUP BY 1,2,3,4 + ORDER BY 5 DESC + LIMIT #PG_STAT_STATEMENTS_LIMIT#" + metrics: + - role: + usage: "LABEL" + description: "Role that executed the statement" + - dbname: + usage: "LABEL" + description: "Database in which the statement was executed" + - queryid: + usage: "LABEL" + description: "Internal hash code, computed from the statement's parse tree" + - query: + usage: "LABEL" + description: "First 40 characters of query text" + - exec_time_ms: + usage: "GAUGE" + description: "Average query runtime in milliseconds" + + +ccp_pg_stat_statements_top_total: + query: "SELECT pg_get_userbyid(s.userid) as role, + d.datname AS dbname, + s.queryid, + btrim(replace(left(s.query, 40), '\n', '')) AS query, + s.total_exec_time exec_time_ms + FROM public.pg_stat_statements s + JOIN pg_catalog.pg_database d + ON d.oid = s.dbid + ORDER BY 5 DESC + LIMIT #PG_STAT_STATEMENTS_LIMIT#" + metrics: + - role: + usage: "LABEL" + description: "Role that executed the statement" + - dbname: + usage: "LABEL" + description: "Database in which the statement was executed" + - queryid: + usage: "LABEL" + description: "Internal hash code, computed from the statement's parse tree" + - query: + usage: "LABEL" + description: "First 40 characters of query text" + - exec_time_ms: + usage: "GAUGE" + description: "Total time spent in the statement in milliseconds" + + +ccp_pg_stat_statements_top_max: + query: "SELECT pg_get_userbyid(s.userid) as role, + d.datname AS dbname, + s.queryid, + btrim(replace(left(s.query, 40), '\n', '')) AS query, + s.max_exec_time AS exec_time_ms + FROM public.pg_stat_statements s + JOIN pg_catalog.pg_database d + ON d.oid = s.dbid + ORDER BY 5 DESC + LIMIT #PG_STAT_STATEMENTS_LIMIT#" + metrics: + - role: + usage: "LABEL" + description: "Role that executed the statement" + - dbname: + usage: "LABEL" + description: "Database in which the statement was executed" + - queryid: + usage: "LABEL" + description: "Internal hash code, computed from the statement's parse tree" + - query: + usage: "LABEL" + description: "First 40 characters of query text" + - exec_time_ms: + usage: "GAUGE" + description: "Maximum time spent in the statement in milliseconds" + + +ccp_pg_stat_statements_top_wal: + query: "SELECT pg_get_userbyid(s.userid) as role, + d.datname AS dbname, + s.queryid, + btrim(replace(left(s.query, 40), '\n', '')) AS query, + s.wal_records AS records, + s.wal_fpi AS fpi, + s.wal_bytes AS bytes + FROM public.pg_stat_statements s + JOIN pg_catalog.pg_database d + ON d.oid = s.dbid + ORDER BY s.wal_bytes DESC + LIMIT #PG_STAT_STATEMENTS_LIMIT#" + metrics: + - role: + usage: "LABEL" + description: "Role that executed the statement" + - dbname: + usage: "LABEL" + description: "Database in which the statement was executed" + - queryid: + usage: "LABEL" + description: "Internal hash code, computed from the statement's parse tree" + - query: + usage: "LABEL" + description: "First 40 characters of query text" + - records: + usage: "GAUGE" + description: "Total number of WAL records generated by the statement" + - fpi: + usage: "GAUGE" + description: "Total number of WAL full page images generated by the statement" + - bytes: + usage: "GAUGE" + description: "Total amount of WAL generated by the statement in bytes" + +### +# +# End File: PG17 queries_pg_stat_statements.yml +# +### diff --git a/postgres_exporter/common/pg17/queries_pg_stat_statements_reset_info.yml b/postgres_exporter/common/pg17/queries_pg_stat_statements_reset_info.yml new file mode 100644 index 0000000..ba78f5e --- /dev/null +++ b/postgres_exporter/common/pg17/queries_pg_stat_statements_reset_info.yml @@ -0,0 +1,19 @@ +### +# +# Begin File: pg_stat_statements_reset_info.yml +# +# Copyright © 2017-2024 Crunchy Data Solutions, Inc. All Rights Reserved. +# +### +ccp_pg_stat_statements_reset: + query: "select monitor.pg_stat_statements_reset_info(#PG_STAT_STATEMENTS_THROTTLE_MINUTES#) as time" + metrics: + - time: + usage: "GAUGE" + description: "Epoch time when stats were reset" + +### +# +# End File: pg_stat_statements_reset_info.yml +# +### diff --git a/postgres_exporter/common/pg17/setup.sql b/postgres_exporter/common/pg17/setup.sql new file mode 100644 index 0000000..2527539 --- /dev/null +++ b/postgres_exporter/common/pg17/setup.sql @@ -0,0 +1,463 @@ +-- PG17 pgMonitor Setup +-- +-- Copyright © 2017-2024 Crunchy Data Solutions, Inc. All Rights Reserved. +-- + +DO $$ +BEGIN + IF NOT EXISTS (SELECT 1 FROM pg_roles WHERE rolname = 'ccp_monitoring') THEN + CREATE ROLE ccp_monitoring WITH LOGIN; + END IF; + + -- The pgmonitor role is required by the pgnodemx extension in PostgreSQL versions 9.5 and 9.6 + -- and should be removed when upgrading to PostgreSQL 10 and above. + IF EXISTS (SELECT 1 FROM pg_roles WHERE rolname = 'pgmonitor') THEN + DROP ROLE pgmonitor; + END IF; +END +$$; + +GRANT pg_monitor to ccp_monitoring; +GRANT pg_execute_server_program TO ccp_monitoring; + +ALTER ROLE ccp_monitoring SET lock_timeout TO '2min'; +ALTER ROLE ccp_monitoring SET jit TO 'off'; + +CREATE SCHEMA IF NOT EXISTS monitor AUTHORIZATION ccp_monitoring; + +DROP TABLE IF EXISTS monitor.pgbackrest_info CASCADE; +CREATE TABLE IF NOT EXISTS monitor.pgbackrest_info (config_file text NOT NULL, data jsonb NOT NULL, gather_timestamp timestamptz DEFAULT now() NOT NULL); +-- Force more aggressive autovacuum to avoid table bloat over time +ALTER TABLE monitor.pgbackrest_info SET (autovacuum_analyze_scale_factor = 0, autovacuum_vacuum_scale_factor = 0, autovacuum_vacuum_threshold = 10, autovacuum_analyze_threshold = 10); + +DROP FUNCTION IF EXISTS monitor.pgbackrest_info(); -- old version from 2.3 +DROP FUNCTION IF EXISTS monitor.pgbackrest_info(int); +CREATE OR REPLACE FUNCTION monitor.pgbackrest_info(p_throttle_minutes int DEFAULT 10) RETURNS SETOF monitor.pgbackrest_info + LANGUAGE plpgsql + SET search_path TO pg_catalog, pg_temp +AS $function$ +DECLARE + +v_gather_timestamp timestamptz; +v_throttle interval; +v_system_identifier bigint; + +BEGIN +-- Get pgBackRest info in JSON format + +v_throttle := make_interval(mins := p_throttle_minutes); + +SELECT COALESCE(max(gather_timestamp), '1970-01-01'::timestamptz) INTO v_gather_timestamp FROM monitor.pgbackrest_info; + +IF pg_catalog.pg_is_in_recovery() = 'f' THEN + IF ((CURRENT_TIMESTAMP - v_gather_timestamp) > v_throttle) THEN + + -- Ensure table is empty + DELETE FROM monitor.pgbackrest_info; + + SELECT system_identifier into v_system_identifier FROM pg_control_system(); + + -- Copy data into the table directory from the pgBackRest into command + EXECUTE format( $cmd$ COPY monitor.pgbackrest_info (config_file, data) FROM program '/usr/bin/pgbackrest-info.sh %s' WITH (format text,DELIMITER '|') $cmd$, v_system_identifier::text ); + + END IF; +END IF; + +RETURN QUERY SELECT * FROM monitor.pgbackrest_info; + +IF NOT FOUND THEN + RAISE EXCEPTION 'No backups being returned from pgbackrest info command'; +END IF; + +END +$function$; + + +DROP FUNCTION IF EXISTS monitor.sequence_status(); +CREATE FUNCTION monitor.sequence_status() RETURNS TABLE (sequence_name text, last_value bigint, slots numeric, used numeric, percent int, cycle boolean, numleft numeric, table_usage text) + LANGUAGE sql SECURITY DEFINER STABLE + SET search_path TO pg_catalog, pg_temp +AS $function$ + +/* + * Provide detailed status information of sequences in the current database + */ + +WITH default_value_sequences AS ( + -- Get sequences defined as default values with related table + -- Note this subquery can be locked/hung by DDL that affects tables with sequences. + -- Use monitor.sequence_exhaustion() to actually monitor for sequences running out + SELECT s.seqrelid, c.oid + FROM pg_catalog.pg_attribute a + JOIN pg_catalog.pg_attrdef ad on (ad.adrelid,ad.adnum) = (a.attrelid,a.attnum) + JOIN pg_catalog.pg_class c on a.attrelid = c.oid + JOIN pg_catalog.pg_sequence s ON s.seqrelid = regexp_replace(pg_get_expr(ad.adbin,ad.adrelid), $re$^nextval\('(.+?)'::regclass\)$$re$, $re$\1$re$)::regclass + WHERE (pg_get_expr(ad.adbin,ad.adrelid)) ~ '^nextval\(' +), dep_sequences AS ( + -- Get sequences set as dependencies with related tables (identities) + SELECT s.seqrelid, c.oid + FROM pg_catalog.pg_sequence s + JOIN pg_catalog.pg_depend d ON s.seqrelid = d.objid + JOIN pg_catalog.pg_class c ON d.refobjid = c.oid + UNION + SELECT seqrelid, oid FROM default_value_sequences +), all_sequences AS ( + -- Get any remaining sequences + SELECT s.seqrelid AS sequence_oid, ds.oid AS table_oid + FROM pg_catalog.pg_sequence s + LEFT JOIN dep_sequences ds ON s.seqrelid = ds.seqrelid +) +SELECT sequence_name + , last_value + , slots + , used + , ROUND(used/slots*100)::int AS percent + , cycle + , CASE WHEN slots < used THEN 0 ELSE slots - used END AS numleft + , table_usage +FROM ( + SELECT format('%I.%I',s.schemaname, s.sequencename)::text AS sequence_name + , COALESCE(s.last_value,s.min_value) AS last_value + , s.cycle + , CEIL((s.max_value-min_value::NUMERIC+1)/s.increment_by::NUMERIC) AS slots + , CEIL((COALESCE(s.last_value,s.min_value)-s.min_value::NUMERIC+1)/s.increment_by::NUMERIC) AS used + , string_agg(a.table_oid::regclass::text, ', ') AS table_usage + FROM pg_catalog.pg_sequences s + JOIN all_sequences a ON (format('%I.%I', s.schemaname, s.sequencename))::regclass = a.sequence_oid + GROUP BY 1,2,3,4,5 +) x +ORDER BY ROUND(used/slots*100) DESC + +$function$; + + +DROP FUNCTION IF EXISTS monitor.sequence_exhaustion(int); +CREATE FUNCTION monitor.sequence_exhaustion(p_percent integer DEFAULT 75, OUT count bigint) + LANGUAGE sql SECURITY DEFINER STABLE + SET search_path TO pg_catalog, pg_temp +AS $function$ + +/* + * Returns count of sequences that have used up the % value given via the p_percent parameter (default 75%) + */ + +SELECT count(*) AS count +FROM ( + SELECT CEIL((s.max_value-min_value::NUMERIC+1)/s.increment_by::NUMERIC) AS slots + , CEIL((COALESCE(s.last_value,s.min_value)-s.min_value::NUMERIC+1)/s.increment_by::NUMERIC) AS used + FROM pg_catalog.pg_sequences s +) x +WHERE (ROUND(used/slots*100)::int) > p_percent; + +$function$; + +/* + * Tables and functions for monitoring changes to pg_settings and pg_hba_file_rules system catalogs. + * Can't just do a raw check for the hash value since Prometheus only records numeric values for alerts + * Tables allow recording of existing settings so they can be referred back to to see what changed + * If either checksum function returns 0, then NO settings have changed + * If either checksum function returns 1, then something has changed since last known valid state + * For replicas, logging past settings is not possible to compare what may have changed + * For replicas, by default, it is expected that its settings will match the primary + * For replicas, if the pg_settings or pg_hba.conf are necessarily different from the primary, a known good hash of that replica's + settings can be sent as an argument to the relevant checksum function. Views are provided to easily obtain the hash values used by this monitoring tool. + * If any known hash parameters are passed to the checksum functions, note that it will override any past hash values stored in the log table when doing comparisons and completely re-evaluate the entire state. This is true even if done on a primary where the current state will then also be logged for comparison if it differs from the given hash. + */ + +DROP TABLE IF EXISTS monitor.pg_settings_checksum; +DROP TABLE IF EXISTS monitor.pg_hba_checksum; + +CREATE TABLE monitor.pg_settings_checksum ( + settings_hash_generated text NOT NULL + , settings_hash_known_provided text + , settings_string text NOT NULL + , created_at timestamptz DEFAULT now() NOT NULL + , valid smallint NOT NULL ); + +COMMENT ON COLUMN monitor.pg_settings_checksum.valid IS 'Set this column to zero if this group of settings is a valid change'; +CREATE INDEX ON monitor.pg_settings_checksum (created_at); + +CREATE TABLE monitor.pg_hba_checksum ( + hba_hash_generated text NOT NULL + , hba_hash_known_provided text + , hba_string text NOT NULL + , created_at timestamptz DEFAULT now() NOT NULL + , valid smallint NOT NULL ); + +COMMENT ON COLUMN monitor.pg_hba_checksum.valid IS 'Set this column to zero if this group of settings is a valid change'; +CREATE INDEX ON monitor.pg_hba_checksum (created_at); + + +DROP FUNCTION IF EXISTS monitor.pg_settings_checksum(text); +CREATE FUNCTION monitor.pg_settings_checksum(p_known_settings_hash text DEFAULT NULL) + RETURNS smallint + LANGUAGE plpgsql SECURITY DEFINER + SET search_path TO pg_catalog, pg_temp +AS $function$ +DECLARE + +v_is_in_recovery boolean; +v_settings_hash text; +v_settings_hash_old text; +v_settings_match smallint := 0; +v_settings_string text; +v_settings_string_old text; +v_valid smallint; + +BEGIN + +SELECT pg_is_in_recovery() INTO v_is_in_recovery; + +SELECT md5_hash + , settings_string +INTO v_settings_hash + , v_settings_string +FROM monitor.pg_settings_hash; + +SELECT settings_hash_generated, valid +INTO v_settings_hash_old, v_valid +FROM monitor.pg_settings_checksum +ORDER BY created_at DESC LIMIT 1; + +IF p_known_settings_hash IS NOT NULL THEN + v_settings_hash_old := p_known_settings_hash; + -- Do not base validity on the stored value if manual hash is given. + v_valid := 0; +END IF; + +IF (v_settings_hash_old IS NOT NULL) THEN + + IF (v_settings_hash != v_settings_hash_old) THEN + + v_valid := 1; + + IF v_is_in_recovery = false THEN + INSERT INTO monitor.pg_settings_checksum ( + settings_hash_generated + , settings_hash_known_provided + , settings_string + , valid) + VALUES ( + v_settings_hash + , p_known_settings_hash + , v_settings_string + , v_valid); + END IF; + END IF; + +ELSE + + v_valid := 0; + IF v_is_in_recovery = false THEN + INSERT INTO monitor.pg_settings_checksum ( + settings_hash_generated + , settings_hash_known_provided + , settings_string + , valid) + VALUES (v_settings_hash + , p_known_settings_hash + , v_settings_string + , v_valid); + END IF; + +END IF; + +RETURN v_valid; + +END +$function$; + + +DROP FUNCTION IF EXISTS monitor.pg_hba_checksum(text); +CREATE FUNCTION monitor.pg_hba_checksum(p_known_hba_hash text DEFAULT NULL) + RETURNS smallint + LANGUAGE plpgsql SECURITY DEFINER + SET search_path TO pg_catalog, pg_temp +AS $function$ +DECLARE + +v_hba_hash text; +v_hba_hash_old text; +v_hba_match smallint := 0; +v_hba_string text; +v_hba_string_old text; +v_is_in_recovery boolean; +v_valid smallint; + +BEGIN + +SELECT pg_is_in_recovery() INTO v_is_in_recovery; + +IF current_setting('server_version_num')::int >= 100000 THEN + + SELECT md5_hash + , hba_string + INTO v_hba_hash + , v_hba_string + FROM monitor.pg_hba_hash; + +ELSE + RAISE EXCEPTION 'pg_hba change monitoring unsupported in versions older than PostgreSQL 10'; +END IF; + +SELECT hba_hash_generated, valid +INTO v_hba_hash_old, v_valid +FROM monitor.pg_hba_checksum +ORDER BY created_at DESC LIMIT 1; + +IF p_known_hba_hash IS NOT NULL THEN + v_hba_hash_old := p_known_hba_hash; + -- Do not base validity on the stored value if manual hash is given. + v_valid := 0; +END IF; + +IF (v_hba_hash_old IS NOT NULL) THEN + + IF (v_hba_hash != v_hba_hash_old) THEN + + v_valid := 1; + + IF v_is_in_recovery = false THEN + INSERT INTO monitor.pg_hba_checksum ( + hba_hash_generated + , hba_hash_known_provided + , hba_string + , valid) + VALUES ( + v_hba_hash + , p_known_hba_hash + , v_hba_string + , v_valid); + END IF; + END IF; + +ELSE + + v_valid := 0; + IF v_is_in_recovery = false THEN + INSERT INTO monitor.pg_hba_checksum ( + hba_hash_generated + , hba_hash_known_provided + , hba_string + , valid) + VALUES (v_hba_hash + , p_known_hba_hash + , v_hba_string + , v_valid); + END IF; + +END IF; + +RETURN v_valid; + +END +$function$; + + +DROP FUNCTION IF EXISTS monitor.pg_settings_checksum_set_valid(); +/* + * This function provides quick, clear interface for resetting the checksum monitor to treat the currently detected configuration as valid after alerting on a change. Note that configuration history will be cleared. + */ +CREATE FUNCTION monitor.pg_settings_checksum_set_valid() RETURNS smallint + LANGUAGE sql +AS $function$ + +TRUNCATE monitor.pg_settings_checksum; + +SELECT monitor.pg_settings_checksum(); + +$function$; + + +DROP FUNCTION IF EXISTS monitor.pg_hba_checksum_set_valid(); +/* + * This function provides quick, clear interface for resetting the checksum monitor to treat the currently detected configuration as valid after alerting on a change. Note that configuration history will be cleared. + */ +CREATE FUNCTION monitor.pg_hba_checksum_set_valid() RETURNS smallint + LANGUAGE sql +AS $function$ + +TRUNCATE monitor.pg_hba_checksum; + +SELECT monitor.pg_hba_checksum(); + +$function$; + + +DROP VIEW IF EXISTS monitor.pg_settings_hash; +CREATE VIEW monitor.pg_settings_hash AS + WITH settings_ordered_list AS ( + SELECT name + , COALESCE(setting, '<>') AS setting + FROM pg_catalog.pg_settings + ORDER BY name, setting) + SELECT md5(string_agg(name||setting, ',')) AS md5_hash + , string_agg(name||setting, ',') AS settings_string + FROM settings_ordered_list; + + +DROP VIEW IF EXISTS monitor.pg_hba_hash; +CREATE VIEW monitor.pg_hba_hash AS + -- Order by line number so it's caught if no content is changed but the order of entries is changed + WITH hba_ordered_list AS ( + SELECT COALESCE(type, '<>') AS type + , array_to_string(COALESCE(database, ARRAY['<>']), ',') AS database + , array_to_string(COALESCE(user_name, ARRAY['<>']), ',') AS user_name + , COALESCE(address, '<>') AS address + , COALESCE(netmask, '<>') AS netmask + , COALESCE(auth_method, '<>') AS auth_method + , array_to_string(COALESCE(options, ARRAY['<>']), ',') AS options + FROM pg_catalog.pg_hba_file_rules + ORDER BY line_number) + SELECT md5(string_agg(type||database||user_name||address||netmask||auth_method||options, ',')) AS md5_hash + , string_agg(type||database||user_name||address||netmask||auth_method||options, ',') AS hba_string + FROM hba_ordered_list; + + + +DROP TABLE IF EXISTS monitor.pg_stat_statements_reset_info; +-- Table to store last reset time for pg_stat_statements +CREATE TABLE monitor.pg_stat_statements_reset_info( + reset_time timestamptz +); + +DROP FUNCTION IF EXISTS monitor.pg_stat_statements_reset_info(int); +-- Function to reset pg_stat_statements periodically +CREATE FUNCTION monitor.pg_stat_statements_reset_info(p_throttle_minutes integer DEFAULT 1440) + RETURNS bigint + LANGUAGE plpgsql + SECURITY DEFINER + SET search_path TO pg_catalog, pg_temp +AS $function$ +DECLARE + + v_reset_timestamp timestamptz; + v_throttle interval; + +BEGIN + + IF p_throttle_minutes < 0 THEN + RETURN 0; + END IF; + + v_throttle := make_interval(mins := p_throttle_minutes); + + SELECT COALESCE(max(reset_time), '1970-01-01'::timestamptz) INTO v_reset_timestamp FROM monitor.pg_stat_statements_reset_info; + + IF ((CURRENT_TIMESTAMP - v_reset_timestamp) > v_throttle) THEN + -- Ensure table is empty + DELETE FROM monitor.pg_stat_statements_reset_info; + PERFORM pg_stat_statements_reset(); + INSERT INTO monitor.pg_stat_statements_reset_info(reset_time) values (now()); + END IF; + + RETURN (SELECT extract(epoch from reset_time) FROM monitor.pg_stat_statements_reset_info); + +EXCEPTION + WHEN others then + RETURN 0; +END +$function$; + +GRANT EXECUTE ON ALL FUNCTIONS IN SCHEMA monitor TO ccp_monitoring; +GRANT ALL ON ALL TABLES IN SCHEMA monitor TO ccp_monitoring; diff --git a/postgres_exporter/linux/pg17/sysconfig.postgres_exporter_pg17 b/postgres_exporter/linux/pg17/sysconfig.postgres_exporter_pg17 new file mode 100644 index 0000000..95c8157 --- /dev/null +++ b/postgres_exporter/linux/pg17/sysconfig.postgres_exporter_pg17 @@ -0,0 +1,23 @@ +### +# +# Copyright © 2017-2024 Crunchy Data Solutions, Inc. All Rights Reserved. +# +### + +# This file must be in a pathname that matches the EnvironmentFile entry in the service file (Default: /etc/sysconfig/postgres_exporter) + +# --web.listen-address: change '0.0.0.0' to the network IP assigned to this system if necessary, otherwise it will listen on any IP. Change port as necessary if running multiple instances. +# --extend.query-path: location of file containing custom queries to run. Location below is one recommended by crunchy setup steps. +# DATA_SOURCE_NAME: psql connection string. set the database that the exporter will connect to. Default is `postgres`. +# QUERY_FILE_LIST: space delimited yml files that will be concatenated to a single queries.yml file. This should only be set once. If set multiple times, last one wins. + +PGBACKREST_INFO_THROTTLE_MINUTES=10 +PG_STAT_STATEMENTS_LIMIT=20 +PG_STAT_STATEMENTS_THROTTLE_MINUTES=-1 +OPT="--web.listen-address=0.0.0.0:9187 --extend.query-path=/etc/postgres_exporter/17/queries.yml --disable-default-metrics --disable-settings-metrics" +DATA_SOURCE_NAME="postgresql:///postgres?host=/var/run/postgresql/&user=ccp_monitoring&sslmode=disable" + +# For global / cluster metric +QUERY_FILE_LIST="/etc/postgres_exporter/17/queries_global.yml /etc/postgres_exporter/17/queries_general.yml /etc/postgres_exporter/17/queries_global_dbsize.yml" + +# Recommend running separate exporter service to collect per-db metrics, even if there is only one database in the instance. Allows easier expansion to support multiple databases at a later time. See postgres_exporter_pg##_per_db file. diff --git a/postgres_exporter/linux/pg17/sysconfig.postgres_exporter_pg17_per_db b/postgres_exporter/linux/pg17/sysconfig.postgres_exporter_pg17_per_db new file mode 100644 index 0000000..1af2864 --- /dev/null +++ b/postgres_exporter/linux/pg17/sysconfig.postgres_exporter_pg17_per_db @@ -0,0 +1,21 @@ +### +# +# Copyright © 2017-2024 Crunchy Data Solutions, Inc. All Rights Reserved. +# +### + +# This file must be in a pathname that matches the EnvironmentFile entry in the service file (Default: /etc/sysconfig/postgres_exporter) + +# --web.listen-address: change '0.0.0.0' to the network IP assigned to this system if necessary, otherwise it will listen on any IP. Change port as necessary if running multiple instances. +# --extend.query-path: location of file containing custom queries to run. Location below is one recommended by crunchy setup steps. +# DATA_SOURCE_NAME: psql connection string. set the database that the exporter will connect to. Default is `postgres`. +# QUERY_FILE_LIST: space delimited yml files that will be concatenated to a single queries.yml file. This should only be set once. If set multiple times, last one wins. + +# Ensure that listening port is different than global postgres_exporter (9188 below). Also ensure that query-path file is different from global one (queries_all_db_stats.yml below). +OPT="--web.listen-address=0.0.0.0:9188 --extend.query-path=/etc/postgres_exporter/17/queries_all_db_stats.yml --disable-default-metrics --disable-settings-metrics" +DATA_SOURCE_NAME="postgresql:///postgres?host=/var/run/postgresql/&user=ccp_monitoring&sslmode=disable" +# Multiple DSN's can be given to connect to multiple databases with a single exporter. Separate DSNs with a comma and no spaces. Database name in example below is first the value after the "///" before the "?". Additional connection options can be given after "?", separated by "&". +#DATA_SOURCE_NAME="postgresql:///postgres?host=/var/run/postgresql/&user=ccp_monitoring&sslmode=disable,postgresql:///mydb1?host=/var/run/postgresql/&user=ccp_monitoring&sslmode=disable,postgresql:///mydb2?host=/var/run/postgresql/&user=ccp_monitoring&sslmode=disable" + +# Ex: For per db metric. DO NOT add queries that return the same labels/values on all databases otherwise the exporter will throw errors. The queries that pgmonitor provides for per-db metrics all include a "dbname" label to distinguish them. +QUERY_FILE_LIST="/etc/postgres_exporter/17/queries_per_db.yml" From 970d6f0a6a083157f89420dc9f1896169c5eec98 Mon Sep 17 00:00:00 2001 From: Anthony Landreth Date: Mon, 23 Sep 2024 13:00:12 -0400 Subject: [PATCH 2/3] Adds a changelog fragment. --- changelogs/fragments/435.yml | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 changelogs/fragments/435.yml diff --git a/changelogs/fragments/435.yml b/changelogs/fragments/435.yml new file mode 100644 index 0000000..b25b557 --- /dev/null +++ b/changelogs/fragments/435.yml @@ -0,0 +1,3 @@ +--- +minor_changes: + - postgres_exporter - Files to help transition off of postgres_exporter. From eea204413fcd3b4b727516cc2aacd2ddf845e867 Mon Sep 17 00:00:00 2001 From: Keith Fiske Date: Wed, 2 Oct 2024 16:44:05 -0400 Subject: [PATCH 3/3] fix: move bgwriter query from global to versioned general for postgres_exporter --- .../common/pg11/queries_general.yml | 38 ++++++++++++++++++- .../common/pg12/queries_general.yml | 36 ++++++++++++++++++ .../common/pg13/queries_general.yml | 36 ++++++++++++++++++ .../common/pg14/queries_general.yml | 36 ++++++++++++++++++ .../common/pg15/queries_general.yml | 37 ++++++++++++++++++ .../common/pg16/queries_general.yml | 37 ++++++++++++++++++ postgres_exporter/common/queries_global.yml | 38 ------------------- 7 files changed, 218 insertions(+), 40 deletions(-) diff --git a/postgres_exporter/common/pg11/queries_general.yml b/postgres_exporter/common/pg11/queries_general.yml index fc6e380..ec2458c 100644 --- a/postgres_exporter/common/pg11/queries_general.yml +++ b/postgres_exporter/common/pg11/queries_general.yml @@ -6,8 +6,42 @@ # ### -# There are currently no PG11 only queries that are not already in queries_global.yml -# This file is kept to allow consistent setting of QUERY_FILE_LIST in sysconfig with future PG versions +ccp_stat_bgwriter: + query: "SELECT checkpoints_timed, checkpoints_req, checkpoint_write_time, checkpoint_sync_time, buffers_checkpoint, buffers_clean, maxwritten_clean, buffers_backend, buffers_backend_fsync, buffers_alloc, stats_reset FROM pg_catalog.pg_stat_bgwriter" + metrics: + - checkpoints_timed: + usage: "GAUGE" + description: "Number of scheduled checkpoints that have been performed" + - checkpoints_req: + usage: "GAUGE" + description: "Number of requested checkpoints that have been performed" + - checkpoint_write_time: + usage: "GAUGE" + description: "Total amount of time that has been spent in the portion of checkpoint processing where files are written to disk, in milliseconds" + - checkpoint_sync_time: + usage: "GAUGE" + description: "Total amount of time that has been spent in the portion of checkpoint processing where files are synchronized to disk, in milliseconds" + - buffers_checkpoint: + usage: "GAUGE" + description: "Number of buffers written during checkpoints" + - buffers_clean: + usage: "GAUGE" + description: "Number of buffers written by the background writer" + - maxwritten_clean: + usage: "GAUGE" + description: "Number of times the background writer stopped a cleaning scan because it had written too many buffers" + - buffers_backend: + usage: "GAUGE" + description: "Number of buffers written directly by a backend" + - buffers_backend_fsync: + usage: "GAUGE" + description: "Number of times a backend had to execute its own fsync call (normally the background writer handles those even when the backend does its own write)" + - buffers_alloc: + usage: "GAUGE" + description: "Number of buffers allocated" + - stats_reset: + usage: "GAUGE" + description: "Time at which these statistics were last reset" ### # diff --git a/postgres_exporter/common/pg12/queries_general.yml b/postgres_exporter/common/pg12/queries_general.yml index fbebd80..fb2a9d4 100644 --- a/postgres_exporter/common/pg12/queries_general.yml +++ b/postgres_exporter/common/pg12/queries_general.yml @@ -6,6 +6,42 @@ # ### +ccp_stat_bgwriter: + query: "SELECT checkpoints_timed, checkpoints_req, checkpoint_write_time, checkpoint_sync_time, buffers_checkpoint, buffers_clean, maxwritten_clean, buffers_backend, buffers_backend_fsync, buffers_alloc, stats_reset FROM pg_catalog.pg_stat_bgwriter" + metrics: + - checkpoints_timed: + usage: "GAUGE" + description: "Number of scheduled checkpoints that have been performed" + - checkpoints_req: + usage: "GAUGE" + description: "Number of requested checkpoints that have been performed" + - checkpoint_write_time: + usage: "GAUGE" + description: "Total amount of time that has been spent in the portion of checkpoint processing where files are written to disk, in milliseconds" + - checkpoint_sync_time: + usage: "GAUGE" + description: "Total amount of time that has been spent in the portion of checkpoint processing where files are synchronized to disk, in milliseconds" + - buffers_checkpoint: + usage: "GAUGE" + description: "Number of buffers written during checkpoints" + - buffers_clean: + usage: "GAUGE" + description: "Number of buffers written by the background writer" + - maxwritten_clean: + usage: "GAUGE" + description: "Number of times the background writer stopped a cleaning scan because it had written too many buffers" + - buffers_backend: + usage: "GAUGE" + description: "Number of buffers written directly by a backend" + - buffers_backend_fsync: + usage: "GAUGE" + description: "Number of times a backend had to execute its own fsync call (normally the background writer handles those even when the backend does its own write)" + - buffers_alloc: + usage: "GAUGE" + description: "Number of buffers allocated" + - stats_reset: + usage: "GAUGE" + description: "Time at which these statistics were last reset" ccp_data_checksum_failure: query: "SELECT datname AS dbname diff --git a/postgres_exporter/common/pg13/queries_general.yml b/postgres_exporter/common/pg13/queries_general.yml index f5d67f8..5874da6 100644 --- a/postgres_exporter/common/pg13/queries_general.yml +++ b/postgres_exporter/common/pg13/queries_general.yml @@ -6,6 +6,42 @@ # ### +ccp_stat_bgwriter: + query: "SELECT checkpoints_timed, checkpoints_req, checkpoint_write_time, checkpoint_sync_time, buffers_checkpoint, buffers_clean, maxwritten_clean, buffers_backend, buffers_backend_fsync, buffers_alloc, stats_reset FROM pg_catalog.pg_stat_bgwriter" + metrics: + - checkpoints_timed: + usage: "GAUGE" + description: "Number of scheduled checkpoints that have been performed" + - checkpoints_req: + usage: "GAUGE" + description: "Number of requested checkpoints that have been performed" + - checkpoint_write_time: + usage: "GAUGE" + description: "Total amount of time that has been spent in the portion of checkpoint processing where files are written to disk, in milliseconds" + - checkpoint_sync_time: + usage: "GAUGE" + description: "Total amount of time that has been spent in the portion of checkpoint processing where files are synchronized to disk, in milliseconds" + - buffers_checkpoint: + usage: "GAUGE" + description: "Number of buffers written during checkpoints" + - buffers_clean: + usage: "GAUGE" + description: "Number of buffers written by the background writer" + - maxwritten_clean: + usage: "GAUGE" + description: "Number of times the background writer stopped a cleaning scan because it had written too many buffers" + - buffers_backend: + usage: "GAUGE" + description: "Number of buffers written directly by a backend" + - buffers_backend_fsync: + usage: "GAUGE" + description: "Number of times a backend had to execute its own fsync call (normally the background writer handles those even when the backend does its own write)" + - buffers_alloc: + usage: "GAUGE" + description: "Number of buffers allocated" + - stats_reset: + usage: "GAUGE" + description: "Time at which these statistics were last reset" ccp_data_checksum_failure: query: "SELECT datname AS dbname diff --git a/postgres_exporter/common/pg14/queries_general.yml b/postgres_exporter/common/pg14/queries_general.yml index 1c20f66..dae4c86 100644 --- a/postgres_exporter/common/pg14/queries_general.yml +++ b/postgres_exporter/common/pg14/queries_general.yml @@ -6,6 +6,42 @@ # ### +ccp_stat_bgwriter: + query: "SELECT checkpoints_timed, checkpoints_req, checkpoint_write_time, checkpoint_sync_time, buffers_checkpoint, buffers_clean, maxwritten_clean, buffers_backend, buffers_backend_fsync, buffers_alloc, stats_reset FROM pg_catalog.pg_stat_bgwriter" + metrics: + - checkpoints_timed: + usage: "GAUGE" + description: "Number of scheduled checkpoints that have been performed" + - checkpoints_req: + usage: "GAUGE" + description: "Number of requested checkpoints that have been performed" + - checkpoint_write_time: + usage: "GAUGE" + description: "Total amount of time that has been spent in the portion of checkpoint processing where files are written to disk, in milliseconds" + - checkpoint_sync_time: + usage: "GAUGE" + description: "Total amount of time that has been spent in the portion of checkpoint processing where files are synchronized to disk, in milliseconds" + - buffers_checkpoint: + usage: "GAUGE" + description: "Number of buffers written during checkpoints" + - buffers_clean: + usage: "GAUGE" + description: "Number of buffers written by the background writer" + - maxwritten_clean: + usage: "GAUGE" + description: "Number of times the background writer stopped a cleaning scan because it had written too many buffers" + - buffers_backend: + usage: "GAUGE" + description: "Number of buffers written directly by a backend" + - buffers_backend_fsync: + usage: "GAUGE" + description: "Number of times a backend had to execute its own fsync call (normally the background writer handles those even when the backend does its own write)" + - buffers_alloc: + usage: "GAUGE" + description: "Number of buffers allocated" + - stats_reset: + usage: "GAUGE" + description: "Time at which these statistics were last reset" ccp_data_checksum_failure: query: "SELECT datname AS dbname diff --git a/postgres_exporter/common/pg15/queries_general.yml b/postgres_exporter/common/pg15/queries_general.yml index c39f5ec..353f88c 100644 --- a/postgres_exporter/common/pg15/queries_general.yml +++ b/postgres_exporter/common/pg15/queries_general.yml @@ -6,6 +6,43 @@ # ### +ccp_stat_bgwriter: + query: "SELECT checkpoints_timed, checkpoints_req, checkpoint_write_time, checkpoint_sync_time, buffers_checkpoint, buffers_clean, maxwritten_clean, buffers_backend, buffers_backend_fsync, buffers_alloc, stats_reset FROM pg_catalog.pg_stat_bgwriter" + metrics: + - checkpoints_timed: + usage: "GAUGE" + description: "Number of scheduled checkpoints that have been performed" + - checkpoints_req: + usage: "GAUGE" + description: "Number of requested checkpoints that have been performed" + - checkpoint_write_time: + usage: "GAUGE" + description: "Total amount of time that has been spent in the portion of checkpoint processing where files are written to disk, in milliseconds" + - checkpoint_sync_time: + usage: "GAUGE" + description: "Total amount of time that has been spent in the portion of checkpoint processing where files are synchronized to disk, in milliseconds" + - buffers_checkpoint: + usage: "GAUGE" + description: "Number of buffers written during checkpoints" + - buffers_clean: + usage: "GAUGE" + description: "Number of buffers written by the background writer" + - maxwritten_clean: + usage: "GAUGE" + description: "Number of times the background writer stopped a cleaning scan because it had written too many buffers" + - buffers_backend: + usage: "GAUGE" + description: "Number of buffers written directly by a backend" + - buffers_backend_fsync: + usage: "GAUGE" + description: "Number of times a backend had to execute its own fsync call (normally the background writer handles those even when the backend does its own write)" + - buffers_alloc: + usage: "GAUGE" + description: "Number of buffers allocated" + - stats_reset: + usage: "GAUGE" + description: "Time at which these statistics were last reset" + ccp_data_checksum_failure: query: "SELECT datname AS dbname diff --git a/postgres_exporter/common/pg16/queries_general.yml b/postgres_exporter/common/pg16/queries_general.yml index f0c98b6..5dbe64b 100644 --- a/postgres_exporter/common/pg16/queries_general.yml +++ b/postgres_exporter/common/pg16/queries_general.yml @@ -6,6 +6,43 @@ # ### +ccp_stat_bgwriter: + query: "SELECT checkpoints_timed, checkpoints_req, checkpoint_write_time, checkpoint_sync_time, buffers_checkpoint, buffers_clean, maxwritten_clean, buffers_backend, buffers_backend_fsync, buffers_alloc, stats_reset FROM pg_catalog.pg_stat_bgwriter" + metrics: + - checkpoints_timed: + usage: "GAUGE" + description: "Number of scheduled checkpoints that have been performed" + - checkpoints_req: + usage: "GAUGE" + description: "Number of requested checkpoints that have been performed" + - checkpoint_write_time: + usage: "GAUGE" + description: "Total amount of time that has been spent in the portion of checkpoint processing where files are written to disk, in milliseconds" + - checkpoint_sync_time: + usage: "GAUGE" + description: "Total amount of time that has been spent in the portion of checkpoint processing where files are synchronized to disk, in milliseconds" + - buffers_checkpoint: + usage: "GAUGE" + description: "Number of buffers written during checkpoints" + - buffers_clean: + usage: "GAUGE" + description: "Number of buffers written by the background writer" + - maxwritten_clean: + usage: "GAUGE" + description: "Number of times the background writer stopped a cleaning scan because it had written too many buffers" + - buffers_backend: + usage: "GAUGE" + description: "Number of buffers written directly by a backend" + - buffers_backend_fsync: + usage: "GAUGE" + description: "Number of times a backend had to execute its own fsync call (normally the background writer handles those even when the backend does its own write)" + - buffers_alloc: + usage: "GAUGE" + description: "Number of buffers allocated" + - stats_reset: + usage: "GAUGE" + description: "Time at which these statistics were last reset" + ccp_data_checksum_failure: query: "SELECT datname AS dbname diff --git a/postgres_exporter/common/queries_global.yml b/postgres_exporter/common/queries_global.yml index 2e1ca55..11c084c 100644 --- a/postgres_exporter/common/queries_global.yml +++ b/postgres_exporter/common/queries_global.yml @@ -75,44 +75,6 @@ ccp_settings_gauge: description: "Size of shared_buffers in bytes" -ccp_stat_bgwriter: - query: "SELECT checkpoints_timed, checkpoints_req, checkpoint_write_time, checkpoint_sync_time, buffers_checkpoint, buffers_clean, maxwritten_clean, buffers_backend, buffers_backend_fsync, buffers_alloc, stats_reset FROM pg_catalog.pg_stat_bgwriter" - metrics: - - checkpoints_timed: - usage: "GAUGE" - description: "Number of scheduled checkpoints that have been performed" - - checkpoints_req: - usage: "GAUGE" - description: "Number of requested checkpoints that have been performed" - - checkpoint_write_time: - usage: "GAUGE" - description: "Total amount of time that has been spent in the portion of checkpoint processing where files are written to disk, in milliseconds" - - checkpoint_sync_time: - usage: "GAUGE" - description: "Total amount of time that has been spent in the portion of checkpoint processing where files are synchronized to disk, in milliseconds" - - buffers_checkpoint: - usage: "GAUGE" - description: "Number of buffers written during checkpoints" - - buffers_clean: - usage: "GAUGE" - description: "Number of buffers written by the background writer" - - maxwritten_clean: - usage: "GAUGE" - description: "Number of times the background writer stopped a cleaning scan because it had written too many buffers" - - buffers_backend: - usage: "GAUGE" - description: "Number of buffers written directly by a backend" - - buffers_backend_fsync: - usage: "GAUGE" - description: "Number of times a backend had to execute its own fsync call (normally the background writer handles those even when the backend does its own write)" - - buffers_alloc: - usage: "GAUGE" - description: "Number of buffers allocated" - - stats_reset: - usage: "GAUGE" - description: "Time at which these statistics were last reset" - - ccp_stat_database: query: "SELECT s.datname as dbname, xact_commit, xact_rollback, blks_read, blks_hit, tup_returned, tup_fetched, tup_inserted, tup_updated, tup_deleted, conflicts, temp_files, temp_bytes, deadlocks FROM pg_catalog.pg_stat_database s JOIN pg_catalog.pg_database d on d.datname = s.datname WHERE d.datistemplate = false" metrics: