From 81897ddcdb180a7b21751ff930ef9a3b84029069 Mon Sep 17 00:00:00 2001 From: "Amias.Q.Li" Date: Wed, 8 Jan 2025 08:37:33 +0800 Subject: [PATCH 1/4] replication lag for the MGR --- ...erf_schema_replication_group_member_lag.go | 93 +++++++++++++++++++ ...chema_replication_group_member_lag_test.go | 61 ++++++++++++ 2 files changed, 154 insertions(+) create mode 100644 collector/perf_schema_replication_group_member_lag.go create mode 100644 collector/perf_schema_replication_group_member_lag_test.go diff --git a/collector/perf_schema_replication_group_member_lag.go b/collector/perf_schema_replication_group_member_lag.go new file mode 100644 index 00000000..88d03a9e --- /dev/null +++ b/collector/perf_schema_replication_group_member_lag.go @@ -0,0 +1,93 @@ +// Copyright 2020 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package collector + +import ( + "context" + "database/sql" + "log/slog" + + "github.com/prometheus/client_golang/prometheus" +) + +const perfReplicationGroupMemberLagQuery = ` + SELECT IF( + applier_coordinator_status.SERVICE_STATE = 'OFF' + OR conn_status.SERVICE_STATE = 'OFF', + 9999, + IF( + GTID_SUBTRACT(conn_status.LAST_QUEUED_TRANSACTION, + applier_status.LAST_APPLIED_TRANSACTION) = '' + OR UNIX_TIMESTAMP(applier_status.APPLYING_TRANSACTION_IMMEDIATE_COMMIT_TIMESTAMP) = + 0, + 0, + TIME_TO_SEC(TIMEDIFF( + NOW(6), + applier_status.APPLYING_TRANSACTION_IMMEDIATE_COMMIT_TIMESTAMP + )) + ) + ) AS replication_group_member_lag + FROM performance_schema.replication_connection_status AS conn_status + JOIN performance_schema.replication_applier_status_by_worker AS applier_status + ON applier_status.channel_name = conn_status.channel_name + JOIN performance_schema.replication_applier_status_by_coordinator AS applier_coordinator_status + ON applier_coordinator_status.channel_name = conn_status.channel_name + WHERE conn_status.channel_name = 'group_replication_applier' + ORDER BY IF(GTID_SUBTRACT(conn_status.LAST_QUEUED_TRANSACTION, + applier_status.LAST_APPLIED_TRANSACTION) = '' + OR UNIX_TIMESTAMP(applier_status.APPLYING_TRANSACTION_IMMEDIATE_COMMIT_TIMESTAMP) = 0, + '1-IDLE', '0-EXECUTING') ASC, + applier_status.APPLYING_TRANSACTION_IMMEDIATE_COMMIT_TIMESTAMP ASC + LIMIT 1; + ` + +// ScrapeReplicationGroupMembers collects from `performance_schema.replication_group_members`. +type ScrapePerfReplicationGroupMemberLag struct{} + +// Name of the Scraper. Should be unique. +func (ScrapePerfReplicationGroupMemberLag) Name() string { + return performanceSchema + ".replication_group_member_lag" +} + +// Help describes the role of the Scraper. +func (ScrapePerfReplicationGroupMemberLag) Help() string { + return "Collect the replication lag according to applier queue from performance_schema group replication tables" +} + +// Version of MySQL from which scraper is available. +func (ScrapePerfReplicationGroupMemberLag) Version() float64 { + return 5.7 +} + +// Scrape collects data from database connection and sends it over channel as prometheus metric. +func (ScrapePerfReplicationGroupMemberLag) Scrape(ctx context.Context, instance *instance, ch chan<- prometheus.Metric, logger *slog.Logger) error { + db := instance.getDB() + var lag uint64 + err := db.QueryRowContext(ctx, perfReplicationGroupMemberLagQuery).Scan(&lag) + if err != nil { + if err == sql.ErrNoRows { + return nil + } + return err + } + ch <- prometheus.MustNewConstMetric( + prometheus.NewDesc(prometheus.BuildFQName(namespace, performanceSchema, "replication_group_member_lag"), + "Group replication lag in seconds", nil, nil), + prometheus.GaugeValue, float64(lag), + ) + return nil +} + +// check interface +var _ Scraper = ScrapePerfReplicationGroupMemberLag{} diff --git a/collector/perf_schema_replication_group_member_lag_test.go b/collector/perf_schema_replication_group_member_lag_test.go new file mode 100644 index 00000000..aa37b4e1 --- /dev/null +++ b/collector/perf_schema_replication_group_member_lag_test.go @@ -0,0 +1,61 @@ +// Copyright 2020 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package collector + +import ( + "context" + "testing" + + "github.com/DATA-DOG/go-sqlmock" + "github.com/prometheus/client_golang/prometheus" + dto "github.com/prometheus/client_model/go" + "github.com/prometheus/common/promslog" + "github.com/smartystreets/goconvey/convey" +) + +func TestScrapePerfReplicationGroupMemberLag(t *testing.T) { + db, mock, err := sqlmock.New() + if err != nil { + t.Fatalf("error opening a stub database connection: %s", err) + } + defer db.Close() + inst := &instance{db: db} + + columns := []string{"replication_group_member_lag"} + rows := sqlmock.NewRows(columns).AddRow(1) + mock.ExpectQuery(sanitizeQuery(perfReplicationGroupMemberLagQuery)).WillReturnRows(rows) + + ch := make(chan prometheus.Metric) + go func() { + if err = (ScrapePerfReplicationGroupMemberLag{}).Scrape(context.Background(), inst, ch, promslog.NewNopLogger()); err != nil { + t.Errorf("error calling function on test: %s", err) + } + close(ch) + }() + + metricExpected := []MetricResult{ + {labels: labelMap{}, value: 1, metricType: dto.MetricType_GAUGE}, + } + + convey.Convey("Metrics comparison", t, func() { + for _, expect := range metricExpected { + got := readMetric(<-ch) + convey.So(got, convey.ShouldResemble, expect) + } + }) + + if err := mock.ExpectationsWereMet(); err != nil { + t.Errorf("unfulfilled expectations: %s", err) + } +} From 9f24b8645102a0978bf768932700ea67b4c2af13 Mon Sep 17 00:00:00 2001 From: "Amias.Q.Li" Date: Wed, 8 Jan 2025 10:44:51 +0800 Subject: [PATCH 2/4] replication lag will be 99999999 when SERVICE_STATE is off --- collector/perf_schema_replication_group_member_lag.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/collector/perf_schema_replication_group_member_lag.go b/collector/perf_schema_replication_group_member_lag.go index 88d03a9e..a9d6e6d7 100644 --- a/collector/perf_schema_replication_group_member_lag.go +++ b/collector/perf_schema_replication_group_member_lag.go @@ -25,7 +25,7 @@ const perfReplicationGroupMemberLagQuery = ` SELECT IF( applier_coordinator_status.SERVICE_STATE = 'OFF' OR conn_status.SERVICE_STATE = 'OFF', - 9999, + 99999999, IF( GTID_SUBTRACT(conn_status.LAST_QUEUED_TRANSACTION, applier_status.LAST_APPLIED_TRANSACTION) = '' From 1d61b84097ec6dc5029c0d7dccca9a55a117b5bc Mon Sep 17 00:00:00 2001 From: "Amias.Q.Li" Date: Wed, 8 Jan 2025 10:46:25 +0800 Subject: [PATCH 3/4] ScrapePerfReplicationGroupMemberLag default false --- mysqld_exporter.go | 1 + 1 file changed, 1 insertion(+) diff --git a/mysqld_exporter.go b/mysqld_exporter.go index d50bb60e..ce771c75 100644 --- a/mysqld_exporter.go +++ b/mysqld_exporter.go @@ -91,6 +91,7 @@ var scrapers = map[collector.Scraper]bool{ collector.ScrapePerfReplicationGroupMembers{}: false, collector.ScrapePerfReplicationGroupMemberStats{}: false, collector.ScrapePerfReplicationApplierStatsByWorker{}: false, + collector.ScrapePerfReplicationGroupMemberLag{}: false, collector.ScrapeSysUserSummary{}: false, collector.ScrapeUserStat{}: false, collector.ScrapeClientStat{}: false, From b5970098e44a06474c0b1d8d3a76f13334857624 Mon Sep 17 00:00:00 2001 From: "Amias.Q.Li" Date: Wed, 8 Jan 2025 10:54:51 +0800 Subject: [PATCH 4/4] update readme --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 3b962092..926266ec 100644 --- a/README.md +++ b/README.md @@ -131,6 +131,7 @@ collect.perf_schema.tablelocks | 5.6 | C collect.perf_schema.replication_group_members | 5.7 | Collect metrics from performance_schema.replication_group_members. collect.perf_schema.replication_group_member_stats | 5.7 | Collect metrics from performance_schema.replication_group_member_stats. collect.perf_schema.replication_applier_status_by_worker | 5.7 | Collect metrics from performance_schema.replication_applier_status_by_worker. +collect.perf_schema.replication_group_member_lag | 5.7 | Collect group member lag from performance_schema. collect.slave_status | 5.1 | Collect from SHOW SLAVE STATUS (Enabled by default) collect.slave_hosts | 5.1 | Collect from SHOW SLAVE HOSTS collect.sys.user_summary | 5.7 | Collect metrics from sys.x$user_summary (disabled by default).