diff --git a/docs/generated/metrics/metrics.html b/docs/generated/metrics/metrics.html
index a344c7ceac0..a918b13c95e 100644
--- a/docs/generated/metrics/metrics.html
+++ b/docs/generated/metrics/metrics.html
@@ -1623,6 +1623,7 @@
APPLICATION | rpc.connection.unhealthy_nanos | Gauge of nanoseconds of unhealthy connection time.
On the prometheus endpoint scraped with the cluster setting 'server.child_metrics.enabled' set, the constituent parts of this metric are available on a per-peer basis and one can read off for how long a given peer has been unreachable | Nanoseconds | GAUGE | NANOSECONDS | AVG | NONE |
APPLICATION | schedules.BACKUP.failed | Number of BACKUP jobs failed | Jobs | COUNTER | COUNT | AVG | NON_NEGATIVE_DERIVATIVE |
APPLICATION | schedules.BACKUP.last-completed-time | The unix timestamp of the most recently completed backup by a schedule specified as maintaining this metric | Jobs | GAUGE | TIMESTAMP_SEC | AVG | NONE |
+APPLICATION | schedules.BACKUP.last-completed-time-by-virtual_cluster | The unix timestamp of the most recently completed host scheduled backup by virtual cluster specified as maintaining this metric | Jobs | GAUGE | TIMESTAMP_SEC | AVG | NONE |
APPLICATION | schedules.BACKUP.protected_age_sec | The age of the oldest PTS record protected by BACKUP schedules | Seconds | GAUGE | SECONDS | AVG | NONE |
APPLICATION | schedules.BACKUP.protected_record_count | Number of PTS records held by BACKUP schedules | Records | GAUGE | COUNT | AVG | NONE |
APPLICATION | schedules.BACKUP.started | Number of BACKUP jobs started | Jobs | COUNTER | COUNT | AVG | NON_NEGATIVE_DERIVATIVE |
diff --git a/pkg/backup/BUILD.bazel b/pkg/backup/BUILD.bazel
index 74ecc017e42..ebd243e860a 100644
--- a/pkg/backup/BUILD.bazel
+++ b/pkg/backup/BUILD.bazel
@@ -195,6 +195,7 @@ go_test(
"restore_span_covering_test.go",
"restore_test.go",
"revision_reader_test.go",
+ "schedule_exec_test.go",
"schedule_pts_chaining_test.go",
"show_test.go",
"system_schema_test.go",
@@ -314,6 +315,7 @@ go_test(
"//pkg/util/log",
"//pkg/util/log/eventpb",
"//pkg/util/log/logpb",
+ "//pkg/util/metric",
"//pkg/util/mon",
"//pkg/util/protoutil",
"//pkg/util/randutil",
diff --git a/pkg/backup/schedule_exec.go b/pkg/backup/schedule_exec.go
index 72ca4995983..6a15412ae29 100644
--- a/pkg/backup/schedule_exec.go
+++ b/pkg/backup/schedule_exec.go
@@ -37,7 +37,8 @@ type backupMetrics struct {
*jobs.ExecutorPTSMetrics
// TODO(rui): move this to the backup job so it can be controlled by the
// updates_cluster_monitoring_metrics option.
- RpoMetric *metric.Gauge
+ RpoMetric *metric.Gauge
+ RpoTenantMetric *metric.GaugeVec
}
var _ metric.Struct = &backupMetrics{}
@@ -360,6 +361,12 @@ func (e *scheduledBackupExecutor) backupSucceeded(
// for monitoring an RPO SLA, update that metric.
if args.UpdatesLastBackupMetric {
e.metrics.RpoMetric.Update(details.(jobspb.BackupDetails).EndTime.GoTime().Unix())
+ if details.(jobspb.BackupDetails).SpecificTenantIds != nil {
+ for _, tenantID := range details.(jobspb.BackupDetails).SpecificTenantIds {
+ e.metrics.RpoTenantMetric.Update(map[string]string{"tenant_id": tenantID.String()},
+ details.(jobspb.BackupDetails).EndTime.GoTime().Unix())
+ }
+ }
}
if args.UnpauseOnSuccess == jobspb.InvalidScheduleID {
@@ -578,6 +585,12 @@ func init() {
Measurement: "Jobs",
Unit: metric.Unit_TIMESTAMP_SEC,
}),
+ RpoTenantMetric: metric.NewExportedGaugeVec(metric.Metadata{
+ Name: "schedules.BACKUP.last-completed-time-by-virtual_cluster",
+ Help: "The unix timestamp of the most recently completed host scheduled backup by virtual cluster specified as maintaining this metric",
+ Measurement: "Jobs",
+ Unit: metric.Unit_TIMESTAMP_SEC,
+ }, []string{"tenant_id"}),
},
}, nil
})
diff --git a/pkg/backup/schedule_exec_test.go b/pkg/backup/schedule_exec_test.go
new file mode 100644
index 00000000000..91ec69188b6
--- /dev/null
+++ b/pkg/backup/schedule_exec_test.go
@@ -0,0 +1,105 @@
+// Copyright 2025 The Cockroach Authors.
+//
+// Use of this software is governed by the CockroachDB Software License
+// included in the /LICENSE file.
+
+package backup
+
+import (
+ "context"
+ "github.com/cockroachdb/cockroach/pkg/util/leaktest"
+ "testing"
+
+ "github.com/cockroachdb/cockroach/pkg/backup/backuppb"
+ "github.com/cockroachdb/cockroach/pkg/jobs"
+ "github.com/cockroachdb/cockroach/pkg/jobs/jobspb"
+ "github.com/cockroachdb/cockroach/pkg/roachpb"
+ "github.com/cockroachdb/cockroach/pkg/util/hlc"
+ "github.com/cockroachdb/cockroach/pkg/util/metric"
+ pbtypes "github.com/gogo/protobuf/types"
+ "github.com/stretchr/testify/require"
+)
+
+func TestBackupSucceededUpdatesMetrics(t *testing.T) {
+ defer leaktest.AfterTest(t)()
+ ctx := context.Background()
+ executor := &scheduledBackupExecutor{
+ metrics: backupMetrics{
+ RpoMetric: metric.NewGauge(metric.Metadata{}),
+ RpoTenantMetric: metric.NewExportedGaugeVec(metric.Metadata{}, []string{"tenant_id"}),
+ },
+ }
+
+ t.Run("updates RPO metric", func(t *testing.T) {
+ schedule := createSchedule(t, true)
+ endTime := hlc.Timestamp{WallTime: hlc.UnixNano()}
+ details := jobspb.BackupDetails{EndTime: endTime}
+
+ err := executor.backupSucceeded(ctx, nil, schedule, details, nil)
+ require.NoError(t, err)
+ require.Equal(t, endTime.GoTime().Unix(), executor.metrics.RpoMetric.Value())
+ })
+
+ t.Run("updates RPO tenant metric", func(t *testing.T) {
+ schedule := createSchedule(t, true)
+ tenantIDs := mustMakeTenantIDs(t, 1, 2)
+ endTime := hlc.Timestamp{WallTime: hlc.UnixNano()}
+ details := jobspb.BackupDetails{
+ EndTime: endTime,
+ SpecificTenantIds: tenantIDs,
+ }
+
+ err := executor.backupSucceeded(ctx, nil, schedule, details, nil)
+ require.NoError(t, err)
+
+ expectedTenantIDs := []string{"system", "2"}
+ verifyRPOTenantMetricLabels(t, executor.metrics.RpoTenantMetric, expectedTenantIDs)
+ verifyRPOTenantMetricGaugeValue(t, executor.metrics.RpoTenantMetric, details.EndTime)
+ })
+}
+
+func createSchedule(t *testing.T, updatesLastBackupMetric bool) *jobs.ScheduledJob {
+ schedule := jobs.NewScheduledJob(nil)
+
+ args := &backuppb.ScheduledBackupExecutionArgs{
+ UpdatesLastBackupMetric: updatesLastBackupMetric,
+ }
+ any, err := pbtypes.MarshalAny(args)
+ require.NoError(t, err)
+ schedule.SetExecutionDetails(schedule.ExecutorType(), jobspb.ExecutionArguments{Args: any})
+ return schedule
+}
+
+func mustMakeTenantIDs(t *testing.T, ids ...int) []roachpb.TenantID {
+ var tenantIDs []roachpb.TenantID
+ for _, id := range ids {
+ tid, err := roachpb.MakeTenantID(uint64(id))
+ require.NoError(t, err)
+ tenantIDs = append(tenantIDs, tid)
+ }
+ return tenantIDs
+}
+
+func verifyRPOTenantMetricLabels(
+ t *testing.T, metric *metric.GaugeVec, expectedTenantIDs []string,
+) {
+ prometheusMetrics := metric.ToPrometheusMetrics()
+ var actualTenantIDs []string
+ for _, promMetric := range prometheusMetrics {
+ labels := promMetric.GetLabel()
+ for _, label := range labels {
+ if label.GetName() == "tenant_id" {
+ actualTenantIDs = append(actualTenantIDs, label.GetValue())
+ }
+ }
+ }
+ require.ElementsMatch(t, expectedTenantIDs, actualTenantIDs)
+}
+
+func verifyRPOTenantMetricGaugeValue(t *testing.T, metric *metric.GaugeVec, endTime hlc.Timestamp) {
+ prometheusMetrics := metric.ToPrometheusMetrics()
+ for _, promMetric := range prometheusMetrics {
+ value := promMetric.Gauge.GetValue()
+ require.Equal(t, float64(endTime.GoTime().Unix()), value)
+ }
+}