diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 85e7622459..449cb757e9 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -54,7 +54,7 @@ jobs: - verify - build - test - runs-on: ${{ fromJson('{"amd64":"ubuntu-20.04", "arm64":["self-hosted", "Linux", "ARM64"]}')[matrix.arch] }} + runs-on: ${{ fromJson('{"amd64":"ubuntu-20.04", "arm64":"github-arm64-2c-8gb"}')[matrix.arch] }} steps: - uses: actions/checkout@v4 diff --git a/.github/workflows/upload_image.yml b/.github/workflows/upload_image.yml index 75a8d9824f..5edb78afba 100644 --- a/.github/workflows/upload_image.yml +++ b/.github/workflows/upload_image.yml @@ -22,7 +22,7 @@ jobs: [chaos-daemon, chaos-mesh, chaos-dashboard, chaos-kernel, chaos-dlv] outputs: image_tag: ${{ steps.image_tag.outputs.image_tag }} - runs-on: ${{ fromJson('{"amd64":"ubuntu-20.04", "arm64":["self-hosted", "Linux", "ARM64"]}')[matrix.arch] }} + runs-on: ${{ fromJson('{"amd64":"ubuntu-20.04", "arm64":"github-arm64-2c-8gb"}')[matrix.arch] }} steps: - uses: actions/checkout@v4 with: diff --git a/CHANGELOG.md b/CHANGELOG.md index 679b1915f9..815bb7d035 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,6 +15,7 @@ For more information and how-to, see [RFC: Keep A Changelog](https://github.com/ ### Changed - Upgrade pnpm lockfile version to 9.0 [#4522](https://github.com/chaos-mesh/chaos-mesh/pull/4522) +- Use gauge subsystem in controller/daemon metrics [#4554](https://github.com/chaos-mesh/chaos-mesh/pull/4554) - Support for userInfo.Extra in validating webhook [#4559](https://github.com/chaos-mesh/chaos-mesh/pull/4559) ### Deprecated @@ -29,6 +30,7 @@ For more information and how-to, see [RFC: Keep A Changelog](https://github.com/ - Issue with getting a single archive in the Dashboard UI [#4521](https://github.com/chaos-mesh/chaos-mesh/pull/4521) - Wrong references of helm values in the prometheus template [#4543](https://github.com/chaos-mesh/chaos-mesh/pull/4543) +- Return 404 when the archive schedule was not found [#4553](https://github.com/chaos-mesh/chaos-mesh/pull/4553) ### Security @@ -74,7 +76,6 @@ For more information and how-to, see [RFC: Keep A Changelog](https://github.com/ - Upgrade byteman-helper to v4.0.22 [#4299](https://github.com/chaos-mesh/chaos-mesh/pull/4299) - GCP auth is changed to object with additional key `existingSecret` in helm chart values [#4303](https://github.com/chaos-mesh/chaos-mesh/pull/4303) - Add context to the http request to download the chart [#4304](https://github.com/chaos-mesh/chaos-mesh/pull/4304) -- dashboard: return 404 when the archive schedule was not found [#4553](https://github.com/chaos-mesh/chaos-mesh/pull/4553) ### Deprecated diff --git a/pkg/metrics/chaos-controller-manager.go b/pkg/metrics/chaos-controller-manager.go index 33d6873487..3258941123 100644 --- a/pkg/metrics/chaos-controller-manager.go +++ b/pkg/metrics/chaos-controller-manager.go @@ -28,6 +28,11 @@ import ( "github.com/chaos-mesh/chaos-mesh/pkg/status" ) +const ( + // chaosControllerManagerMetricsSubsystem is the subsystem name for chaos controller manager metrics + chaosControllerManagerMetricsSubsystem = "chaos_controller_manager" +) + // ChaosControllerManagerMetricsCollector implements prometheus.Collector interface type ChaosControllerManagerMetricsCollector struct { logger logr.Logger @@ -57,58 +62,70 @@ func NewChaosControllerManagerMetricsCollector(manager ctrl.Manager, registerer logger: logger, store: store, chaosExperiments: prometheus.NewGaugeVec(prometheus.GaugeOpts{ - Name: "chaos_controller_manager_chaos_experiments", - Help: "Total number of chaos experiments and their phases", + Subsystem: chaosControllerManagerMetricsSubsystem, + Name: "chaos_experiments", + Help: "Total number of chaos experiments and their phases", }, []string{"namespace", "kind", "phase"}), SidecarTemplates: prometheus.NewGauge(prometheus.GaugeOpts{ - Name: "chaos_mesh_templates", - Help: "Total number of injection templates", + Subsystem: chaosControllerManagerMetricsSubsystem, + Name: "chaos_mesh_templates", + Help: "Total number of injection templates", }), ConfigTemplates: prometheus.NewGaugeVec(prometheus.GaugeOpts{ - Name: "chaos_mesh_config_templates", - Help: "Total number of config templates", + Subsystem: chaosControllerManagerMetricsSubsystem, + Name: "chaos_mesh_config_templates", + Help: "Total number of config templates", }, []string{"namespace", "template"}), InjectionConfigs: prometheus.NewGaugeVec(prometheus.GaugeOpts{ - Name: "chaos_mesh_injection_configs", - Help: "Total number of injection configs", + Subsystem: chaosControllerManagerMetricsSubsystem, + Name: "chaos_mesh_injection_configs", + Help: "Total number of injection configs", }, []string{"namespace", "template"}), TemplateNotExist: prometheus.NewCounterVec(prometheus.CounterOpts{ - Name: "chaos_mesh_template_not_exist_total", - Help: "Total number of template not exist error", + Subsystem: chaosControllerManagerMetricsSubsystem, + Name: "chaos_mesh_template_not_exist_total", + Help: "Total number of template not exist error", }, []string{"namespace", "template"}), ConfigNameDuplicate: prometheus.NewCounterVec(prometheus.CounterOpts{ - Name: "chaos_mesh_config_name_duplicate_total", - Help: "Total number of config name duplication error", + Subsystem: chaosControllerManagerMetricsSubsystem, + Name: "chaos_mesh_config_name_duplicate_total", + Help: "Total number of config name duplication error", }, []string{"namespace", "config"}), TemplateLoadError: prometheus.NewCounter(prometheus.CounterOpts{ - Name: "chaos_mesh_template_load_failed_total", - Help: "Total number of failures when rendering config args to template", + Subsystem: chaosControllerManagerMetricsSubsystem, + Name: "chaos_mesh_template_load_failed_total", + Help: "Total number of failures when rendering config args to template", }), InjectRequired: prometheus.NewCounterVec(prometheus.CounterOpts{ - Name: "chaos_mesh_inject_required_total", - Help: "Total number of injections required", + Subsystem: chaosControllerManagerMetricsSubsystem, + Name: "chaos_mesh_inject_required_total", + Help: "Total number of injections required", }, []string{"namespace", "config"}), Injections: prometheus.NewCounterVec(prometheus.CounterOpts{ Name: "chaos_mesh_injections_total", Help: "Total number of sidecar injections performed on the webhook", }, []string{"namespace", "config"}), chaosSchedules: prometheus.NewGaugeVec(prometheus.GaugeOpts{ - Name: "chaos_controller_manager_chaos_schedules", - Help: "Total number of chaos schedules", + Subsystem: chaosControllerManagerMetricsSubsystem, + Name: "chaos_schedules", + Help: "Total number of chaos schedules", }, []string{"namespace"}), chaosWorkflows: prometheus.NewGaugeVec(prometheus.GaugeOpts{ - Name: "chaos_controller_manager_chaos_workflows", - Help: "Total number of chaos workflows", + Subsystem: chaosControllerManagerMetricsSubsystem, + Name: "chaos_workflows", + Help: "Total number of chaos workflows", }, []string{"namespace"}), EmittedEvents: prometheus.NewCounterVec(prometheus.CounterOpts{ - Name: "chaos_controller_manager_emitted_event_total", - Help: "Total number of the emitted event by chaos-controller-manager", + Subsystem: chaosControllerManagerMetricsSubsystem, + Name: "emitted_event_total", + Help: "Total number of the emitted event by chaos-controller-manager", }, []string{"type", "reason", "namespace"}), } if registerer != nil { registerer.MustRegister(c) } + return c } diff --git a/pkg/metrics/chaos-daemon.go b/pkg/metrics/chaos-daemon.go index fd08f406e6..e000c692f7 100644 --- a/pkg/metrics/chaos-daemon.go +++ b/pkg/metrics/chaos-daemon.go @@ -42,6 +42,8 @@ const ( kubernetesPodNameLabel = "io.kubernetes.pod.name" kubernetesPodNamespaceLabel = "io.kubernetes.pod.namespace" kubernetesContainerNameLabel = "io.kubernetes.container.name" + // chaosDaemonMetricsSubsystem is the subsystem name for chaos daemon metrics + chaosDaemonMetricsSubsystem = "chaos_daemon" ) func WithHistogramName(name string) grpcprometheus.HistogramOption { @@ -64,20 +66,24 @@ func NewChaosDaemonMetricsCollector(logger logr.Logger) *ChaosDaemonMetricsColle return &ChaosDaemonMetricsCollector{ logger: logger, iptablesPackets: prometheus.NewGaugeVec(prometheus.GaugeOpts{ - Name: "chaos_daemon_iptables_packets", - Help: "Total number of iptables packets", + Subsystem: chaosDaemonMetricsSubsystem, + Name: "iptables_packets", + Help: "Total number of iptables packets", }, []string{"namespace", "pod", "container", "table", "chain", "policy", "rule"}), iptablesPacketBytes: prometheus.NewGaugeVec(prometheus.GaugeOpts{ - Name: "chaos_daemon_iptables_packet_bytes", - Help: "Total bytes of iptables packets", + Subsystem: chaosDaemonMetricsSubsystem, + Name: "iptables_packet_bytes", + Help: "Total bytes of iptables packets", }, []string{"namespace", "pod", "container", "table", "chain", "policy", "rule"}), ipsetMembers: prometheus.NewGaugeVec(prometheus.GaugeOpts{ - Name: "chaos_daemon_ipset_members", - Help: "Total number of ipset members", + Subsystem: chaosDaemonMetricsSubsystem, + Name: "ipset_members", + Help: "Total number of ipset members", }, []string{"namespace", "pod", "container"}), tcRules: prometheus.NewGaugeVec(prometheus.GaugeOpts{ - Name: "chaos_daemon_tcs", - Help: "Total number of tc rules", + Subsystem: chaosDaemonMetricsSubsystem, + Name: "tcs_rules", + Help: "Total number of tc rules", }, []string{"namespace", "pod", "container"}), } }