-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #53 from projectsyn/feature/additional_rules
Add addon to add additional rules
- Loading branch information
Showing
85 changed files
with
26,122 additions
and
3 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
local kap = import 'lib/kapitan.libjsonnet'; | ||
local kube = import 'lib/kube.libjsonnet'; | ||
local prom = import 'lib/prom.libsonnet'; | ||
local inv = kap.inventory(); | ||
local params = inv.parameters.prometheus; | ||
{ | ||
prometheus+: { | ||
prometheusRule+: { | ||
spec+: { | ||
groups+: [ | ||
{ | ||
name: group_name, | ||
rules: [ | ||
local rnamekey = | ||
local k = std.splitLimit(rname, ':', 1); | ||
assert std.member([ 'alert', 'record' ], k[0]) : 'Invalid custom rule key "%s", the component expects that custom rule keys are prefixed with either "alert:" or "record:"' % [ rname ]; | ||
k; | ||
params.addon_configs.additional_rules[group_name][rname] { | ||
[rnamekey[0]]: rnamekey[1], | ||
} | ||
for rname in std.objectFields(params.addon_configs.additional_rules[group_name]) | ||
if params.addon_configs.additional_rules[group_name][rname] != null | ||
], | ||
} | ||
for group_name in std.objectFields(params.addon_configs.additional_rules) | ||
if params.addon_configs.additional_rules[group_name] != null | ||
], | ||
}, | ||
}, | ||
}, | ||
} |
29 changes: 29 additions & 0 deletions
29
docs/modules/ROOT/pages/references/addon-additional-rules.adoc
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
= Addon: Additional rules | ||
|
||
This addon allows users to configure additional Prometheus rules to deploy on the cluster via the parameter `addon_configs.additional_rules`. | ||
|
||
Each key-value pair in the dictionary is transformed into a Prometheus rule group by the component. | ||
|
||
The component expects that values are dicts themselves and expects that keys in those dicts are prefixed with `record:` or `alert:` to indicate whether the rule is a recording or alerting rule. | ||
The component will transform the keys into fields in the resulting rule by taking the prefix as the field name and the rest of the key as the field value. | ||
For example, key `"record:sum:some:metric:5m"` would be transformed into `record: sum:some:metric:5m` which should define a recording rule with name `sum:some:metric:5m`. | ||
This field is then merged into the provided value which should be a valid rule definition. | ||
|
||
See the Prometheus docs for supported configurations for https://prometheus.io/docs/prometheus/latest/configuration/recording_rules/[recording] and https://prometheus.io/docs/prometheus/latest/configuration/alerting_rules/[alerting] rules. | ||
|
||
|
||
Example: | ||
|
||
[source,yaml] | ||
--- | ||
rules: | ||
generic-rules: | ||
"alert:ContainerOOMKilled": | ||
annotations: | ||
message: A container ({{$labels.container}}) in pod {{ $labels.namespace }}/{{ $labels.pod }} was OOM killed | ||
expr: | | ||
kube_pod_container_status_last_terminated_reason{reason="OOMKilled"} == 1 | ||
labels: | ||
source: https://git.vshn.net/swisscompks/syn-tenant-repo/-/blob/master/common.yml | ||
severity: devnull | ||
--- |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
--- | ||
parameters: | ||
prometheus: | ||
addons: | ||
- additional-rules | ||
addon_configs: | ||
additional_rules: | ||
generic-rules: | ||
"alert:ContainerOOMKilled": | ||
annotations: | ||
message: "A container ({{$labels.container}}) in pod {{ $labels.namespace }}/{{ $labels.pod }} was OOM killed" | ||
expr: | | ||
kube_pod_container_status_last_terminated_reason{reason="OOMKilled"} == 1 | ||
labels: | ||
severity: devnull | ||
instances: | ||
default-instance: | ||
prometheus: | ||
enabled: true | ||
alertmanager: | ||
enabled: true | ||
# Disabled for some speedup since it's irrelevant for this test | ||
grafana: | ||
enabled: false | ||
nodeExporter: | ||
enabled: true | ||
blackboxExporter: | ||
enabled: true | ||
kubernetesControlPlane: | ||
enabled: true | ||
prometheusAdapter: | ||
enabled: true | ||
kubeStateMetrics: | ||
enabled: true | ||
kubePrometheus: | ||
enabled: true |
6 changes: 6 additions & 0 deletions
6
tests/golden/additional_rules/prometheus/apps/prometheus.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
spec: | ||
ignoreDifferences: | ||
- group: '' | ||
jsonPointers: | ||
- /imagePullSecrets | ||
kind: ServiceAccount |
8 changes: 8 additions & 0 deletions
8
tests/golden/additional_rules/prometheus/prometheus/00_operator_namespace.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
apiVersion: v1 | ||
kind: Namespace | ||
metadata: | ||
annotations: {} | ||
labels: | ||
SYNMonitoring: main | ||
name: syn-prometheus-operator | ||
name: syn-prometheus-operator |
94 changes: 94 additions & 0 deletions
94
...ional_rules/prometheus/prometheus/100_default-instance_kubePrometheus_prometheusRule.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,94 @@ | ||
apiVersion: monitoring.coreos.com/v1 | ||
kind: PrometheusRule | ||
metadata: | ||
annotations: | ||
source: https://github.com/projectsyn/component-prometheus | ||
labels: | ||
app.kubernetes.io/component: exporter | ||
app.kubernetes.io/managed-by: commodore | ||
app.kubernetes.io/name: kube-prometheus | ||
app.kubernetes.io/part-of: kube-prometheus | ||
monitoring.syn.tools/enabled: 'true' | ||
prometheus: default-instance | ||
role: alert-rules | ||
name: kube-prometheus-rules | ||
namespace: syn-prometheus | ||
spec: | ||
groups: | ||
- name: general.rules | ||
rules: | ||
- alert: TargetDown | ||
annotations: | ||
description: '{{ printf "%.4g" $value }}% of the {{ $labels.job }}/{{ | ||
$labels.service }} targets in {{ $labels.namespace }} namespace are | ||
down.' | ||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/general/targetdown | ||
summary: One or more targets are unreachable. | ||
expr: 100 * (count(up == 0) BY (job, namespace, service) / count(up) BY | ||
(job, namespace, service)) > 10 | ||
for: 10m | ||
labels: | ||
severity: warning | ||
- alert: Watchdog | ||
annotations: | ||
description: 'This is an alert meant to ensure that the entire alerting | ||
pipeline is functional. | ||
This alert is always firing, therefore it should always be firing in | ||
Alertmanager | ||
and always fire against a receiver. There are integrations with various | ||
notification | ||
mechanisms that send a notification when this alert is not firing. For | ||
example the | ||
"DeadMansSnitch" integration in PagerDuty. | ||
' | ||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/general/watchdog | ||
summary: An alert that should always be firing to certify that Alertmanager | ||
is working properly. | ||
expr: vector(1) | ||
labels: | ||
severity: none | ||
- name: node-network | ||
rules: | ||
- alert: NodeNetworkInterfaceFlapping | ||
annotations: | ||
description: Network interface "{{ $labels.device }}" changing its up | ||
status often on node-exporter {{ $labels.namespace }}/{{ $labels.pod | ||
}} | ||
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/general/nodenetworkinterfaceflapping | ||
summary: Network interface is often changing its status | ||
expr: 'changes(node_network_up{job="node-exporter",device!~"veth.+"}[2m]) | ||
> 2 | ||
' | ||
for: 2m | ||
labels: | ||
severity: warning | ||
- name: kube-prometheus-node-recording.rules | ||
rules: | ||
- expr: sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait",mode!="steal"}[3m])) | ||
BY (instance) | ||
record: instance:node_cpu:rate:sum | ||
- expr: sum(rate(node_network_receive_bytes_total[3m])) BY (instance) | ||
record: instance:node_network_receive_bytes:rate:sum | ||
- expr: sum(rate(node_network_transmit_bytes_total[3m])) BY (instance) | ||
record: instance:node_network_transmit_bytes:rate:sum | ||
- expr: sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait",mode!="steal"}[5m])) | ||
WITHOUT (cpu, mode) / ON(instance) GROUP_LEFT() count(sum(node_cpu_seconds_total) | ||
BY (instance, cpu)) BY (instance) | ||
record: instance:node_cpu:ratio | ||
- expr: sum(rate(node_cpu_seconds_total{mode!="idle",mode!="iowait",mode!="steal"}[5m])) | ||
record: cluster:node_cpu:sum_rate5m | ||
- expr: cluster:node_cpu_seconds_total:rate5m / count(sum(node_cpu_seconds_total) | ||
BY (instance, cpu)) | ||
record: cluster:node_cpu:ratio | ||
- name: kube-prometheus-general.rules | ||
rules: | ||
- expr: count without(instance, pod, node) (up == 1) | ||
record: count:up1 | ||
- expr: count without(instance, pod, node) (up == 0) | ||
record: count:up0 |
Oops, something went wrong.