Skip to content

Commit

Permalink
[KO-344] Montoring stack (#300)
Browse files Browse the repository at this point in the history
* Basic monitoring stack deployment
  • Loading branch information
tanmayja authored Aug 28, 2024
1 parent 84d1471 commit 57c8e19
Show file tree
Hide file tree
Showing 25 changed files with 1,901 additions and 1 deletion.
12 changes: 12 additions & 0 deletions config/monitoring/alertmanager/config/alertmanager.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# This is an example alertmanager.yml which sends alert notifications to a slack channel.

global:
slack_api_url: "https://hooks.slack.com/services/TXXXXXXXX/XXXXXXXXXXX/XXXXXXXXXXXXXXXXXXXXXXXX"
route:
group_by: ['cluster', 'service']
receiver: slack_user

receivers:
- name: slack_user
slack_configs:
- text: "summary: {{ .CommonAnnotations.summary }}\ndescription: {{ .CommonAnnotations.description }}"
21 changes: 21 additions & 0 deletions config/monitoring/alertmanager/kustomization.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization

labels:
- includeSelectors: true
pairs:
app.kubernetes.io/name: aerospike-alertmanager
app.kubernetes.io/component: alertmanager

resources:
- statefulset.yaml
- pvc.yaml
- service.yaml

configMapGenerator:
- name: alertmanager-config
files:
- config/alertmanager.yml

generatorOptions:
disableNameSuffixHash: true
10 changes: 10 additions & 0 deletions config/monitoring/alertmanager/pvc.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: alertmanager-data
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 1Gi
8 changes: 8 additions & 0 deletions config/monitoring/alertmanager/service.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
apiVersion: v1
kind: Service
metadata:
name: alertmanager
spec:
ports:
- name: http
port: 9093
43 changes: 43 additions & 0 deletions config/monitoring/alertmanager/statefulset.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
apiVersion: apps/v1
kind: StatefulSet
metadata:
name: alertmanager
spec:
template:
spec:
containers:
- name: alertmanager
image: prom/alertmanager:latest
args:
- --config.file=/etc/alertmanager/alertmanager.yml
- --storage.path=/alertmanager
- --log.level=info
- --cluster.advertise-address=0.0.0.0:9093
livenessProbe:
httpGet:
path: /-/healthy
port: 9093
initialDelaySeconds: 25
periodSeconds: 20
ports:
- containerPort: 9093
readinessProbe:
httpGet:
path: /-/ready
port: 9093
volumeMounts:
- mountPath: /etc/alertmanager
name: alertmanager-conf
- mountPath: /alertmanager
name: alertmanager-data
securityContext:
fsGroup: 26
serviceAccountName: aerospike-monitoring-stack-prometheus
volumes:
- name: alertmanager-data
persistentVolumeClaim:
claimName: aerospike-monitoring-stack-alertmanager-data
- name: alertmanager-conf
configMap:
defaultMode: 420
name: aerospike-monitoring-stack-alertmanager-config
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
apiVersion: 1
providers:
- name: 'default'
folder: 'Aerospike'
folderUid: 'aerospike1'
type: file
disableDeletion: false
editable: true
updateIntervalSeconds: 10
options:
path: /var/lib/grafana/dashboards
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
apiVersion: 1
datasources:
- name: "Aerospike Prometheus"
type: prometheus
access: proxy
url: http://aerospike-monitoring-stack-prometheus:9090
editable: true
isDefault: false
40 changes: 40 additions & 0 deletions config/monitoring/grafana/config/download_files.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
#!/bin/sh

# Check if curl and jq is installed; if not, install curl and jq
if ! command -v curl >/dev/null 2>&1 || ! command -v jq >/dev/null 2>&1; then
echo "curl or jq not found. Installing..."
apk add --no-cache curl jq
else
echo "curl and jq are already installed."
fi

# Define the dashboards to download in the format <dashboard_id>:<revision> or <dashboard_id>
DASHBOARDS="16119:10 16115:7 20279"

# Directory where the dashboards will be saved
TARGET_DIR="/mnt/data"
mkdir -p "$TARGET_DIR"

DELIMITER=':'

# Loop through each dashboard identifier in DASHBOARDS
for DASHBOARD in $DASHBOARDS; do
if echo "$DASHBOARD" | grep -q "$DELIMITER"; then
# If the delimiter ':' exists, split into ID and REVISION
ID=$(echo "$DASHBOARD" | cut -d"$DELIMITER" -f1)
REVISION=$(echo "$DASHBOARD" | cut -d"$DELIMITER" -f2)
FILENAME="$ID-rev$REVISION.json"
URL="https://grafana.com/api/dashboards/$ID/revisions/$REVISION/download"
curl -o "$TARGET_DIR/$FILENAME" "$URL"
else
# No delimiter, only the ID is provided
ID="$DASHBOARD"
FILENAME="$ID.json"
URL="https://grafana.com/api/dashboards/$ID"
curl -s "$URL" | jq '.json' > "$TARGET_DIR/$FILENAME"
fi
done

# List the downloaded files
echo "Downloaded dashboard files:"
ls -l "$TARGET_DIR"
13 changes: 13 additions & 0 deletions config/monitoring/grafana/config/grafana.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
[analytics]
check_for_updates = true
[grafana_net]
url = https://grafana.net
[log]
mode = console
level = debug
[paths]
data = /var/lib/grafana/data
logs = /var/log/grafana
plugins = /var/lib/grafana/plugins
[server]
http_port = 3000
30 changes: 30 additions & 0 deletions config/monitoring/grafana/kustomization.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization

labels:
- includeSelectors: true
pairs:
app.kubernetes.io/name: aerospike-grafana
app.kubernetes.io/component: grafana

resources:
- statefulset.yaml
- pvc.yaml
- service.yaml

configMapGenerator:
- name: grafana-provisioning-datasources
files:
- config/aerospike_grafana_datasource.yaml
- name: grafana-config
files:
- config/grafana.ini
- name: grafana-dashboard-config
files:
- config/aerospike_grafana_dashboards_config.yaml
- name: download-script
files:
- config/download_files.sh

generatorOptions:
disableNameSuffixHash: true
10 changes: 10 additions & 0 deletions config/monitoring/grafana/pvc.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: grafana-data
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 1Gi
10 changes: 10 additions & 0 deletions config/monitoring/grafana/service.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
apiVersion: v1
kind: Service
metadata:
name: grafana
spec:
ports:
- name: http
port: 80
protocol: TCP
targetPort: 3000
88 changes: 88 additions & 0 deletions config/monitoring/grafana/statefulset.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
apiVersion: apps/v1
kind: StatefulSet
metadata:
name: grafana
spec:
replicas: 1
template:
spec:
serviceAccountName: aerospike-monitoring-stack-prometheus
terminationGracePeriodSeconds: 120
initContainers:
- name: download-dashboards
image: alpine:latest
command: ["/bin/sh"]
args: [ "-c", "/bin/sh -x /mnt/scripts/download_files.sh" ]
volumeMounts:
- name: dashboards
mountPath: /mnt/data
- name: script-volume
mountPath: /mnt/scripts
containers:
- name: grafana
image: "grafana/grafana:latest"
imagePullPolicy: "IfNotPresent"
volumeMounts:
- name: grafana-config
mountPath: "/etc/grafana/"
- name: grafana-provisioning-datasources
mountPath: "/etc/grafana/provisioning/datasources"
- name: grafana-dashboard-config
mountPath: "/etc/grafana/provisioning/dashboards"
- name: grafana-data
mountPath: "/data"
- name: dashboards
mountPath: "/var/lib/grafana/dashboards"
ports:
- name: service
containerPort: 80
protocol: TCP
- name: grafana
containerPort: 3000
protocol: TCP
livenessProbe:
httpGet:
path: /api/health
port: 3000
initialDelaySeconds: 30
periodSeconds: 10
timeoutSeconds: 10
successThreshold: 1
failureThreshold: 10
readinessProbe:
httpGet:
path: /api/health
port: 3000
initialDelaySeconds: 30
periodSeconds: 10
timeoutSeconds: 10
successThreshold: 1
failureThreshold: 10
env:
- name: GF_SECURITY_ADMIN_USER
value: "admin"
- name: GF_SECURITY_ADMIN_PASSWORD
value: "admin"
- name: GF_PATHS_DATA
value: /data/grafana/data
securityContext:
fsGroup: 472
volumes:
- name: grafana-config
configMap:
name: aerospike-monitoring-stack-grafana-config
- name: grafana-provisioning-datasources
configMap:
name: aerospike-monitoring-stack-grafana-provisioning-datasources
- name: grafana-dashboard-config
configMap:
defaultMode: 420
name: aerospike-monitoring-stack-grafana-dashboard-config
- name: script-volume
configMap:
name: aerospike-monitoring-stack-download-script
- name: grafana-data
persistentVolumeClaim:
claimName: aerospike-monitoring-stack-grafana-data
- name: dashboards
emptyDir: {}
16 changes: 16 additions & 0 deletions config/monitoring/kustomization.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
namespace: monitoring

labels:
- includeSelectors: false
pairs:
app.kubernetes.io/managed-by: kustomize
app.kubernetes.io/part-of: aerospike-monitoring-stack

namePrefix: aerospike-monitoring-stack-

resources:
- grafana
- prometheus
- alertmanager
23 changes: 23 additions & 0 deletions config/monitoring/prometheus/clusterrole.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: prometheus
rules:
- apiGroups:
- ""
resources:
- namespaces
- pods
- statefulsets
- configmaps
- secrets
- services
- nodes
- nodes/metrics
- endpoints
verbs:
- list
- watch
- get
- nonResourceURLs: ["/metrics"]
verbs: ["get"]
12 changes: 12 additions & 0 deletions config/monitoring/prometheus/clusterrolebinding.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: prometheus
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: aerospike-monitoring-stack-prometheus
subjects:
- kind: ServiceAccount
name: aerospike-monitoring-stack-prometheus
namespace: monitoring
Loading

0 comments on commit 57c8e19

Please sign in to comment.