Skip to content

Commit

Permalink
Merge pull request #49 from solarwinds/feature/NH-9834
Browse files Browse the repository at this point in the history
NH-9834:  Logs monitoring
  • Loading branch information
gantrior authored Jul 1, 2022
2 parents 97cf09c + 590a0c0 commit cfa1f57
Show file tree
Hide file tree
Showing 8 changed files with 420 additions and 3 deletions.
20 changes: 19 additions & 1 deletion .github/workflows/buildAndDeploy.yml
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,17 @@ jobs:
kubetail timeseries-mock-service --namespace monitoring > OtelLogs/events.log &
sleep 60
- name: Tail Otel mock logs for 60 seconds
run: |
mkdir -p OtelLogs
kubetail timeseries-logs-mock-service --namespace monitoring > OtelLogs/logs.log &
sleep 60
# Delete resources and wait some time for its termination
- name: Destroy environment
run: skaffold delete
run: |
skaffold delete
sleep 20
- name: Evaluate metrics collection functionality
run: |
Expand All @@ -76,6 +85,15 @@ jobs:
exit 1
fi
- name: Evaluate logs collection functionality
run: |
if grep -q "#logs" "OtelLogs/logs.log"; then
echo "Some logs found"
else
echo "Logs are missing"
exit 1
fi
- name: Capture Otel logs
uses: actions/upload-artifact@v2
if: failure() && hashFiles('OtelLogs') != '' # test existance of some report in `OtelLogs` folder
Expand Down
1 change: 1 addition & 0 deletions build/docker/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ ENV SCRAPE_INTERVAL="60s"
ENV OTEL_ENVOY_ADDRESS_TLS_INSECURE="false"

COPY /build/otel-collector-config.yaml /opt/default-config.yaml
COPY /build/logs-otel-collector-config.yaml /opt/log-config.yaml

ENTRYPOINT ["/swi-otelcol"]
CMD ["--config=/opt/default-config.yaml"]
Expand Down
180 changes: 180 additions & 0 deletions build/logs-otel-collector-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,180 @@
exporters:
otlp:
endpoint: ${OTEL_ENVOY_ADDRESS}
tls:
insecure: ${OTEL_ENVOY_ADDRESS_TLS_INSECURE}
headers:
"Authorization": "Bearer ${SOLARWINDS_API_TOKEN}"
extensions:
health_check: {}

processors:
# For more all the options about the filtering see https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/processor/filterprocessor
filter:
logs:
include:
match_type: regexp
record_attributes:
# allow only system namespaces (kube-system, kube-public)
- key: k8s.namespace.name
value: ^kube-.*$

groupbyattrs/all:
keys:
- k8s.container.name
- k8s.namespace.name
- k8s.pod.name
- k8s.pod.uid

resource:
attributes:

# Collector and Manifest version
- key: sw.k8s.agent.manifest.version
value: "1.0"
action: insert

# Cluster
- key: sw.k8s.cluster.uid
value: ${CLUSTER_UID}
action: insert

- key: k8s.cluster.name
value: ${CLUSTER_NAME}
action: insert

# Node
- key: k8s.node.name
value: ${NODE_NAME}
action: insert

batch:
send_batch_size: 8192
send_batch_max_size: 8192
timeout: 1s
receivers:
filelog:
include: [ /var/log/pods/*/*/*.log ]
# Exclude collector container's logs. The file format is /var/log/pods/<namespace_name>_<pod_name>_<pod_uid>/<container_name>/<run_id>.log
exclude: [ "/var/log/pods/${POD_NAMESPACE}_${POD_NAME}*_*/swi-opentelemetry-collector/*.log" ]
start_at: beginning
include_file_path: true
include_file_name: false
operators:
# Find out which format is used by kubernetes
- type: router
id: get-format
routes:
- output: parser-docker
expr: 'body matches "^\\{"'
- output: parser-crio
expr: 'body matches "^[^ Z]+ "'
- output: parser-containerd
expr: 'body matches "^[^ Z]+Z"'
# Parse CRI-O format
- type: regex_parser
id: parser-crio
regex: '^(?P<time>[^ Z]+) (?P<stream>stdout|stderr) (?P<logtag>[^ ]*)( |)(?P<log>.*)$'
output: merge-cri-lines
parse_to: body
timestamp:
parse_from: body.time
layout_type: gotime
layout: '2006-01-02T15:04:05.000000000-07:00'
# Parse CRI-Containerd format
- type: regex_parser
id: parser-containerd
regex: '^(?P<time>[^ ^Z]+Z) (?P<stream>stdout|stderr) (?P<logtag>[^ ]*)( |)(?P<log>.*)$'
output: merge-cri-lines
parse_to: body
timestamp:
parse_from: body.time
layout: '%Y-%m-%dT%H:%M:%S.%LZ'
# Parse Docker format
- type: json_parser
id: parser-docker
parse_to: body
output: merge-docker-lines
timestamp:
parse_from: body.time
layout: '%Y-%m-%dT%H:%M:%S.%LZ'

# Merge log lines split by Docker logging driver.
- type: recombine
id: merge-docker-lines
source_identifier: attributes["log.file.path"]
output: merge-multiline-logs
combine_field: body.log
combine_with: ""
is_last_entry: body.log matches "\n$"

# Merge log lines split by CRI logging drivers.
- type: recombine
id: merge-cri-lines
source_identifier: attributes["log.file.path"]
output: merge-multiline-logs
combine_field: body.log
combine_with: ""
is_last_entry: body.logtag == "F"
overwrite_with: newest

# Merges incoming log records into multiline logs.
- type: recombine
id: merge-multiline-logs
output: extract-metadata-from-filepath
source_identifier: attributes["log.file.path"]
combine_field: body.log
combine_with: ""
is_first_entry: body.log matches "^\\[?\\d{4}-\\d{1,2}-\\d{1,2}.\\d{2}:\\d{2}:\\d{2}.*"

# Extract metadata from file path
- type: regex_parser
id: extract-metadata-from-filepath
regex: '^.*\/(?P<namespace>[^_]+)_(?P<pod_name>[^_]+)_(?P<uid>[a-f0-9\-]+)\/(?P<container_name>[^\._]+)\/(?P<run_id>\d+)\.log$'
parse_from: attributes["log.file.path"]

# Rename attributes
- type: move
id: move-attributes
from: body.stream
to: attributes["stream"]
- type: move
from: attributes.container_name
to: attributes["k8s.container.name"]
- type: move
from: attributes.namespace
to: attributes["k8s.namespace.name"]
- type: move
from: attributes.pod_name
to: attributes["k8s.pod.name"]
- type: move
from: attributes.run_id
to: attributes["run_id"]
- type: move
from: attributes.uid
to: attributes["k8s.pod.uid"]
- type: remove
field: attributes["log.file.path"]
- type: move
from: body.log
to: body

service:
extensions:
- health_check
pipelines:
logs:
exporters:
- otlp
processors:
- filter
- groupbyattrs/all
- resource
- batch
receivers:
- filelog
telemetry:
logs:
level: "info"
metrics:
address: 0.0.0.0:8888
4 changes: 3 additions & 1 deletion build/nighthawk-swi-opentelemetry-collector.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@ exporters:
receivers:
- gomod: github.com/open-telemetry/opentelemetry-collector-contrib/receiver/prometheusreceiver v0.51.0
- gomod: github.com/open-telemetry/opentelemetry-collector-contrib/receiver/k8seventsreceiver v0.51.0

- gomod: github.com/open-telemetry/opentelemetry-collector-contrib/receiver/filelogreceiver v0.51.0

processors:
- import: go.opentelemetry.io/collector/processor/batchprocessor
gomod: go.opentelemetry.io/collector v0.51.0
Expand All @@ -22,6 +23,7 @@ processors:
- gomod: github.com/open-telemetry/opentelemetry-collector-contrib/processor/deltatorateprocessor v0.51.0
- gomod: github.com/open-telemetry/opentelemetry-collector-contrib/processor/cumulativetodeltaprocessor v0.51.0
- gomod: github.com/open-telemetry/opentelemetry-collector-contrib/processor/metricsgenerationprocessor v0.51.0
- gomod: github.com/open-telemetry/opentelemetry-collector-contrib/processor/filterprocessor v0.51.0
- gomod: github.com/solarwinds/nighthawk-im-k8s-monitor/processor/prometheustypeconverterprocessor v0.0.1
path: "./src/processor/prometheustypeconverterprocessor"
- gomod: github.com/solarwinds/nighthawk-im-k8s-monitor/processor/swmetricstransformprocessor v0.0.1
Expand Down
89 changes: 88 additions & 1 deletion deploy/k8s/manifest.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -145,4 +145,91 @@ spec:
resources:
limits:
cpu: 256m
memory: 512Mi
memory: 512Mi
---
apiVersion: apps/v1
kind: DaemonSet
metadata:
name: swi-opentelemetry-collector-logs
spec:
selector:
matchLabels:
k8s-app: swi-opentelemetry-collector-logs
template:
metadata:
labels:
k8s-app: swi-opentelemetry-collector-logs
spec:
terminationGracePeriodSeconds: 30
securityContext:
## In order to reliably read logs from mounted node logging paths, we need to run as root
fsGroup: 0
runAsUser: 0
runAsGroup: 0
containers:
- name: swi-opentelemetry-collector
image: "solarwinds/swi-opentelemetry-collector:latest"
imagePullPolicy: Always
command:
- /swi-otelcol
- --config=/opt/log-config.yaml
env:
- name: POD_NAME
valueFrom:
fieldRef:
fieldPath: metadata.name
- name: POD_NAMESPACE
valueFrom:
fieldRef:
fieldPath: metadata.namespace
- name: NODE_NAME
valueFrom:
fieldRef:
fieldPath: spec.nodeName
- name: SOLARWINDS_API_TOKEN
valueFrom:
secretKeyRef:
name: solarwinds-api-token
key: SOLARWINDS_API_TOKEN
optional: true
- name: OTEL_ENVOY_ADDRESS
value: "<OTEL_ENVOY_ADDRESS>"
- name: CLUSTER_NAME
value: "<CLUSTER_NAME>"
- name: CLUSTER_UID
value: "<CLUSTER_UID>"
livenessProbe:
httpGet:
path: /
port: 13133
readinessProbe:
httpGet:
path: /
port: 13133
resources:
requests:
cpu: 100m
memory: 32Mi
limits:
cpu: 1000m
memory: 1Gi
volumeMounts:
- mountPath: /var/log/pods
name: varlogpods
readOnly: true
- mountPath: /var/log/containers
name: varlogcontainers
readOnly: true
- mountPath: /var/lib/docker/containers
name: varlibdockercontainers
readOnly: true
volumes:
- name: varlogpods
hostPath:
path: /var/log/pods
- name: varlogcontainers
hostPath:
path: /var/log/containers
- name: varlibdockercontainers
hostPath:
path: /var/lib/docker/containers
1 change: 1 addition & 0 deletions tests/deploy/base/kustomization.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,4 +9,5 @@ resources:

patchesStrategicMerge:
- patch-apply-env-variables.yaml
- patch-logs-apply-env-variables.yaml
- patch-cluster-role-binding.yaml
Loading

0 comments on commit cfa1f57

Please sign in to comment.