diff --git a/.chloggen/k8slog_receiver_setup.yaml b/.chloggen/k8slog_receiver_setup.yaml new file mode 100644 index 000000000000..53e63beaaadb --- /dev/null +++ b/.chloggen/k8slog_receiver_setup.yaml @@ -0,0 +1,20 @@ +# Use this changelog template to create an entry for release notes. +# If your change doesn't affect end users, such as a test fix or a tooling change, +# you should instead start your pull request title with [chore] or use the "Skip Changelog" label. + +# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix' +change_type: new_component + +# The name of the component, or a single word describing the area of concern, (e.g. filelogreceiver) +component: k8slogreceiver + +# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`). +note: "Add the skeleton for the new k8slogreceiver in development." + +# Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists. +issues: [23339] + +# (Optional) One or more lines of additional information to render under the primary note. +# These lines will be padded with 2 spaces and then inserted directly into the document. +# Use pipe (|) for multiline entries. +subtext: \ No newline at end of file diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 48fa4d869fe8..3d42e002c27c 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -250,6 +250,7 @@ receiver/jmxreceiver/ @open-telemetry receiver/journaldreceiver/ @open-telemetry/collector-contrib-approvers @sumo-drosiek @djaglowski receiver/k8sclusterreceiver/ @open-telemetry/collector-contrib-approvers @dmitryax @TylerHelmuth @povilasv @ChrsMark receiver/k8seventsreceiver/ @open-telemetry/collector-contrib-approvers @dmitryax @TylerHelmuth @ChrsMark +receiver/k8slogreceiver/ @open-telemetry/collector-contrib-approvers @h0cheung @TylerHelmuth receiver/k8sobjectsreceiver/ @open-telemetry/collector-contrib-approvers @dmitryax @hvaghani221 @TylerHelmuth @ChrsMark receiver/kafkametricsreceiver/ @open-telemetry/collector-contrib-approvers @dmitryax receiver/kafkareceiver/ @open-telemetry/collector-contrib-approvers @pavolloffay @MovieStoreGuy diff --git a/.github/ISSUE_TEMPLATE/bug_report.yaml b/.github/ISSUE_TEMPLATE/bug_report.yaml index 95b022b70897..bd04a7e3b99e 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.yaml +++ b/.github/ISSUE_TEMPLATE/bug_report.yaml @@ -252,6 +252,7 @@ body: - receiver/journald - receiver/k8scluster - receiver/k8sevents + - receiver/k8slog - receiver/k8sobjects - receiver/kafka - receiver/kafkametrics diff --git a/.github/ISSUE_TEMPLATE/feature_request.yaml b/.github/ISSUE_TEMPLATE/feature_request.yaml index acf39e99429e..363306042e23 100644 --- a/.github/ISSUE_TEMPLATE/feature_request.yaml +++ b/.github/ISSUE_TEMPLATE/feature_request.yaml @@ -246,6 +246,7 @@ body: - receiver/journald - receiver/k8scluster - receiver/k8sevents + - receiver/k8slog - receiver/k8sobjects - receiver/kafka - receiver/kafkametrics diff --git a/.github/ISSUE_TEMPLATE/other.yaml b/.github/ISSUE_TEMPLATE/other.yaml index a99efb30d46b..0548e4f160c9 100644 --- a/.github/ISSUE_TEMPLATE/other.yaml +++ b/.github/ISSUE_TEMPLATE/other.yaml @@ -246,6 +246,7 @@ body: - receiver/journald - receiver/k8scluster - receiver/k8sevents + - receiver/k8slog - receiver/k8sobjects - receiver/kafka - receiver/kafkametrics diff --git a/.github/ISSUE_TEMPLATE/unmaintained.yaml b/.github/ISSUE_TEMPLATE/unmaintained.yaml index c7148dc20c9a..7a23f46b453b 100644 --- a/.github/ISSUE_TEMPLATE/unmaintained.yaml +++ b/.github/ISSUE_TEMPLATE/unmaintained.yaml @@ -251,6 +251,7 @@ body: - receiver/journald - receiver/k8scluster - receiver/k8sevents + - receiver/k8slog - receiver/k8sobjects - receiver/kafka - receiver/kafkametrics diff --git a/cmd/githubgen/allowlist.txt b/cmd/githubgen/allowlist.txt index 838360612c7b..ba5608fc601f 100644 --- a/cmd/githubgen/allowlist.txt +++ b/cmd/githubgen/allowlist.txt @@ -18,3 +18,4 @@ shazlehu swar8080 thmshmm zpzhuSplunk +h0cheung diff --git a/receiver/k8slogreceiver/Makefile b/receiver/k8slogreceiver/Makefile new file mode 100644 index 000000000000..ded7a36092dc --- /dev/null +++ b/receiver/k8slogreceiver/Makefile @@ -0,0 +1 @@ +include ../../Makefile.Common diff --git a/receiver/k8slogreceiver/README.md b/receiver/k8slogreceiver/README.md new file mode 100644 index 000000000000..ad5f2f4559ec --- /dev/null +++ b/receiver/k8slogreceiver/README.md @@ -0,0 +1,144 @@ +# K8slog Receiver + + +| Status | | +| ------------- |-----------| +| Stability | [development]: logs | +| Distributions | [] | +| Issues | [![Open issues](https://img.shields.io/github/issues-search/open-telemetry/opentelemetry-collector-contrib?query=is%3Aissue%20is%3Aopen%20label%3Areceiver%2Fk8slog%20&label=open&color=orange&logo=opentelemetry)](https://github.com/open-telemetry/opentelemetry-collector-contrib/issues?q=is%3Aopen+is%3Aissue+label%3Areceiver%2Fk8slog) [![Closed issues](https://img.shields.io/github/issues-search/open-telemetry/opentelemetry-collector-contrib?query=is%3Aissue%20is%3Aclosed%20label%3Areceiver%2Fk8slog%20&label=closed&color=blue&logo=opentelemetry)](https://github.com/open-telemetry/opentelemetry-collector-contrib/issues?q=is%3Aclosed+is%3Aissue+label%3Areceiver%2Fk8slog) | +| [Code Owners](https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/main/CONTRIBUTING.md#becoming-a-code-owner) | [@h0cheung](https://www.github.com/h0cheung), [@TylerHelmuth](https://www.github.com/TylerHelmuth) | + +[development]: https://github.com/open-telemetry/opentelemetry-collector/blob/main/docs/component-stability.md#development + + +Tails and parses logs in k8s environment. + +There only one mode of discovery as for now, it's specified by the `discovery.mode` configuration option: +- `daemonset-stdout`: (default) Deployed as a DaemonSet, the receiver will read logs from the stdout of pods in the same node. + +Two modes of discovery are planned to be supported in the future: + +- `daemonset-file`: Deployed as a DaemonSet, the receiver will read logs from files inside pods in the same node. +- `sidecar`: Deployed as a sidecar container, the receiver will read logs from files. + +## Configuration + +The following settings are common to all discovery modes: + +| Field | Default | Description | +|------------------|--------------------|------------------------------------------------------------------------------------------------------------------| +| `discovery.mode` | `daemonset-stdout` | The mode of discovery. Only `daemonset-stdout` is supported now. `daemonset-file` and `sidecar` are coming soon. | +| `extract` | | The rules to extract metadata from pods and containers. TODO default values. | +| TODO: add fields for reading files similar to filelogreceiver | + +When `discovery.mode` is not `sidecar`, there are additional configuration options: + +| Field | Default | Description | +|-------------------------------|------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `discovery.k8s_api.auth_type` | `serviceAccount` | The authentication type of k8s api. Options are `serviceAccount` or `kubeConfig`. | +| `discovery.host_root` | `/host-root` | The directory which the root of host is mounted on. | +| `discovery.runtime_apis` | | The runtime apis used to get log file paths. docker and cri-containerd are supported now. By default, it will try to automatically detect the cri-containerd. | +| `discovery.node_from_env` | `KUBE_NODE_NAME` | The environment variable name of node name. | +| `discovery.filter` | [] | The filter used to filter pods and containers. By default, all pods and containers will be collected. | + +### Operators + +Each operator performs a simple responsibility, such as parsing a timestamp or JSON. Chain together operators to process logs into a desired format. + +- Every operator has a `type`. +- Every operator can be given a unique `id`. If you use the same type of operator more than once in a pipeline, you must specify an `id`. Otherwise, the `id` defaults to the value of `type`. +- Operators will output to the next operator in the pipeline. The last operator in the pipeline will emit from the receiver. Optionally, the `output` parameter can be used to specify the `id` of another operator to which logs will be passed directly. +- Only parsers and general purpose operators should be used. + +### Filters + +When `discovery.mode` is not `sidecar`, the `discovery.filter` field can be used to filter pods and containers. The filter is a list of rules. Each rule is a map with the following fields: + +| Field | Description | +|---------------|--------------------------------------------------------------| +| `annotations` | MapFilters that filters pods by annotations. | +| `labels` | MapFilters that filters pods by labels. | +| `env` | MapFilters that filters containers by environment variables. | +| `containers` | ValueFilters that filters containers by name. | +| `namespaces` | ValueFilters that filters pods by namespace. | +| `pods` | ValueFilters that filters pods by name. | + +#### MapFilter + +A MapFilter can be used to filter pods by maps, such as annotations or labels. It has the following fields: + +| Field | Description | +|---------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `op` | The operation to perform. Options are:
- "equals": (default) the value must be equal to the specified value.
- "not-equals": the value must not be equal to the specified value.
- "exists": the value must exist.
- "not-exists": the value must not exist.
- "matches": the value must match the specified regular expression.
- "not-matches": the value must not match the specified regular expression. | +| `key` | The key of the map. | +| `value` | The value to match. Only used for "equals", "not-equals", "matches", and "not-matches" operations. | + +#### ValueFilter + +A ValueFilter can be used to filter pods by string values, such as container names or namespaces. It has the following fields: + +| Field | Description | +|---------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `op` | The operation to perform. Options are:
- "equals": (default) the value must be equal to the specified value.
- "not-equals": the value must not be equal to the specified value.
- "matches": the value must match the specified regular expression.
- "not-matches": the value must not match the specified regular expression. | +| `value` | The value to match. | + +### Extract + +The `extract` field can be used to extract fields from the log file path. It has the following fields: + +| Field | Description | +|---------------|--------------------------------------------------------------------------------------| +| `metadata` | A string slice of metadata to extract from the pods and containers. | +| `env` | A FieldExtractConfig that extracts fields from environment variables of containers. | +| `annotations` | A FieldExtractConfig that extracts fields from annotations of pods. | +| `labels` | A FieldExtractConfig that extracts fields from labels of pods. | + +#### FieldExtractConfig + +A FieldExtractConfig can be used to extract fields from maps, such as annotations or labels. It has the following fields: + +| Field | Description | +|-------------|------------------------------------------------------------------------------------------------------| +| `tag_name` | Required. The name of the extracted attributes. | +| `key` | The key of the map (annotation, label or etc).Exactly one of `key` or `key_regex` must be specified. | +| `key_regex` | The regular expression of the key. Exactly one of `key` or `key_regex` must be specified. | +| `regex` | Optional. The regular expression to extract a submatch from the value. | + +### Supported encodings + +| Key | Description | +|------------|------------------------------------------------------------------| +| `nop` | No encoding validation. Treats the file as a stream of raw bytes | +| `utf-8` | UTF-8 encoding | +| `utf-16le` | UTF-16 encoding with little-endian byte order | +| `utf-16be` | UTF-16 encoding with big-endian byte order | +| `ascii` | ASCII encoding | +| `big5` | The Big5 Chinese character encoding | + +Other less common encodings are supported on a best-effort basis. See [https://www.iana.org/assignments/character-sets/character-sets.xhtml](https://www.iana.org/assignments/character-sets/character-sets.xhtml) for other encodings available. + +## Additional Terminology and Features + +- An [entry](../../pkg/stanza/docs/types/entry.md) is the base representation of log data as it moves through a pipeline. All operators either create, modify, or consume entries. +- A [field](../../pkg/stanza/docs/types/field.md) is used to reference values in an entry. +- A common [expression](../../pkg/stanza/docs/types/expression.md) syntax is used in several operators. For example, expressions can be used to [filter](../../pkg/stanza/docs/operators/filter.md) or [route](../../pkg/stanza/docs/operators/router.md) entries. + +### Parsers with Embedded Operations + +Many parsers operators can be configured to embed certain followup operations such as timestamp and severity parsing. For more information, see [complex parsers](../../pkg/stanza/docs/types/parsers.md#complex-parsers). + +## Example - Collect logs from stdout of all containers + +Receiver Configuration +```yaml +receivers: + k8slog: + discovery: + mode: daemonset-stdout + operators: + - type: recombine + combine_field: body + is_first_entry: body matches "^\\d{4}-\\d{2}-\\d{2}" + max_log_size: 128kb + source_identifier: attributes["k8s.pod.uid"] +``` diff --git a/receiver/k8slogreceiver/config.go b/receiver/k8slogreceiver/config.go new file mode 100644 index 000000000000..3a2c0242e1d1 --- /dev/null +++ b/receiver/k8slogreceiver/config.go @@ -0,0 +1,217 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +package k8slogreceiver // import "github.com/open-telemetry/opentelemetry-collector-contrib/receiver/k8slogreceiver" + +import ( + "fmt" + + "go.uber.org/multierr" + + "github.com/open-telemetry/opentelemetry-collector-contrib/internal/k8sconfig" +) + +const ( + ModeDaemonSetStdout = "daemonset-stdout" +) + +const ( + DefaultMode = ModeDaemonSetStdout + DefaultHostRoot = "/host_root" + DefaultNodeFromEnv = "KUBE_NODE_NAME" +) + +// Config is the configuration of a k8slog receiver +type Config struct { + Discovery SourceConfig `mapstructure:"discovery"` + Extract ExtractConfig `mapstructure:"extract"` + + // TODO: refactor fileconsumer and add it's config of k8s implementation here. +} + +// ExtractConfig allows specifying how to extract resource attributes from pod. +type ExtractConfig struct { + // Metadata represents the list of metadata fields to extract from pod. + // TODO: supported metadata fields and default values. + Metadata []string `mapstructure:"metadata"` + + // Annotations represents the rules to extract from pod annotations. + Annotations []FieldExtractConfig `mapstructure:"annotations"` + + // Labels represents the rules to extract from pod labels. + Labels []FieldExtractConfig `mapstructure:"labels"` + + // Env represents the rules to extract from container environment variables. + Env []FieldExtractConfig `mapstructure:"env"` +} + +// FieldExtractConfig allows specifying an extraction rule to extract a resource attribute from pod (or namespace) +// annotations (or labels). +// This is a copy of the config from the k8sattributes processor. +type FieldExtractConfig struct { + // TagName represents the name of the resource attribute that will be added to logs, metrics or spans. + // When not specified, a default tag name will be used of the format: + // - k8s.pod.annotations. + // - k8s.pod.labels.