From 50a5e711be75b156af98f1b7831781cb58057bf5 Mon Sep 17 00:00:00 2001 From: kbauer Date: Mon, 10 Feb 2025 10:31:58 -0800 Subject: [PATCH 1/4] feat: add k8s node and cluster config --- .github/workflows/ci-config-drift.yaml | 21 +- .github/workflows/ci-nightly.yaml | 1 + .github/workflows/ci.yaml | 1 + Makefile | 2 +- cmd/goreleaser/internal/configure.go | 35 +- configs/README.md | 24 - configs/nr-otel-collector-gateway.yaml | 46 -- distributions/README.md | 23 +- distributions/nrdot-collector-host/README.md | 2 + .../.goreleaser-nightly.yaml | 23 +- .../nrdot-collector-k8s/.goreleaser.yaml | 23 +- distributions/nrdot-collector-k8s/Dockerfile | 5 + distributions/nrdot-collector-k8s/Makefile | 16 + distributions/nrdot-collector-k8s/README.md | 19 + .../nrdot-collector-k8s/config-cluster.yaml | 524 ++++++++++++++++++ .../nrdot-collector-k8s/config-node.yaml | 522 +++++++++++++++++ .../nrdot-collector-k8s/sync-configs.sh | 51 ++ .../mocked_backend/templates/daemonset.yaml | 9 + .../templates/k8s-api-access.yaml | 88 +++ test/charts/mocked_backend/values.yaml | 2 + .../nr_backend/templates/daemonset.yaml | 5 +- .../nr_backend/templates/k8s-api-access.yaml | 88 +++ test/charts/nr_backend/values.yaml | 4 +- test/e2e/Makefile | 2 +- test/e2e/hostmetrics/hostmetrics_test.go | 35 +- .../hostmetrics_slow/hostmetrics_slow_test.go | 24 +- test/e2e/util/chart/mocked_backend.go | 8 +- test/e2e/util/helm/helm.go | 6 +- test/e2e/util/k8s/k8s.go | 40 +- 29 files changed, 1448 insertions(+), 201 deletions(-) delete mode 100644 configs/README.md delete mode 100644 configs/nr-otel-collector-gateway.yaml create mode 100644 distributions/nrdot-collector-k8s/Makefile create mode 100644 distributions/nrdot-collector-k8s/README.md create mode 100644 distributions/nrdot-collector-k8s/config-cluster.yaml create mode 100644 distributions/nrdot-collector-k8s/config-node.yaml create mode 100755 distributions/nrdot-collector-k8s/sync-configs.sh create mode 100644 test/charts/mocked_backend/templates/k8s-api-access.yaml create mode 100644 test/charts/nr_backend/templates/k8s-api-access.yaml diff --git a/.github/workflows/ci-config-drift.yaml b/.github/workflows/ci-config-drift.yaml index bb9d6ff1..89eafd97 100644 --- a/.github/workflows/ci-config-drift.yaml +++ b/.github/workflows/ci-config-drift.yaml @@ -16,23 +16,6 @@ jobs: - name: Set up Helm uses: azure/setup-helm@v4.2.0 - - name: Setup local kind cluster - uses: helm/kind-action@v1 - with: - version: v0.21.0 - cluster_name: "config-drift" - wait: 60s - - - name: Install nr-k8s-otel-collector chart + - name: Sync k8s distro configs from helm chart run: | - helm repo add newrelic https://newrelic.github.io/helm-charts - - helm install test newrelic/nr-k8s-otel-collector -n default \ - --set cluster=config-drift --set licenseKey=PLACEHOLDER \ - --create-namespace --dependency-update - - kubectl get configmaps -n default test-nr-k8s-otel-collector-daemonset-config \ - -o "jsonpath={.data['daemonset-config\.yaml']}" | yq . - - kubectl get configmaps -n default test-nr-k8s-otel-collector-deployment-config \ - -o "jsonpath={.data['deployment-config\.yaml']}" | yq . + make -f ./distributions/nrdot-collector-k8s/Makefile sync-configs diff --git a/.github/workflows/ci-nightly.yaml b/.github/workflows/ci-nightly.yaml index 3c76dffe..624f7250 100644 --- a/.github/workflows/ci-nightly.yaml +++ b/.github/workflows/ci-nightly.yaml @@ -48,6 +48,7 @@ jobs: distribution: - nr-otel-collector - nrdot-collector-host + - nrdot-collector-k8s steps: - name: Checkout uses: actions/checkout@v4 diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 0a20c8d5..f413cee8 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -18,6 +18,7 @@ jobs: distribution: - nr-otel-collector - nrdot-collector-host + - nrdot-collector-k8s uses: ./.github/workflows/ci-base.yaml with: distribution: ${{ matrix.distribution }} diff --git a/Makefile b/Makefile index 939afb04..4b45a661 100644 --- a/Makefile +++ b/Makefile @@ -16,7 +16,7 @@ TOOLS_BIN_NAMES := $(addprefix $(TOOLS_BIN_DIR)/, $(notdir $(shell echo $(TOOLS_ GO_LICENCE_DETECTOR := $(TOOLS_BIN_DIR)/go-licence-detector GO_LICENCE_DETECTOR_CONFIG := $(SRC_ROOT)/internal/assets/license/rules.json -DISTRIBUTIONS ?= "nr-otel-collector,nrdot-collector-host" +DISTRIBUTIONS ?= "nr-otel-collector,nrdot-collector-host,nrdot-collector-k8s" ci: check build licenses-check check: ensure-goreleaser-up-to-date diff --git a/cmd/goreleaser/internal/configure.go b/cmd/goreleaser/internal/configure.go index 2f05a2e7..a52b50aa 100644 --- a/cmd/goreleaser/internal/configure.go +++ b/cmd/goreleaser/internal/configure.go @@ -42,9 +42,20 @@ const ( var ( ImagePrefixes = []string{DockerHub} NightlyImagePrefixes = []string{EnvRegistry} - - Architectures = []string{"amd64", "arm64"} - DefaultConfigDists = map[string]bool{LegacyDistro: true, HostDistro: true} + Architectures = []string{"amd64", "arm64"} + SkipBinaries = map[string]bool{ + K8sDistro: true, + } + NfpmDefaultConfig = map[string]string{ + LegacyDistro: "config.yaml", + HostDistro: "config.yaml", + // k8s missing due to not packaged via nfpm + } + DockerIncludedConfigs = map[string][]string{ + LegacyDistro: {"config.yaml"}, + HostDistro: {"config.yaml"}, + K8sDistro: {"config-node.yaml", "config-cluster.yaml"}, + } K8sDockerSkipArchs = map[string]bool{"arm": true, "386": true} K8sGoos = []string{"linux"} K8sArchs = []string{"amd64", "arm64"} @@ -85,10 +96,9 @@ func Generate(dist string, nightly bool) config.Project { } func Blobs(dist string, nightly bool) []config.Blob { - if dist == K8sDistro { + if skip, ok := SkipBinaries[dist]; ok && skip { return nil } - version := "{{ .Version }}" if nightly { @@ -154,6 +164,9 @@ func ArmVersions(dist string) []string { } func Archives(dist string) []config.Archive { + if skip, ok := SkipBinaries[dist]; ok && skip { + return nil + } return []config.Archive{ Archive(dist), } @@ -175,7 +188,7 @@ func Archive(dist string) config.Archive { } func Packages(dist string) []config.NFPM { - if dist == K8sDistro { + if skip, ok := SkipBinaries[dist]; ok && skip { return nil } return []config.NFPM{ @@ -197,9 +210,9 @@ func Package(dist string) config.NFPM { Type: "config|noreplace", }, } - if _, ok := DefaultConfigDists[dist]; ok { + if defaultConfig, ok := NfpmDefaultConfig[dist]; ok { nfpmContents = append(nfpmContents, config.NFPMContent{ - Source: "config.yaml", + Source: defaultConfig, Destination: path.Join("/etc", dist, "config.yaml"), Type: "config", }) @@ -294,8 +307,10 @@ func DockerImage(dist string, nightly bool, arch string, armVersion string) conf return fmt.Sprintf("--label=org.opencontainers.image.%s={{%s}}", name, template) } files := make([]string, 0) - if _, ok := DefaultConfigDists[dist]; ok { - files = append(files, "config.yaml") + if configFiles, ok := DockerIncludedConfigs[dist]; ok { + for _, configFile := range configFiles { + files = append(files, configFile) + } } return config.Docker{ ImageTemplates: imageTemplates, diff --git a/configs/README.md b/configs/README.md deleted file mode 100644 index c0f8e010..00000000 --- a/configs/README.md +++ /dev/null @@ -1,24 +0,0 @@ -# WIP: Configurations for the New Relic Otel collectors - -## New Relic Otel collector (nr-otel-collector) - -- [nr-otel-collector-agent-linux.yaml](./nr-otel-collector-agent-linux.yaml): Feature parity with the New Relic Infrastructure Agent. - -### Usage - -#### Package manager - -If the collector was installed using a Linux package manager (APT, RPM, etc) some environment variables are predefined in the Systemd service [environment file](../distributions/nr-otel-collector/nr-otel-collector.conf): - -``` -OTEL_EXPORTER_OTLP_ENDPOINT=https://otlp.nr-data.net:443 -NEW_RELIC_MEMORY_LIMIT_MIB=100 -``` - -The `NEW_RELIC_LICENSE_KEY` environment variable must be set manually, it can be appended to the Systemd service environment file (`/etc/nr-otel-collector/nr-otel-collector.conf`) or directly to the collector's configuration (`/etc/nr-otel-collector/config.yaml`). - -#### Command line - -``` -OTEL_EXPORTER_OTLP_ENDPOINT=https://otlp.nr-data.net:4317 NEW_RELIC_MEMORY_LIMIT_MIB=100 NEW_RELIC_LICENSE_KEY=your_license_key nr-otel-collector --config nr-otel-collector-agent-linux.yaml -``` diff --git a/configs/nr-otel-collector-gateway.yaml b/configs/nr-otel-collector-gateway.yaml deleted file mode 100644 index 17e55b80..00000000 --- a/configs/nr-otel-collector-gateway.yaml +++ /dev/null @@ -1,46 +0,0 @@ -# The following environment variables require manual modification: -# - NEW_RELIC_LICENSE_KEY: New Relic license key. - -# If the collector is not installed through a package manager, the following -# environment variables need to be set: -# - NEW_RELIC_MEMORY_LIMIT_MIB: Maximum amount of memory to be used. (default: 100) -# - OTEL_EXPORTER_OTLP_ENDPOINT: New Relic Otlp endpoint to export metrics to (see: https://docs.newrelic.com/docs/more-integrations/open-source-telemetry-integrations/opentelemetry/get-started/opentelemetry-set-up-your-app/#review-settings) - -extensions: - health_check: - -receivers: - otlp: - protocols: - grpc: - http: - -processors: - batch: - memory_limiter: - check_interval: 1s - limit_mib: ${NEW_RELIC_MEMORY_LIMIT_MIB} - -exporters: - debug: - verbosity: detailed - otlp: - endpoint: ${OTEL_EXPORTER_OTLP_ENDPOINT} - headers: - api-key: ${NEW_RELIC_LICENSE_KEY} - -service: - - pipelines: - - traces: - receivers: [otlp] - processors: [memory_limiter, batch] - exporters: [debug, otlp] - - metrics: - receivers: [otlp] - processors: [memory_limiter, batch] - exporters: [debug, otlp] - - extensions: [health_check] diff --git a/distributions/README.md b/distributions/README.md index f906a7b2..60ff21d3 100644 --- a/distributions/README.md +++ b/distributions/README.md @@ -2,17 +2,26 @@ ## Installation -### General +### Docker -#### Environment variables -- `NEW_RELIC_LICENSE_KEY`: New Relic ingest key. -- `NEW_RELIC_MEMORY_LIMIT_MIB`: Maximum amount of memory to be used. -- `OTEL_EXPORTER_OTLP_ENDPOINT`: New Relic OTLP endpoint to export metrics to, see [official docs](https://docs.newrelic.com/docs/opentelemetry/best-practices/opentelemetry-otlp/) +Each distribution is available as a Docker image under the [newrelic](https://hub.docker.com/u/newrelic?page=1&search=nrdot-collector) organization on Docker Hub. + +### OS-specific packages +For certain distributions, signed OS-specific packages are also available under [Releases](https://github.com/newrelic/opentelemetry-collector-releases/releases) on GitHub. + +## Configuration ### Components The full list of components is available in the respective `manifest.yaml` -### Configuration +### Customize Default Configuration + +The default configuration exposes some options via environment variables: + +| Environment Variable | Description | Default | +|---|---|---| +| `NEW_RELIC_LICENSE_KEY` | New Relic ingest key | N/A - Required | +| `NEW_RELIC_MEMORY_LIMIT_MIB` | Maximum amount of memory to be used | 100 | +| `OTEL_EXPORTER_OTLP_ENDPOINT` | New Relic OTLP endpoint to export metrics to, see [official docs](https://docs.newrelic.com/docs/opentelemetry/best-practices/opentelemetry-otlp/) | `https://otlp.nr-data.net:4317` | -The default configuration is `config.yaml` which is embedded in the `Dockerfile` and any OS-specific packaging (if available). diff --git a/distributions/nrdot-collector-host/README.md b/distributions/nrdot-collector-host/README.md index c8b9191e..6b4b7625 100644 --- a/distributions/nrdot-collector-host/README.md +++ b/distributions/nrdot-collector-host/README.md @@ -5,3 +5,5 @@ Note: See [general README](../README.md) for information that applies to all dis A distribution of the NRDOT collector focused on - monitoring the host the collector is deployed on via `hostmetricsreceiver` and `filelogreceiver` - support piping other telemetry through it via the `otlpreceiver` + +Distribution is available as docker image and as OS-specific package. \ No newline at end of file diff --git a/distributions/nrdot-collector-k8s/.goreleaser-nightly.yaml b/distributions/nrdot-collector-k8s/.goreleaser-nightly.yaml index ab1e4c64..0a68cf90 100644 --- a/distributions/nrdot-collector-k8s/.goreleaser-nightly.yaml +++ b/distributions/nrdot-collector-k8s/.goreleaser-nightly.yaml @@ -1,5 +1,7 @@ version: 2 project_name: nrdot-collector-releases-nightly +release: + disable: "true" builds: - id: nrdot-collector-k8s goos: @@ -16,15 +18,6 @@ builds: - -trimpath env: - CGO_ENABLED=0 -archives: - - id: nrdot-collector-k8s - builds: - - nrdot-collector-k8s - name_template: '{{ .Binary }}_{{ .Version }}_{{ .Os }}_{{ .Arch }}{{ if .Arm }}v{{ .Arm }}{{ end }}{{ if .Mips }}_{{ .Mips }}{{ end }}' - format_overrides: - - goos: windows - formats: - - zip snapshot: version_template: '{{ incpatch .Version }}-SNAPSHOT-{{.ShortCommit}}' checksum: @@ -38,6 +31,9 @@ dockers: image_templates: - '{{ .Env.REGISTRY }}/nrdot-collector-k8s:{{ .Version }}-nightly-amd64' - '{{ .Env.REGISTRY }}/nrdot-collector-k8s:nightly-amd64' + extra_files: + - config-node.yaml + - config-cluster.yaml build_flag_templates: - --pull - --platform=linux/amd64 @@ -54,6 +50,9 @@ dockers: image_templates: - '{{ .Env.REGISTRY }}/nrdot-collector-k8s:{{ .Version }}-nightly-arm64' - '{{ .Env.REGISTRY }}/nrdot-collector-k8s:nightly-arm64' + extra_files: + - config-node.yaml + - config-cluster.yaml build_flag_templates: - --pull - --platform=linux/arm64 @@ -82,9 +81,3 @@ signs: - ${artifact} signature: ${artifact}.sig artifacts: all - certificate: ${artifact}.pem -docker_signs: - - args: - - sign - - ${artifact} - artifacts: all diff --git a/distributions/nrdot-collector-k8s/.goreleaser.yaml b/distributions/nrdot-collector-k8s/.goreleaser.yaml index f04af0ff..22aefe1e 100644 --- a/distributions/nrdot-collector-k8s/.goreleaser.yaml +++ b/distributions/nrdot-collector-k8s/.goreleaser.yaml @@ -1,5 +1,7 @@ version: 2 project_name: nrdot-collector-releases +release: + disable: "true" builds: - id: nrdot-collector-k8s goos: @@ -16,15 +18,6 @@ builds: - -trimpath env: - CGO_ENABLED=0 -archives: - - id: nrdot-collector-k8s - builds: - - nrdot-collector-k8s - name_template: '{{ .Binary }}_{{ .Version }}_{{ .Os }}_{{ .Arch }}{{ if .Arm }}v{{ .Arm }}{{ end }}{{ if .Mips }}_{{ .Mips }}{{ end }}' - format_overrides: - - goos: windows - formats: - - zip snapshot: version_template: '{{ incpatch .Version }}-SNAPSHOT-{{.ShortCommit}}' checksum: @@ -38,6 +31,9 @@ dockers: image_templates: - newrelic/nrdot-collector-k8s:{{ .Version }}-amd64 - newrelic/nrdot-collector-k8s:latest-amd64 + extra_files: + - config-node.yaml + - config-cluster.yaml build_flag_templates: - --pull - --platform=linux/amd64 @@ -54,6 +50,9 @@ dockers: image_templates: - newrelic/nrdot-collector-k8s:{{ .Version }}-arm64 - newrelic/nrdot-collector-k8s:latest-arm64 + extra_files: + - config-node.yaml + - config-cluster.yaml build_flag_templates: - --pull - --platform=linux/arm64 @@ -86,9 +85,3 @@ signs: - ${artifact} signature: ${artifact}.sig artifacts: all - certificate: ${artifact}.pem -docker_signs: - - args: - - sign - - ${artifact} - artifacts: all diff --git a/distributions/nrdot-collector-k8s/Dockerfile b/distributions/nrdot-collector-k8s/Dockerfile index 3ba03798..6b1f40e7 100644 --- a/distributions/nrdot-collector-k8s/Dockerfile +++ b/distributions/nrdot-collector-k8s/Dockerfile @@ -8,6 +8,11 @@ USER ${USER_UID} COPY --from=certs /etc/ssl/certs/ca-certificates.crt /etc/ssl/certs/ca-certificates.crt COPY --chmod=755 nrdot-collector-k8s /nrdot-collector-k8s +COPY config-node.yaml /etc/nrdot-collector-k8s/config-node.yaml +COPY config-cluster.yaml /etc/nrdot-collector-k8s/config-cluster.yaml + ENTRYPOINT ["/nrdot-collector-k8s"] +CMD ["--config", "/etc/nrdot-collector-k8s/config-node.yaml"] + # `4137` and `4318`: OTLP EXPOSE 4317 4318 \ No newline at end of file diff --git a/distributions/nrdot-collector-k8s/Makefile b/distributions/nrdot-collector-k8s/Makefile new file mode 100644 index 00000000..1fa15a5a --- /dev/null +++ b/distributions/nrdot-collector-k8s/Makefile @@ -0,0 +1,16 @@ +THIS_MAKEFILE_DIR := $(realpath $(dir $(realpath $(lastword $(MAKEFILE_LIST))))) + +.PHONY: sync-configs +sync-configs: + @${THIS_MAKEFILE_DIR}/sync-configs.sh + +.PHONY: check-k8s-configs +check-k8s-configs: sync-configs + @git diff --name-only | grep -E 'distributions\/nrdot-collector-k8s\/config-\w*.yaml' \ + && { \ + echo "Collector configs in helm chart have changed. Adjust sync script to take changes into account.";\ + echo "Diff of $(NOTICE_OUTPUT):";\ + git --no-pager diff HEAD -- 'distributions/nrdot-collector-k8s/config-*.yaml';\ + exit 1;\ + } \ + || exit 0 diff --git a/distributions/nrdot-collector-k8s/README.md b/distributions/nrdot-collector-k8s/README.md new file mode 100644 index 00000000..ade12ac0 --- /dev/null +++ b/distributions/nrdot-collector-k8s/README.md @@ -0,0 +1,19 @@ +# nrdot-collector-k8s + +Note: See [general README](../README.md) for information that applies to all distributions. + +A distribution of the NRDOT collector focused on gathering metrics in a kubernetes environment with two different configs: +- [config-node.yaml](./config-node.yaml) (default): Collects node-level metrics via `hostmetricsreceiver`, `filelogreceiver`, `kubeletstatsreceiver` and `prometheusreceiver` (`cAdvisor`, `kubelet`) +- [config-cluster.yaml](./config-cluster.yaml): Collects cluster-level metrics via `k8seventsreceiver`, `prometheusreceiver` (`kube-state-metrics`, `apiserver`, `controller-manager`, `scheduler`). Can be enabled by overriding the default docker `CMD`, i.e. `--config /etc/nrdot-collector-k8s/config-cluster.yaml` + +Distribution is available as docker image and runs in `node` mode by default. + +## Additional Configuration + +See [general README](../README.md) for information that applies to all distributions. + +### nrdot-collector-k8s +| Environment Variable | Description | Default | +|---|---|---| +| `K8S_CLUSTER_NAME` | Kubernetes Cluster Name used to populate attributes like `k8s.cluster.name` | `cluster-name-placeholder` | +| `MY_POD_IP` | Pod IP to configure `otlpreceiver` | `cluster-name-placeholder` | \ No newline at end of file diff --git a/distributions/nrdot-collector-k8s/config-cluster.yaml b/distributions/nrdot-collector-k8s/config-cluster.yaml new file mode 100644 index 00000000..4ac0d127 --- /dev/null +++ b/distributions/nrdot-collector-k8s/config-cluster.yaml @@ -0,0 +1,524 @@ +receivers: + otlp: + protocols: + http: + endpoint: ${env:MY_POD_IP}:4318 + k8s_events: + prometheus/ksm: + config: + scrape_configs: + - job_name: kube-state-metrics + scrape_interval: 1m + kubernetes_sd_configs: + - role: pod + relabel_configs: + - action: keep + regex: kube-state-metrics + source_labels: + - __meta_kubernetes_pod_label_app_kubernetes_io_name + - action: replace + target_label: job_label + replacement: kube-state-metrics + prometheus: + config: + scrape_configs: + - job_name: apiserver + scrape_interval: 1m + kubernetes_sd_configs: + - role: endpoints + namespaces: + names: + - default + scheme: https + tls_config: + ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + insecure_skip_verify: false + bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token + relabel_configs: + - action: keep + regex: default;kubernetes;https + source_labels: + - __meta_kubernetes_namespace + - __meta_kubernetes_service_name + - __meta_kubernetes_endpoint_port_name + - action: replace + source_labels: + - __meta_kubernetes_namespace + target_label: namespace + - action: replace + source_labels: + - __meta_kubernetes_service_name + target_label: service + - action: replace + target_label: job_label + replacement: apiserver + - job_name: controller-manager + scrape_interval: 1m + metrics_path: /metrics + kubernetes_sd_configs: + - role: endpoints + scheme: https + tls_config: + ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + insecure_skip_verify: false + bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token + relabel_configs: + - action: keep + regex: default;kubernetes;https + source_labels: + - __meta_kubernetes_namespace + - __meta_kubernetes_service_name + - __meta_kubernetes_endpoint_port_name + - action: replace + source_labels: + - __meta_kubernetes_namespace + target_label: namespace + - action: replace + source_labels: + - __meta_kubernetes_pod_name + target_label: pod + - action: replace + source_labels: + - __meta_kubernetes_service_name + target_label: service + - action: replace + target_label: job_label + replacement: controller-manager + - job_name: scheduler + scrape_interval: 1m + kubernetes_sd_configs: + - role: endpoints + scheme: https + tls_config: + ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + insecure_skip_verify: true + bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token + relabel_configs: + - action: keep + regex: default;kubernetes;https + source_labels: + - __meta_kubernetes_namespace + - __meta_kubernetes_service_name + - __meta_kubernetes_endpoint_port_name + - action: replace + source_labels: + - __meta_kubernetes_namespace + target_label: namespace + - action: replace + source_labels: + - __meta_kubernetes_service_name + target_label: service + - action: replace + target_label: job_label + replacement: scheduler +processors: + groupbyattrs: + keys: + - pod + - container + - daemonset + - replicaset + - statefulset + - deployment + - cronjob + - configmap + - job + - job_name + - horizontalpodautoscaler + - persistentvolume + - persistentvolumeclaim + - endpoint + - mutatingwebhookconfiguration + - validatingwebhookconfiguration + - lease + - storageclass + - secret + - service + - resourcequota + - node + - namespace + transform/ksm: + metric_statements: + - context: resource + statements: + - delete_key(attributes, "k8s.pod.name") + - delete_key(attributes, "k8s.pod.uid") + - delete_key(attributes, "k8s.container.name") + - delete_key(attributes, "k8s.namespace.name") + - delete_key(attributes, "k8s.node.name") + - delete_key(attributes, "k8s.replicaset.name") + - set(attributes["k8s.pod.name"], attributes["pod"]) + - set(attributes["k8s.daemonset.name"], attributes["daemonset"]) + - set(attributes["k8s.replicaset.name"], attributes["replicaset"]) + - set(attributes["k8s.statefulset.name"], attributes["statefulset"]) + - set(attributes["k8s.deployment.name"], attributes["deployment"]) + - set(attributes["k8s.node.name"], attributes["node"]) + - set(attributes["k8s.namespace.name"], attributes["namespace"]) + - set(attributes["k8s.container.name"], attributes["container"]) + - set(attributes["k8s.uid.uid"], attributes["uid"]) + metricstransform/k8s_cluster_info: + transforms: + - include: kubernetes_build_info + action: update + new_name: k8s.cluster.info + metricstransform/kube_pod_status_phase: + transforms: + - include: 'kube_pod_container_status_waiting' + match_type: strict + action: update + new_name: 'kube_pod_container_status_phase' + operations: + - action: add_label + new_label: container_phase + new_value: waiting + - include: 'kube_pod_container_status_running' + match_type: strict + action: update + new_name: 'kube_pod_container_status_phase' + operations: + - action: add_label + new_label: container_phase + new_value: running + - include: 'kube_pod_container_status_terminated' + match_type: strict + action: update + new_name: 'kube_pod_container_status_phase' + operations: + - action: add_label + new_label: container_phase + new_value: terminated + metricstransform/ldm: + transforms: + - include: .* + match_type: regexp + action: update + operations: + - action: add_label + new_label: low.data.mode + new_value: 'false' + metricstransform/k8s_cluster_info_ldm: + transforms: + - include: k8s.cluster.info + action: update + operations: + - action: update_label + label: low.data.mode + value_actions: + - value: 'false' + new_value: 'true' + metricstransform/ksm: + transforms: + - include: kube_cronjob_(created|spec_suspend|status_(active|last_schedule_time)) + action: update + match_type: regexp + operations: + - action: update_label + label: low.data.mode + value_actions: + - value: 'false' + new_value: 'true' + - include: kube_daemonset_(created|status_(current_number_scheduled|desired_number_scheduled|updated_number_scheduled)|status_number_(available|misscheduled|ready|unavailable)) + action: update + match_type: regexp + operations: + - action: update_label + label: low.data.mode + value_actions: + - value: 'false' + new_value: 'true' + - include: kube_deployment_(created|metadata_generation|spec_(replicas|strategy_rollingupdate_max_surge)|status_(condition|observed_generation|replicas)|status_replicas_(available|ready|unavailable|updated)) + action: update + match_type: regexp + operations: + - action: update_label + label: low.data.mode + value_actions: + - value: 'false' + new_value: 'true' + - include: kube_horizontalpodautoscaler_(spec_(max_replicas|min_replicas)|status_(condition|current_replicas|desired_replicas)) + action: update + match_type: regexp + operations: + - action: update_label + label: low.data.mode + value_actions: + - value: 'false' + new_value: 'true' + - include: kube_job_(complete|created|failed|spec_(active_deadline_seconds|completions|parallelism)|status_(active|completion_time|failed|start_time|succeeded)) + action: update + match_type: regexp + operations: + - action: update_label + label: low.data.mode + value_actions: + - value: 'false' + new_value: 'true' + - include: kube_node_status_(allocatable|condition) + action: update + match_type: regexp + operations: + - action: update_label + label: low.data.mode + value_actions: + - value: 'false' + new_value: 'true' + - include: kube_persistentvolume_(capacity_bytes|created|info|status_phase) + action: update + match_type: regexp + operations: + - action: update_label + label: low.data.mode + value_actions: + - value: 'false' + new_value: 'true' + - include: kube_persistentvolumeclaim_(created|info|resource_requests_storage_bytes|status_phase) + action: update + match_type: regexp + operations: + - action: update_label + label: low.data.mode + value_actions: + - value: 'false' + new_value: 'true' + - include: kube_pod_container_(resource_(limits|requests)|status_(phase|ready|restarts_total|waiting_reason)) + action: update + match_type: regexp + operations: + - action: update_label + label: low.data.mode + value_actions: + - value: 'false' + new_value: 'true' + - include: kube_pod_(created|info|status_(phase|ready|ready_time|scheduled|scheduled_time)) + action: update + match_type: regexp + operations: + - action: update_label + label: low.data.mode + value_actions: + - value: 'false' + new_value: 'true' + - include: kube_service_(annotations|created|info|labels|spec_type|status_load_balancer_ingress) + action: update + match_type: regexp + operations: + - action: update_label + label: low.data.mode + value_actions: + - value: 'false' + new_value: 'true' + - include: kube_statefulset_(created|persistentvolumeclaim_retention_policy|replicas|status_(current_revision|replicas)|status_replicas_(available|current|ready|updated)) + action: update + match_type: regexp + operations: + - action: update_label + label: low.data.mode + value_actions: + - value: 'false' + new_value: 'true' + metricstransform/apiserver: + transforms: + - include: apiserver_storage_objects + action: update + match_type: regexp + operations: + - action: update_label + label: low.data.mode + value_actions: + - value: 'false' + new_value: 'true' + - include: go_(goroutines|threads) + action: update + match_type: regexp + operations: + - action: update_label + label: low.data.mode + value_actions: + - value: 'false' + new_value: 'true' + - include: process_resident_memory_bytes + action: update + match_type: regexp + operations: + - action: update_label + label: low.data.mode + value_actions: + - value: 'false' + new_value: 'true' + filter/exclude_metrics_low_data_mode: + metrics: + metric: + - 'HasAttrOnDatapoint("low.data.mode", "false")' + filter/exclude_zero_value_kube_node_status_condition: + metrics: + datapoint: + - metric.name == "kube_node_status_condition" and value_double == 0.0 + filter/exclude_zero_value_kube_persistentvolumeclaim_status_phase: + metrics: + datapoint: + - metric.name == "kube_persistentvolumeclaim_status_phase" and value_double == 0.0 + filter/exclude_zero_value_kube_pod_status_phase: + metrics: + datapoint: + - metric.name == "kube_pod_status_phase" and value_double == 0.0 + filter/exclude_zero_value_kube_pod_container_status: + metrics: + datapoint: + - metric.name == "kube_pod_container_status" and value_double == 0.0 + resourcedetection/env: + detectors: ["env", "system"] + override: false + system: + hostname_sources: ["os"] + resource_attributes: + host.id: + enabled: true + resourcedetection/cloudproviders: + detectors: [gcp, eks, azure, aks, ec2, ecs] + timeout: 2s + override: false + ec2: + resource_attributes: + host.name: + enabled: false + resource/metrics: + attributes: + - key: host.id + from_attribute: host.name + action: upsert + - key: k8s.cluster.name + action: upsert + value: ${env:K8S_CLUSTER_NAME:-cluster-name-placeholder} + - key: newrelicOnly + action: upsert + value: 'true' + - key: service.name + action: delete + - key: service_name + action: delete + resource/events: + attributes: + - key: "event.name" + action: upsert + value: "InfrastructureEvent" + - key: "event.domain" + action: upsert + value: "newrelic-otel-event" + - key: "category" + action: upsert + value: "kubernetes" + - key: k8s.cluster.name + action: upsert + value: ${env:K8S_CLUSTER_NAME:-cluster-name-placeholder} + - key: newrelicOnly + action: upsert + value: 'true' + transform/events: + log_statements: + - context: log + statements: + - set(attributes["event.source.host"], resource.attributes["k8s.node.name"]) + transform/low_data_mode_inator: + metric_statements: + - context: metric + statements: + - set(description, "") + - set(unit, "") + resource/low_data_mode_inator: + attributes: + - key: http.scheme + action: delete + - key: net.host.name + action: delete + - key: net.host.port + action: delete + cumulativetodelta: + k8sattributes: + auth_type: "serviceAccount" + passthrough: false + filter: + node_from_env_var: KUBE_NODE_NAME + extract: + metadata: + - k8s.pod.name + - k8s.pod.uid + - k8s.deployment.name + - k8s.namespace.name + - k8s.node.name + - k8s.pod.start_time + pod_association: + - sources: + - from: resource_attribute + name: k8s.pod.uid + attributes/self: + actions: + - key: k8s.pod.name + action: upsert + from_attribute: pod + - key: k8s.deployment.name + action: upsert + from_attribute: deployment + - key: k8s.node.name + action: upsert + from_attribute: node + - key: k8s.namespace.name + action: upsert + from_attribute: namespace + memory_limiter: + check_interval: 1s + limit_percentage: 80 + spike_limit_percentage: 25 + batch: + send_batch_max_size: 1000 + timeout: 30s + send_batch_size: 800 +exporters: + otlphttp/newrelic: + endpoint: ${env:OTEL_EXPORTER_OTLP_ENDPOINT:-https://otlp.nr-data.net} + headers: + api-key: ${env:NEW_RELIC_LICENSE_KEY} +service: + pipelines: + metrics/ksm: + receivers: + - prometheus/ksm + processors: + - metricstransform/kube_pod_status_phase + - filter/exclude_zero_value_kube_node_status_condition + - filter/exclude_zero_value_kube_persistentvolumeclaim_status_phase + - filter/exclude_zero_value_kube_pod_status_phase + - filter/exclude_zero_value_kube_pod_container_status + - resource/metrics + - resourcedetection/env + - resourcedetection/cloudproviders + - batch + - groupbyattrs + - transform/ksm + exporters: + - otlphttp/newrelic + metrics: + receivers: + - prometheus + processors: + - metricstransform/k8s_cluster_info + - resource/metrics + - k8sattributes + - attributes/self + - memory_limiter + - cumulativetodelta + - batch + exporters: + - otlphttp/newrelic + logs/events: + receivers: + - k8s_events + processors: + - transform/events + - resource/events + - batch + exporters: + - otlphttp/newrelic + extensions: + - health_check +extensions: + health_check: {} diff --git a/distributions/nrdot-collector-k8s/config-node.yaml b/distributions/nrdot-collector-k8s/config-node.yaml new file mode 100644 index 00000000..9524a992 --- /dev/null +++ b/distributions/nrdot-collector-k8s/config-node.yaml @@ -0,0 +1,522 @@ +receivers: + hostmetrics: + collection_interval: 1m + scrapers: + cpu: + metrics: + system.cpu.time: + enabled: false + system.cpu.utilization: + enabled: true + load: + memory: + metrics: + system.memory.utilization: + enabled: true + paging: + metrics: + system.paging.utilization: + enabled: false + system.paging.faults: + enabled: false + filesystem: + metrics: + system.filesystem.utilization: + enabled: true + disk: + metrics: + system.disk.merged: + enabled: false + system.disk.pending_operations: + enabled: false + system.disk.weighted_io_time: + enabled: false + network: + metrics: + system.network.connections: + enabled: false + kubeletstats: + collection_interval: 1m + endpoint: "${KUBE_NODE_NAME}:10250" + auth_type: "serviceAccount" + insecure_skip_verify: true + metrics: + k8s.container.cpu_limit_utilization: + enabled: true + prometheus: + config: + scrape_configs: + - job_name: cadvisor + scrape_interval: 1m + bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token + kubernetes_sd_configs: + - role: node + relabel_configs: + - replacement: kubernetes.default.svc.cluster.local:443 + target_label: __address__ + - regex: (.+) + replacement: /api/v1/nodes/$${1}/proxy/metrics/cadvisor + source_labels: + - __meta_kubernetes_node_name + target_label: __metrics_path__ + - action: replace + target_label: job_label + replacement: cadvisor + scheme: https + tls_config: + ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + insecure_skip_verify: false + server_name: kubernetes + - job_name: kubelet + scrape_interval: 1m + bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token + kubernetes_sd_configs: + - role: node + relabel_configs: + - replacement: kubernetes.default.svc.cluster.local:443 + target_label: __address__ + - regex: (.+) + replacement: /api/v1/nodes/$${1}/proxy/metrics + source_labels: + - __meta_kubernetes_node_name + target_label: __metrics_path__ + - action: replace + target_label: job_label + replacement: kubelet + scheme: https + tls_config: + ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + insecure_skip_verify: false + server_name: kubernetes + filelog: + include: + - /var/log/pods/*/*/*.log + exclude: + - /var/log/pods/*/otel-collector-daemonset/*.log + - /var/log/pods/*/otel-collector-deployment/*.log + - /var/log/pods/*/containers/*-exec.log + - /var/log/pods/*/konnectivity-agent/*.log + - /var/log/container/otel-collector-daemonset/*.log + - /var/log/container/otel-collector-deployment/*.log + - /var/log/containers/*-exec.log + start_at: beginning + include_file_path: true + include_file_name: true + operators: + - type: router + id: get-format + routes: + - output: parser-docker + expr: 'body matches "^\\{"' + - output: parser-crio + expr: 'body matches "^[^ Z]+ "' + - output: parser-containerd + expr: 'body matches "^[^ Z]+Z"' + - type: regex_parser + id: parser-crio + regex: '^(?P