From ca8f70b64b8bf8edb27b091cc2ee4044aa32e30b Mon Sep 17 00:00:00 2001 From: Tim Ebert Date: Tue, 13 Aug 2024 21:30:52 +0200 Subject: [PATCH] Remove janitor component (#312) * Remove janitor component With the new version of parca, database files are structured differently and we can't apply the janitor's naive logic for implementing retention for parca data anymore. The component is dropped accordingly. * Don't deploy profiling setup by default --- .github/workflows/images.yaml | 1 - Makefile | 5 +- docs/development.md | 20 ++++- hack/cmd/janitor/main.go | 98 ---------------------- hack/config/profiling/janitor_cronjob.yaml | 51 ----------- hack/config/profiling/kustomization.yaml | 6 -- hack/config/skaffold.yaml | 59 ++++++------- 7 files changed, 44 insertions(+), 196 deletions(-) delete mode 100644 hack/cmd/janitor/main.go delete mode 100644 hack/config/profiling/janitor_cronjob.yaml diff --git a/.github/workflows/images.yaml b/.github/workflows/images.yaml index edb69487..dd9bd4df 100644 --- a/.github/workflows/images.yaml +++ b/.github/workflows/images.yaml @@ -36,7 +36,6 @@ jobs: entrypoints=( ./cmd/sharder ./cmd/shard - ./hack/cmd/janitor ./webhosting-operator/cmd/experiment ./webhosting-operator/cmd/webhosting-operator ) diff --git a/Makefile b/Makefile index fffec218..36ea50b2 100644 --- a/Makefile +++ b/Makefile @@ -5,7 +5,6 @@ TAG ?= latest GHCR_REPO ?= ghcr.io/timebertt/kubernetes-controller-sharding SHARDER_IMG ?= $(GHCR_REPO)/sharder:$(TAG) SHARD_IMG ?= $(GHCR_REPO)/shard:$(TAG) -JANITOR_IMG ?= $(GHCR_REPO)/janitor:$(TAG) WEBHOSTING_OPERATOR_IMG ?= $(GHCR_REPO)/webhosting-operator:$(TAG) EXPERIMENT_IMG ?= $(GHCR_REPO)/experiment:$(TAG) @@ -151,7 +150,7 @@ images: export KO_DOCKER_REPO = $(GHCR_REPO) .PHONY: images images: $(KO) ## Build and push container images using ko. $(KO) build --push=$(PUSH) --sbom none --base-import-paths -t $(TAG) --platform linux/amd64,linux/arm64 \ - ./cmd/sharder ./cmd/shard ./hack/cmd/janitor ./webhosting-operator/cmd/webhosting-operator + ./cmd/sharder ./cmd/shard ./webhosting-operator/cmd/webhosting-operator ##@ Deployment @@ -178,7 +177,7 @@ up dev: export SKAFFOLD_TAIL ?= true .PHONY: deploy deploy: $(SKAFFOLD) $(KUBECTL) $(YQ) ## Build all images and deploy everything to K8s cluster specified in $KUBECONFIG. - $(SKAFFOLD) deploy -i $(SHARDER_IMG) -i $(SHARD_IMG) -i $(JANITOR_IMG) -i $(WEBHOSTING_OPERATOR_IMG) -i $(EXPERIMENT_IMG) + $(SKAFFOLD) deploy -i $(SHARDER_IMG) -i $(SHARD_IMG) -i $(WEBHOSTING_OPERATOR_IMG) -i $(EXPERIMENT_IMG) .PHONY: up up: $(SKAFFOLD) $(KUBECTL) $(YQ) ## Build all images, deploy everything to K8s cluster specified in $KUBECONFIG, start port-forward and tail logs. diff --git a/docs/development.md b/docs/development.md index 4eaa4777..6de2e41f 100644 --- a/docs/development.md +++ b/docs/development.md @@ -128,9 +128,9 @@ NAME TYPE DATA AGE CLUSTERRING-50D858E0-EXAMPLE secret/dummy-foo Opaque 0 3s shard-5fc87c9fb7-kfb2z ``` -## Monitoring and Continuous Profiling +## Monitoring -When using the skaffold-based setup, you also get a full monitoring and continuous profiling setup for observing and analyzing the components' resource usage. +When using the skaffold-based setup, you also get a full monitoring setup for observing and analyzing the components' resource usage. To access the monitoring dashboards and metrics in Grafana, simply forward its port and open http://localhost:3000/ in your browser: @@ -142,6 +142,14 @@ The password for Grafana's `admin` user is written to `hack/config/monitoring/de Be sure to check out the controller-runtime dashboard: http://localhost:3000/d/PuCBL3zVz/controller-runtime-controllers +## Continuous Profiling + +To dig deeper into the components' resource usage, you can deploy the continuous profiling setup based on [Parca](https://parca.dev/): + +```bash +make up SKAFFOLD_MODULE=profiling SKAFFOLD_PROFILE=profiling +``` + To access the profiling data in Parca, simply forward its port and open http://localhost:7070/ in your browser: ```bash @@ -149,3 +157,11 @@ kubectl -n parca port-forward svc/parca 7070 & ``` For accessing Parca through its `Ingress`, use the basic auth password for the `parca` user from `hack/config/profiling/parca_password.secret.txt`. + +Note that the Parca deployment doesn't implement retention for profiling data. +I.e., the Parca data volume will grow infinitely as long as Parca is running. +To shut down Parca after analyzing the collected profiles and destroying the persistent volume use the following command: + +```bash +make down SKAFFOLD_MODULE=profiling SKAFFOLD_PROFILE=profiling +``` diff --git a/hack/cmd/janitor/main.go b/hack/cmd/janitor/main.go deleted file mode 100644 index 6c05d8e4..00000000 --- a/hack/cmd/janitor/main.go +++ /dev/null @@ -1,98 +0,0 @@ -/* -Copyright 2023 Tim Ebert. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package main - -import ( - "fmt" - "log" - "os" - "path/filepath" - "time" - - "github.com/spf13/cobra" - "sigs.k8s.io/controller-runtime/pkg/manager/signals" -) - -func main() { - var ( - maxAge time.Duration - ) - - cmd := &cobra.Command{ - Use: "janitor path", - Short: "janitor is a stupidly simple storage retention tool", - Long: `janitor deletes everything in the given directory that has a modification timestamp older than a given retention time.`, - - SilenceErrors: true, - Args: cobra.ExactArgs(1), - RunE: func(cmd *cobra.Command, args []string) error { - path := args[0] - if path == "" { - return fmt.Errorf("path must not be empty") - } - if maxAge <= 0 { - return fmt.Errorf("max-age must be greater than 0") - } - - cmd.SilenceUsage = true - - return run(path, time.Now().Add(-maxAge)) - }, - } - - cmd.Flags().DurationVar(&maxAge, "max-age", 0, "Maximum age of files and directories to keep. Elements with an older modification timestamp are deleted.") - - if err := cmd.ExecuteContext(signals.SetupSignalHandler()); err != nil { - log.Fatalln(err) - } -} - -func run(dir string, cleanBefore time.Time) error { - performedCleanup := false - - elements, err := os.ReadDir(dir) - if err != nil { - return err - } - - for _, element := range elements { - path := filepath.Join(dir, element.Name()) - - info, err := element.Info() - if err != nil { - return fmt.Errorf("failed to get info for %q: %w", path, err) - } - - modTime := info.ModTime() - if !modTime.Before(cleanBefore) { - continue - } - - log.Printf("Cleaning %s (last modified: %s)", path, modTime.UTC().Format(time.RFC3339)) - - if err := os.RemoveAll(path); err != nil { - return fmt.Errorf("failed to remove %q: %w", path, err) - } - performedCleanup = true - } - - if !performedCleanup { - log.Println("Nothing to clean") - } - - return nil -} diff --git a/hack/config/profiling/janitor_cronjob.yaml b/hack/config/profiling/janitor_cronjob.yaml deleted file mode 100644 index f44a2802..00000000 --- a/hack/config/profiling/janitor_cronjob.yaml +++ /dev/null @@ -1,51 +0,0 @@ -apiVersion: batch/v1 -kind: CronJob -metadata: - name: janitor - namespace: parca -spec: - concurrencyPolicy: Forbid - schedule: "*/10 * * * *" - jobTemplate: - spec: - template: - metadata: - labels: - app.kubernetes.io/component: janitor - app.kubernetes.io/instance: parca - app.kubernetes.io/name: parca - spec: - restartPolicy: Never - containers: - - name: janitor - image: janitor - args: - - /var/lib/parca/blocks/parca/stacktraces - - --max-age=48h - volumeMounts: - - mountPath: /var/lib/parca - name: parca - securityContext: - fsGroupChangePolicy: OnRootMismatch - fsGroup: 65534 - runAsNonRoot: true - runAsUser: 65534 - seccompProfile: - type: RuntimeDefault - supplementalGroups: - - 65534 - volumes: - - name: parca - persistentVolumeClaim: - claimName: parca - # Require janitor to be scheduled to the same node as parca itself because the volume is RWO and cannot be - # attached to multiple nodes simultaneously. - affinity: - podAffinity: - requiredDuringSchedulingIgnoredDuringExecution: - - topologyKey: kubernetes.io/hostname - labelSelector: - matchLabels: - app.kubernetes.io/component: observability - app.kubernetes.io/instance: parca - app.kubernetes.io/name: parca diff --git a/hack/config/profiling/kustomization.yaml b/hack/config/profiling/kustomization.yaml index 112c0bcc..41a3381c 100644 --- a/hack/config/profiling/kustomization.yaml +++ b/hack/config/profiling/kustomization.yaml @@ -8,12 +8,6 @@ resources: # grant parca running in namespace "parca" permissions required for service discovery in namespace # "sharding-system" and scrape the pprof endpoints of sharder - rbac_sharder.yaml -- janitor_cronjob.yaml - -images: -- name: janitor - newName: ghcr.io/timebertt/kubernetes-controller-sharding/janitor - newTag: latest generatorOptions: disableNameSuffixHash: true diff --git a/hack/config/skaffold.yaml b/hack/config/skaffold.yaml index d106ca84..987ed145 100644 --- a/hack/config/skaffold.yaml +++ b/hack/config/skaffold.yaml @@ -282,41 +282,30 @@ apiVersion: skaffold/v4beta10 kind: Config metadata: name: profiling -build: - artifacts: - - image: ghcr.io/timebertt/kubernetes-controller-sharding/janitor - ko: - dependencies: - paths: - - go.mod - - ./hack/cmd/janitor/**/*.go - main: ./hack/cmd/janitor - tagPolicy: - inputDigest: {} - local: - concurrency: 0 -manifests: - kustomize: - paths: - - hack/config/profiling - hooks: - before: - - host: - command: - - hack/config/profiling/ensure-admin-password.sh -deploy: - kubectl: - flags: - apply: - - --server-side - - --force-conflicts - defaultNamespace: "" -portForward: - - resourceType: service - resourceName: parca - namespace: parca - port: http - localPort: 7071 +profiles: +- name: profiling + manifests: + kustomize: + paths: + - hack/config/profiling + hooks: + before: + - host: + command: + - hack/config/profiling/ensure-admin-password.sh + deploy: + kubectl: + flags: + apply: + - --server-side + - --force-conflicts + defaultNamespace: "" + portForward: + - resourceType: service + resourceName: parca + namespace: parca + port: http + localPort: 7071 --- apiVersion: skaffold/v4beta10 kind: Config