diff --git a/webhosting-operator/.run/webhosting-operator process (kind).run.xml b/webhosting-operator/.run/webhosting-operator process (kind).run.xml index 7632fb8f..2deca6af 100644 --- a/webhosting-operator/.run/webhosting-operator process (kind).run.xml +++ b/webhosting-operator/.run/webhosting-operator process (kind).run.xml @@ -3,10 +3,11 @@ - - + + + diff --git a/webhosting-operator/Makefile b/webhosting-operator/Makefile index 8dd78cea..d2ee606a 100644 --- a/webhosting-operator/Makefile +++ b/webhosting-operator/Makefile @@ -59,20 +59,27 @@ generate: $(CONTROLLER_GEN) ## Generate code containing DeepCopy, DeepCopyInto, fmt: ## Run go fmt against code. go fmt ./... -.PHONY: vet -vet: ## Run go vet against code. - go vet ./... - .PHONY: modules modules: ## Runs go mod to ensure modules are up to date. go mod tidy .PHONY: test -test: $(SETUP_ENVTEST) manifests generate fmt vet ## Run tests. +test: $(SETUP_ENVTEST) ## Run tests. KUBEBUILDER_ASSETS="$(shell $(SETUP_ENVTEST) use $(ENVTEST_K8S_VERSION) -p path)" go test -race ./... +.PHONY: test-kyverno +test-kyverno: $(KYVERNO) ## Run kyverno policy tests. + $(KYVERNO) test --remove-color -v 4 . + ##@ Verification +.PHONY: vet +vet: ## Run go vet against code. + go vet ./... + +.PHONY: check +check: vet test test-kyverno ## Check everything (vet + test + test-kyverno). + .PHONY: verify-fmt verify-fmt: fmt ## Verify go code is formatted. @if !(git diff --quiet HEAD); then \ @@ -92,7 +99,7 @@ verify-modules: modules ## Verify go module files are up to date. fi .PHONY: verify -verify: verify-fmt verify-generate verify-modules test ## Verify everything (all verify-* rules + test). +verify: verify-fmt verify-generate verify-modules check ## Verify everything (all verify-* rules + check). ##@ Build @@ -127,10 +134,10 @@ kind-down: $(KIND) ## Tear down the kind testing cluster. $(KIND) delete cluster --name webhosting .PHONY: deploy-ingress-nginx -deploy-ingress-nginx: $(KUSTOMIZE) $(KUBECTL) ## Deploy ingress-nginx to K8s cluster specified in $KUBECONFIG. +deploy-ingress-nginx: $(KUBECTL) ## Deploy ingress-nginx to K8s cluster specified in $KUBECONFIG. @# job template is immutable, delete old jobs to prepare for upgrade $(KUBECTL) -n ingress-nginx delete job --ignore-not-found ingress-nginx-admission-create ingress-nginx-admission-patch - $(KUSTOMIZE) build config/ingress-nginx/$(OVERLAY) | $(KUBECTL) apply --server-side -f - + $(KUBECTL) apply --server-side -k config/ingress-nginx/$(OVERLAY) $(KUBECTL) -n ingress-nginx wait deploy ingress-nginx-controller --for=condition=Available --timeout=2m # use static label for skaffold to prevent rolling all components on every skaffold invocation diff --git a/webhosting-operator/README.md b/webhosting-operator/README.md index f9f40d51..1319bddd 100644 --- a/webhosting-operator/README.md +++ b/webhosting-operator/README.md @@ -154,7 +154,7 @@ You've already deployed a customized installation of [kube-prometheus](https://g ```bash # get the grafana admin password -cat config/monitoring/default/grafana_admin_pass.secret.txt +cat config/monitoring/default/grafana_admin_password.secret.txt ``` Now, visit your [local webhosting dashboard](http://127.0.0.1:3000/d/NbmNpqEnk/webhosting?orgId=1) at http://127.0.0.1:3000. diff --git a/webhosting-operator/config/external-dns/kustomization.yaml b/webhosting-operator/config/external-dns/kustomization.yaml index c346f391..6a71b244 100644 --- a/webhosting-operator/config/external-dns/kustomization.yaml +++ b/webhosting-operator/config/external-dns/kustomization.yaml @@ -5,11 +5,11 @@ namespace: external-dns images: - name: registry.k8s.io/external-dns/external-dns - newTag: v0.13.5 + newTag: v0.13.6 resources: - namespace.yaml -- https://github.com/kubernetes-sigs/external-dns//kustomize?ref=v0.13.5 +- https://github.com/kubernetes-sigs/external-dns//kustomize?ref=v0.13.6 patches: - path: patch-deployment.yaml diff --git a/webhosting-operator/config/external-dns/patch-deployment.yaml b/webhosting-operator/config/external-dns/patch-deployment.yaml index 39b05b5b..5d2aa40d 100644 --- a/webhosting-operator/config/external-dns/patch-deployment.yaml +++ b/webhosting-operator/config/external-dns/patch-deployment.yaml @@ -18,8 +18,10 @@ spec: - --google-zone-visibility=public - --policy=sync - --registry=txt - - --txt-owner-id=timebertt-fb28d21f90-sharding + - --txt-owner-id=shoot--timebertt--sharding-0e61b9e9-b7ce-4a71-a502-89f366015617-ond-460a37 - --interval=1m + # ensure the records are not owned by short-lived acme solvers managed by cert-manager or website ingresses + - --label-filter=acme.cert-manager.io/http01-solver!=true,app!=website env: - name: GOOGLE_APPLICATION_CREDENTIALS value: /etc/secrets/service-account/service-account.json diff --git a/webhosting-operator/config/manager/default/manager.yaml b/webhosting-operator/config/manager/default/manager.yaml index e7f2958d..e355f4a1 100644 --- a/webhosting-operator/config/manager/default/manager.yaml +++ b/webhosting-operator/config/manager/default/manager.yaml @@ -16,6 +16,8 @@ spec: containers: - name: manager image: controller:latest + args: + - --zap-log-level=info ports: - name: metrics containerPort: 8080 diff --git a/webhosting-operator/config/manager/with-dns/manager_patch.yaml b/webhosting-operator/config/manager/with-dns/manager_patch.yaml index db87db89..8f72b0ae 100644 --- a/webhosting-operator/config/manager/with-dns/manager_patch.yaml +++ b/webhosting-operator/config/manager/with-dns/manager_patch.yaml @@ -9,6 +9,7 @@ spec: containers: - name: manager args: + - --zap-log-level=info - --config=/config.yaml volumeMounts: - name: config diff --git a/webhosting-operator/config/monitoring/default/ensure-admin-password.sh b/webhosting-operator/config/monitoring/default/ensure-admin-password.sh new file mode 100755 index 00000000..33f95a33 --- /dev/null +++ b/webhosting-operator/config/monitoring/default/ensure-admin-password.sh @@ -0,0 +1,7 @@ +#!/usr/bin/env bash + +dir="$(dirname "$0")" +file="$dir/grafana_admin_password.secret.txt" + +[ -f "$file" ] && exit 0 +cat /dev/urandom | tr -dc "a-zA-Z0-9" | head -c 32 > "$file" diff --git a/webhosting-operator/config/monitoring/default/kustomization.yaml b/webhosting-operator/config/monitoring/default/kustomization.yaml index bf3078b4..760652bb 100644 --- a/webhosting-operator/config/monitoring/default/kustomization.yaml +++ b/webhosting-operator/config/monitoring/default/kustomization.yaml @@ -31,8 +31,10 @@ configMapGenerator: secretGenerator: - name: grafana-admin namespace: monitoring + literals: + - username=admin files: - - password=grafana_admin_pass.secret.txt + - password=grafana_admin_password.secret.txt patches: - path: patch_grafana_admin.yaml @@ -58,10 +60,3 @@ patches: kind: Deployment name: kube-state-metrics namespace: monitoring -- path: patch_filter_nodeexporter_metrics.yaml - target: - group: monitoring.coreos.com - version: v1 - kind: ServiceMonitor - name: node-exporter - namespace: monitoring diff --git a/webhosting-operator/config/monitoring/default/patch_filter_kubelet_metrics.yaml b/webhosting-operator/config/monitoring/default/patch_filter_kubelet_metrics.yaml index 1d396efb..4271a423 100644 --- a/webhosting-operator/config/monitoring/default/patch_filter_kubelet_metrics.yaml +++ b/webhosting-operator/config/monitoring/default/patch_filter_kubelet_metrics.yaml @@ -26,11 +26,3 @@ sourceLabels: [namespace] regex: project-.+ action: drop -# drop cadvisor network metrics for pod network interfaces on websites workers -# (not all metrics have a namespace label, so we can't drop them granularly) -- op: add - path: /spec/endpoints/1/metricRelabelings/- - value: - sourceLabels: [__name__, node, interface] - regex: "container_network_.+;.*fb28d21f90--sharding-websites-.+;cali.+" - action: drop diff --git a/webhosting-operator/config/monitoring/default/patch_filter_nodeexporter_metrics.yaml b/webhosting-operator/config/monitoring/default/patch_filter_nodeexporter_metrics.yaml deleted file mode 100644 index 63166388..00000000 --- a/webhosting-operator/config/monitoring/default/patch_filter_nodeexporter_metrics.yaml +++ /dev/null @@ -1,10 +0,0 @@ -# drop filesystem metrics for websites workers (high cardinality and memory usage of mountpoint and device label) -- op: add - path: /spec/endpoints/0/metricRelabelings - value: [] -- op: add - path: /spec/endpoints/0/metricRelabelings/- - value: - sourceLabels: [__name__, instance] - regex: "node_filesystem_.+;.*fb28d21f90--sharding-websites-.+" - action: drop diff --git a/webhosting-operator/config/policy/controlplane/etcd-main.yaml b/webhosting-operator/config/policy/controlplane/etcd-main.yaml new file mode 100644 index 00000000..aeae194b --- /dev/null +++ b/webhosting-operator/config/policy/controlplane/etcd-main.yaml @@ -0,0 +1,50 @@ +apiVersion: kyverno.io/v1 +kind: Policy +metadata: + name: etcd-main + namespace: shoot--timebertt--sharding +spec: + failurePolicy: Fail + rules: + # set static requests/limits on etcd-main to ensure similar evaluation environment between load test runs + - name: resources + match: + any: + - resources: + kinds: + - Pod + selector: + matchLabels: + instance: etcd-main + name: etcd + mutate: + patchStrategicMerge: + spec: + containers: + - name: etcd + resources: + requests: + cpu: 12000m + memory: 12Gi + limits: + cpu: 12000m + memory: 12Gi + # schedule etcd-main on high-cpu worker pool for stable performance + - name: add-scheduling-constraints + match: + any: + - resources: + kinds: + - Pod + selector: + matchLabels: + instance: etcd-main + name: etcd + mutate: + patchesJson6902: |- + - op: add + path: "/spec/tolerations/-" + value: {"key":"high-cpu","operator":"Equal","value":"true","effect":"NoSchedule"} + - op: add + path: "/spec/affinity/nodeAffinity/requiredDuringSchedulingIgnoredDuringExecution/nodeSelectorTerms/-" + value: {"matchExpressions": [{"key":"high-cpu","operator":"In","values":["true"]}]} diff --git a/webhosting-operator/config/policy/controlplane/kube-apiserver-scale.yaml b/webhosting-operator/config/policy/controlplane/kube-apiserver-scale.yaml new file mode 100644 index 00000000..fe8eda77 --- /dev/null +++ b/webhosting-operator/config/policy/controlplane/kube-apiserver-scale.yaml @@ -0,0 +1,31 @@ +apiVersion: kyverno.io/v1 +kind: Policy +metadata: + name: kube-apiserver-scale + namespace: shoot--timebertt--sharding +spec: + failurePolicy: Ignore + # schema validation doesn't seem to work in combination with the /scale subresource, disable it for now + schemaValidation: false + rules: + # set static replicas on kube-apiserver to ensure similar evaluation environment between load test runs + - name: replicas-scale + match: + any: + - resources: + # mutate scale requests by HPA + kinds: + - Deployment/scale + # the Scale subresource doesn't have the original resource's labels -> we have to match by name + names: + - kube-apiserver + preconditions: + all: + # Only patch spec.replicas if the control plane is not hibernated, i.e., if spec.replicas>=1. + - key: "{{ request.object.spec.replicas || `1` }}" + operator: GreaterThan + value: 0 + mutate: + patchStrategicMerge: + spec: + replicas: 1 diff --git a/webhosting-operator/config/policy/controlplane/kube-apiserver.yaml b/webhosting-operator/config/policy/controlplane/kube-apiserver.yaml new file mode 100644 index 00000000..f74ab6a3 --- /dev/null +++ b/webhosting-operator/config/policy/controlplane/kube-apiserver.yaml @@ -0,0 +1,74 @@ +apiVersion: kyverno.io/v1 +kind: Policy +metadata: + name: kube-apiserver + namespace: shoot--timebertt--sharding +spec: + failurePolicy: Fail + rules: + # set static replicas on kube-apiserver to ensure similar evaluation environment between load test runs + # if the cluster is hibernated (spec.replicas=0), this rule is skipped + - name: replicas + match: + any: + - resources: + kinds: + - Deployment + selector: + matchLabels: + app: kubernetes + role: apiserver + preconditions: + all: + # Only patch spec.replicas if the control plane is not hibernated, i.e., if spec.replicas>=1. + # NB: gardenlet deploys kube-apiserver with spec.replicas=null which is defaulted after the policy webhook call + # to spec.replicas=1. Hence, treat spec.replicas=null the same way as spec.replicas=1. + - key: "{{ request.object.spec.replicas || `1` }}" + operator: GreaterThan + value: 0 + mutate: + patchStrategicMerge: + spec: + replicas: 1 + # set static requests/limits on kube-apiserver to ensure similar evaluation environment between load test runs + - name: resources + match: + any: + - resources: + kinds: + - Pod + selector: + matchLabels: + app: kubernetes + role: apiserver + mutate: + patchStrategicMerge: + spec: + containers: + - name: kube-apiserver + resources: + requests: + cpu: 12000m + memory: 12Gi + limits: + cpu: 12000m + memory: 12Gi + # schedule kube-apiserver on high-cpu worker pool for stable performance + - name: add-scheduling-constraints + match: + any: + - resources: + kinds: + - Pod + selector: + matchLabels: + app: kubernetes + role: apiserver + mutate: + patchesJson6902: |- + - op: add + path: "/spec/tolerations/-" + value: {"key":"high-cpu","operator":"Equal","value":"true","effect":"NoSchedule"} + - op: add + path: "/spec/affinity/nodeAffinity/requiredDuringSchedulingIgnoredDuringExecution/nodeSelectorTerms/-" + value: {"matchExpressions": [{"key":"high-cpu","operator":"In","values":["true"]}]} diff --git a/webhosting-operator/config/policy/controlplane/policy-kube-controller-manager.yaml b/webhosting-operator/config/policy/controlplane/kube-controller-manager.yaml similarity index 94% rename from webhosting-operator/config/policy/controlplane/policy-kube-controller-manager.yaml rename to webhosting-operator/config/policy/controlplane/kube-controller-manager.yaml index 2e551951..9a0a0c78 100644 --- a/webhosting-operator/config/policy/controlplane/policy-kube-controller-manager.yaml +++ b/webhosting-operator/config/policy/controlplane/kube-controller-manager.yaml @@ -2,7 +2,7 @@ apiVersion: kyverno.io/v1 kind: Policy metadata: name: kube-controller-manager - namespace: shoot--fb28d21f90--sharding + namespace: shoot--timebertt--sharding spec: failurePolicy: Ignore rules: diff --git a/webhosting-operator/config/policy/controlplane/kustomization.yaml b/webhosting-operator/config/policy/controlplane/kustomization.yaml index eaff9569..f854e40e 100644 --- a/webhosting-operator/config/policy/controlplane/kustomization.yaml +++ b/webhosting-operator/config/policy/controlplane/kustomization.yaml @@ -2,7 +2,10 @@ apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization # This kustomization contains policies for manipulating shoot control plane components. -# If the seed doesn't have kyverno installed, you can use the patch file alternatively. +# For this to work, kyverno needs to be installed on the seed cluster. resources: -- policy-kube-controller-manager.yaml +- etcd-main.yaml +- kube-apiserver.yaml +- kube-apiserver-scale.yaml +- kube-controller-manager.yaml diff --git a/webhosting-operator/config/policy/controlplane/patch_kube-controller-manager_ratelimiter.yaml b/webhosting-operator/config/policy/controlplane/patch_kube-controller-manager_ratelimiter.yaml deleted file mode 100644 index 957b466e..00000000 --- a/webhosting-operator/config/policy/controlplane/patch_kube-controller-manager_ratelimiter.yaml +++ /dev/null @@ -1,8 +0,0 @@ -# patch rate limiter settings of kube-controller-manager in shoot control plane, use with: -# k -n shoot--fb28d21f90--sharding patch deploy kube-controller-manager --type json --patch-file config/patch_kube-controller-manager_ratelimiter.yaml -- op: add - path: /spec/template/spec/containers/0/command/- - value: "--kube-api-qps=800" -- op: add - path: /spec/template/spec/containers/0/command/- - value: "--kube-api-burst=1000" diff --git a/webhosting-operator/config/policy/controlplane/tests/kube-apiserver-scale-awake/kyverno-test.yaml b/webhosting-operator/config/policy/controlplane/tests/kube-apiserver-scale-awake/kyverno-test.yaml new file mode 100644 index 00000000..92042fe1 --- /dev/null +++ b/webhosting-operator/config/policy/controlplane/tests/kube-apiserver-scale-awake/kyverno-test.yaml @@ -0,0 +1,15 @@ +name: kube-apiserver-scale-awake +policies: +- ../../kube-apiserver-scale.yaml +resources: +# spec.replicas=2 -> expect spec.replicas=1 +- scale.yaml +variables: variables.yaml +results: +- policy: kube-apiserver-scale + rule: replicas-scale + resource: kube-apiserver + namespace: shoot--timebertt--sharding + kind: Scale + result: pass + patchedResource: scale_expected.yaml diff --git a/webhosting-operator/config/policy/controlplane/tests/kube-apiserver-scale-awake/scale.yaml b/webhosting-operator/config/policy/controlplane/tests/kube-apiserver-scale-awake/scale.yaml new file mode 100644 index 00000000..69ae402c --- /dev/null +++ b/webhosting-operator/config/policy/controlplane/tests/kube-apiserver-scale-awake/scale.yaml @@ -0,0 +1,7 @@ +kind: Scale +apiVersion: autoscaling/v1 +metadata: + name: kube-apiserver + namespace: shoot--timebertt--sharding +spec: + replicas: 2 diff --git a/webhosting-operator/config/policy/controlplane/tests/kube-apiserver-scale-awake/scale_expected.yaml b/webhosting-operator/config/policy/controlplane/tests/kube-apiserver-scale-awake/scale_expected.yaml new file mode 100644 index 00000000..9a8da48c --- /dev/null +++ b/webhosting-operator/config/policy/controlplane/tests/kube-apiserver-scale-awake/scale_expected.yaml @@ -0,0 +1,7 @@ +kind: Scale +apiVersion: autoscaling/v1 +metadata: + name: kube-apiserver + namespace: shoot--timebertt--sharding +spec: + replicas: 1 diff --git a/webhosting-operator/config/policy/controlplane/tests/kube-apiserver-scale-awake/variables.yaml b/webhosting-operator/config/policy/controlplane/tests/kube-apiserver-scale-awake/variables.yaml new file mode 100644 index 00000000..7cb80f5e --- /dev/null +++ b/webhosting-operator/config/policy/controlplane/tests/kube-apiserver-scale-awake/variables.yaml @@ -0,0 +1,11 @@ +subresources: +- subresource: + name: "deployments/scale" + kind: "Scale" + group: "autoscaling" + version: "v1" + parentResource: + name: "deployments" + kind: "Deployment" + group: "apps" + version: "v1" diff --git a/webhosting-operator/config/policy/controlplane/tests/kube-apiserver-scale-hibernated/kyverno-test.yaml b/webhosting-operator/config/policy/controlplane/tests/kube-apiserver-scale-hibernated/kyverno-test.yaml new file mode 100644 index 00000000..93415ca0 --- /dev/null +++ b/webhosting-operator/config/policy/controlplane/tests/kube-apiserver-scale-hibernated/kyverno-test.yaml @@ -0,0 +1,14 @@ +name: kube-apiserver-scale-hibernated +policies: +- ../../kube-apiserver-scale.yaml +resources: +# spec.replicas=0 -> expect skip +- scale.yaml +variables: variables.yaml +results: +- policy: kube-apiserver-scale + rule: replicas-scale + resource: kube-apiserver + namespace: shoot--timebertt--sharding + kind: Scale + result: skip diff --git a/webhosting-operator/config/policy/controlplane/tests/kube-apiserver-scale-hibernated/scale.yaml b/webhosting-operator/config/policy/controlplane/tests/kube-apiserver-scale-hibernated/scale.yaml new file mode 100644 index 00000000..3e7b0d7d --- /dev/null +++ b/webhosting-operator/config/policy/controlplane/tests/kube-apiserver-scale-hibernated/scale.yaml @@ -0,0 +1,7 @@ +kind: Scale +apiVersion: autoscaling/v1 +metadata: + name: kube-apiserver + namespace: shoot--timebertt--sharding +spec: + replicas: 0 diff --git a/webhosting-operator/config/policy/controlplane/tests/kube-apiserver-scale-hibernated/variables.yaml b/webhosting-operator/config/policy/controlplane/tests/kube-apiserver-scale-hibernated/variables.yaml new file mode 100644 index 00000000..7cb80f5e --- /dev/null +++ b/webhosting-operator/config/policy/controlplane/tests/kube-apiserver-scale-hibernated/variables.yaml @@ -0,0 +1,11 @@ +subresources: +- subresource: + name: "deployments/scale" + kind: "Scale" + group: "autoscaling" + version: "v1" + parentResource: + name: "deployments" + kind: "Deployment" + group: "apps" + version: "v1" diff --git a/webhosting-operator/config/policy/controlplane/tests/kube-apiserver/kube-apiserver-awake.yaml b/webhosting-operator/config/policy/controlplane/tests/kube-apiserver/kube-apiserver-awake.yaml new file mode 100644 index 00000000..4eda5fa0 --- /dev/null +++ b/webhosting-operator/config/policy/controlplane/tests/kube-apiserver/kube-apiserver-awake.yaml @@ -0,0 +1,18 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + labels: + app: kubernetes + role: apiserver + name: kube-apiserver-awake + namespace: shoot--timebertt--sharding +spec: + replicas: 2 + template: + spec: + containers: + - name: kube-apiserver + resources: + requests: + cpu: 800m + memory: 800Mi diff --git a/webhosting-operator/config/policy/controlplane/tests/kube-apiserver/kube-apiserver-awake_expected.yaml b/webhosting-operator/config/policy/controlplane/tests/kube-apiserver/kube-apiserver-awake_expected.yaml new file mode 100644 index 00000000..07c91f7b --- /dev/null +++ b/webhosting-operator/config/policy/controlplane/tests/kube-apiserver/kube-apiserver-awake_expected.yaml @@ -0,0 +1,18 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + labels: + app: kubernetes + role: apiserver + name: kube-apiserver-awake + namespace: shoot--timebertt--sharding +spec: + replicas: 1 + template: + spec: + containers: + - name: kube-apiserver + resources: + requests: + cpu: 800m + memory: 800Mi diff --git a/webhosting-operator/config/policy/controlplane/tests/kube-apiserver/kube-apiserver-hibernated.yaml b/webhosting-operator/config/policy/controlplane/tests/kube-apiserver/kube-apiserver-hibernated.yaml new file mode 100644 index 00000000..38727f81 --- /dev/null +++ b/webhosting-operator/config/policy/controlplane/tests/kube-apiserver/kube-apiserver-hibernated.yaml @@ -0,0 +1,18 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + labels: + app: kubernetes + role: apiserver + name: kube-apiserver-hibernated + namespace: shoot--timebertt--sharding +spec: + replicas: 0 + template: + spec: + containers: + - name: kube-apiserver + resources: + requests: + cpu: 800m + memory: 800Mi diff --git a/webhosting-operator/config/policy/controlplane/tests/kube-apiserver/kube-apiserver-null.yaml b/webhosting-operator/config/policy/controlplane/tests/kube-apiserver/kube-apiserver-null.yaml new file mode 100644 index 00000000..eaa26cb2 --- /dev/null +++ b/webhosting-operator/config/policy/controlplane/tests/kube-apiserver/kube-apiserver-null.yaml @@ -0,0 +1,17 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + labels: + app: kubernetes + role: apiserver + name: kube-apiserver-null + namespace: shoot--timebertt--sharding +spec: + template: + spec: + containers: + - name: kube-apiserver + resources: + requests: + cpu: 800m + memory: 800Mi diff --git a/webhosting-operator/config/policy/controlplane/tests/kube-apiserver/kube-apiserver-null_expected.yaml b/webhosting-operator/config/policy/controlplane/tests/kube-apiserver/kube-apiserver-null_expected.yaml new file mode 100644 index 00000000..0901cc6a --- /dev/null +++ b/webhosting-operator/config/policy/controlplane/tests/kube-apiserver/kube-apiserver-null_expected.yaml @@ -0,0 +1,18 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + labels: + app: kubernetes + role: apiserver + name: kube-apiserver-null + namespace: shoot--timebertt--sharding +spec: + replicas: 1 + template: + spec: + containers: + - name: kube-apiserver + resources: + requests: + cpu: 800m + memory: 800Mi diff --git a/webhosting-operator/config/policy/controlplane/tests/kube-apiserver/kyverno-test.yaml b/webhosting-operator/config/policy/controlplane/tests/kube-apiserver/kyverno-test.yaml new file mode 100644 index 00000000..0185af6a --- /dev/null +++ b/webhosting-operator/config/policy/controlplane/tests/kube-apiserver/kyverno-test.yaml @@ -0,0 +1,31 @@ +name: kube-apiserver +policies: +- ../../kube-apiserver.yaml +resources: +# spec.replicas=2 -> expect spec.replicas=1 +- kube-apiserver-awake.yaml +# spec.replicas=null -> expect spec.replicas=1 +- kube-apiserver-null.yaml +# spec.replicas=0 -> expect skip +- kube-apiserver-hibernated.yaml +results: +- policy: kube-apiserver + rule: replicas + resource: kube-apiserver-awake + namespace: shoot--timebertt--sharding + kind: Deployment + result: pass + patchedResource: kube-apiserver-awake_expected.yaml +- policy: kube-apiserver + rule: replicas + resource: kube-apiserver-null + namespace: shoot--timebertt--sharding + kind: Deployment + result: pass + patchedResource: kube-apiserver-null_expected.yaml +- policy: kube-apiserver + rule: replicas + resource: kube-apiserver-hibernated + namespace: shoot--timebertt--sharding + kind: Deployment + result: skip diff --git a/webhosting-operator/config/policy/default/kustomization.yaml b/webhosting-operator/config/policy/default/kustomization.yaml index b3a9f1db..e609fb24 100644 --- a/webhosting-operator/config/policy/default/kustomization.yaml +++ b/webhosting-operator/config/policy/default/kustomization.yaml @@ -1,5 +1,4 @@ apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization -resources: -- websites-experiment.yaml +resources: [] diff --git a/webhosting-operator/config/policy/default/websites-experiment.yaml b/webhosting-operator/config/policy/default/websites-experiment.yaml deleted file mode 100644 index ae374736..00000000 --- a/webhosting-operator/config/policy/default/websites-experiment.yaml +++ /dev/null @@ -1,42 +0,0 @@ -apiVersion: kyverno.io/v1 -kind: ClusterPolicy -metadata: - name: websites-experiment -spec: - failurePolicy: Fail - rules: - # don't actually run website pods in load tests - # otherwise, we would need an immense amount of compute power for running dummy websites - - name: scale-down-websites - match: - any: - - resources: - kinds: - - Deployment - selector: - matchLabels: - app: website - namespaceSelector: - matchLabels: - generated-by: experiment - mutate: - patchStrategicMerge: - spec: - replicas: 0 - # use fake ingress class to prevent overloading ingress controller (this is not what we want to load test) - - name: disable-ingress - match: - any: - - resources: - kinds: - - Ingress - selector: - matchLabels: - app: website - namespaceSelector: - matchLabels: - generated-by: experiment - mutate: - patchStrategicMerge: - spec: - ingressClassName: fake diff --git a/webhosting-operator/config/policy/shoot/kustomization.yaml b/webhosting-operator/config/policy/shoot/kustomization.yaml index eea6ee90..f94da50c 100644 --- a/webhosting-operator/config/policy/shoot/kustomization.yaml +++ b/webhosting-operator/config/policy/shoot/kustomization.yaml @@ -4,4 +4,10 @@ kind: Kustomization resources: - ../default - experiment-scheduling.yaml +- scale-up-worker-experiment.yaml - webhosting-operator-scheduling.yaml + +images: +- name: pause + newName: registry.k8s.io/pause + newTag: "3.7" diff --git a/webhosting-operator/config/policy/shoot/scale-up-worker-experiment.yaml b/webhosting-operator/config/policy/shoot/scale-up-worker-experiment.yaml new file mode 100644 index 00000000..24175e39 --- /dev/null +++ b/webhosting-operator/config/policy/shoot/scale-up-worker-experiment.yaml @@ -0,0 +1,52 @@ +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + labels: + app: scale-up-worker + pool: experiment + name: scale-up-worker-experiment + namespace: default +spec: + revisionHistoryLimit: 2 + selector: + matchLabels: + app: scale-up-worker + pool: experiment + template: + metadata: + labels: + app: scale-up-worker + pool: experiment + spec: + containers: + - name: pause + image: pause + priorityClassName: reserve-excess-capacity + tolerations: + - key: dedicated-for + value: experiment + effect: NoSchedule + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: dedicated-for + operator: In + values: + - experiment + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchLabels: + app: scale-up-worker + pool: experiment + topologyKey: kubernetes.io/hostname +--- +apiVersion: scheduling.k8s.io/v1 +kind: PriorityClass +metadata: + name: reserve-excess-capacity +description: PriorityClass for reserving excess capacity (over-provisioning) +value: -5 diff --git a/webhosting-operator/config/profiling/ensure-admin-password.sh b/webhosting-operator/config/profiling/ensure-admin-password.sh new file mode 100755 index 00000000..700af606 --- /dev/null +++ b/webhosting-operator/config/profiling/ensure-admin-password.sh @@ -0,0 +1,9 @@ +#!/usr/bin/env bash + +dir="$(dirname "$0")" +password_file="$dir/parca_password.secret.txt" +auth_file="$dir/parca_auth.secret.txt" + +[ -f "$password_file" ] && [ -f "$auth_file" ] && exit 0 +cat /dev/urandom | tr -dc "a-zA-Z0-9" | head -c 32 > "$password_file" +cat "$password_file" | htpasswd -i -c "$auth_file" parca diff --git a/webhosting-operator/config/profiling/kustomization.yaml b/webhosting-operator/config/profiling/kustomization.yaml index 6c50e90f..22fe45e4 100644 --- a/webhosting-operator/config/profiling/kustomization.yaml +++ b/webhosting-operator/config/profiling/kustomization.yaml @@ -6,6 +6,7 @@ resources: # provide parca running in namespace "parca" with the permissions required for service discovery in namespace # "webhosting-system" and scrape the pprof endpoints of webhosting-operator - parca_rbac.yaml +- parca_ingress.yaml generatorOptions: disableNameSuffixHash: true @@ -21,6 +22,15 @@ configMapGenerator: files: - parca.yaml=parca_config.yaml +secretGenerator: +- name: parca-basic-auth + namespace: parca + literals: + - username=parca + files: + - password=parca_password.secret.txt + - auth=parca_auth.secret.txt + patches: - patch: | apiVersion: policy/v1beta1 diff --git a/webhosting-operator/config/profiling/parca_ingress.yaml b/webhosting-operator/config/profiling/parca_ingress.yaml new file mode 100644 index 00000000..1cc3aeb7 --- /dev/null +++ b/webhosting-operator/config/profiling/parca_ingress.yaml @@ -0,0 +1,31 @@ +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + annotations: + cert-manager.io/cluster-issuer: letsencrypt-http01 + nginx.ingress.kubernetes.io/auth-type: basic + nginx.ingress.kubernetes.io/auth-secret: parca-basic-auth + nginx.ingress.kubernetes.io/auth-realm: 'Authentication Required' + labels: + app.kubernetes.io/component: observability + app.kubernetes.io/instance: parca + app.kubernetes.io/name: parca + name: parca + namespace: parca +spec: + ingressClassName: nginx + rules: + - host: parca.webhosting.timebertt.dev + http: + paths: + - backend: + service: + name: parca + port: + name: http + path: / + pathType: Prefix + tls: + - hosts: + - parca.webhosting.timebertt.dev + secretName: parca-tls diff --git a/webhosting-operator/go.mod b/webhosting-operator/go.mod index c94c31ba..72137273 100644 --- a/webhosting-operator/go.mod +++ b/webhosting-operator/go.mod @@ -25,7 +25,7 @@ require ( ) // https://github.com/timebertt/controller-runtime/tree/sharding-0.15 -replace sigs.k8s.io/controller-runtime => github.com/timebertt/controller-runtime v0.6.1-0.20230802071417-292b9ca7912e +replace sigs.k8s.io/controller-runtime => github.com/timebertt/controller-runtime v0.6.1-0.20230910160759-61c8e6d9a2de require ( github.com/beorn7/perks v1.0.1 // indirect diff --git a/webhosting-operator/go.sum b/webhosting-operator/go.sum index e170e240..2ebceddd 100644 --- a/webhosting-operator/go.sum +++ b/webhosting-operator/go.sum @@ -149,8 +149,8 @@ github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/ github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk= github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= -github.com/timebertt/controller-runtime v0.6.1-0.20230802071417-292b9ca7912e h1:t4Gy5cr8/vPUXwyAJkdrJJtchk9ze0I1L9tUA5F49jA= -github.com/timebertt/controller-runtime v0.6.1-0.20230802071417-292b9ca7912e/go.mod h1:dC7gCD9Y6rWUT9YaaCuR/kHLZqppXydmPLCc/2KffF4= +github.com/timebertt/controller-runtime v0.6.1-0.20230910160759-61c8e6d9a2de h1:LxfB3lU0Su0pjmdqB3NsI5oIF4835lhFqPH+RNbJ1mM= +github.com/timebertt/controller-runtime v0.6.1-0.20230910160759-61c8e6d9a2de/go.mod h1:dC7gCD9Y6rWUT9YaaCuR/kHLZqppXydmPLCc/2KffF4= github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.3.5/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k= diff --git a/webhosting-operator/pkg/controllers/webhosting/common.go b/webhosting-operator/pkg/controllers/webhosting/common.go index f86c97cf..f5a60b97 100644 --- a/webhosting-operator/pkg/controllers/webhosting/common.go +++ b/webhosting-operator/pkg/controllers/webhosting/common.go @@ -17,7 +17,26 @@ limitations under the License. package webhosting import ( + "context" + + apierrors "k8s.io/apimachinery/pkg/api/errors" "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/reconcile" ) const fieldOwner = client.FieldOwner("webhosting-operator") + +// SilenceConflicts wraps a reconciler to not return conflict errors. The requests is requeued with exponential backoff +// as if an error was returned but the error will not be logged. +func SilenceConflicts(r reconcile.Reconciler) reconcile.Reconciler { + return reconcile.Func(func(ctx context.Context, request reconcile.Request) (reconcile.Result, error) { + result, err := r.Reconcile(ctx, request) + if apierrors.IsConflict(err) { + result.Requeue = true + // RequeueAfter takes precedence over Requeue, set it to zero in case it was returned alongside a conflict error + result.RequeueAfter = 0 + err = nil + } + return result, err + }) +} diff --git a/webhosting-operator/pkg/controllers/webhosting/website_controller.go b/webhosting-operator/pkg/controllers/webhosting/website_controller.go index 468126ed..605a34fd 100644 --- a/webhosting-operator/pkg/controllers/webhosting/website_controller.go +++ b/webhosting-operator/pkg/controllers/webhosting/website_controller.go @@ -285,6 +285,13 @@ func (r *WebsiteReconciler) IngressForWebsite(serverName string, website *webhos }} applyIngressConfigToIngress(r.Config.Ingress, ingress) + + if isGeneratedByExperiment(website) { + // don't actually expose website ingresses in load tests + // use fake ingress class to prevent overloading ingress controller (this is not what we want to load test) + ingress.Spec.IngressClassName = pointer.String("fake") + } + return ingress, ctrl.SetControllerReference(website, ingress, r.Scheme) } @@ -399,6 +406,12 @@ func (r *WebsiteReconciler) DeploymentForWebsite(serverName string, website *web }, } + if isGeneratedByExperiment(website) { + // don't actually run website pods in load tests + // otherwise, we would need an immense amount of compute power for running dummy websites + deployment.Spec.Replicas = pointer.Int32(0) + } + return deployment, ctrl.SetControllerReference(website, deployment, r.Scheme) } @@ -473,9 +486,9 @@ func (r *WebsiteReconciler) SetupWithManager(mgr ctrl.Manager) error { builder.WithPredicates(predicate.GenerationChangedPredicate{}), ). WithOptions(controller.Options{ - MaxConcurrentReconciles: 5, + MaxConcurrentReconciles: 15, }). - Build(r) + Build(SilenceConflicts(r)) if err != nil { return err } @@ -597,3 +610,7 @@ func GetDeploymentCondition(conditions []appsv1.DeploymentCondition, conditionTy } return nil } + +func isGeneratedByExperiment(obj client.Object) bool { + return obj.GetLabels()["generated-by"] == "experiment" +} diff --git a/webhosting-operator/shoot.yaml b/webhosting-operator/shoot.yaml index 60a9b52c..c9d79fd8 100644 --- a/webhosting-operator/shoot.yaml +++ b/webhosting-operator/shoot.yaml @@ -2,7 +2,7 @@ apiVersion: core.gardener.cloud/v1beta1 kind: Shoot metadata: name: sharding - namespace: garden-fb28d21f90-99b0e + namespace: garden-timebertt spec: addons: kubernetesDashboard: @@ -12,15 +12,8 @@ spec: cloudProfileName: stackit extensions: - type: shoot-yawol - hibernation: - schedules: - - start: '00 19 * * 1,2,3,4,5' - end: '30 07 * * 1,2,3,4,5' - location: Europe/Berlin kubernetes: enableStaticTokenKubeconfig: false - kubeAPIServer: - enableBasicAuthentication: false kubeControllerManager: nodeCIDRMaskSize: 20 kubeProxy: @@ -61,7 +54,7 @@ spec: machine: image: name: coreos - type: g1.4 + type: g1a.8d maxSurge: 1 maxUnavailable: 0 maximum: 2 @@ -80,7 +73,7 @@ spec: machine: image: name: coreos - type: g1.3 + type: g1a.4d maxSurge: 1 maxUnavailable: 0 maximum: 1 @@ -105,7 +98,7 @@ spec: machine: image: name: coreos - type: g1.3 + type: g1a.4d maxSurge: 1 maxUnavailable: 0 maximum: 1 @@ -125,4 +118,4 @@ spec: - eu01-2 purpose: development region: RegionOne - secretBindingName: cq12kuc8ir + secretBindingName: timebertt diff --git a/webhosting-operator/skaffold.yaml b/webhosting-operator/skaffold.yaml index fc7ed193..50ff0ecc 100644 --- a/webhosting-operator/skaffold.yaml +++ b/webhosting-operator/skaffold.yaml @@ -36,7 +36,7 @@ deploy: profiles: - name: shoot activation: - - kubeContext: .*fb28d21f90-99b0e--sharding.* + - kubeContext: .*timebertt--sharding.* manifests: kustomize: paths: @@ -91,7 +91,7 @@ profiles: value: config/manager/shard - name: shoot activation: - - kubeContext: .*fb28d21f90-99b0e--sharding.* + - kubeContext: .*timebertt--sharding.* patches: - op: replace path: /manifests/kustomize/paths/0 @@ -179,9 +179,7 @@ manifests: before: - host: command: - - bash - - -c - - '[ -f config/monitoring/default/grafana_admin_pass.secret.txt ] || cat /dev/urandom | tr -dc "a-zA-Z0-9" | head -c 32 > config/monitoring/default/grafana_admin_pass.secret.txt' + - config/monitoring/default/ensure-admin-password.sh deploy: kubectl: defaultNamespace: "" @@ -192,7 +190,7 @@ deploy: profiles: - name: shoot activation: - - kubeContext: .*fb28d21f90-99b0e--sharding.* + - kubeContext: .*timebertt--sharding.* manifests: kustomize: paths: @@ -213,23 +211,25 @@ apiVersion: skaffold/v4beta6 kind: Config metadata: name: profiling -# profiling config disabled by default, can be enabled with profile "profiling" -profiles: -- name: profiling - manifests: - kustomize: - paths: - - config/profiling - deploy: - kubectl: - defaultNamespace: "" - flags: - apply: - - --server-side - - --force-conflicts - portForward: - - resourceType: service - namespace: parca - resourceName: parca - port: http - localPort: 7070 +manifests: + kustomize: + paths: + - config/profiling + hooks: + before: + - host: + command: + - config/profiling/ensure-admin-password.sh +deploy: + kubectl: + defaultNamespace: "" + flags: + apply: + - --server-side + - --force-conflicts +portForward: +- resourceType: service + namespace: parca + resourceName: parca + port: http + localPort: 7070 diff --git a/webhosting-operator/tools.mk b/webhosting-operator/tools.mk index cad5361f..fe034fab 100644 --- a/webhosting-operator/tools.mk +++ b/webhosting-operator/tools.mk @@ -25,7 +25,7 @@ $(CONTROLLER_GEN): $(call tool_version_file,$(CONTROLLER_GEN),$(CONTROLLER_GEN_V KIND := $(TOOLS_BIN_DIR)/kind KIND_VERSION ?= v0.20.0 $(KIND): $(call tool_version_file,$(KIND),$(KIND_VERSION)) - curl -L -o $(KIND) https://kind.sigs.k8s.io/dl/$(KIND_VERSION)/kind-$(shell uname -s | tr '[:upper:]' '[:lower:]')-$(shell uname -m | sed 's/x86_64/amd64/;s/aarch64/arm64/') + curl -Lo $(KIND) https://kind.sigs.k8s.io/dl/$(KIND_VERSION)/kind-$(shell uname -s | tr '[:upper:]' '[:lower:]')-$(shell uname -m | sed 's/x86_64/amd64/;s/aarch64/arm64/') chmod +x $(KIND) KO := $(TOOLS_BIN_DIR)/ko @@ -39,10 +39,11 @@ $(KUBECTL): $(call tool_version_file,$(KUBECTL),$(KUBECTL_VERSION)) curl -Lo $(KUBECTL) https://dl.k8s.io/release/$(KUBECTL_VERSION)/bin/$(shell uname -s | tr '[:upper:]' '[:lower:]')/$(shell uname -m | sed 's/x86_64/amd64/')/kubectl chmod +x $(KUBECTL) -KUSTOMIZE := $(TOOLS_BIN_DIR)/kustomize -KUSTOMIZE_VERSION ?= v5.1.0 -$(KUSTOMIZE): $(call tool_version_file,$(KUSTOMIZE),$(KUSTOMIZE_VERSION)) - GOBIN=$(abspath $(TOOLS_BIN_DIR)) go install sigs.k8s.io/kustomize/kustomize/v5@$(KUSTOMIZE_VERSION) +KYVERNO := $(TOOLS_BIN_DIR)/kyverno +KYVERNO_VERSION ?= v1.10.3 +$(KYVERNO): $(call tool_version_file,$(KYVERNO),$(KYVERNO_VERSION)) + curl -Lo - https://github.com/kyverno/kyverno/releases/download/$(KYVERNO_VERSION)/kyverno-cli_$(KYVERNO_VERSION)_$(shell uname -s | tr '[:upper:]' '[:lower:]')_$(shell uname -m | sed 's/aarch64/arm64/').tar.gz | tar -xzmf - -C $(TOOLS_BIN_DIR) kyverno + chmod +x $(KYVERNO) GINKGO := $(TOOLS_BIN_DIR)/ginkgo $(GINKGO): go.mod @@ -61,5 +62,5 @@ $(SKAFFOLD): $(call tool_version_file,$(SKAFFOLD),$(SKAFFOLD_VERSION)) YQ := $(TOOLS_BIN_DIR)/yq YQ_VERSION ?= v4.34.2 $(YQ): $(call tool_version_file,$(YQ),$(YQ_VERSION)) - curl -L -o $(YQ) https://github.com/mikefarah/yq/releases/download/$(YQ_VERSION)/yq_$(shell uname -s | tr '[:upper:]' '[:lower:]')_$(shell uname -m | sed 's/x86_64/amd64/;s/aarch64/arm64/') + curl -Lo $(YQ) https://github.com/mikefarah/yq/releases/download/$(YQ_VERSION)/yq_$(shell uname -s | tr '[:upper:]' '[:lower:]')_$(shell uname -m | sed 's/x86_64/amd64/;s/aarch64/arm64/') chmod +x $(YQ)