Skip to content

Commit

Permalink
Merge pull request #25 from timebertt/stable-experiments
Browse files Browse the repository at this point in the history
Stable experiments
  • Loading branch information
timebertt authored Sep 15, 2023
2 parents 55ccf6a + 2c71755 commit f07f223
Show file tree
Hide file tree
Showing 44 changed files with 578 additions and 144 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,11 @@
<module name="kubernetes-controller-sharding" />
<working_directory value="$PROJECT_DIR$/webhosting-operator" />
<envs>
<env name="KUBECONFIG" value="$PROJECT_DIR$/webhosting-operator/dev/kind_kubeconfig.yaml" />
<env name="LEADER_ELECT" value="false" />
<env name="SHARD_ID" value="webhosting-operator-0" />
<env name="SHARD_MODE" value="sharder" />
<env name="SHARD_MODE" value="both" />
<env name="SHARDING_ENABLED" value="true" />
<env name="KUBECONFIG" value="$PROJECT_DIR$/webhosting-operator/dev/kind_kubeconfig.yaml" />
</envs>
<kind value="PACKAGE" />
<package value="github.com/timebertt/kubernetes-controller-sharding/webhosting-operator/cmd/webhosting-operator" />
Expand Down
23 changes: 15 additions & 8 deletions webhosting-operator/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -59,20 +59,27 @@ generate: $(CONTROLLER_GEN) ## Generate code containing DeepCopy, DeepCopyInto,
fmt: ## Run go fmt against code.
go fmt ./...

.PHONY: vet
vet: ## Run go vet against code.
go vet ./...

.PHONY: modules
modules: ## Runs go mod to ensure modules are up to date.
go mod tidy

.PHONY: test
test: $(SETUP_ENVTEST) manifests generate fmt vet ## Run tests.
test: $(SETUP_ENVTEST) ## Run tests.
KUBEBUILDER_ASSETS="$(shell $(SETUP_ENVTEST) use $(ENVTEST_K8S_VERSION) -p path)" go test -race ./...

.PHONY: test-kyverno
test-kyverno: $(KYVERNO) ## Run kyverno policy tests.
$(KYVERNO) test --remove-color -v 4 .

##@ Verification

.PHONY: vet
vet: ## Run go vet against code.
go vet ./...

.PHONY: check
check: vet test test-kyverno ## Check everything (vet + test + test-kyverno).

.PHONY: verify-fmt
verify-fmt: fmt ## Verify go code is formatted.
@if !(git diff --quiet HEAD); then \
Expand All @@ -92,7 +99,7 @@ verify-modules: modules ## Verify go module files are up to date.
fi

.PHONY: verify
verify: verify-fmt verify-generate verify-modules test ## Verify everything (all verify-* rules + test).
verify: verify-fmt verify-generate verify-modules check ## Verify everything (all verify-* rules + check).

##@ Build

Expand Down Expand Up @@ -127,10 +134,10 @@ kind-down: $(KIND) ## Tear down the kind testing cluster.
$(KIND) delete cluster --name webhosting

.PHONY: deploy-ingress-nginx
deploy-ingress-nginx: $(KUSTOMIZE) $(KUBECTL) ## Deploy ingress-nginx to K8s cluster specified in $KUBECONFIG.
deploy-ingress-nginx: $(KUBECTL) ## Deploy ingress-nginx to K8s cluster specified in $KUBECONFIG.
@# job template is immutable, delete old jobs to prepare for upgrade
$(KUBECTL) -n ingress-nginx delete job --ignore-not-found ingress-nginx-admission-create ingress-nginx-admission-patch
$(KUSTOMIZE) build config/ingress-nginx/$(OVERLAY) | $(KUBECTL) apply --server-side -f -
$(KUBECTL) apply --server-side -k config/ingress-nginx/$(OVERLAY)
$(KUBECTL) -n ingress-nginx wait deploy ingress-nginx-controller --for=condition=Available --timeout=2m

# use static label for skaffold to prevent rolling all components on every skaffold invocation
Expand Down
2 changes: 1 addition & 1 deletion webhosting-operator/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@ You've already deployed a customized installation of [kube-prometheus](https://g

```bash
# get the grafana admin password
cat config/monitoring/default/grafana_admin_pass.secret.txt
cat config/monitoring/default/grafana_admin_password.secret.txt
```

Now, visit your [local webhosting dashboard](http://127.0.0.1:3000/d/NbmNpqEnk/webhosting?orgId=1) at http://127.0.0.1:3000.
Expand Down
4 changes: 2 additions & 2 deletions webhosting-operator/config/external-dns/kustomization.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,11 @@ namespace: external-dns

images:
- name: registry.k8s.io/external-dns/external-dns
newTag: v0.13.5
newTag: v0.13.6

resources:
- namespace.yaml
- https://github.com/kubernetes-sigs/external-dns//kustomize?ref=v0.13.5
- https://github.com/kubernetes-sigs/external-dns//kustomize?ref=v0.13.6

patches:
- path: patch-deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,10 @@ spec:
- --google-zone-visibility=public
- --policy=sync
- --registry=txt
- --txt-owner-id=timebertt-fb28d21f90-sharding
- --txt-owner-id=shoot--timebertt--sharding-0e61b9e9-b7ce-4a71-a502-89f366015617-ond-460a37
- --interval=1m
# ensure the records are not owned by short-lived acme solvers managed by cert-manager or website ingresses
- --label-filter=acme.cert-manager.io/http01-solver!=true,app!=website
env:
- name: GOOGLE_APPLICATION_CREDENTIALS
value: /etc/secrets/service-account/service-account.json
Expand Down
2 changes: 2 additions & 0 deletions webhosting-operator/config/manager/default/manager.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ spec:
containers:
- name: manager
image: controller:latest
args:
- --zap-log-level=info
ports:
- name: metrics
containerPort: 8080
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ spec:
containers:
- name: manager
args:
- --zap-log-level=info
- --config=/config.yaml
volumeMounts:
- name: config
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
#!/usr/bin/env bash

dir="$(dirname "$0")"
file="$dir/grafana_admin_password.secret.txt"

[ -f "$file" ] && exit 0
cat /dev/urandom | tr -dc "a-zA-Z0-9" | head -c 32 > "$file"
11 changes: 3 additions & 8 deletions webhosting-operator/config/monitoring/default/kustomization.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,10 @@ configMapGenerator:
secretGenerator:
- name: grafana-admin
namespace: monitoring
literals:
- username=admin
files:
- password=grafana_admin_pass.secret.txt
- password=grafana_admin_password.secret.txt

patches:
- path: patch_grafana_admin.yaml
Expand All @@ -58,10 +60,3 @@ patches:
kind: Deployment
name: kube-state-metrics
namespace: monitoring
- path: patch_filter_nodeexporter_metrics.yaml
target:
group: monitoring.coreos.com
version: v1
kind: ServiceMonitor
name: node-exporter
namespace: monitoring
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,3 @@
sourceLabels: [namespace]
regex: project-.+
action: drop
# drop cadvisor network metrics for pod network interfaces on websites workers
# (not all metrics have a namespace label, so we can't drop them granularly)
- op: add
path: /spec/endpoints/1/metricRelabelings/-
value:
sourceLabels: [__name__, node, interface]
regex: "container_network_.+;.*fb28d21f90--sharding-websites-.+;cali.+"
action: drop

This file was deleted.

50 changes: 50 additions & 0 deletions webhosting-operator/config/policy/controlplane/etcd-main.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
apiVersion: kyverno.io/v1
kind: Policy
metadata:
name: etcd-main
namespace: shoot--timebertt--sharding
spec:
failurePolicy: Fail
rules:
# set static requests/limits on etcd-main to ensure similar evaluation environment between load test runs
- name: resources
match:
any:
- resources:
kinds:
- Pod
selector:
matchLabels:
instance: etcd-main
name: etcd
mutate:
patchStrategicMerge:
spec:
containers:
- name: etcd
resources:
requests:
cpu: 12000m
memory: 12Gi
limits:
cpu: 12000m
memory: 12Gi
# schedule etcd-main on high-cpu worker pool for stable performance
- name: add-scheduling-constraints
match:
any:
- resources:
kinds:
- Pod
selector:
matchLabels:
instance: etcd-main
name: etcd
mutate:
patchesJson6902: |-
- op: add
path: "/spec/tolerations/-"
value: {"key":"high-cpu","operator":"Equal","value":"true","effect":"NoSchedule"}
- op: add
path: "/spec/affinity/nodeAffinity/requiredDuringSchedulingIgnoredDuringExecution/nodeSelectorTerms/-"
value: {"matchExpressions": [{"key":"high-cpu","operator":"In","values":["true"]}]}
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
apiVersion: kyverno.io/v1
kind: Policy
metadata:
name: kube-apiserver-scale
namespace: shoot--timebertt--sharding
spec:
failurePolicy: Ignore
# schema validation doesn't seem to work in combination with the /scale subresource, disable it for now
schemaValidation: false
rules:
# set static replicas on kube-apiserver to ensure similar evaluation environment between load test runs
- name: replicas-scale
match:
any:
- resources:
# mutate scale requests by HPA
kinds:
- Deployment/scale
# the Scale subresource doesn't have the original resource's labels -> we have to match by name
names:
- kube-apiserver
preconditions:
all:
# Only patch spec.replicas if the control plane is not hibernated, i.e., if spec.replicas>=1.
- key: "{{ request.object.spec.replicas || `1` }}"
operator: GreaterThan
value: 0
mutate:
patchStrategicMerge:
spec:
replicas: 1
74 changes: 74 additions & 0 deletions webhosting-operator/config/policy/controlplane/kube-apiserver.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
apiVersion: kyverno.io/v1
kind: Policy
metadata:
name: kube-apiserver
namespace: shoot--timebertt--sharding
spec:
failurePolicy: Fail
rules:
# set static replicas on kube-apiserver to ensure similar evaluation environment between load test runs
# if the cluster is hibernated (spec.replicas=0), this rule is skipped
- name: replicas
match:
any:
- resources:
kinds:
- Deployment
selector:
matchLabels:
app: kubernetes
role: apiserver
preconditions:
all:
# Only patch spec.replicas if the control plane is not hibernated, i.e., if spec.replicas>=1.
# NB: gardenlet deploys kube-apiserver with spec.replicas=null which is defaulted after the policy webhook call
# to spec.replicas=1. Hence, treat spec.replicas=null the same way as spec.replicas=1.
- key: "{{ request.object.spec.replicas || `1` }}"
operator: GreaterThan
value: 0
mutate:
patchStrategicMerge:
spec:
replicas: 1
# set static requests/limits on kube-apiserver to ensure similar evaluation environment between load test runs
- name: resources
match:
any:
- resources:
kinds:
- Pod
selector:
matchLabels:
app: kubernetes
role: apiserver
mutate:
patchStrategicMerge:
spec:
containers:
- name: kube-apiserver
resources:
requests:
cpu: 12000m
memory: 12Gi
limits:
cpu: 12000m
memory: 12Gi
# schedule kube-apiserver on high-cpu worker pool for stable performance
- name: add-scheduling-constraints
match:
any:
- resources:
kinds:
- Pod
selector:
matchLabels:
app: kubernetes
role: apiserver
mutate:
patchesJson6902: |-
- op: add
path: "/spec/tolerations/-"
value: {"key":"high-cpu","operator":"Equal","value":"true","effect":"NoSchedule"}
- op: add
path: "/spec/affinity/nodeAffinity/requiredDuringSchedulingIgnoredDuringExecution/nodeSelectorTerms/-"
value: {"matchExpressions": [{"key":"high-cpu","operator":"In","values":["true"]}]}
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ apiVersion: kyverno.io/v1
kind: Policy
metadata:
name: kube-controller-manager
namespace: shoot--fb28d21f90--sharding
namespace: shoot--timebertt--sharding
spec:
failurePolicy: Ignore
rules:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,10 @@ apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization

# This kustomization contains policies for manipulating shoot control plane components.
# If the seed doesn't have kyverno installed, you can use the patch file alternatively.
# For this to work, kyverno needs to be installed on the seed cluster.

resources:
- policy-kube-controller-manager.yaml
- etcd-main.yaml
- kube-apiserver.yaml
- kube-apiserver-scale.yaml
- kube-controller-manager.yaml

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
name: kube-apiserver-scale-awake
policies:
- ../../kube-apiserver-scale.yaml
resources:
# spec.replicas=2 -> expect spec.replicas=1
- scale.yaml
variables: variables.yaml
results:
- policy: kube-apiserver-scale
rule: replicas-scale
resource: kube-apiserver
namespace: shoot--timebertt--sharding
kind: Scale
result: pass
patchedResource: scale_expected.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
kind: Scale
apiVersion: autoscaling/v1
metadata:
name: kube-apiserver
namespace: shoot--timebertt--sharding
spec:
replicas: 2
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
kind: Scale
apiVersion: autoscaling/v1
metadata:
name: kube-apiserver
namespace: shoot--timebertt--sharding
spec:
replicas: 1
Loading

0 comments on commit f07f223

Please sign in to comment.