From b6698d71be9c4ce2bb311d9c8829466ddc7c166b Mon Sep 17 00:00:00 2001 From: Massimo Gengarelli Date: Tue, 2 Jul 2024 10:29:34 +0200 Subject: [PATCH] test: better automated e2e tests --- .github/workflows/ci.yaml | 15 +- tests/kubetest.sh | 285 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 292 insertions(+), 8 deletions(-) create mode 100755 tests/kubetest.sh diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 56a7ed3..ced7cc6 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -29,12 +29,11 @@ jobs: with: skipClusterCreation: true - run: terraform init - - run: make cluster-test - - run: sleep 30 - - run: kubectl get ns | grep target - - run: kubectl get ns | grep chaosmonkey - - run: kubectl -n chaosmonkey get pods | grep chaos-monkey - - run: kubectl -n target get deployments | grep nginx - - run: kubectl -n target get cmc | grep chaosmonkey-nginx - - run: kubectl -n target get events | grep ChaosMonkey + name: Initialize Terraform + - run: | + make cluster-test + sleep 10 + ./tests/kubetest.sh + name: End to end tests - run: make clean + name: Cleanup diff --git a/tests/kubetest.sh b/tests/kubetest.sh new file mode 100755 index 0000000..189ea92 --- /dev/null +++ b/tests/kubetest.sh @@ -0,0 +1,285 @@ +#!/usr/bin/env bash + +KIND=$(which kind) +KUBECTL=$(which kubectl) +CLUSTER_NAME="${TERRAFORM_CLUSTER_NAME:-chaosmonkey-cluster}" + +set -eo pipefail + +log() { + local level="${1}" + local coloredLevel="" + shift + case "${level}" in + info) + coloredLevel="\033[0;32m${level}\033[0m" + ;; + warn) + coloredLevel="\033[0;33m${level}\033[0m" + ;; + error) + coloredLevel="\033[0;31m${level}\033[0m" + ;; + debug) + coloredLevel="\033[0;34m${level}\033[0m" + ;; + *) + coloredLevel="\033[0;32munknown\033[0m" + ;; + + esac + + echo -e "[$(date +"%Y-%m-%d %H:%M:%S")] [${coloredLevel}] - $*" +} + +info() { + log info "$*" +} + +warn() { + log warn "$*" +} + +debug() { + if [[ "${TEST_DEBUG:-false}" == "true" ]]; then + log debug "$*" + fi +} + +err() { + log error "$*" + exit 1 +} + +debug "Checking kind @ ${KIND}" +if [[ -z "${KIND}" ]]; then + err "Please install kind: https://kind.sigs.k8s.io/docs/user/quick-start/" +fi +info "Kind found at ${KIND}" + +debug "Checking kubectl @ ${KUBECTL}" +if [[ -z "${KUBECTL}" ]]; then + err "Please install kubectl: https://kubernetes.io/docs/tasks/tools/install-kubectl/" +fi +info "Kubectl found at ${KUBECTL}" + +# Check if the cluster has been started +debug "Check that ${CLUSTER_NAME} exists" +${KIND} get clusters +# if ! ${KIND} get clusters | grep "${CLUSTER_NAME}" &>/dev/null; then +# err "Please start the cluster using 'make cluster-test' before running this script" +# fi +info "Cluster ${CLUSTER_NAME} found" + +# Force switch to the right context +${KUBECTL} config get-contexts +${KUBECTL} config use-context "kind-${CLUSTER_NAME}" + +# Start the test +info "Starting the test" + +info "Checking namespaces" +for ns in target chaosmonkey; do + debug "Checking if namespace ${ns} target exists" + if ! ${KUBECTL} get ns | grep ${ns} &>/dev/null; then + err "Namespace ${ns} does not exist" + fi +done + +info "Checking pods" +for ns in target chaosmonkey; do + debug "Checking if pods in namespace ${ns} are ready" + if ! ${KUBECTL} get pods --namespace=${ns} | grep Running &>/dev/null; then + err "Pods in namespace ${ns} target are not ready" + fi +done + +info "Checking deployments" +deploymentCount=$(${KUBECTL} get deployments --namespace=chaosmonkey --no-headers | wc -l) +debug "chaosmonkey namespace contains ${deploymentCount} deployment(s)" +if [[ ${deploymentCount} != 1 ]]; then + err "chaosmonkey namespace should contain 1 deployment" +fi + +deploymentCount=$(${KUBECTL} get deployments --namespace=target --no-headers | wc -l) +debug "target namespace contains ${deploymentCount} deployment(s)" +if [[ ${deploymentCount} != 2 ]]; then + err "target namespace should contain 2 deployments" +fi + +info "Checking ChaosMonkeyConfigurations" +cmcCount=$(${KUBECTL} get cmc --namespace=target --no-headers | wc -l) +debug "target namespace contains ${cmcCount} cmc(s)" +if [[ ${cmcCount} != 2 ]]; then + err "target namespace should contain 2 cmc" +fi + +disruptScale="nginx-disrupt-scale" +disruptPods="nginx-disrupt-pods" + +info "Resetting CMCs to initial values" + +debug "Force enable ${disruptScale}" +${KUBECTL} -n target patch cmc chaosmonkey-${disruptScale} --type json --patch-file=/dev/stdin <<-JSONPATCH >/dev/null +[ + {"op": "replace", "path": "/spec/enabled", "value": true}, + {"op": "replace", "path": "/spec/podMode", "value": false}, + {"op": "replace", "path": "/spec/minReplicas", "value": 2}, + {"op": "replace", "path": "/spec/maxReplicas", "value": 5} +] +JSONPATCH + +debug "Force enable ${disruptPods}" +${KUBECTL} -n target patch cmc chaosmonkey-${disruptPods} --type json --patch-file=/dev/stdin <<-JSONPATCH >/dev/null +[ + {"op": "replace", "path": "/spec/enabled", "value": true}, + {"op": "replace", "path": "/spec/podMode", "value": true}, + {"op": "replace", "path": "/spec/minReplicas", "value": 6}, + {"op": "replace", "path": "/spec/maxReplicas", "value": 8} +] +JSONPATCH + +info "Resetting ${disruptPods} to 2 replicas" +${KUBECTL} -n target scale deployment ${disruptPods} --replicas=2 >/dev/null + +info "Checking events" +if ! ${KUBECTL} -n target get events | grep ChaosMonkey &>/dev/null; then + warn "no events found in target namespace, please check the chaosmonkey pod logs (not considered as an error)" +fi + +info "Checking CMC with podMode=false (${disruptScale})" +currentScale=$(${KUBECTL} get deployment ${disruptScale} --namespace=target -o jsonpath='{.spec.replicas}') +newScale=$(${KUBECTL} get deployment ${disruptScale} --namespace=target -o jsonpath='{.spec.replicas}') + +completedLoops=0 +while [ "${currentScale}" == "${newScale}" ]; do + info "Current replicas: ${currentScale}, waiting for the replicas to change ($((10 - completedLoops)) retries left)" + + newScale=$(${KUBECTL} get deployment ${disruptScale} --namespace=target -o jsonpath='{.spec.replicas}') + debug "Current replicas: ${currentScale}, new replicas: ${newScale}" + + completedLoops=$((completedLoops + 1)) + if [ ${completedLoops} -gt 10 ]; then + err "Replicas did not change after ${completedLoops} retries, please check the chaosmonkey pod logs" + else + sleep 10 + fi +done + +info "Checking CMC with podMode=true (${disruptPods})" +currentPods=$(${KUBECTL} get -n target pods --selector "app=${disruptPods}" -o jsonpath='{.items[*].metadata.name}') +newPods=$(${KUBECTL} get -n target pods --selector "app=${disruptPods}" -o jsonpath='{.items[*].metadata.name}') + +completedLoops=0 +while [ "${currentPods}" == "${newPods}" ]; do + info "Current pods: ${currentPods}, waiting for the pods to change ($((10 - completedLoops)) retries left)" + + newPods=$(${KUBECTL} get -n target pods --selector "app=${disruptPods}" -o jsonpath='{.items[*].metadata.name}') + debug "Current pods: ${currentPods}, new pods: ${newPods}" + + completedLoops=$((completedLoops + 1)) + if [ ${completedLoops} -gt 10 ]; then + err "Pods did not change after ${completedLoops} retries, please check the chaosmonkey pod logs" + else + sleep 10 + fi +done + +debug "There should always be 2 pods active" +countPods=$(${KUBECTL} get -n target pods --selector "app=${disruptPods}" --no-headers | wc -l) + +completedLoops=0 +info "Checking number of pods" +while [[ ${countPods} != 2 ]]; do + debug "Checking number of pods (${countPods}), waiting for 2 ($((10 - completedLoops)) retries left)" + countPods=$(${KUBECTL} get -n target pods --selector "app=${disruptPods}" --no-headers | wc -l) + sleep 1 + + completedLoops=$((completedLoops + 1)) + if [ ${completedLoops} -gt 10 ]; then + err "There should always be 2 pods active" + fi +done + +info "Stopping ${disruptScale} CMC" +if ! ${KUBECTL} patch -n target cmc chaosmonkey-${disruptScale} --type json --patch-file=/dev/stdin <<-JSONPATCH >/dev/null; then +[ + { "op": "replace", "path": "/spec/enabled", "value": false } +] +JSONPATCH + err "Could not patch CMC for ${disruptScale}" +fi + +info "Checking that CMC ${disruptScale} has been stopped correctly (number of scales should not change over time)" +currentScale=$(${KUBECTL} get deployment ${disruptScale} --namespace=target -o jsonpath='{.spec.replicas}') +newScale=$(${KUBECTL} get deployment ${disruptScale} --namespace=target -o jsonpath='{.spec.replicas}') + +completedLoops=0 +while [ ${completedLoops} -lt 5 ]; do + debug "Loop #${completedLoops}" + if [ "${currentScale}" != "${newScale}" ]; then + err "Number of replicas changed (${currentScale} -> ${newScale})" + else + info "Still ok, number of replicas: ${currentScale} ($((5 - completedLoops)) loops left)" + sleep 5 + completedLoops=$((completedLoops + 1)) + fi +done + +info "Switching ${disruptPods} from podMode=true to podMode=false" +if ! ${KUBECTL} patch -n target cmc chaosmonkey-${disruptPods} --type json --patch '[{"op":"replace", "path":"/spec/podMode", "value":false}]' >/dev/null; then + err "Could not patch CMC ${disruptPods}" +fi + +info "Checking that CMC ${disruptPods} is now correctly modifying the replicas of the deployment" +currentScale=$(${KUBECTL} get deployment ${disruptPods} --namespace=target -o jsonpath='{.spec.replicas}') +newScale=$(${KUBECTL} get deployment ${disruptPods} --namespace=target -o jsonpath='{.spec.replicas}') + +completedLoops=0 +while [ "${currentScale}" -eq "${newScale}" ]; do + info "Number of replicas (${currentScale}) still equal, waiting for the replicas to change... ($((10 - completedLoops)) retries left)" + + newScale=$(${KUBECTL} get deployment ${disruptPods} --namespace=target -o jsonpath='{.spec.replicas}') + completedLoops=$((completedLoops + 1)) + + if [ ${completedLoops} -gt 10 ]; then + err "Replicas did not change after ${completedLoops} retries, please check the chaosmonkey pod logs" + else + sleep 10 + fi +done + +info "Switching ${disruptScale} from podMode=false to podMode=true and re-enabling it" +if ! ${KUBECTL} patch -n target cmc chaosmonkey-${disruptScale} --type json --patch-file=/dev/stdin <<-JSONPATCH >/dev/null; then +[ + { "op": "replace", "path": "/spec/enabled", "value": true }, + { "op": "replace", "path": "/spec/podMode", "value": true } +] +JSONPATCH + err "Could not patch CMC ${disruptScale}" +fi + +info "Making sure there are at least two replicas of ${disruptScale}" +if ! ${KUBECTL} scale -n target deployment ${disruptScale} --replicas=2 >/dev/null; then + err "Could not scale ${disruptScale}" +fi + +completedLoops=0 +currentPods=$(${KUBECTL} get -n target pods --selector "app=${disruptScale}" -o jsonpath='{.items[*].metadata.name}') +newPods=$(${KUBECTL} get -n target pods --selector "app=${disruptScale}" -o jsonpath='{.items[*].metadata.name}') + +while [ "${currentPods}" == "${newPods}" ]; do + info "Current pods: ${currentPods}, waiting for the pods to change ($((10 - completedLoops)) retries left)" + + newPods=$(${KUBECTL} get -n target pods --selector "app=${disruptScale}" -o jsonpath='{.items[*].metadata.name}') + debug "Current pods: ${currentPods}, new pods: ${newPods}" + + completedLoops=$((completedLoops + 1)) + if [ ${completedLoops} -gt 10 ]; then + err "Pods did not change after ${completedLoops} retries, please check the chaosmonkey pod logs" + else + sleep 10 + fi +done + +info "All tests passed!"