diff --git a/config/jobs/kubernetes/sig-testing/compatibility-versions-e2e.yaml b/config/jobs/kubernetes/sig-testing/compatibility-versions-e2e.yaml new file mode 100644 index 000000000000..b565a8797809 --- /dev/null +++ b/config/jobs/kubernetes/sig-testing/compatibility-versions-e2e.yaml @@ -0,0 +1,54 @@ +periodics: +- interval: 6h + cluster: k8s-infra-prow-build + name: ci-kubernetes-e2e-kind-compatibility-versions + annotations: + testgrid-dashboards: sig-testing-kind + testgrid-tab-name: compatibility-version-test + description: Uses kubetest & kind to run e2e tests from older (n-1..3) kubernetes releases against a latest kubernetes master components w/ --emulated-version= set. + # TODO(aaron-prindle) route the alert email to a rotation vs individual email + testgrid-alert-email: aprindle@google.com + testgrid-num-columns-recent: '6' + labels: + preset-dind-enabled: "true" + preset-kind-volume-mounts: "true" + decorate: true + decoration_config: + timeout: 60m + extra_refs: + - org: kubernetes + repo: test-infra + base_ref: master + path_alias: k8s.io/test-infra + spec: + containers: + - image: gcr.io/k8s-staging-test-infra/krte:v20240903-6a352c5344-master + imagePullPolicy: Always # pull latest image for canary testing + command: + - wrapper.sh + - bash + - -c + - curl -sSL https://kind.sigs.k8s.io/dl/latest/linux-amd64.tgz | tar xvfz - -C "${PATH%%:*}/" && "$(go env GOPATH)"/src/k8s.io/test-infra/experiment/compatibility-versions/e2e-k8s-compatibility-versions.sh + env: + - name: EMULATED_VERSION + value: "1.31" # TODO(aaron-prindle) FIXME - hardcoded for now + - name: SKIP + value: Alpha|Disruptive|Slow|Flaky|IPv6|LoadBalancer|PodSecurityPolicy|nfs + - name: PARALLEL + value: "true" + - name: FEATURE_GATES + value: '{"AllBeta":true}' + - name: RUNTIME_CONFIG + value: '{"api/beta":"true", "api/ga":"true"}' + # we need privileged mode in order to do docker in docker + securityContext: + privileged: true + resources: + limits: + memory: 9Gi + cpu: 7 + requests: + # these are both a bit below peak usage during build + # this is mostly for building kubernetes + memory: 9Gi + cpu: 7 diff --git a/experiment/compatibility-versions/OWNERS b/experiment/compatibility-versions/OWNERS new file mode 100644 index 000000000000..7d8e3b5ab5dd --- /dev/null +++ b/experiment/compatibility-versions/OWNERS @@ -0,0 +1,6 @@ +# See the OWNERS docs at https://go.k8s.io/owners + +reviewers: +- aaron-prindle +approvers: +- aaron-prindle diff --git a/experiment/compatibility-versions/e2e-k8s-compatibility-versions.sh b/experiment/compatibility-versions/e2e-k8s-compatibility-versions.sh new file mode 100755 index 000000000000..8d3426ff0050 --- /dev/null +++ b/experiment/compatibility-versions/e2e-k8s-compatibility-versions.sh @@ -0,0 +1,338 @@ +#!/bin/sh +# Copyright 2024 The Kubernetes Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# hack script for running a kind e2e +# must be run with a kubernetes checkout in $PWD (IE from the checkout) +# Usage: SKIP="ginkgo skip regex" FOCUS="ginkgo focus regex" kind-e2e.sh + +set -o errexit -o nounset -o xtrace + +# Settings: +# SKIP: ginkgo skip regex +# FOCUS: ginkgo focus regex +# LABEL_FILTER: ginkgo label query for selecting tests (see "Spec Labels" in https://onsi.github.io/ginkgo/#filtering-specs) +# +# The default is to focus on conformance tests. Serial tests get skipped when +# parallel testing is enabled. Using LABEL_FILTER instead of combining SKIP and +# FOCUS is recommended (more expressive, easier to read than regexp). +# +# GA_ONLY: true - limit to GA APIs/features as much as possible +# false - (default) APIs and features left at defaults +# FEATURE_GATES: +# JSON or YAML encoding of a string/bool map: {"FeatureGateA": true, "FeatureGateB": false} +# Enables or disables feature gates in the entire cluster. +# Cannot be used when GA_ONLY=true. +# RUNTIME_CONFIG: +# JSON or YAML encoding of a string/string (!) map: {"apia.example.com/v1alpha1": "true", "apib.example.com/v1beta1": "false"} +# Enables API groups in the apiserver via --runtime-config. +# Cannot be used when GA_ONLY=true. + +# cleanup logic for cleanup on exit +CLEANED_UP=false +cleanup() { + if [ "$CLEANED_UP" = "true" ]; then + return + fi + # KIND_CREATE_ATTEMPTED is true once we: kind create + if [ "${KIND_CREATE_ATTEMPTED:-}" = true ]; then + kind "export" logs "${ARTIFACTS}" || true + kind delete cluster || true + fi + rm -f _output/bin/e2e.test || true + # remove our tempdir, this needs to be last, or it will prevent kind delete + if [ -n "${TMP_DIR:-}" ]; then + rm -rf "${TMP_DIR:?}" + fi + CLEANED_UP=true +} + +# setup signal handlers +# shellcheck disable=SC2317 # this is not unreachable code +signal_handler() { + if [ -n "${GINKGO_PID:-}" ]; then + kill -TERM "$GINKGO_PID" || true + fi + cleanup +} +trap signal_handler INT TERM + +# build kubernetes / node image, e2e binaries +build() { + # build the node image w/ kubernetes + kind build node-image -v 1 + # Ginkgo v1 is used by Kubernetes 1.24 and earlier, fallback if v2 is not available. + GINKGO_SRC_DIR="vendor/github.com/onsi/ginkgo/v2/ginkgo" + if [ ! -d "$GINKGO_SRC_DIR" ]; then + GINKGO_SRC_DIR="vendor/github.com/onsi/ginkgo/ginkgo" + fi + # make sure we have e2e requirements + make all WHAT="cmd/kubectl test/e2e/e2e.test ${GINKGO_SRC_DIR}" + + # Ensure the built kubectl is used instead of system + export PATH="${PWD}/_output/bin:$PATH" +} + +check_structured_log_support() { + case "${KUBE_VERSION}" in + v1.1[0-8].*) + echo "$1 is only supported on versions >= v1.19, got ${KUBE_VERSION}" + exit 1 + ;; + esac +} + +# up a cluster with kind +create_cluster() { + # Grab the version of the cluster we're about to start + KUBE_VERSION="$(docker run --rm --entrypoint=cat "kindest/node:latest" /kind/version)" + + # Default Log level for all components in test clusters + KIND_CLUSTER_LOG_LEVEL=${KIND_CLUSTER_LOG_LEVEL:-4} + + EMULATED_VERSION=${EMULATED_VERSION:-} + + # potentially enable --logging-format + CLUSTER_LOG_FORMAT=${CLUSTER_LOG_FORMAT:-} + scheduler_extra_args=" \"v\": \"${KIND_CLUSTER_LOG_LEVEL}\"" + controllerManager_extra_args=" \"v\": \"${KIND_CLUSTER_LOG_LEVEL}\"" + apiServer_extra_args=" \"v\": \"${KIND_CLUSTER_LOG_LEVEL}\"" + kubelet_extra_args=" \"v\": \"${KIND_CLUSTER_LOG_LEVEL}\"" + + if [ -n "$CLUSTER_LOG_FORMAT" ]; then + check_structured_log_support "CLUSTER_LOG_FORMAT" + scheduler_extra_args="${scheduler_extra_args} + \"logging-format\": \"${CLUSTER_LOG_FORMAT}\"" + controllerManager_extra_args="${controllerManager_extra_args} + \"logging-format\": \"${CLUSTER_LOG_FORMAT}\"" + apiServer_extra_args="${apiServer_extra_args} + \"logging-format\": \"${CLUSTER_LOG_FORMAT}\"" + fi + + KUBELET_LOG_FORMAT=${KUBELET_LOG_FORMAT:-$CLUSTER_LOG_FORMAT} + if [ -n "$KUBELET_LOG_FORMAT" ]; then + check_structured_log_support "KUBECTL_LOG_FORMAT" + kubelet_extra_args="${kubelet_extra_args} + \"logging-format\": \"${KUBELET_LOG_FORMAT}\"" + fi + + # JSON or YAML map injected into featureGates config + feature_gates="${FEATURE_GATES:-{\}}" + # --runtime-config argument value passed to the API server, again as a map + runtime_config="${RUNTIME_CONFIG:-{\}}" + + case "${GA_ONLY:-false}" in + false) + : + ;; + true) + if [ "${feature_gates}" != "{}" ]; then + echo "GA_ONLY=true and FEATURE_GATES=${feature_gates} are mutually exclusive." + exit 1 + fi + if [ "${runtime_config}" != "{}" ]; then + echo "GA_ONLY=true and RUNTIME_CONFIG=${runtime_config} are mutually exclusive." + exit 1 + fi + + echo "Limiting to GA APIs and features for ${KUBE_VERSION}" + feature_gates='{"AllAlpha":false,"AllBeta":false}' + runtime_config='{"api/alpha":"false", "api/beta":"false"}' + ;; + *) + echo "\$GA_ONLY set to '${GA_ONLY}'; supported values are true and false (default)" + exit 1 + ;; + esac + + # create the config file + cat < "${ARTIFACTS}/kind-config.yaml" +# config for 1 control plane node and 2 workers (necessary for conformance) +kind: Cluster +apiVersion: kind.x-k8s.io/v1alpha4 +networking: + ipFamily: ${IP_FAMILY:-ipv4} + kubeProxyMode: ${KUBE_PROXY_MODE:-iptables} + # don't pass through host search paths + # TODO: possibly a reasonable default in the future for kind ... + dnsSearch: [] +nodes: +- role: control-plane +- role: worker +- role: worker +featureGates: ${feature_gates} +runtimeConfig: ${runtime_config} +kubeadmConfigPatches: +- | + kind: ClusterConfiguration + metadata: + name: config + apiServer: + extraArgs: +${apiServer_extra_args} + "emulated-version": "${EMULATED_VERSION}" + controllerManager: + extraArgs: +${controllerManager_extra_args} + "emulated-version": "${EMULATED_VERSION}" + scheduler: + extraArgs: +${scheduler_extra_args} + "emulated-version": "${EMULATED_VERSION}" + --- + kind: InitConfiguration + nodeRegistration: + kubeletExtraArgs: +${kubelet_extra_args} + --- + kind: JoinConfiguration + nodeRegistration: + kubeletExtraArgs: +${kubelet_extra_args} +EOF + # NOTE: must match the number of workers above + NUM_NODES=2 + # actually create the cluster + # TODO(BenTheElder): settle on verbosity for this script + KIND_CREATE_ATTEMPTED=true + kind create cluster \ + --image=kindest/node:latest \ + --retain \ + --wait=1m \ + -v=3 \ + "--config=${ARTIFACTS}/kind-config.yaml" + + # debug cluster version + kubectl version + + # Patch kube-proxy to set the verbosity level + kubectl patch -n kube-system daemonset/kube-proxy \ + --type='json' -p='[{"op": "add", "path": "/spec/template/spec/containers/0/command/-", "value": "--v='"${KIND_CLUSTER_LOG_LEVEL}"'" }]' +} + +# run e2es with ginkgo-e2e.sh +run_tests() { + # Change to the cloned Kubernetes repository + pushd ../kubernetes + + # IPv6 clusters need some CoreDNS changes in order to work in k8s CI: + # 1. k8s CI doesn´t offer IPv6 connectivity, so CoreDNS should be configured + # to work in an offline environment: + # https://github.com/coredns/coredns/issues/2494#issuecomment-457215452 + # 2. k8s CI adds following domains to resolv.conf search field: + # c.k8s-prow-builds.internal google.internal. + # CoreDNS should handle those domains and answer with NXDOMAIN instead of SERVFAIL + # otherwise pods stops trying to resolve the domain. + if [ "${IP_FAMILY:-ipv4}" = "ipv6" ]; then + # Get the current config + original_coredns=$(kubectl get -oyaml -n=kube-system configmap/coredns) + echo "Original CoreDNS config:" + echo "${original_coredns}" + # Patch it + fixed_coredns=$( + printf '%s' "${original_coredns}" | sed \ + -e 's/^.*kubernetes cluster\.local/& internal/' \ + -e '/^.*upstream$/d' \ + -e '/^.*fallthrough.*$/d' \ + -e '/^.*forward . \/etc\/resolv.conf$/d' \ + -e '/^.*loop$/d' \ + ) + echo "Patched CoreDNS config:" + echo "${fixed_coredns}" + printf '%s' "${fixed_coredns}" | kubectl apply -f - + fi + + # ginkgo regexes and label filter + SKIP="${SKIP:-}" + FOCUS="${FOCUS:-}" + LABEL_FILTER="${LABEL_FILTER:-}" + if [ -z "${FOCUS}" ] && [ -z "${LABEL_FILTER}" ]; then + FOCUS="\\[Conformance\\]" + fi + # if we set PARALLEL=true, skip serial tests set --ginkgo-parallel + if [ "${PARALLEL:-false}" = "true" ]; then + export GINKGO_PARALLEL=y + if [ -z "${SKIP}" ]; then + SKIP="\\[Serial\\]" + else + SKIP="\\[Serial\\]|${SKIP}" + fi + fi + + # setting this env prevents ginkgo e2e from trying to run provider setup + export KUBERNETES_CONFORMANCE_TEST='y' + # setting these is required to make RuntimeClass tests work ... :/ + export KUBE_CONTAINER_RUNTIME=remote + export KUBE_CONTAINER_RUNTIME_ENDPOINT=unix:///run/containerd/containerd.sock + export KUBE_CONTAINER_RUNTIME_NAME=containerd + # ginkgo can take forever to exit, so we run it in the background and save the + # PID, bash will not run traps while waiting on a process, but it will while + # running a builtin like `wait`, saving the PID also allows us to forward the + # interrupt + ./hack/ginkgo-e2e.sh \ + '--provider=skeleton' "--num-nodes=${NUM_NODES}" \ + "--ginkgo.focus=${FOCUS}" "--ginkgo.skip=${SKIP}" "--ginkgo.label-filter=${LABEL_FILTER}" \ + "--report-dir=${ARTIFACTS}" '--disable-log-dump=true' & + GINKGO_PID=$! + wait "$GINKGO_PID" + + # Return to the original directory + popd +} + +# clone kubernetes repo for specific release branch +clone_kubernetes_release() { + # Clone the specific Kubernetes release branch + # Replace "release-1.31" with the desired branch + KUBE_RELEASE_BRANCH=${KUBE_RELEASE_BRANCH:-release-1.31} + git clone --single-branch --branch "${KUBE_RELEASE_BRANCH}" https://github.com/kubernetes/kubernetes.git +} + +main() { + # create temp dir and setup cleanup + TMP_DIR=$(mktemp -d) + + # ensure artifacts (results) directory exists when not in CI + export ARTIFACTS="${ARTIFACTS:-${PWD}/_artifacts}" + mkdir -p "${ARTIFACTS}" + + # export the KUBECONFIG to a unique path for testing + KUBECONFIG="${HOME}/.kube/kind-test-config" + export KUBECONFIG + echo "exported KUBECONFIG=${KUBECONFIG}" + + # debug kind version + kind version + + # build kubernetes + build + # in CI attempt to release some memory after building + if [ -n "${KUBETEST_IN_DOCKER:-}" ]; then + sync || true + echo 1 > /proc/sys/vm/drop_caches || true + fi + + # create the cluster and run tests + res=0 + create_cluster || res=$? + + # Clone the specific Kubernetes release branch + clone_kubernetes_release + + run_tests || res=$? + cleanup || res=$? + exit $res +} + +main