Skip to content

Commit

Permalink
Modularization & Status Enhancements (#188)
Browse files Browse the repository at this point in the history
* Add etcd status checks

* Add sync period to custodian controller

* Address minor comments from review

* Handle Unknown -> Ready members

* Remove status.podRef

Remove `status.podRef` which was introduced by an earlier, unreleased commit
and use `status.name` instead.

* Add ClusterSize

* Check ContainersReady condition
  • Loading branch information
timuthy authored Jun 15, 2021
1 parent c532e21 commit f02ff02
Show file tree
Hide file tree
Showing 30 changed files with 2,089 additions and 90 deletions.
2 changes: 1 addition & 1 deletion .ci/test
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,7 @@ function test_with_coverage() {
fetch_envtest_tools "$kb_root_dir"
setup_envtest_env "$kb_root_dir"

TEST_PACKAGES="api api_tests controllers"
TEST_PACKAGES="api api_tests controllers pkg"
GINKGO_COMMON_FLAGS="-r -timeout=1h0m0s --randomizeAllSpecs --randomizeSuites --failOnPending --progress"
if [ -z $COVER ] || [ "$COVER" = false ] ; then
echo "[INFO] Test coverage is disabled."
Expand Down
5 changes: 4 additions & 1 deletion api/v1alpha1/etcd_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -293,7 +293,7 @@ const (

// EtcdMemberStatus holds information about a etcd cluster membership.
type EtcdMemberStatus struct {
// Name is the name of the etcd member.
// Name is the name of the etcd member. It is the name of the backing `Pod`.
Name string `json:"name"`
// ID is the ID of the etcd member.
ID string `json:"id"`
Expand Down Expand Up @@ -325,6 +325,9 @@ type EtcdStatus struct {
// LastError represents the last occurred error.
// +optional
LastError *string `json:"lastError,omitempty"`
// Cluster size is the size of the etcd cluster.
// +optional
ClusterSize *int32 `json:"clusterSize,omitempty"`
// CurrentReplicas is the current replica count for the etcd cluster.
// +optional
CurrentReplicas int32 `json:"currentReplicas,omitempty"`
Expand Down
5 changes: 5 additions & 0 deletions api/v1alpha1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 6 additions & 1 deletion config/crd/bases/druid.gardener.cloud_etcds.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -423,6 +423,10 @@ spec:
status:
description: EtcdStatus defines the observed state of Etcd.
properties:
clusterSize:
description: Cluster size is the size of the etcd cluster.
format: int32
type: integer
conditions:
description: Conditions represents the latest available observations
of an etcd's current state.
Expand Down Expand Up @@ -548,7 +552,8 @@ spec:
format: date-time
type: string
name:
description: Name is the name of the etcd member.
description: Name is the name of the etcd member. It is the
name of the backing `Pod`.
type: string
reason:
description: The reason for the condition's last transition.
Expand Down
32 changes: 32 additions & 0 deletions controllers/config/custodian.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
// Copyright (c) 2021 SAP SE or an SAP affiliate company. All rights reserved. This file is licensed under the Apache Software License, v. 2 except as noted otherwise in the LICENSE file
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package config

import "time"

// EtcdCustodianController contains configuration for the etcd custodian controller.
type EtcdCustodianController struct {
// EtcdMember holds configuration related to etcd members.
EtcdMember EtcdMemberConfig
// SyncPeriod is the duration after which re-enqueuing happens.
SyncPeriod time.Duration
}

type EtcdMemberConfig struct {
// EtcdMemberUnknownThreshold is the duration after which a etcd member's state is considered `Unknown`.
EtcdMemberUnknownThreshold time.Duration
// EtcdMemberUnknownThreshold is the duration after which a etcd member's state is considered `NotReady`.
EtcdMemberNotReadyThreshold time.Duration
}
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,12 @@ import (
"testing"
"time"

"github.com/gardener/gardener/pkg/utils/test"
druidv1alpha1 "github.com/gardener/etcd-druid/api/v1alpha1"
controllersconfig "github.com/gardener/etcd-druid/controllers/config"

"github.com/gardener/gardener/pkg/utils/test"
. "github.com/onsi/ginkgo"
. "github.com/onsi/gomega"

druidv1alpha1 "github.com/gardener/etcd-druid/api/v1alpha1"

appsv1 "k8s.io/api/apps/v1"
corev1 "k8s.io/api/core/v1"
"k8s.io/client-go/kubernetes/scheme"
Expand Down Expand Up @@ -111,7 +110,12 @@ var _ = BeforeSuite(func(done Done) {
err = er.SetupWithManager(mgr, 1, true)
Expect(err).NotTo(HaveOccurred())

custodian := NewEtcdCustodian(mgr)
custodian := NewEtcdCustodian(mgr, controllersconfig.EtcdCustodianController{
EtcdMember: controllersconfig.EtcdMemberConfig{
EtcdMemberUnknownThreshold: 1 * time.Minute,
EtcdMemberNotReadyThreshold: 1 * time.Minute,
},
})

err = custodian.SetupWithManager(mgrCtx, mgr, 1)
Expect(err).NotTo(HaveOccurred())
Expand Down
89 changes: 57 additions & 32 deletions controllers/etcd_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ import (
"k8s.io/apimachinery/pkg/util/yaml"
"k8s.io/client-go/rest"
"k8s.io/client-go/util/retry"
"k8s.io/utils/pointer"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/controller"
Expand Down Expand Up @@ -209,7 +210,7 @@ func (r *EtcdReconciler) reconcile(ctx context.Context, etcd *druidv1alpha1.Etcd
finalizers.Insert(FinalizerName)
etcd.Finalizers = finalizers.UnsortedList()
if err := r.Update(ctx, etcd); err != nil {
if err := r.updateEtcdErrorStatus(ctx, etcd, nil, err); err != nil {
if err := r.updateEtcdErrorStatus(ctx, noOp, etcd, nil, err); err != nil {
return ctrl.Result{
Requeue: true,
}, err
Expand All @@ -220,7 +221,7 @@ func (r *EtcdReconciler) reconcile(ctx context.Context, etcd *druidv1alpha1.Etcd
}
}
if err := r.addFinalizersToDependantSecrets(ctx, logger, etcd); err != nil {
if err := r.updateEtcdErrorStatus(ctx, etcd, nil, err); err != nil {
if err := r.updateEtcdErrorStatus(ctx, noOp, etcd, nil, err); err != nil {
return ctrl.Result{
Requeue: true,
}, err
Expand All @@ -235,9 +236,9 @@ func (r *EtcdReconciler) reconcile(ctx context.Context, etcd *druidv1alpha1.Etcd
Requeue: true,
}, err
}
svc, ss, err := r.reconcileEtcd(ctx, logger, etcd)
op, svc, sts, err := r.reconcileEtcd(ctx, logger, etcd)
if err != nil {
if err := r.updateEtcdErrorStatus(ctx, etcd, ss, err); err != nil {
if err := r.updateEtcdErrorStatus(ctx, op, etcd, sts, err); err != nil {
logger.Error(err, "Error during reconciling ETCD")
return ctrl.Result{
Requeue: true,
Expand All @@ -247,8 +248,7 @@ func (r *EtcdReconciler) reconcile(ctx context.Context, etcd *druidv1alpha1.Etcd
Requeue: true,
}, err
}

if err := r.updateEtcdStatus(ctx, etcd, svc, ss); err != nil {
if err := r.updateEtcdStatus(ctx, op, etcd, svc, sts); err != nil {
return ctrl.Result{
Requeue: true,
}, err
Expand All @@ -264,7 +264,7 @@ func (r *EtcdReconciler) delete(ctx context.Context, etcd *druidv1alpha1.Etcd) (
logger.Info("Starting operation")

if err := r.removeDependantStatefulset(ctx, logger, etcd); err != nil {
if err := r.updateEtcdErrorStatus(ctx, etcd, nil, err); err != nil {
if err := r.updateEtcdErrorStatus(ctx, deleteOp, etcd, nil, err); err != nil {
return ctrl.Result{
Requeue: true,
}, err
Expand All @@ -275,7 +275,7 @@ func (r *EtcdReconciler) delete(ctx context.Context, etcd *druidv1alpha1.Etcd) (
}

if err := r.removeFinalizersToDependantSecrets(ctx, logger, etcd); err != nil {
if err := r.updateEtcdErrorStatus(ctx, etcd, nil, err); err != nil {
if err := r.updateEtcdErrorStatus(ctx, deleteOp, etcd, nil, err); err != nil {
return ctrl.Result{
Requeue: true,
}, err
Expand Down Expand Up @@ -557,7 +557,16 @@ func (r *EtcdReconciler) getConfigMapFromEtcd(etcd *druidv1alpha1.Etcd, rendered
return decoded, nil
}

func (r *EtcdReconciler) reconcileStatefulSet(ctx context.Context, logger logr.Logger, etcd *druidv1alpha1.Etcd, values map[string]interface{}) (*appsv1.StatefulSet, error) {
type operationResult string

const (
bootstrapOp operationResult = "bootstrap"
reconcileOp operationResult = "reconcile"
deleteOp operationResult = "delete"
noOp operationResult = "none"
)

func (r *EtcdReconciler) reconcileStatefulSet(ctx context.Context, logger logr.Logger, etcd *druidv1alpha1.Etcd, values map[string]interface{}) (operationResult, *appsv1.StatefulSet, error) {
logger.Info("Reconciling etcd statefulset")

// If any adoptions are attempted, we should first recheck for deletion with
Expand All @@ -582,19 +591,19 @@ func (r *EtcdReconciler) reconcileStatefulSet(ctx context.Context, logger logr.L
selector, err := metav1.LabelSelectorAsSelector(etcd.Spec.Selector)
if err != nil {
logger.Error(err, "Error converting etcd selector to selector")
return nil, err
return noOp, nil, err
}
dm := NewEtcdDruidRefManager(r.Client, r.Scheme, etcd, selector, etcdGVK, canAdoptFunc)
statefulSets, err := dm.FetchStatefulSet(ctx, etcd)
if err != nil {
logger.Error(err, "Error while fetching StatefulSet")
return nil, err
return noOp, nil, err
}

logger.Info("Claiming existing etcd StatefulSet")
claimedStatefulSets, err := dm.ClaimStatefulsets(ctx, statefulSets)
if err != nil {
return nil, err
return noOp, nil, err
}

if len(claimedStatefulSets) > 0 {
Expand All @@ -612,41 +621,42 @@ func (r *EtcdReconciler) reconcileStatefulSet(ctx context.Context, logger logr.L
// TODO: (timuthy) Check if this is really needed.
sts := &appsv1.StatefulSet{}
if err := r.Get(ctx, types.NamespacedName{Name: claimedStatefulSets[0].Name, Namespace: claimedStatefulSets[0].Namespace}, sts); err != nil {
return nil, err
return noOp, nil, err
}

// Statefulset is claimed by for this etcd. Just sync the specs
if sts, err = r.syncStatefulSetSpec(ctx, logger, sts, etcd, values); err != nil {
return nil, err
return noOp, nil, err
}

// restart etcd pods in crashloop backoff
selector, err := metav1.LabelSelectorAsSelector(sts.Spec.Selector)
if err != nil {
logger.Error(err, "error converting StatefulSet selector to selector")
return nil, err
return noOp, nil, err
}
podList := &v1.PodList{}
if err := r.List(ctx, podList, client.InNamespace(etcd.Namespace), client.MatchingLabelsSelector{Selector: selector}); err != nil {
return nil, err
return noOp, nil, err
}

for _, pod := range podList.Items {
if utils.IsPodInCrashloopBackoff(pod.Status) {
if err := r.Delete(ctx, &pod); err != nil {
logger.Error(err, fmt.Sprintf("error deleting etcd pod in crashloop: %s/%s", pod.Namespace, pod.Name))
return nil, err
return noOp, nil, err
}
}
}

return r.waitUntilStatefulSetReady(ctx, logger, etcd, sts)
sts, err = r.waitUntilStatefulSetReady(ctx, logger, etcd, sts)
return reconcileOp, sts, err
}

// Required statefulset doesn't exist. Create new
sts, err := r.getStatefulSetFromEtcd(etcd, values)
if err != nil {
return nil, err
return noOp, nil, err
}

err = r.Create(ctx, sts)
Expand All @@ -658,10 +668,11 @@ func (r *EtcdReconciler) reconcileStatefulSet(ctx context.Context, logger logr.L
err = nil
}
if err != nil {
return nil, err
return noOp, nil, err
}

return r.waitUntilStatefulSetReady(ctx, logger, etcd, sts)
sts, err = r.waitUntilStatefulSetReady(ctx, logger, etcd, sts)
return bootstrapOp, sts, err
}

func getContainerMapFromPodTemplateSpec(spec v1.PodSpec) map[string]v1.Container {
Expand Down Expand Up @@ -758,40 +769,39 @@ func (r *EtcdReconciler) getStatefulSetFromEtcd(etcd *druidv1alpha1.Etcd, values
return decoded, nil
}

func (r *EtcdReconciler) reconcileEtcd(ctx context.Context, logger logr.Logger, etcd *druidv1alpha1.Etcd) (*corev1.Service, *appsv1.StatefulSet, error) {

func (r *EtcdReconciler) reconcileEtcd(ctx context.Context, logger logr.Logger, etcd *druidv1alpha1.Etcd) (operationResult, *corev1.Service, *appsv1.StatefulSet, error) {
values, err := r.getMapFromEtcd(etcd)
if err != nil {
return nil, nil, err
return noOp, nil, nil, err
}

chartPath := getChartPath()
renderedChart, err := r.chartApplier.Render(chartPath, etcd.Name, etcd.Namespace, values)
if err != nil {
return nil, nil, err
return noOp, nil, nil, err
}
svc, err := r.reconcileServices(ctx, logger, etcd, renderedChart)
if err != nil {
return nil, nil, err
return noOp, nil, nil, err
}
if svc != nil {
values["serviceName"] = svc.Name
}

cm, err := r.reconcileConfigMaps(ctx, logger, etcd, renderedChart)
if err != nil {
return nil, nil, err
return noOp, nil, nil, err
}
if cm != nil {
values["configMapName"] = cm.Name
}

ss, err := r.reconcileStatefulSet(ctx, logger, etcd, values)
op, sts, err := r.reconcileStatefulSet(ctx, logger, etcd, values)
if err != nil {
return nil, nil, err
return noOp, nil, nil, err
}

return svc, ss, nil
return op, svc, sts, nil
}

func checkEtcdOwnerReference(refs []metav1.OwnerReference, etcd *druidv1alpha1.Etcd) bool {
Expand Down Expand Up @@ -1118,14 +1128,24 @@ func canDeleteStatefulset(sts *appsv1.StatefulSet, etcd *druidv1alpha1.Etcd) boo

}

func (r *EtcdReconciler) updateEtcdErrorStatus(ctx context.Context, etcd *druidv1alpha1.Etcd, sts *appsv1.StatefulSet, lastError error) error {
func bootstrapReset(etcd *druidv1alpha1.Etcd) {
etcd.Status.Members = nil
etcd.Status.ClusterSize = pointer.Int32Ptr(int32(etcd.Spec.Replicas))
}

func (r *EtcdReconciler) updateEtcdErrorStatus(ctx context.Context, op operationResult, etcd *druidv1alpha1.Etcd, sts *appsv1.StatefulSet, lastError error) error {
err := kutil.TryUpdateStatus(ctx, retry.DefaultBackoff, r.Client, etcd, func() error {
lastErrStr := fmt.Sprintf("%v", lastError)
etcd.Status.LastError = &lastErrStr
etcd.Status.ObservedGeneration = &etcd.Generation
if sts != nil {
ready := CheckStatefulSet(etcd, sts) == nil
etcd.Status.Ready = &ready

if op == bootstrapOp {
// Reset members in bootstrap phase to ensure depending conditions can be calculated correctly.
bootstrapReset(etcd)
}
}
return nil
})
Expand All @@ -1136,14 +1156,19 @@ func (r *EtcdReconciler) updateEtcdErrorStatus(ctx context.Context, etcd *druidv
return r.removeOperationAnnotation(ctx, etcd)
}

func (r *EtcdReconciler) updateEtcdStatus(ctx context.Context, etcd *druidv1alpha1.Etcd, svc *corev1.Service, sts *appsv1.StatefulSet) error {
func (r *EtcdReconciler) updateEtcdStatus(ctx context.Context, op operationResult, etcd *druidv1alpha1.Etcd, svc *corev1.Service, sts *appsv1.StatefulSet) error {
err := kutil.TryUpdateStatus(ctx, retry.DefaultBackoff, r.Client, etcd, func() error {
ready := CheckStatefulSet(etcd, sts) == nil
etcd.Status.Ready = &ready
svcName := svc.Name
etcd.Status.ServiceName = &svcName
etcd.Status.LastError = nil
etcd.Status.ObservedGeneration = &etcd.Generation

if op == bootstrapOp {
// Reset members in bootstrap phase to ensure depending conditions can be calculated correctly.
bootstrapReset(etcd)
}
return nil
})

Expand Down
Loading

0 comments on commit f02ff02

Please sign in to comment.