-
Notifications
You must be signed in to change notification settings - Fork 50
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Added logics for quorum loss scenario.
- Loading branch information
1 parent
51b3cfe
commit 491ea3c
Showing
11 changed files
with
562 additions
and
108 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,196 @@ | ||
// Copyright (c) 2022 SAP SE or an SAP affiliate company. All rights reserved. This file is licensed under the Apache Software License, v. 2 except as noted otherwise in the LICENSE file | ||
// | ||
// Licensed under the Apache License, Version 2.0 (the "License"); | ||
// you may not use this file except in compliance with the License. | ||
// You may obtain a copy of the License at | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, software | ||
// distributed under the License is distributed on an "AS IS" BASIS, | ||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
// See the License for the specific language governing permissions and | ||
// limitations under the License. | ||
|
||
package controllers | ||
|
||
import ( | ||
"context" | ||
"fmt" | ||
"time" | ||
|
||
"sigs.k8s.io/controller-runtime/pkg/handler" | ||
"sigs.k8s.io/controller-runtime/pkg/source" | ||
|
||
"github.com/go-logr/logr" | ||
appsv1 "k8s.io/api/apps/v1" | ||
coordinationv1 "k8s.io/api/coordination/v1" | ||
corev1 "k8s.io/api/core/v1" | ||
"k8s.io/apimachinery/pkg/api/errors" | ||
"k8s.io/apimachinery/pkg/types" | ||
"k8s.io/client-go/util/retry" | ||
"k8s.io/utils/pointer" | ||
ctrl "sigs.k8s.io/controller-runtime" | ||
"sigs.k8s.io/controller-runtime/pkg/client" | ||
"sigs.k8s.io/controller-runtime/pkg/controller" | ||
"sigs.k8s.io/controller-runtime/pkg/log" | ||
"sigs.k8s.io/controller-runtime/pkg/manager" | ||
|
||
druidv1alpha1 "github.com/gardener/etcd-druid/api/v1alpha1" | ||
controllersconfig "github.com/gardener/etcd-druid/controllers/config" | ||
"github.com/gardener/etcd-druid/pkg/health/etcdmember" | ||
druidpredicates "github.com/gardener/etcd-druid/pkg/predicate" | ||
"github.com/gardener/gardener/pkg/controllerutils" | ||
kutil "github.com/gardener/gardener/pkg/utils/kubernetes" | ||
) | ||
|
||
const clusterMgmtControllerName = "cluster-mgmt-controller" | ||
|
||
// ClusterMgmtController reconciles ETCD multinode cluster | ||
type ClusterMgmtController struct { | ||
client.Client | ||
logger logr.Logger | ||
config controllersconfig.ClusterMgmtConfig | ||
} | ||
|
||
// NewClusterMgmtController creates a new ClusterMgmtController object | ||
func NewClusterMgmtController(mgr manager.Manager, config controllersconfig.ClusterMgmtConfig) *ClusterMgmtController { | ||
return &ClusterMgmtController{ | ||
Client: mgr.GetClient(), | ||
logger: log.Log.WithName("cluster-mgmt-controller"), | ||
config: config, | ||
} | ||
} | ||
|
||
// SetupWithManager sets up manager with a new controller and cmc as the reconcile.Reconciler | ||
func (cmc *ClusterMgmtController) SetupWithManager(mgr ctrl.Manager, workers int) error { | ||
|
||
ctrl, err := controller.New(clusterMgmtControllerName, mgr, controller.Options{ | ||
Reconciler: cmc, | ||
MaxConcurrentReconciles: workers, | ||
}) | ||
if err != nil { | ||
return err | ||
} | ||
|
||
return ctrl.Watch( | ||
&source.Kind{Type: &coordinationv1.Lease{}}, | ||
&handler.EnqueueRequestForOwner{OwnerType: &druidv1alpha1.Etcd{}, IsController: true}, | ||
// druidpredicates.LeaseHolderIdentityChange(), | ||
druidpredicates.IsMemberLease(), | ||
) | ||
} | ||
|
||
// +kubebuilder:rbac:groups=druid.gardener.cloud,resources=etcds,verbs=get;list;watch | ||
// +kubebuilder:rbac:groups=coordination.k8s.io,resources=leases,verbs=get;list;watch;create;update;patch;delete | ||
|
||
// Reconcile reconciles the multinode ETCD cluster. | ||
func (cmc *ClusterMgmtController) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { | ||
cmc.logger.Info("Cluster management controller reconciliation started") | ||
etcd := &druidv1alpha1.Etcd{} | ||
if err := cmc.Get(ctx, req.NamespacedName, etcd); err != nil { | ||
if errors.IsNotFound(err) { | ||
// Object not found, return. Created objects are automatically garbage collected. | ||
// For additional cleanup logic use finalizers. | ||
return ctrl.Result{}, nil | ||
} | ||
// Error reading the object - requeue the request. | ||
return ctrl.Result{ | ||
RequeueAfter: 10 * time.Second, | ||
}, err | ||
} | ||
|
||
logger := cmc.logger.WithValues("etcd", kutil.Key(etcd.Namespace, etcd.Name).String()) | ||
|
||
// run a loop every 3 minutes that will monitor the cluster health and take action if members in the etcd cluster are down | ||
for { | ||
if !etcd.DeletionTimestamp.IsZero() { | ||
return ctrl.Result{Requeue: false}, nil | ||
} | ||
|
||
// Allow some time before the quorum loss check actually happens | ||
startTime := time.Now() | ||
if !startTime.After(etcd.CreationTimestamp.Add(cmc.config.WaitDuration)) { | ||
continue | ||
} | ||
|
||
unknownThreshold := 300 * time.Second | ||
notReadyThreshold := 60 * time.Second | ||
|
||
checker := etcdmember.ReadyCheck(cmc.Client, logger, controllersconfig.EtcdCustodianController{ | ||
EtcdMember: controllersconfig.EtcdMemberConfig{ | ||
EtcdMemberNotReadyThreshold: notReadyThreshold, | ||
EtcdMemberUnknownThreshold: unknownThreshold, | ||
}, | ||
}) | ||
|
||
results := checker.Check(context.Background(), *etcd) | ||
|
||
totalReadyMembers := 0 | ||
|
||
for _, result := range results { | ||
if result.Status() == "LeaseSucceeded" { | ||
totalReadyMembers = totalReadyMembers + 1 | ||
} | ||
} | ||
|
||
quorum := int(etcd.Spec.Replicas)/2 + 1 | ||
|
||
if totalReadyMembers < quorum { | ||
// scale down the statefulset to 0 | ||
sts := &appsv1.StatefulSet{} | ||
err := cmc.Get(ctx, types.NamespacedName{Name: etcd.Name, Namespace: etcd.Namespace}, sts) | ||
if err != nil { | ||
return ctrl.Result{ | ||
RequeueAfter: 10 * time.Second, | ||
}, fmt.Errorf("cound not fetch statefulset: %v", err) | ||
} | ||
|
||
if _, err := controllerutils.GetAndCreateOrStrategicMergePatch(ctx, cmc.Client, sts, func() error { | ||
sts.Spec.Replicas = pointer.Int32(0) | ||
return nil | ||
}); err != nil { | ||
return ctrl.Result{ | ||
RequeueAfter: 10 * time.Second, | ||
}, fmt.Errorf("cound not scale down statefulset to 0 : %v", err) | ||
} | ||
|
||
// delete the pvcs | ||
if err := cmc.DeleteAllOf(ctx, &corev1.PersistentVolumeClaim{}, | ||
client.InNamespace(sts.GetNamespace()), | ||
client.MatchingLabels(getMatchingLabels(sts))); err != nil { | ||
logger.Info("Retrying.. As cound not delete PVCs : ", err) | ||
continue | ||
} | ||
|
||
// scale up the statefulset to 1 | ||
if err := controllerutils.TryUpdate(ctx, retry.DefaultBackoff, cmc.Client, etcd, func() error { | ||
sts.Spec.Replicas = pointer.Int32(1) | ||
return nil | ||
}); err != nil { | ||
logger.Info("Retrying.. As cound not scale up statefulset to 1 : ", err) | ||
continue | ||
} | ||
|
||
// scale up the statefulset to ETCD replicas | ||
if err := controllerutils.TryUpdate(ctx, retry.DefaultBackoff, cmc.Client, etcd, func() error { | ||
sts.Spec.Replicas = &etcd.Spec.Replicas | ||
return nil | ||
}); err != nil { | ||
logger.Info("Retrying.. As cound not scale up statefulset to replica number : ", err) | ||
continue | ||
} | ||
|
||
continue | ||
} | ||
} | ||
} | ||
|
||
func getMatchingLabels(sts *appsv1.StatefulSet) map[string]string { | ||
labels := make(map[string]string) | ||
|
||
labels["name"] = sts.Labels["name"] | ||
labels["instance"] = sts.Labels["instance"] | ||
|
||
return labels | ||
} |
Oops, something went wrong.