diff --git a/controllers/compaction_lease_controller.go b/controllers/compaction_lease_controller.go index 66aa48039..686e7c763 100644 --- a/controllers/compaction_lease_controller.go +++ b/controllers/compaction_lease_controller.go @@ -40,7 +40,6 @@ import ( druidv1alpha1 "github.com/gardener/etcd-druid/api/v1alpha1" controllersconfig "github.com/gardener/etcd-druid/controllers/config" "github.com/gardener/etcd-druid/pkg/common" - componentlease "github.com/gardener/etcd-druid/pkg/component/etcd/lease" druidpredicates "github.com/gardener/etcd-druid/pkg/predicate" "github.com/gardener/etcd-druid/pkg/utils" "github.com/gardener/gardener/pkg/utils/imagevector" @@ -119,7 +118,7 @@ func (lc *CompactionLeaseController) Reconcile(ctx context.Context, req ctrl.Req // Get full and delta snapshot lease to check the HolderIdentity value to take decision on compaction job fullLease := &coordinationv1.Lease{} - if err := lc.Get(ctx, kutil.Key(etcd.Namespace, componentlease.GetFullSnapshotLeaseName(etcd)), fullLease); err != nil { + if err := lc.Get(ctx, kutil.Key(etcd.Namespace, utils.GetFullSnapshotLeaseName(etcd)), fullLease); err != nil { logger.Info("Couldn't fetch full snap lease because: " + err.Error()) return ctrl.Result{ @@ -128,7 +127,7 @@ func (lc *CompactionLeaseController) Reconcile(ctx context.Context, req ctrl.Req } deltaLease := &coordinationv1.Lease{} - if err := lc.Get(ctx, kutil.Key(etcd.Namespace, componentlease.GetDeltaSnapshotLeaseName(etcd)), deltaLease); err != nil { + if err := lc.Get(ctx, kutil.Key(etcd.Namespace, utils.GetDeltaSnapshotLeaseName(etcd)), deltaLease); err != nil { logger.Info("Couldn't fetch delta snap lease because: " + err.Error()) return ctrl.Result{ @@ -491,8 +490,8 @@ func getCompactJobCommands(etcd *druidv1alpha1.Etcd) []string { command = append(command, "--data-dir=/var/etcd/data") command = append(command, "--snapstore-temp-directory=/var/etcd/data/tmp") command = append(command, "--enable-snapshot-lease-renewal=true") - command = append(command, "--full-snapshot-lease-name="+componentlease.GetFullSnapshotLeaseName(etcd)) - command = append(command, "--delta-snapshot-lease-name="+componentlease.GetDeltaSnapshotLeaseName(etcd)) + command = append(command, "--full-snapshot-lease-name="+utils.GetFullSnapshotLeaseName(etcd)) + command = append(command, "--delta-snapshot-lease-name="+utils.GetDeltaSnapshotLeaseName(etcd)) var quota int64 = DefaultETCDQuota if etcd.Spec.Etcd.Quota != nil { diff --git a/controllers/compaction_lease_controller_test.go b/controllers/compaction_lease_controller_test.go index d8b1c36ce..ac0be9f48 100644 --- a/controllers/compaction_lease_controller_test.go +++ b/controllers/compaction_lease_controller_test.go @@ -19,7 +19,6 @@ import ( "time" druidv1alpha1 "github.com/gardener/etcd-druid/api/v1alpha1" - componentlease "github.com/gardener/etcd-druid/pkg/component/etcd/lease" "github.com/gardener/etcd-druid/pkg/utils" "github.com/gardener/gardener/pkg/controllerutils" "github.com/gardener/gardener/pkg/utils/test/matchers" @@ -406,8 +405,8 @@ func validateEtcdForCmpctJob(instance *druidv1alpha1.Etcd, j *batchv1.Job) { "--data-dir=/var/etcd/data": Equal("--data-dir=/var/etcd/data"), "--snapstore-temp-directory=/var/etcd/data/tmp": Equal("--snapstore-temp-directory=/var/etcd/data/tmp"), "--enable-snapshot-lease-renewal=true": Equal("--enable-snapshot-lease-renewal=true"), - fmt.Sprintf("%s=%s", "--full-snapshot-lease-name", componentlease.GetFullSnapshotLeaseName(instance)): Equal(fmt.Sprintf("%s=%s", "--full-snapshot-lease-name", componentlease.GetFullSnapshotLeaseName(instance))), - fmt.Sprintf("%s=%s", "--delta-snapshot-lease-name", componentlease.GetDeltaSnapshotLeaseName(instance)): Equal(fmt.Sprintf("%s=%s", "--delta-snapshot-lease-name", componentlease.GetDeltaSnapshotLeaseName(instance))), + fmt.Sprintf("%s=%s", "--full-snapshot-lease-name", utils.GetFullSnapshotLeaseName(instance)): Equal(fmt.Sprintf("%s=%s", "--full-snapshot-lease-name", utils.GetFullSnapshotLeaseName(instance))), + fmt.Sprintf("%s=%s", "--delta-snapshot-lease-name", utils.GetDeltaSnapshotLeaseName(instance)): Equal(fmt.Sprintf("%s=%s", "--delta-snapshot-lease-name", utils.GetDeltaSnapshotLeaseName(instance))), fmt.Sprintf("%s=%s", "--store-prefix", instance.Spec.Backup.Store.Prefix): Equal(fmt.Sprintf("%s=%s", "--store-prefix", instance.Spec.Backup.Store.Prefix)), fmt.Sprintf("%s=%s", "--storage-provider", store): Equal(fmt.Sprintf("%s=%s", "--storage-provider", store)), fmt.Sprintf("%s=%s", "--store-container", *instance.Spec.Backup.Store.Container): Equal(fmt.Sprintf("%s=%s", "--store-container", *instance.Spec.Backup.Store.Container)), @@ -757,7 +756,7 @@ func fullLeaseIsCorrectlyReconciled(c client.Client, instance *druidv1alpha1.Etc ctx, cancel := context.WithTimeout(context.TODO(), timeout) defer cancel() req := types.NamespacedName{ - Name: componentlease.GetFullSnapshotLeaseName(instance), + Name: utils.GetFullSnapshotLeaseName(instance), Namespace: instance.Namespace, } @@ -775,7 +774,7 @@ func deltaLeaseIsCorrectlyReconciled(c client.Client, instance *druidv1alpha1.Et ctx, cancel := context.WithTimeout(context.TODO(), timeout) defer cancel() req := types.NamespacedName{ - Name: componentlease.GetDeltaSnapshotLeaseName(instance), + Name: utils.GetDeltaSnapshotLeaseName(instance), Namespace: instance.Namespace, } diff --git a/controllers/controller_ref_manager.go b/controllers/controller_ref_manager.go index d00162d21..a51eb207d 100644 --- a/controllers/controller_ref_manager.go +++ b/controllers/controller_ref_manager.go @@ -210,56 +210,6 @@ func (m *EtcdDruidRefManager) FetchStatefulSet(ctx context.Context, etcd *druidv return statefulSets, err } -// ClaimStatefulsets tries to take ownership of a list of Statefulsets. -// -// It will reconcile the following: -// * Adopt orphans if the selector matches. -// * Release owned objects if the selector no longer matches. -// * Remove ownerReferences from the statefulsets and use annotations -// -// Optional: If one or more filters are specified, a Statefulset will only be claimed if -// all filters return true. -// -// A non-nil error is returned if some form of reconciliation was attempted and -// failed. Usually, controllers should try again later in case reconciliation -// is still needed. -// -// If the error is nil, either the reconciliation succeeded, or no -// reconciliation was necessary. The list of statefulsets that you now own is returned. -func (m *EtcdDruidRefManager) ClaimStatefulsets(ctx context.Context, statefulSetList *appsv1.StatefulSetList, filters ...func(*appsv1.StatefulSet) bool) ([]*appsv1.StatefulSet, error) { - var ( - claimed []*appsv1.StatefulSet - errlist []error - ) - - match := func(obj metav1.Object) bool { - ss := obj.(*appsv1.StatefulSet) - // Check selector first so filters only run on potentially matching statefulsets. - if !m.Selector.Matches(labels.Set(ss.Labels)) { - return false - } - for _, filter := range filters { - if !filter(ss) { - return false - } - } - return true - } - - for k := range statefulSetList.Items { - sts := &statefulSetList.Items[k] - ok, err := m.claimObject(ctx, sts, match, m.AdoptResource, m.ReleaseResource) - if err != nil { - errlist = append(errlist, err) - continue - } - if ok { - claimed = append(claimed, sts) - } - } - return claimed, utilerrors.NewAggregate(errlist) -} - func (m *EtcdDruidRefManager) ClaimPodDisruptionBudget(ctx context.Context, pdb *policyv1beta1.PodDisruptionBudget, filters ...func(*policyv1beta1.PodDisruptionBudget) bool) (*policyv1beta1.PodDisruptionBudget, error) { var errlist []error @@ -425,25 +375,3 @@ func RecheckDeletionTimestamp(getObject func() (metav1.Object, error)) func() er return nil } } - -// CheckStatefulSet checks whether the given StatefulSet is healthy. -// A StatefulSet is considered healthy if its controller observed its current revision, -// it is not in an update (i.e. UpdateRevision is empty) and if its current replicas are equal to -// desired replicas specified in ETCD specs. -func CheckStatefulSet(etcd *druidv1alpha1.Etcd, statefulSet *appsv1.StatefulSet) error { - if statefulSet.Status.ObservedGeneration < statefulSet.Generation { - return fmt.Errorf("observed generation outdated (%d/%d)", statefulSet.Status.ObservedGeneration, statefulSet.Generation) - } - - replicas := int32(1) - - if etcd != nil { - replicas = etcd.Spec.Replicas - } - - if statefulSet.Status.ReadyReplicas < replicas { - return fmt.Errorf("not enough ready replicas (%d/%d)", statefulSet.Status.ReadyReplicas, replicas) - } - - return nil -} diff --git a/controllers/controllers_suite_test.go b/controllers/controllers_suite_test.go index 666725bea..147c871a1 100644 --- a/controllers/controllers_suite_test.go +++ b/controllers/controllers_suite_test.go @@ -105,12 +105,12 @@ var _ = BeforeSuite(func(done Done) { er, err := NewEtcdReconcilerWithImageVector(mgr, false) Expect(err).NotTo(HaveOccurred()) - err = er.SetupWithManager(mgr, 1, true) + err = er.SetupWithManager(mgr, 5, true) Expect(err).NotTo(HaveOccurred()) secret := NewSecret(mgr) - err = secret.SetupWithManager(mgr, 1) + err = secret.SetupWithManager(mgr, 5) Expect(err).NotTo(HaveOccurred()) custodian := NewEtcdCustodian(mgr, controllersconfig.EtcdCustodianController{ @@ -119,13 +119,13 @@ var _ = BeforeSuite(func(done Done) { }, }) - err = custodian.SetupWithManager(mgrCtx, mgr, 1, true) + err = custodian.SetupWithManager(mgrCtx, mgr, 5, true) Expect(err).NotTo(HaveOccurred()) etcdCopyBackupsTaskReconciler, err := NewEtcdCopyBackupsTaskReconcilerWithImageVector(mgr) Expect(err).NotTo(HaveOccurred()) - err = etcdCopyBackupsTaskReconciler.SetupWithManager(mgr, 1) + err = etcdCopyBackupsTaskReconciler.SetupWithManager(mgr, 5) Expect(err).NotTo(HaveOccurred()) activeDeadlineDuration, err = time.ParseDuration("2m") diff --git a/controllers/etcd_controller.go b/controllers/etcd_controller.go index 41e2d5f52..fbdfc2312 100644 --- a/controllers/etcd_controller.go +++ b/controllers/etcd_controller.go @@ -29,6 +29,8 @@ import ( componentconfigmap "github.com/gardener/etcd-druid/pkg/component/etcd/configmap" componentlease "github.com/gardener/etcd-druid/pkg/component/etcd/lease" componentservice "github.com/gardener/etcd-druid/pkg/component/etcd/service" + "github.com/gardener/etcd-druid/pkg/component/etcd/statefulset" + componentsts "github.com/gardener/etcd-druid/pkg/component/etcd/statefulset" druidpredicates "github.com/gardener/etcd-druid/pkg/predicate" "github.com/gardener/etcd-druid/pkg/utils" @@ -37,9 +39,9 @@ import ( "github.com/gardener/gardener/pkg/chartrenderer" "github.com/gardener/gardener/pkg/client/kubernetes" "github.com/gardener/gardener/pkg/controllerutils" + gardenercomponent "github.com/gardener/gardener/pkg/operation/botanist/component" "github.com/gardener/gardener/pkg/utils/imagevector" kutil "github.com/gardener/gardener/pkg/utils/kubernetes" - gardenerretry "github.com/gardener/gardener/pkg/utils/retry" "github.com/go-logr/logr" appsv1 "k8s.io/api/apps/v1" batchv1beta1 "k8s.io/api/batch/v1beta1" @@ -53,7 +55,6 @@ import ( "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" - errorsutil "k8s.io/apimachinery/pkg/util/errors" "k8s.io/apimachinery/pkg/util/sets" "k8s.io/apimachinery/pkg/util/yaml" "k8s.io/client-go/rest" @@ -140,10 +141,6 @@ func getChartPath() string { return filepath.Join("charts", "etcd") } -func getChartPathForStatefulSet() string { - return filepath.Join("etcd", "templates", "etcd-statefulset.yaml") -} - func getChartPathForServiceAccount() string { return filepath.Join("etcd", "templates", "etcd-serviceaccount.yaml") } @@ -370,7 +367,8 @@ func (r *EtcdReconciler) delete(ctx context.Context, etcd *druidv1alpha1.Etcd) ( } } - if waitForStatefulSetCleanup, err := r.removeDependantStatefulset(ctx, logger, etcd); err != nil { + stsDeployer := gardenercomponent.OpDestroyAndWait(componentsts.New(r.Client, logger, componentsts.Values{Name: etcd.Name, Namespace: etcd.Namespace})) + if err := stsDeployer.Destroy(ctx); err != nil { if err = r.updateEtcdErrorStatus(ctx, etcd, nil, err); err != nil { return ctrl.Result{ Requeue: true, @@ -379,10 +377,6 @@ func (r *EtcdReconciler) delete(ctx context.Context, etcd *druidv1alpha1.Etcd) ( return ctrl.Result{ Requeue: true, }, err - } else if waitForStatefulSetCleanup { - return ctrl.Result{ - RequeueAfter: 30 * time.Second, - }, nil } leaseDeployer := componentlease.New(r.Client, etcd.Namespace, componentlease.GenerateValues(etcd)) @@ -468,228 +462,6 @@ func (r *EtcdReconciler) getPodDisruptionBudgetFromEtcd(etcd *druidv1alpha1.Etcd return nil, fmt.Errorf("missing podDisruptionBudget template file in the charts: %v", pdbPath) } -func (r *EtcdReconciler) reconcileStatefulSet(ctx context.Context, logger logr.Logger, etcd *druidv1alpha1.Etcd, values map[string]interface{}) (*appsv1.StatefulSet, error) { - logger.Info("Reconciling etcd statefulset") - - // If any adoptions are attempted, we should first recheck for deletion with - // an uncached quorum read sometime after listing Machines (see #42639). - canAdoptFunc := RecheckDeletionTimestamp(func() (metav1.Object, error) { - foundEtcd := &druidv1alpha1.Etcd{} - err := r.Get(context.TODO(), types.NamespacedName{Name: etcd.Name, Namespace: etcd.Namespace}, foundEtcd) - if err != nil { - return nil, err - } - - if foundEtcd.GetDeletionTimestamp() != nil { - return nil, fmt.Errorf("%v/%v etcd is marked for deletion", etcd.Namespace, etcd.Name) - } - - if foundEtcd.UID != etcd.UID { - return nil, fmt.Errorf("original %v/%v etcd gone: got uid %v, wanted %v", etcd.Namespace, etcd.Name, foundEtcd.UID, etcd.UID) - } - return foundEtcd, nil - }) - - selector, err := metav1.LabelSelectorAsSelector(etcd.Spec.Selector) - if err != nil { - logger.Error(err, "Error converting etcd selector to selector") - return nil, err - } - dm := NewEtcdDruidRefManager(r.Client, r.Scheme, etcd, selector, etcdGVK, canAdoptFunc) - statefulSets, err := dm.FetchStatefulSet(ctx, etcd) - if err != nil { - logger.Error(err, "Error while fetching StatefulSet") - return nil, err - } - - logger.Info("Claiming existing etcd StatefulSet") - claimedStatefulSets, err := dm.ClaimStatefulsets(ctx, statefulSets) - if err != nil { - return nil, err - } - - if len(claimedStatefulSets) > 0 { - // Keep only 1 statefulset. Delete the rest - for i := 1; i < len(claimedStatefulSets); i++ { - sts := claimedStatefulSets[i] - logger.Info("Found duplicate StatefulSet, deleting it", "statefulset", kutil.Key(sts.Namespace, sts.Name).String()) - if err := r.Delete(ctx, sts); err != nil { - logger.Error(err, "Error in deleting duplicate StatefulSet", "statefulset", kutil.Key(sts.Namespace, sts.Name).String()) - continue - } - } - - // Fetch the updated statefulset - // TODO: (timuthy) Check if this is really needed. - sts := &appsv1.StatefulSet{} - if err := r.Get(ctx, types.NamespacedName{Name: claimedStatefulSets[0].Name, Namespace: claimedStatefulSets[0].Namespace}, sts); err != nil { - return nil, err - } - - // Statefulset is claimed by for this etcd. Just sync the specs - if sts, err = r.syncStatefulSetSpec(ctx, logger, sts, etcd, values); err != nil { - return nil, err - } - - // restart etcd pods in crashloop backoff - selector, err := metav1.LabelSelectorAsSelector(sts.Spec.Selector) - if err != nil { - logger.Error(err, "error converting StatefulSet selector to selector") - return nil, err - } - podList := &corev1.PodList{} - if err := r.List(ctx, podList, client.InNamespace(etcd.Namespace), client.MatchingLabelsSelector{Selector: selector}); err != nil { - return nil, err - } - - for _, pod := range podList.Items { - if utils.IsPodInCrashloopBackoff(pod.Status) { - if err := r.Delete(ctx, &pod); err != nil { - logger.Error(err, fmt.Sprintf("error deleting etcd pod in crashloop: %s/%s", pod.Namespace, pod.Name)) - return nil, err - } - } - } - - sts, err = r.waitUntilStatefulSetReady(ctx, logger, etcd, sts) - return sts, err - } - - // Required statefulset doesn't exist. Create new - sts, err := r.getStatefulSetFromEtcd(etcd, values) - if err != nil { - return nil, err - } - - err = r.Create(ctx, sts) - - // Ignore the precondition violated error, this machine is already updated - // with the desired label. - if err == errorsutil.ErrPreconditionViolated { - logger.Info("StatefulSet %s precondition doesn't hold, skip updating it.", "statefulset", kutil.Key(sts.Namespace, sts.Name).String()) - err = nil - } - if err != nil { - return nil, err - } - - sts, err = r.waitUntilStatefulSetReady(ctx, logger, etcd, sts) - return sts, err -} - -func getContainerMapFromPodTemplateSpec(spec corev1.PodSpec) map[string]corev1.Container { - containers := map[string]corev1.Container{} - for _, c := range spec.Containers { - containers[c.Name] = c - } - return containers -} - -func clusterScaledUpToMultiNode(etcd *druidv1alpha1.Etcd) bool { - if etcd == nil { - return false - } - return etcd.Spec.Replicas != 1 && - // Also consider `0` here because this field was not maintained in earlier releases. - (etcd.Status.Replicas == 0 || - etcd.Status.Replicas == 1) -} - -func (r *EtcdReconciler) syncStatefulSetSpec(ctx context.Context, logger logr.Logger, ss *appsv1.StatefulSet, etcd *druidv1alpha1.Etcd, values map[string]interface{}) (*appsv1.StatefulSet, error) { - decoded, err := r.getStatefulSetFromEtcd(etcd, values) - if err != nil { - return nil, err - } - - if reflect.DeepEqual(ss.Spec, decoded.Spec) { - return ss, nil - } - - ssCopy := ss.DeepCopy() - ssCopy.Spec.Replicas = decoded.Spec.Replicas - ssCopy.Spec.UpdateStrategy = decoded.Spec.UpdateStrategy - - recreateSTS := false - if !reflect.DeepEqual(ssCopy.Spec.Selector, decoded.Spec.Selector) { - recreateSTS = true - } - - // We introduced a peer service for multi-node etcd which must be set - // when the previous single-node StatefulSet still has the client service configured. - if ssCopy.Spec.ServiceName != decoded.Spec.ServiceName { - if clusterScaledUpToMultiNode(etcd) { - recreateSTS = true - } - } - - // Applying suggestions from - containers := getContainerMapFromPodTemplateSpec(ssCopy.Spec.Template.Spec) - for i, c := range decoded.Spec.Template.Spec.Containers { - container, ok := containers[c.Name] - if !ok { - return nil, fmt.Errorf("container with name %s could not be fetched from statefulset %s", c.Name, decoded.Name) - } - // only copy requested resources from the existing stateful set to avoid copying already removed (from the etcd resource) resource limits - decoded.Spec.Template.Spec.Containers[i].Resources.Requests = container.Resources.Requests - } - - ssCopy.Spec.Template = decoded.Spec.Template - - if recreateSTS { - logger.Info("StatefulSet change requires recreation", "statefulset", kutil.Key(ssCopy.Namespace, ssCopy.Name).String()) - err = r.recreateStatefulset(ctx, decoded) - } else { - err = retry.RetryOnConflict(retry.DefaultBackoff, func() error { - return r.Patch(ctx, ssCopy, client.MergeFrom(ss)) - }) - } - - // Ignore the precondition violated error, this machine is already updated - // with the desired label. - if err == errorsutil.ErrPreconditionViolated { - logger.Info("StatefulSet precondition doesn't hold, skip updating it", "statefulset", kutil.Key(ss.Namespace, ss.Name).String()) - err = nil - } - if err != nil { - return nil, err - } - return ssCopy, err -} - -func (r *EtcdReconciler) recreateStatefulset(ctx context.Context, ss *appsv1.StatefulSet) error { - skipDelete := false - err := retry.RetryOnConflict(retry.DefaultBackoff, func() error { - if !skipDelete { - if err := r.Delete(ctx, ss); err != nil && !apierrors.IsNotFound(err) { - return err - } - } - skipDelete = true - return r.Create(ctx, ss) - }) - return err -} - -func (r *EtcdReconciler) getStatefulSetFromEtcd(etcd *druidv1alpha1.Etcd, values map[string]interface{}) (*appsv1.StatefulSet, error) { - var err error - decoded := &appsv1.StatefulSet{} - statefulSetPath := getChartPathForStatefulSet() - chartPath := getChartPath() - renderedChart, err := r.chartApplier.Render(chartPath, etcd.Name, etcd.Namespace, values) - if err != nil { - return nil, err - } - if _, ok := renderedChart.Files()[statefulSetPath]; !ok { - return nil, fmt.Errorf("missing configmap template file in the charts: %v", statefulSetPath) - } - - decoder := yaml.NewYAMLOrJSONDecoder(bytes.NewReader([]byte(renderedChart.Files()[statefulSetPath])), 1024) - if err = decoder.Decode(&decoded); err != nil { - return nil, err - } - return decoded, nil -} - func decodeObject(renderedChart *chartrenderer.RenderedChart, path string, object interface{}) error { if content, ok := renderedChart.Files()[path]; ok { decoder := yaml.NewYAMLOrJSONDecoder(bytes.NewReader([]byte(content)), 1024) @@ -840,31 +612,48 @@ func (r *EtcdReconciler) reconcileEtcd(ctx context.Context, logger logr.Logger, return nil, nil, fmt.Errorf("Spec.Replicas should not be even number: %d", etcd.Spec.Replicas) } - val := componentetcd.Values{ - ConfigMap: componentconfigmap.GenerateValues(etcd), - Lease: componentlease.GenerateValues(etcd), - Service: componentservice.GenerateValues(etcd), + etcdImage, etcdBackupImage, err := getEtcdImages(r.ImageVector, etcd) + if err != nil { + return nil, nil, err } - leaseDeployer := componentlease.New(r.Client, etcd.Namespace, val.Lease) + if etcd.Spec.Etcd.Image == nil { + if etcdImage == "" { + return nil, nil, fmt.Errorf("either etcd resource or image vector should have %s image while deploying statefulset", common.Etcd) + } + } else { + etcdImage = *etcd.Spec.Etcd.Image + } + + if etcd.Spec.Backup.Image == nil { + if etcdBackupImage == "" { + return nil, nil, fmt.Errorf("either etcd resource or image vector should have %s image while deploying statefulset", common.BackupRestore) + } + } else { + etcdBackupImage = *etcd.Spec.Backup.Image + } + + val := componentetcd.Values{} + val.Lease = componentlease.GenerateValues(etcd) + leaseDeployer := componentlease.New(r.Client, etcd.Namespace, val.Lease) if err := leaseDeployer.Deploy(ctx); err != nil { return nil, nil, err } + val.Service = componentservice.GenerateValues(etcd) serviceDeployer := componentservice.New(r.Client, etcd.Namespace, val.Service) - if err := serviceDeployer.Deploy(ctx); err != nil { return nil, nil, err } + val.ConfigMap = componentconfigmap.GenerateValues(etcd) cmDeployer := componentconfigmap.New(r.Client, etcd.Namespace, val.ConfigMap) if err := cmDeployer.Deploy(ctx); err != nil { return nil, nil, err } values, err := r.getMapFromEtcd(r.ImageVector, etcd, val, r.disableEtcdServiceAccountAutomount) - if err != nil { return nil, nil, err } @@ -889,12 +678,29 @@ func (r *EtcdReconciler) reconcileEtcd(ctx context.Context, logger logr.Logger, return nil, nil, err } - sts, err := r.reconcileStatefulSet(ctx, logger, etcd, values) - if err != nil { + val.StatefulSet = statefulset.GenerateValues(etcd, + &val.Service.ClientPort, + &val.Service.ServerPort, + &val.Service.BackupPort, + etcdImage, + etcdBackupImage, + map[string]string{ + "checksum/etcd-configmap": val.ConfigMap.ConfigMapChecksum, + }) + + // Create an OpWaiter because after the depoyment we want to wait until the StatefulSet is ready. + var ( + stsDeployer = componentsts.New(r.Client, logger, val.StatefulSet) + deployWaiter = gardenercomponent.OpWaiter(stsDeployer) + ) + + if err := deployWaiter.Deploy(ctx); err != nil { return nil, nil, err } - return &val.Service.ClientServiceName, sts, nil + sts, err := stsDeployer.Get(ctx) + + return &val.Service.ClientServiceName, sts, err } func checkEtcdOwnerReference(refs []metav1.OwnerReference, etcd *druidv1alpha1.Etcd) bool { @@ -1092,8 +898,6 @@ func (r *EtcdReconciler) getMapFromEtcd(im imagevector.ImageVector, etcd *druidv } } - annotations["checksum/etcd-configmap"] = val.ConfigMap.ConfigMapChecksum - pdbMinAvailable := 0 if etcd.Spec.Replicas > 1 { pdbMinAvailable = int(etcd.Spec.Replicas) @@ -1207,44 +1011,6 @@ func getEtcdImages(im imagevector.ImageVector, etcd *druidv1alpha1.Etcd) (string return etcdImage, etcdBackupImage, nil } -func (r *EtcdReconciler) removeDependantStatefulset(ctx context.Context, logger logr.Logger, etcd *druidv1alpha1.Etcd) (waitForStatefulSetCleanup bool, err error) { - selector, err := metav1.LabelSelectorAsSelector(etcd.Spec.Selector) - if err != nil { - return false, err - } - - statefulSets := &appsv1.StatefulSetList{} - if err = r.List(ctx, statefulSets, client.InNamespace(etcd.Namespace), client.MatchingLabelsSelector{Selector: selector}); err != nil { - return false, err - } - - waitForStatefulSetCleanup = false - - for _, sts := range statefulSets.Items { - if canDeleteStatefulset(&sts, etcd) { - var key = kutil.Key(sts.GetNamespace(), sts.GetName()).String() - logger.Info("Deleting statefulset", "statefulset", key) - if err := r.Delete(ctx, &sts, client.PropagationPolicy(metav1.DeletePropagationBackground)); err != nil { - return false, err - } - - // StatefultSet deletion succeeded. Now we need to wait for it to be cleaned up. - waitForStatefulSetCleanup = true - } - } - - return waitForStatefulSetCleanup, nil -} - -func canDeleteStatefulset(sts *appsv1.StatefulSet, etcd *druidv1alpha1.Etcd) bool { - // Adding check for ownerReference to have the same delete path for statefulset. - // The statefulset with ownerReference will be deleted automatically when etcd is - // delete but we would like to explicitly delete it to maintain uniformity in the - // delete path. - return checkEtcdOwnerReference(sts.GetOwnerReferences(), etcd) || - checkEtcdAnnotations(sts.GetAnnotations(), etcd) -} - func bootstrapReset(etcd *druidv1alpha1.Etcd) { etcd.Status.Members = nil etcd.Status.ClusterSize = pointer.Int32Ptr(etcd.Spec.Replicas) @@ -1266,7 +1032,7 @@ func (r *EtcdReconciler) updateEtcdErrorStatus(ctx context.Context, etcd *druidv bootstrapReset(etcd) } - ready := CheckStatefulSet(etcd, sts) == nil + ready := utils.CheckStatefulSet(etcd.Spec.Replicas, sts) == nil etcd.Status.Ready = &ready etcd.Status.Replicas = pointer.Int32PtrDerefOr(sts.Spec.Replicas, 0) } @@ -1281,7 +1047,7 @@ func (r *EtcdReconciler) updateEtcdStatus(ctx context.Context, etcd *druidv1alph bootstrapReset(etcd) } - ready := CheckStatefulSet(etcd, sts) == nil + ready := utils.CheckStatefulSet(etcd.Spec.Replicas, sts) == nil etcd.Status.Ready = &ready svcName := serviceName etcd.Status.ServiceName = &svcName @@ -1292,39 +1058,6 @@ func (r *EtcdReconciler) updateEtcdStatus(ctx context.Context, etcd *druidv1alph }) } -func (r *EtcdReconciler) waitUntilStatefulSetReady(ctx context.Context, logger logr.Logger, etcd *druidv1alpha1.Etcd, sts *appsv1.StatefulSet) (*appsv1.StatefulSet, error) { - var ( - ss = &appsv1.StatefulSet{} - ) - - err := gardenerretry.UntilTimeout(ctx, DefaultInterval, DefaultTimeout, func(ctx context.Context) (bool, error) { - if err := r.Get(ctx, types.NamespacedName{Name: sts.Name, Namespace: sts.Namespace}, ss); err != nil { - if apierrors.IsNotFound(err) { - return gardenerretry.MinorError(err) - } - return gardenerretry.SevereError(err) - } - if err := CheckStatefulSet(etcd, ss); err != nil { - return gardenerretry.MinorError(err) - } - return gardenerretry.Ok() - }) - if err != nil { - messages, err2 := r.fetchPVCEventsFor(ctx, ss) - if err2 != nil { - logger.Error(err2, "Error while fetching events for depending PVC") - // don't expose this error since fetching events is a best effort - // and shouldn't be confused with the actual error - return ss, err - } - if messages != "" { - return ss, fmt.Errorf("%w\n\n%s", err, messages) - } - } - - return ss, err -} - func (r *EtcdReconciler) fetchPVCEventsFor(ctx context.Context, ss *appsv1.StatefulSet) (string, error) { pvcs := &corev1.PersistentVolumeClaimList{} if err := r.List(ctx, pvcs, client.InNamespace(ss.GetNamespace())); err != nil { diff --git a/controllers/etcd_controller_test.go b/controllers/etcd_controller_test.go index f8a1372e3..2fd35547c 100644 --- a/controllers/etcd_controller_test.go +++ b/controllers/etcd_controller_test.go @@ -24,14 +24,12 @@ import ( druidv1alpha1 "github.com/gardener/etcd-druid/api/v1alpha1" "github.com/gardener/etcd-druid/pkg/common" - componentlease "github.com/gardener/etcd-druid/pkg/component/etcd/lease" "github.com/gardener/etcd-druid/pkg/utils" v1beta1constants "github.com/gardener/gardener/pkg/apis/core/v1beta1/constants" "github.com/gardener/gardener/pkg/controllerutils" gardenerUtils "github.com/gardener/gardener/pkg/utils" "github.com/gardener/gardener/pkg/utils/imagevector" - "github.com/gardener/gardener/pkg/utils/kubernetes/health" "github.com/gardener/gardener/pkg/utils/test/matchers" "github.com/ghodss/yaml" . "github.com/onsi/ginkgo" @@ -44,7 +42,6 @@ import ( coordinationv1 "k8s.io/api/coordination/v1" corev1 "k8s.io/api/core/v1" rbac "k8s.io/api/rbac/v1" - "k8s.io/apimachinery/pkg/api/errors" apierrors "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -98,6 +95,7 @@ var ( quota = resource.MustParse("8Gi") provider = druidv1alpha1.StorageProvider("Local") prefix = "/tmp" + uid = "a9b8c7d6e5f4" volumeClaimTemplateName = "etcd-main" garbageCollectionPolicy = druidv1alpha1.GarbageCollectionPolicy(druidv1alpha1.GarbageCollectionPolicyExponential) metricsBasic = druidv1alpha1.Basic @@ -301,7 +299,7 @@ var _ = Describe("Druid", func() { }) Describe("Druid custodian controller", func() { - Context("when adding etcd resources with statefulset already present", func() { + Context("when statefulset status is updated", func() { var ( instance *druidv1alpha1.Etcd sts *appsv1.StatefulSet @@ -315,11 +313,27 @@ var _ = Describe("Druid", func() { instance = getEtcd("foo19", "default", false) c = mgr.GetClient() - // Create StatefulSet - sts = createStatefulset(instance.Name, instance.Namespace, instance.Spec.Labels) - Expect(c.Create(ctx, sts)).To(Succeed()) + ns := corev1.Namespace{ + ObjectMeta: metav1.ObjectMeta{ + Name: instance.Namespace, + }, + } + _, err := controllerutil.CreateOrUpdate(ctx, c, &ns, func() error { return nil }) + Expect(err).To(Not(HaveOccurred())) - Eventually(func() error { return c.Get(ctx, client.ObjectKeyFromObject(instance), sts) }, timeout, pollingInterval).Should(Succeed()) + storeSecret := instance.Spec.Backup.Store.SecretRef.Name + errors := createSecrets(c, instance.Namespace, storeSecret) + Expect(len(errors)).Should(BeZero()) + Expect(c.Create(ctx, instance)).To(Succeed()) + + sts = &appsv1.StatefulSet{} + // Wait until StatefulSet has been created by controller + Eventually(func() error { + return c.Get(ctx, types.NamespacedName{ + Name: instance.Name, + Namespace: instance.Namespace, + }, sts) + }, timeout, pollingInterval).Should(BeNil()) sts.Status.Replicas = 1 sts.Status.ReadyReplicas = 1 @@ -336,12 +350,6 @@ var _ = Describe("Druid", func() { return nil }, timeout, pollingInterval).Should(Succeed()) - // Create ETCD instance - storeSecret := instance.Spec.Backup.Store.SecretRef.Name - errors := createSecrets(c, instance.Namespace, storeSecret) - Expect(len(errors)).Should(BeZero()) - Expect(c.Create(ctx, instance)).To(Succeed()) - Eventually(func() error { return statefulsetIsCorrectlyReconciled(c, instance, sts) }, timeout, pollingInterval).Should(BeNil()) // Check if ETCD has ready replicas more than zero @@ -407,190 +415,6 @@ var _ = Describe("Druid", func() { }) }) - DescribeTable("when adding etcd resources with statefulset already present", - func(name string, setupStatefulSet StatefulSetInitializer) { - var sts *appsv1.StatefulSet - instance := getEtcd(name, "default", false) - c := mgr.GetClient() - - switch setupStatefulSet { - case WithoutOwner: - sts = createStatefulset(name, "default", instance.Spec.Labels) - Expect(sts.OwnerReferences).Should(BeNil()) - case WithOwnerReference: - sts = createStatefulsetWithOwnerReference(instance) - Expect(len(sts.OwnerReferences)).Should(Equal(1)) - case WithOwnerAnnotation: - sts = createStatefulsetWithOwnerAnnotations(instance) - default: - Fail("StatefulSetInitializer invalid") - } - needsOwnerRefUpdate := len(sts.OwnerReferences) > 0 - storeSecret := instance.Spec.Backup.Store.SecretRef.Name - - // Create Secrets - errors := createSecrets(c, instance.Namespace, storeSecret) - Expect(len(errors)).Should(BeZero()) - - // Create StatefulSet - Expect(c.Create(context.TODO(), sts)).To(Succeed()) - - // Create StatefulSet - Expect(c.Create(context.TODO(), instance)).To(Succeed()) - - // Update OwnerRef with UID from just created `etcd` instance - if needsOwnerRefUpdate { - Eventually(func() error { - if err := c.Get(context.TODO(), client.ObjectKeyFromObject(instance), instance); err != nil { - return err - } - if instance.UID != "" { - instance.TypeMeta = metav1.TypeMeta{ - APIVersion: "druid.gardener.cloud/v1alpha1", - Kind: "etcd", - } - return nil - } - return fmt.Errorf("etcd object not yet created") - }, timeout, pollingInterval).Should(BeNil()) - sts.OwnerReferences[0].UID = instance.UID - Expect(c.Update(context.TODO(), sts)).To(Succeed()) - } - - sts = &appsv1.StatefulSet{} - Eventually(func() error { return statefulsetIsCorrectlyReconciled(c, instance, sts) }, timeout, pollingInterval).Should(BeNil()) - setStatefulSetReady(sts) - Expect(c.Status().Update(context.TODO(), sts)).To(Succeed()) - Expect(c.Delete(context.TODO(), instance)).To(Succeed()) - }, - Entry("when statefulset not owned by etcd, druid should adopt the statefulset", "foo20", WithoutOwner), - Entry("when statefulset owned by etcd with owner reference set, druid should remove ownerref and add annotations", "foo21", WithOwnerReference), - Entry("when statefulset owned by etcd with owner annotations set, druid should persist the annotations", "foo22", WithOwnerAnnotation), - Entry("when etcd has the spec changed, druid should reconcile statefulset", "foo23", WithoutOwner), - ) - - Describe("when adding etcd resources with statefulset already present", func() { - Context("when statefulset is in crashloopbackoff", func() { - var err error - var instance *druidv1alpha1.Etcd - var c client.Client - var p *corev1.Pod - var ss *appsv1.StatefulSet - BeforeEach(func() { - instance = getEtcd("foo24", "default", false) - Expect(err).NotTo(HaveOccurred()) - c = mgr.GetClient() - p = createPod(fmt.Sprintf("%s-0", instance.Name), "default", instance.Spec.Labels) - ss = createStatefulset(instance.Name, instance.Namespace, instance.Spec.Labels) - Expect(c.Create(context.TODO(), p)).To(Succeed()) - Expect(c.Create(context.TODO(), ss)).To(Succeed()) - p.Status.ContainerStatuses = []corev1.ContainerStatus{ - { - Name: "Container-0", - State: corev1.ContainerState{ - Waiting: &corev1.ContainerStateWaiting{ - Reason: "CrashLoopBackOff", - Message: "Container is in CrashLoopBackOff.", - }, - }, - }, - } - err = c.Status().Update(context.TODO(), p) - Expect(err).NotTo(HaveOccurred()) - storeSecret := instance.Spec.Backup.Store.SecretRef.Name - errors := createSecrets(c, instance.Namespace, storeSecret) - Expect(len(errors)).Should(BeZero()) - - }) - It("should restart pod", func() { - Expect(c.Create(context.TODO(), instance)).To(Succeed()) - Eventually(func() error { return podDeleted(c, instance) }, timeout, pollingInterval).Should(BeNil()) - }) - AfterEach(func() { - s := &appsv1.StatefulSet{} - Eventually(func() error { return statefulsetIsCorrectlyReconciled(c, instance, s) }, timeout, pollingInterval).Should(BeNil()) - setStatefulSetReady(s) - Expect(c.Status().Update(context.TODO(), s)).To(Succeed()) - Expect(c.Delete(context.TODO(), instance)).To(Succeed()) - }) - }) - }) - - DescribeTable("when deleting etcd resources", - func(name string, setupStatefulSet StatefulSetInitializer) { - var sts *appsv1.StatefulSet - instance := getEtcd(name, "default", false) - c := mgr.GetClient() - - switch setupStatefulSet { - case WithoutOwner: - sts = createStatefulset(name, "default", instance.Spec.Labels) - Expect(sts.OwnerReferences).Should(BeNil()) - case WithOwnerReference: - sts = createStatefulsetWithOwnerReference(instance) - Expect(len(sts.OwnerReferences)).ShouldNot(BeZero()) - case WithOwnerAnnotation: - sts = createStatefulsetWithOwnerAnnotations(instance) - default: - Fail("StatefulSetInitializer invalid") - } - stopCh := make(chan struct{}) - storeSecret := instance.Spec.Backup.Store.SecretRef.Name - needsOwnerRefUpdate := len(sts.OwnerReferences) > 0 - - // Create Secrets - errors := createSecrets(c, instance.Namespace, storeSecret) - Expect(len(errors)).Should(BeZero()) - - // Create StatefulSet - Expect(c.Create(context.TODO(), sts)).To(Succeed()) - - // Create StatefulSet - Expect(c.Create(context.TODO(), instance)).To(Succeed()) - - // Update OwnerRef with UID from just created `etcd` instance - if needsOwnerRefUpdate { - Eventually(func() error { - if err := c.Get(context.TODO(), client.ObjectKeyFromObject(instance), instance); err != nil { - return err - } - if instance.UID != "" { - instance.TypeMeta = metav1.TypeMeta{ - APIVersion: "druid.gardener.cloud/v1alpha1", - Kind: "etcd", - } - return nil - } - return fmt.Errorf("etcd object not yet created") - }, timeout, pollingInterval).Should(BeNil()) - sts.OwnerReferences[0].UID = instance.UID - Expect(c.Update(context.TODO(), sts)).To(Succeed()) - } - - sts = &appsv1.StatefulSet{} - Eventually(func() error { return statefulsetIsCorrectlyReconciled(c, instance, sts) }, timeout, pollingInterval).Should(BeNil()) - // This go-routine is to set that statefulset is ready manually as statefulset controller is absent for tests. - go func() { - for { - select { - case <-time.After(time.Second * 2): - Expect(setAndCheckStatefulSetReady(c, instance)).To(Succeed()) - case <-stopCh: - return - } - } - }() - Eventually(func() error { return isEtcdReady(c, instance) }, timeout, pollingInterval).Should(BeNil()) - close(stopCh) - Expect(c.Delete(context.TODO(), instance)).To(Succeed()) - Eventually(func() error { return statefulSetRemoved(c, sts) }, timeout, pollingInterval).Should(BeNil()) - Eventually(func() error { return etcdRemoved(c, instance) }, timeout, pollingInterval).Should(BeNil()) - }, - Entry("when statefulset with ownerReference and without owner annotations, druid should adopt and delete statefulset", "foo25", WithOwnerReference), - Entry("when statefulset without ownerReference and without owner annotations, druid should adopt and delete statefulset", "foo26", WithoutOwner), - Entry("when statefulset without ownerReference and with owner annotations, druid should adopt and delete statefulset", "foo27", WithOwnerAnnotation), - ) - DescribeTable("when etcd resource is created", func(name string, generateEtcd func(string, string) *druidv1alpha1.Etcd, validate func(*druidv1alpha1.Etcd, *appsv1.StatefulSet, *corev1.ConfigMap, *corev1.Service, *corev1.Service)) { var err error @@ -970,26 +794,6 @@ func validateRole(instance *druidv1alpha1.Etcd, role *rbac.Role) { })) } -func podDeleted(c client.Client, etcd *druidv1alpha1.Etcd) error { - ctx, cancel := context.WithTimeout(context.TODO(), timeout) - defer cancel() - pod := &corev1.Pod{} - req := types.NamespacedName{ - Name: fmt.Sprintf("%s-0", etcd.Name), - Namespace: etcd.Namespace, - } - if err := c.Get(ctx, req, pod); err != nil { - if errors.IsNotFound(err) { - // Object not found, return. Created objects are automatically garbage collected. - // For additional cleanup logic use finalizers - return nil - } - return err - } - return fmt.Errorf("pod not deleted") - -} - func validateEtcdWithDefaults(instance *druidv1alpha1.Etcd, s *appsv1.StatefulSet, cm *corev1.ConfigMap, clSvc *corev1.Service, prSvc *corev1.Service) { configYML := cm.Data[etcdConfig] config := map[string]interface{}{} @@ -1669,8 +1473,8 @@ func validateEtcd(instance *druidv1alpha1.Etcd, s *appsv1.StatefulSet, cm *corev fmt.Sprintf("%s=%s", "--owner-check-interval", instance.Spec.Backup.OwnerCheck.Interval.Duration.String()): Equal(fmt.Sprintf("%s=%s", "--owner-check-interval", instance.Spec.Backup.OwnerCheck.Interval.Duration.String())), fmt.Sprintf("%s=%s", "--owner-check-timeout", instance.Spec.Backup.OwnerCheck.Timeout.Duration.String()): Equal(fmt.Sprintf("%s=%s", "--owner-check-timeout", instance.Spec.Backup.OwnerCheck.Timeout.Duration.String())), fmt.Sprintf("%s=%s", "--owner-check-dns-cache-ttl", instance.Spec.Backup.OwnerCheck.DNSCacheTTL.Duration.String()): Equal(fmt.Sprintf("%s=%s", "--owner-check-dns-cache-ttl", instance.Spec.Backup.OwnerCheck.DNSCacheTTL.Duration.String())), - fmt.Sprintf("%s=%s", "--delta-snapshot-lease-name", componentlease.GetDeltaSnapshotLeaseName(instance)): Equal(fmt.Sprintf("%s=%s", "--delta-snapshot-lease-name", componentlease.GetDeltaSnapshotLeaseName(instance))), - fmt.Sprintf("%s=%s", "--full-snapshot-lease-name", componentlease.GetFullSnapshotLeaseName(instance)): Equal(fmt.Sprintf("%s=%s", "--full-snapshot-lease-name", componentlease.GetFullSnapshotLeaseName(instance))), + fmt.Sprintf("%s=%s", "--delta-snapshot-lease-name", utils.GetDeltaSnapshotLeaseName(instance)): Equal(fmt.Sprintf("%s=%s", "--delta-snapshot-lease-name", utils.GetDeltaSnapshotLeaseName(instance))), + fmt.Sprintf("%s=%s", "--full-snapshot-lease-name", utils.GetFullSnapshotLeaseName(instance)): Equal(fmt.Sprintf("%s=%s", "--full-snapshot-lease-name", utils.GetFullSnapshotLeaseName(instance))), }), "Ports": ConsistOf([]corev1.ContainerPort{ { @@ -2121,7 +1925,7 @@ func etcdRemoved(c client.Client, etcd *druidv1alpha1.Etcd) error { Namespace: etcd.Namespace, } if err := c.Get(ctx, req, e); err != nil { - if errors.IsNotFound(err) { + if apierrors.IsNotFound(err) { // Object not found, return. Created objects are automatically garbage collected. // For additional cleanup logic use finalizers return nil @@ -2131,44 +1935,6 @@ func etcdRemoved(c client.Client, etcd *druidv1alpha1.Etcd) error { return fmt.Errorf("etcd not deleted") } -func isEtcdReady(c client.Client, etcd *druidv1alpha1.Etcd) error { - ctx, cancel := context.WithTimeout(context.TODO(), timeout) - defer cancel() - e := &druidv1alpha1.Etcd{} - req := types.NamespacedName{ - Name: etcd.Name, - Namespace: etcd.Namespace, - } - if err := c.Get(ctx, req, e); err != nil { - return err - } - if e.Status.Ready == nil || !*e.Status.Ready { - return fmt.Errorf("etcd not ready") - } - return nil -} - -func setAndCheckStatefulSetReady(c client.Client, etcd *druidv1alpha1.Etcd) error { - ctx, cancel := context.WithTimeout(context.TODO(), timeout) - defer cancel() - ss := &appsv1.StatefulSet{} - req := types.NamespacedName{ - Name: etcd.Name, - Namespace: etcd.Namespace, - } - if err := c.Get(ctx, req, ss); err != nil { - return err - } - setStatefulSetReady(ss) - if err := c.Status().Update(context.TODO(), ss); err != nil { - return err - } - if err := health.CheckStatefulSet(ss); err != nil { - return err - } - return nil -} - func statefulSetRemoved(c client.Client, ss *appsv1.StatefulSet) error { ctx, cancel := context.WithTimeout(context.TODO(), timeout) defer cancel() @@ -2178,7 +1944,7 @@ func statefulSetRemoved(c client.Client, ss *appsv1.StatefulSet) error { Namespace: ss.Namespace, } if err := c.Get(ctx, req, sts); err != nil { - if errors.IsNotFound(err) { + if apierrors.IsNotFound(err) { // Object not found, return. Created objects are automatically garbage collected. // For additional cleanup logic use finalizers return nil @@ -2199,11 +1965,8 @@ func statefulsetIsCorrectlyReconciled(c client.Client, instance *druidv1alpha1.E if err := c.Get(ctx, req, ss); err != nil { return err } - if !checkEtcdAnnotations(ss.GetAnnotations(), instance) { - return fmt.Errorf("no annotations") - } - if checkEtcdOwnerReference(ss.GetOwnerReferences(), instance) { - return fmt.Errorf("ownerReference exists") + if !checkEtcdOwnerReference(ss.GetOwnerReferences(), instance) { + return fmt.Errorf("ownerReference does not exist") } return nil } @@ -2304,93 +2067,6 @@ func roleBindingIsCorrectlyReconciled(c client.Client, instance *druidv1alpha1.E return nil } -func createStatefulset(name, namespace string, labels map[string]string) *appsv1.StatefulSet { - var replicas int32 = 0 - ss := appsv1.StatefulSet{ - ObjectMeta: metav1.ObjectMeta{ - Name: name, - Namespace: namespace, - Labels: labels, - }, - Spec: appsv1.StatefulSetSpec{ - Replicas: &replicas, - Selector: &metav1.LabelSelector{ - MatchLabels: labels, - }, - Template: corev1.PodTemplateSpec{ - ObjectMeta: metav1.ObjectMeta{ - Name: fmt.Sprintf("%s-0", name), - Namespace: namespace, - Labels: labels, - }, - Spec: corev1.PodSpec{ - Containers: []corev1.Container{ - { - Name: "etcd", - Image: "eu.gcr.io/gardener-project/gardener/etcd:v3.4.13-bootstrap", - }, - { - Name: "backup-restore", - Image: "eu.gcr.io/gardener-project/gardener/etcdbrctl:v0.12.0", - }, - }, - }, - }, - VolumeClaimTemplates: []corev1.PersistentVolumeClaim{}, - ServiceName: "etcd-client", - UpdateStrategy: appsv1.StatefulSetUpdateStrategy{}, - }, - } - return &ss -} - -func createStatefulsetWithOwnerReference(etcd *druidv1alpha1.Etcd) *appsv1.StatefulSet { - ss := createStatefulset(etcd.Name, etcd.Namespace, etcd.Spec.Labels) - ss.SetOwnerReferences([]metav1.OwnerReference{ - { - APIVersion: "druid.gardener.cloud/v1alpha1", - Kind: "etcd", - Name: etcd.Name, - UID: "foo", - Controller: pointer.BoolPtr(true), - BlockOwnerDeletion: pointer.BoolPtr(true), - }, - }) - return ss -} - -func createStatefulsetWithOwnerAnnotations(etcd *druidv1alpha1.Etcd) *appsv1.StatefulSet { - ss := createStatefulset(etcd.Name, etcd.Namespace, etcd.Spec.Labels) - ss.SetAnnotations(map[string]string{ - "gardener.cloud/owned-by": fmt.Sprintf("%s/%s", etcd.Namespace, etcd.Name), - "gardener.cloud/owner-type": "etcd", - }) - return ss -} - -func createPod(name, namespace string, labels map[string]string) *corev1.Pod { - pod := corev1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: name, - Namespace: namespace, - Labels: labels, - }, - Spec: corev1.PodSpec{ - Containers: []corev1.Container{ - { - Name: "etcd", - Image: "eu.gcr.io/gardener-project/gardener/etcd:v3.4.13-bootstrap", - }, - { - Name: "backup-restore", - Image: "eu.gcr.io/gardener-project/gardener/etcdbrctl:v0.12.0", - }, - }, - }, - } - return &pod -} - func getEtcdWithGCS(name, namespace string) *druidv1alpha1.Etcd { provider := druidv1alpha1.StorageProvider("gcp") etcd := getEtcdWithTLS(name, namespace) @@ -2503,6 +2179,7 @@ func getEtcd(name, namespace string, tlsEnabled bool) *druidv1alpha1.Etcd { ObjectMeta: metav1.ObjectMeta{ Name: name, Namespace: namespace, + UID: types.UID(uid), }, Spec: druidv1alpha1.EtcdSpec{ Annotations: map[string]string{ diff --git a/controllers/etcd_custodian_controller.go b/controllers/etcd_custodian_controller.go index 84c70169f..035d7b9ee 100644 --- a/controllers/etcd_custodian_controller.go +++ b/controllers/etcd_custodian_controller.go @@ -21,7 +21,6 @@ import ( "time" "github.com/gardener/gardener/pkg/controllerutils" - "github.com/gardener/gardener/pkg/controllerutils/mapper" kutil "github.com/gardener/gardener/pkg/utils/kubernetes" "github.com/go-logr/logr" @@ -50,6 +49,7 @@ import ( "github.com/gardener/etcd-druid/pkg/health/status" druidmapper "github.com/gardener/etcd-druid/pkg/mapper" druidpredicates "github.com/gardener/etcd-druid/pkg/predicate" + "github.com/gardener/etcd-druid/pkg/utils" ) // EtcdCustodian reconciles status of Etcd object @@ -160,7 +160,7 @@ func (ec *EtcdCustodian) updateEtcdStatus(ctx context.Context, logger logr.Logge Name: sts.Name, } - ready := CheckStatefulSet(etcd, sts) == nil + ready := utils.CheckStatefulSet(etcd.Spec.Replicas, sts) == nil // To be changed once we have multiple replicas. etcd.Status.CurrentReplicas = sts.Status.CurrentReplicas diff --git a/pkg/component/etcd/lease/values_helper.go b/pkg/component/etcd/lease/values_helper.go index 914e56540..31735763a 100644 --- a/pkg/component/etcd/lease/values_helper.go +++ b/pkg/component/etcd/lease/values_helper.go @@ -15,9 +15,8 @@ package lease import ( - "fmt" - druidv1alpha1 "github.com/gardener/etcd-druid/api/v1alpha1" + "github.com/gardener/etcd-druid/pkg/utils" ) // GenerateValues generates `lease.Values` for the lease component with the given parameters. @@ -26,18 +25,8 @@ func GenerateValues(etcd *druidv1alpha1.Etcd) Values { BackupEnabled: etcd.Spec.Backup.Store != nil, EtcdName: etcd.Name, EtcdUID: etcd.UID, - DeltaSnapshotLeaseName: GetDeltaSnapshotLeaseName(etcd), - FullSnapshotLeaseName: GetFullSnapshotLeaseName(etcd), + DeltaSnapshotLeaseName: utils.GetDeltaSnapshotLeaseName(etcd), + FullSnapshotLeaseName: utils.GetFullSnapshotLeaseName(etcd), Replicas: etcd.Spec.Replicas, } } - -// GetDeltaSnapshotLeaseName returns the name of the delta snapshot lease based on the given `etcd` object. -func GetDeltaSnapshotLeaseName(etcd *druidv1alpha1.Etcd) string { - return fmt.Sprintf("%s-delta-snap", etcd.Name) -} - -// GetFullSnapshotLeaseName returns the name of the full snapshot lease based on the given `etcd` object. -func GetFullSnapshotLeaseName(etcd *druidv1alpha1.Etcd) string { - return fmt.Sprintf("%s-full-snap", etcd.Name) -} diff --git a/pkg/component/etcd/statefulset/statefulset.go b/pkg/component/etcd/statefulset/statefulset.go new file mode 100644 index 000000000..fda4e74db --- /dev/null +++ b/pkg/component/etcd/statefulset/statefulset.go @@ -0,0 +1,715 @@ +// Copyright (c) 2022 SAP SE or an SAP affiliate company. All rights reserved. This file is licensed under the Apache Software License, v. 2 except as noted otherwise in the LICENSE file +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package statefulset + +import ( + "context" + "fmt" + "strconv" + "strings" + "time" + + druidv1alpha1 "github.com/gardener/etcd-druid/api/v1alpha1" + "github.com/gardener/etcd-druid/pkg/utils" + + gardenercomponent "github.com/gardener/gardener/pkg/operation/botanist/component" + kutil "github.com/gardener/gardener/pkg/utils/kubernetes" + "github.com/gardener/gardener/pkg/utils/retry" + gardenerretry "github.com/gardener/gardener/pkg/utils/retry" + "github.com/go-logr/logr" + appsv1 "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" + v1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/api/resource" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/utils/pointer" + "sigs.k8s.io/controller-runtime/pkg/client" +) + +// Interface contains functions for a StatefulSet deployer. +type Interface interface { + gardenercomponent.DeployWaiter + // Get gets the etcd StatefulSet. + Get(context.Context) (*appsv1.StatefulSet, error) +} + +type component struct { + client client.Client + logger logr.Logger + + values Values +} + +func (c *component) Get(ctx context.Context) (*appsv1.StatefulSet, error) { + sts := c.emptyStatefulset() + + if err := c.client.Get(ctx, client.ObjectKeyFromObject(sts), sts); err != nil { + return nil, err + } + + return sts, nil +} + +func (c *component) Deploy(ctx context.Context) error { + sts, err := c.Get(ctx) + if err != nil { + if !apierrors.IsNotFound(err) { + return err + } + sts = c.emptyStatefulset() + } + + if sts.Generation > 1 && sts.Spec.ServiceName != c.values.ServiceName { + // Earlier clusters referred to the client service in `sts.Spec.ServiceName` which must be changed + // when a multi-node cluster is used, see https://github.com/gardener/etcd-druid/pull/293. + if clusterScaledUpToMultiNode(c.values) { + deleteAndWait := gardenercomponent.OpDestroyAndWait(c) + if err := deleteAndWait.Destroy(ctx); err != nil { + return err + } + sts = c.emptyStatefulset() + } + } + + return c.syncStatefulset(ctx, sts) +} + +func (c *component) Destroy(ctx context.Context) error { + sts := c.emptyStatefulset() + + if err := c.deleteStatefulset(ctx, sts); err != nil { + return err + } + return nil +} + +func clusterScaledUpToMultiNode(val Values) bool { + return val.Replicas > 1 && + // Also consider `0` here because this field was not maintained in earlier releases. + (val.StatusReplicas == 0 || + val.StatusReplicas == 1) +} + +const ( + // defaultInterval is the default interval for retry operations. + defaultInterval = 5 * time.Second + // defaultTimeout is the default timeout for retry operations. + defaultTimeout = 90 * time.Second +) + +func (c *component) Wait(ctx context.Context) error { + sts := c.emptyStatefulset() + + err := gardenerretry.UntilTimeout(ctx, defaultInterval, defaultTimeout, func(ctx context.Context) (bool, error) { + if err := c.client.Get(ctx, client.ObjectKeyFromObject(sts), sts); err != nil { + if apierrors.IsNotFound(err) { + return gardenerretry.MinorError(err) + } + return gardenerretry.SevereError(err) + } + if err := utils.CheckStatefulSet(c.values.Replicas, sts); err != nil { + return gardenerretry.MinorError(err) + } + return gardenerretry.Ok() + }) + if err != nil { + messages, err2 := c.fetchPVCEventsFor(ctx, sts) + if err2 != nil { + c.logger.Error(err2, "Error while fetching events for depending PVC") + // don't expose this error since fetching events is a best effort + // and shouldn't be confused with the actual error + return err + } + if messages != "" { + return fmt.Errorf("%w\n\n%s", err, messages) + } + } + + return err +} + +func (c *component) WaitCleanup(ctx context.Context) error { + return gardenerretry.UntilTimeout(ctx, defaultInterval, defaultTimeout, func(ctx context.Context) (done bool, err error) { + sts := c.emptyStatefulset() + err = c.client.Get(ctx, client.ObjectKeyFromObject(sts), sts) + switch { + case apierrors.IsNotFound(err): + return retry.Ok() + case err == nil: + // StatefulSet is still available, so we should retry. + return false, nil + default: + return retry.SevereError(err) + } + }) +} + +func (c *component) syncStatefulset(ctx context.Context, sts *appsv1.StatefulSet) error { + var ( + stsOriginal = sts.DeepCopy() + patch = client.StrategicMergeFrom(stsOriginal) + ) + + sts.ObjectMeta = getObjectMeta(&c.values) + sts.Spec = appsv1.StatefulSetSpec{ + PodManagementPolicy: appsv1.ParallelPodManagement, + UpdateStrategy: appsv1.StatefulSetUpdateStrategy{ + Type: appsv1.RollingUpdateStatefulSetStrategyType, + }, + Replicas: pointer.Int32(c.values.Replicas), + ServiceName: c.values.ServiceName, + Selector: &metav1.LabelSelector{ + MatchLabels: getCommonLabels(&c.values), + }, + Template: v1.PodTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{ + Annotations: c.values.Annotations, + Labels: sts.GetLabels(), + }, + Spec: v1.PodSpec{ + HostAliases: []v1.HostAlias{ + { + IP: "127.0.0.1", + Hostnames: []string{c.values.Name + "-local"}, + }, + }, + ServiceAccountName: c.values.ServiceAccountName, + Affinity: c.values.Affinity, + TopologySpreadConstraints: c.values.TopologySpreadConstraints, + Containers: []v1.Container{ + { + Name: "etcd", + Image: c.values.EtcdImage, + ImagePullPolicy: v1.PullIfNotPresent, + Command: c.values.EtcdCommand, + ReadinessProbe: &v1.Probe{ + Handler: v1.Handler{ + Exec: &v1.ExecAction{ + Command: c.values.ReadinessProbeCommand, + }, + }, + InitialDelaySeconds: 15, + PeriodSeconds: 5, + FailureThreshold: 5, + }, + LivenessProbe: &v1.Probe{ + Handler: v1.Handler{ + Exec: &v1.ExecAction{ + Command: c.values.LivenessProbCommand, + }, + }, + InitialDelaySeconds: 15, + PeriodSeconds: 5, + FailureThreshold: 5, + }, + Ports: getEtcdPorts(c.values), + Resources: getEtcdResources(c.values), + Env: getEtcdEnvVars(c.values), + VolumeMounts: getEtcdVolumeMounts(c.values), + }, + { + Name: "backup-restore", + Image: c.values.BackupImage, + ImagePullPolicy: v1.PullIfNotPresent, + Command: c.values.EtcdBackupCommand, + Ports: getBackupPorts(c.values), + Resources: getBackupResources(c.values), + Env: getBackupRestoreEnvVars(c.values), + VolumeMounts: getBackupRestoreVolumeMounts(c.values), + SecurityContext: &v1.SecurityContext{ + Capabilities: &v1.Capabilities{ + Add: []v1.Capability{ + "SYS_PTRACE", + }, + }, + }, + }, + }, + ShareProcessNamespace: pointer.Bool(true), + Volumes: getVolumes(c.values), + }, + }, + VolumeClaimTemplates: []v1.PersistentVolumeClaim{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: c.values.VolumeClaimTemplateName, + }, + Spec: v1.PersistentVolumeClaimSpec{ + AccessModes: []v1.PersistentVolumeAccessMode{ + v1.ReadWriteOnce, + }, + StorageClassName: c.values.StorageClass, + Resources: getStorageReq(c.values), + }, + }, + }, + } + if c.values.PriorityClassName != nil { + sts.Spec.Template.Spec.PriorityClassName = *c.values.PriorityClassName + } + + if stsOriginal.Generation > 0 { + return c.client.Patch(ctx, sts, patch) + } + + return c.client.Create(ctx, sts) +} + +func (c *component) deleteStatefulset(ctx context.Context, sts *appsv1.StatefulSet) error { + return client.IgnoreNotFound(c.client.Delete(ctx, sts)) +} + +func (c *component) fetchPVCEventsFor(ctx context.Context, ss *appsv1.StatefulSet) (string, error) { + pvcs := &corev1.PersistentVolumeClaimList{} + if err := c.client.List(ctx, pvcs, client.InNamespace(ss.GetNamespace())); err != nil { + return "", err + } + + var ( + pvcMessages string + volumeClaims = ss.Spec.VolumeClaimTemplates + ) + for _, volumeClaim := range volumeClaims { + for _, pvc := range pvcs.Items { + if !strings.HasPrefix(pvc.GetName(), fmt.Sprintf("%s-%s", volumeClaim.Name, ss.Name)) || pvc.Status.Phase == corev1.ClaimBound { + continue + } + messages, err := kutil.FetchEventMessages(ctx, c.client.Scheme(), c.client, &pvc, corev1.EventTypeWarning, 2) + if err != nil { + return "", err + } + if messages != "" { + pvcMessages += fmt.Sprintf("Warning for PVC %s:\n%s\n", pvc.Name, messages) + } + } + } + return pvcMessages, nil +} + +// New creates a new statefulset deployer instance. +func New(c client.Client, logger logr.Logger, values Values) Interface { + objectLogger := logger.WithValues("sts", client.ObjectKey{Name: values.Name, Namespace: values.Namespace}) + + return &component{ + client: c, + logger: objectLogger, + values: values, + } +} + +func (c *component) emptyStatefulset() *appsv1.StatefulSet { + return &appsv1.StatefulSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: c.values.Name, + Namespace: c.values.Namespace, + }, + } +} + +func getCommonLabels(val *Values) map[string]string { + return map[string]string{ + "name": "etcd", + "instance": val.Name, + } +} + +func getObjectMeta(val *Values) metav1.ObjectMeta { + labels := utils.MergeStringMaps(getCommonLabels(val), val.Labels) + + annotations := utils.MergeStringMaps( + map[string]string{ + "gardener.cloud/owned-by": fmt.Sprintf("%s/%s", val.Namespace, val.Name), + "gardener.cloud/owner-type": "etcd", + }, + val.Annotations, + ) + + ownerRefs := []metav1.OwnerReference{ + { + APIVersion: druidv1alpha1.GroupVersion.String(), + Kind: "Etcd", + Name: val.Name, + UID: val.EtcdUID, + Controller: pointer.BoolPtr(true), + BlockOwnerDeletion: pointer.BoolPtr(true), + }, + } + + return metav1.ObjectMeta{ + Name: val.Name, + Namespace: val.Namespace, + Labels: labels, + Annotations: annotations, + OwnerReferences: ownerRefs, + } +} + +func getEtcdPorts(val Values) []corev1.ContainerPort { + return []corev1.ContainerPort{ + { + Name: "server", + Protocol: "TCP", + ContainerPort: pointer.Int32Deref(val.ServerPort, defaultServerPort), + }, + { + Name: "client", + Protocol: "TCP", + ContainerPort: pointer.Int32Deref(val.ClientPort, defaultClientPort), + }, + } +} + +var defaultResourceRequirements = corev1.ResourceRequirements{ + Requests: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("50m"), + corev1.ResourceMemory: resource.MustParse("128Mi"), + }, +} + +func getEtcdResources(val Values) corev1.ResourceRequirements { + if val.EtcdResourceRequirements != nil { + return *val.EtcdResourceRequirements + } + + return defaultResourceRequirements +} + +func getEtcdEnvVars(val Values) []corev1.EnvVar { + var env []corev1.EnvVar + env = append(env, getEnvVarFromValue("ENABLE_TLS", strconv.FormatBool(val.BackupTLS != nil))) + + protocol := "http" + if val.BackupTLS != nil { + protocol = "https" + } + + endpoint := fmt.Sprintf("%s://%s-local:%d", protocol, val.Name, pointer.Int32Deref(val.BackupPort, defaultBackupPort)) + env = append(env, getEnvVarFromValue("BACKUP_ENDPOINT", endpoint)) + + // This env var has been unused for a long time but is kept to not unnecessarily restart etcds. + // Todo(timuthy): Remove this as part of a future release in which an etcd restart is acceptable. + env = append(env, getEnvVarFromValue("FAIL_BELOW_REVISION_PARAMETER", "")) + + return env +} + +func getEtcdVolumeMounts(val Values) []corev1.VolumeMount { + vms := []corev1.VolumeMount{ + { + Name: val.VolumeClaimTemplateName, + MountPath: "/var/etcd/data/", + }, + } + + vms = append(vms, getSecretVolumeMounts(val.ClientUrlTLS, val.PeerUrlTLS)...) + + return vms +} + +func getSecretVolumeMounts(clientUrlTLS, peerUrlTLS *druidv1alpha1.TLSConfig) []corev1.VolumeMount { + var vms []corev1.VolumeMount + + if clientUrlTLS != nil { + vms = append(vms, corev1.VolumeMount{ + Name: "client-url-ca-etcd", + MountPath: "/var/etcd/ssl/client/ca", + }, corev1.VolumeMount{ + Name: "client-url-etcd-server-tls", + MountPath: "/var/etcd/ssl/client/server", + }, corev1.VolumeMount{ + Name: "client-url-etcd-client-tls", + MountPath: "/var/etcd/ssl/client/client", + }) + } + + if peerUrlTLS != nil { + vms = append(vms, corev1.VolumeMount{ + Name: "peer-url-ca-etcd", + MountPath: "/var/etcd/ssl/peer/ca", + }, corev1.VolumeMount{ + Name: "peer-url-etcd-server-tls", + MountPath: "/var/etcd/ssl/peer/server", + }) + } + + return vms +} + +func getBackupRestoreVolumeMounts(val Values) []corev1.VolumeMount { + vms := []corev1.VolumeMount{ + { + Name: val.VolumeClaimTemplateName, + MountPath: "/var/etcd/data", + }, + { + Name: "etcd-config-file", + MountPath: "/var/etcd/config/", + }, + } + + vms = append(vms, getSecretVolumeMounts(val.ClientUrlTLS, val.PeerUrlTLS)...) + + if val.BackupStore == nil { + return vms + } + + provider, err := utils.StorageProviderFromInfraProvider(val.BackupStore.Provider) + if err != nil { + return vms + } + + switch provider { + case utils.Local: + if val.BackupStore.Container != nil { + vms = append(vms, corev1.VolumeMount{ + Name: "host-storage", + MountPath: *val.BackupStore.Container, + }) + } + case utils.GCS: + vms = append(vms, corev1.VolumeMount{ + Name: "etcd-backup", + MountPath: "/root/.gcp/", + }) + case utils.S3, utils.ABS, utils.OSS, utils.Swift, utils.OCS: + vms = append(vms, corev1.VolumeMount{ + Name: "etcd-backup", + MountPath: "/root/etcd-backup/", + }) + } + + return vms +} + +func getStorageReq(val Values) corev1.ResourceRequirements { + storageCapacity := defaultStorageCapacity + if val.StorageCapacity != nil { + storageCapacity = *val.StorageCapacity + } + + return corev1.ResourceRequirements{ + Requests: corev1.ResourceList{ + corev1.ResourceStorage: storageCapacity, + }, + } +} + +func getBackupPorts(val Values) []corev1.ContainerPort { + return []corev1.ContainerPort{ + { + Name: "server", + Protocol: "TCP", + ContainerPort: pointer.Int32Deref(val.BackupPort, defaultBackupPort), + }, + } +} + +func getBackupResources(val Values) corev1.ResourceRequirements { + if val.BackupResourceRequirements != nil { + return *val.BackupResourceRequirements + } + return defaultResourceRequirements +} + +func getVolumes(val Values) []corev1.Volume { + vs := []corev1.Volume{ + { + Name: "etcd-config-file", + VolumeSource: corev1.VolumeSource{ + ConfigMap: &corev1.ConfigMapVolumeSource{ + LocalObjectReference: corev1.LocalObjectReference{ + Name: val.ConfigMapName, + }, + Items: []corev1.KeyToPath{ + { + Key: "etcd.conf.yaml", + Path: "etcd.conf.yaml", + }, + }, + DefaultMode: pointer.Int32(0644), + }, + }, + }, + } + + if val.ClientUrlTLS != nil { + vs = append(vs, corev1.Volume{ + Name: "client-url-ca-etcd", + VolumeSource: corev1.VolumeSource{ + Secret: &corev1.SecretVolumeSource{ + SecretName: val.ClientUrlTLS.TLSCASecretRef.Name, + }, + }, + }, + corev1.Volume{ + Name: "client-url-etcd-server-tls", + VolumeSource: corev1.VolumeSource{ + Secret: &corev1.SecretVolumeSource{ + SecretName: val.ClientUrlTLS.ServerTLSSecretRef.Name, + }, + }, + }, + corev1.Volume{ + Name: "client-url-etcd-client-tls", + VolumeSource: corev1.VolumeSource{ + Secret: &corev1.SecretVolumeSource{ + SecretName: val.ClientUrlTLS.ClientTLSSecretRef.Name, + }, + }, + }) + } + + if val.PeerUrlTLS != nil { + vs = append(vs, corev1.Volume{ + Name: "peer-url-ca-etcd", + VolumeSource: corev1.VolumeSource{ + Secret: &corev1.SecretVolumeSource{ + SecretName: val.PeerUrlTLS.TLSCASecretRef.Name, + }, + }, + }, + corev1.Volume{ + Name: "peer-url-etcd-server-tls", + VolumeSource: corev1.VolumeSource{ + Secret: &corev1.SecretVolumeSource{ + SecretName: val.PeerUrlTLS.ServerTLSSecretRef.Name, + }, + }, + }) + } + + if val.BackupStore == nil { + return vs + } + + storeValues := val.BackupStore + provider, err := utils.StorageProviderFromInfraProvider(storeValues.Provider) + if err != nil { + return vs + } + + switch provider { + case "Local": + hpt := corev1.HostPathDirectory + vs = append(vs, corev1.Volume{ + Name: "host-storage", + VolumeSource: corev1.VolumeSource{ + HostPath: &corev1.HostPathVolumeSource{ + Path: defaultLocalPrefix + "/" + *storeValues.Container, + Type: &hpt, + }, + }, + }) + case utils.GCS, utils.S3, utils.OSS, utils.ABS, utils.Swift, utils.OCS: + vs = append(vs, corev1.Volume{ + Name: "etcd-backup", + VolumeSource: corev1.VolumeSource{ + Secret: &corev1.SecretVolumeSource{ + SecretName: storeValues.SecretRef.Name, + }, + }, + }) + } + + return vs +} + +func getBackupRestoreEnvVars(val Values) []corev1.EnvVar { + var ( + env []corev1.EnvVar + storageContainer string + storeValues = val.BackupStore + ) + + if val.BackupStore != nil { + storageContainer = pointer.StringDeref(val.BackupStore.Container, "") + } + + // TODO(timuthy): Move STORAGE_CONTAINER a few lines below so that we can append and exit in one step. This should only be done in a release where a restart of etcd is acceptable. + env = append(env, getEnvVarFromValue("STORAGE_CONTAINER", storageContainer)) + env = append(env, getEnvVarFromField("POD_NAME", "metadata.name")) + env = append(env, getEnvVarFromField("POD_NAMESPACE", "metadata.namespace")) + + if storeValues == nil { + return env + } + + provider, err := utils.StorageProviderFromInfraProvider(val.BackupStore.Provider) + if err != nil { + return env + } + + // TODO(timuthy): move this to a non root path when we switch to a rootless distribution + const credentialsMountPath = "/root/etcd-backup" + switch provider { + case utils.S3: + env = append(env, getEnvVarFromValue("AWS_APPLICATION_CREDENTIALS", credentialsMountPath)) + + case utils.ABS: + env = append(env, getEnvVarFromValue("AZURE_APPLICATION_CREDENTIALS", credentialsMountPath)) + + case utils.GCS: + env = append(env, getEnvVarFromValue("GOOGLE_APPLICATION_CREDENTIALS", "/root/.gcp/serviceaccount.json")) + + case utils.Swift: + env = append(env, getEnvVarFromValue("OPENSTACK_APPLICATION_CREDENTIALS", credentialsMountPath)) + + case utils.OSS: + env = append(env, getEnvVarFromValue("ALICLOUD_APPLICATION_CREDENTIALS", credentialsMountPath)) + + case utils.ECS: + env = append(env, getEnvVarFromSecrets("ECS_ENDPOINT", storeValues.SecretRef.Name, "endpoint")) + env = append(env, getEnvVarFromSecrets("ECS_ACCESS_KEY_ID", storeValues.SecretRef.Name, "accessKeyID")) + env = append(env, getEnvVarFromSecrets("ECS_SECRET_ACCESS_KEY", storeValues.SecretRef.Name, "secretAccessKey")) + + case utils.OCS: + env = append(env, getEnvVarFromValue("OPENSHIFT_APPLICATION_CREDENTIALS", credentialsMountPath)) + } + + return env +} + +func getEnvVarFromValue(name, value string) corev1.EnvVar { + return corev1.EnvVar{ + Name: name, + Value: value, + } +} + +func getEnvVarFromField(name, fieldPath string) corev1.EnvVar { + return corev1.EnvVar{ + Name: name, + ValueFrom: &corev1.EnvVarSource{ + FieldRef: &corev1.ObjectFieldSelector{ + FieldPath: fieldPath, + }, + }, + } +} + +func getEnvVarFromSecrets(name, secretName, secretKey string) corev1.EnvVar { + return corev1.EnvVar{ + Name: name, + ValueFrom: &corev1.EnvVarSource{ + SecretKeyRef: &corev1.SecretKeySelector{ + LocalObjectReference: corev1.LocalObjectReference{ + Name: secretName, + }, + Key: secretKey, + }, + }, + } +} diff --git a/pkg/component/etcd/statefulset/statefulset_suite_test.go b/pkg/component/etcd/statefulset/statefulset_suite_test.go new file mode 100644 index 000000000..cf8632266 --- /dev/null +++ b/pkg/component/etcd/statefulset/statefulset_suite_test.go @@ -0,0 +1,27 @@ +// Copyright (c) 2022 SAP SE or an SAP affiliate company. All rights reserved. This file is licensed under the Apache Software License, v. 2 except as noted otherwise in the LICENSE file +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package statefulset_test + +import ( + "testing" + + . "github.com/onsi/ginkgo" + . "github.com/onsi/gomega" +) + +func TestService(t *testing.T) { + RegisterFailHandler(Fail) + RunSpecs(t, "Statefulset Component Suite") +} diff --git a/pkg/component/etcd/statefulset/statefulset_test.go b/pkg/component/etcd/statefulset/statefulset_test.go new file mode 100644 index 000000000..c32445dcd --- /dev/null +++ b/pkg/component/etcd/statefulset/statefulset_test.go @@ -0,0 +1,885 @@ +// Copyright (c) 2022 SAP SE or an SAP affiliate company. All rights reserved. This file is licensed under the Apache Software License, v. 2 except as noted otherwise in the LICENSE file +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package statefulset_test + +import ( + "context" + "fmt" + "time" + + druidv1alpha1 "github.com/gardener/etcd-druid/api/v1alpha1" + "github.com/gardener/etcd-druid/pkg/client/kubernetes" + "github.com/gardener/etcd-druid/pkg/common" + . "github.com/gardener/etcd-druid/pkg/component/etcd/statefulset" + druidutils "github.com/gardener/etcd-druid/pkg/utils" + + "github.com/gardener/gardener/pkg/operation/botanist/component" + kutil "github.com/gardener/gardener/pkg/utils/kubernetes" + . "github.com/gardener/gardener/pkg/utils/test/matchers" + "github.com/go-logr/logr" + . "github.com/onsi/ginkgo" + . "github.com/onsi/gomega" + . "github.com/onsi/gomega/gstruct" + appsv1 "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" + "k8s.io/utils/pointer" + "sigs.k8s.io/controller-runtime/pkg/client" + fakeclient "sigs.k8s.io/controller-runtime/pkg/client/fake" +) + +var ( + backupRestore = "backup-restore" + deltaSnapshotPeriod = metav1.Duration{ + Duration: 300 * time.Second, + } + garbageCollectionPeriod = metav1.Duration{ + Duration: 43200 * time.Second, + } + checkSumAnnotations = map[string]string{ + "checksum/etcd-configmap": "abc123", + } + clientPort int32 = 2379 + serverPort int32 = 2380 + backupPort int32 = 8080 + uid = "a9b8c7d6e5f4" + imageEtcd = "eu.gcr.io/gardener-project/gardener/etcd:v3.4.13-bootstrap" + imageBR = "eu.gcr.io/gardener-project/gardener/etcdbrctl:v0.12.0" + snapshotSchedule = "0 */24 * * *" + defragSchedule = "0 */24 * * *" + container = "default.bkp" + storageCapacity = resource.MustParse("5Gi") + storageClass = "gardener.fast" + priorityClassName = "class_priority" + deltaSnapShotMemLimit = resource.MustParse("100Mi") + autoCompactionMode = druidv1alpha1.Periodic + autoCompactionRetention = "2m" + quota = resource.MustParse("8Gi") + prefix = "/tmp" + volumeClaimTemplateName = "etcd-main" + garbageCollectionPolicy = druidv1alpha1.GarbageCollectionPolicy(druidv1alpha1.GarbageCollectionPolicyExponential) + metricsBasic = druidv1alpha1.Basic + etcdSnapshotTimeout = metav1.Duration{ + Duration: 10 * time.Minute, + } + etcdDefragTimeout = metav1.Duration{ + Duration: 10 * time.Minute, + } + etcdLeaderElectionConnectionTimeout = metav1.Duration{ + Duration: 5 * time.Second, + } + + ownerName = "owner.foo.example.com" + ownerID = "bar" + ownerCheckInterval = metav1.Duration{ + Duration: 30 * time.Second, + } + ownerCheckTimeout = metav1.Duration{ + Duration: 2 * time.Minute, + } + ownerCheckDNSCacheTTL = metav1.Duration{ + Duration: 1 * time.Minute, + } + heartbeatDuration = metav1.Duration{ + Duration: 10 * time.Second, + } + backupRestoreResources = corev1.ResourceRequirements{ + Limits: corev1.ResourceList{ + "cpu": parseQuantity("500m"), + "memory": parseQuantity("2Gi"), + }, + Requests: corev1.ResourceList{ + "cpu": parseQuantity("23m"), + "memory": parseQuantity("128Mi"), + }, + } + etcdResources = corev1.ResourceRequirements{ + Limits: corev1.ResourceList{ + "cpu": parseQuantity("2500m"), + "memory": parseQuantity("4Gi"), + }, + Requests: corev1.ResourceList{ + "cpu": parseQuantity("500m"), + "memory": parseQuantity("1000Mi"), + }, + } +) + +var _ = Describe("Statefulset", func() { + var ( + ctx context.Context + cl client.Client + + etcd *druidv1alpha1.Etcd + namespace string + name string + + replicas *int32 + sts *appsv1.StatefulSet + + values Values + stsDeployer component.Deployer + + storageProvider *string + ) + + JustBeforeEach(func() { + etcd = getEtcd(name, namespace, true, *replicas, storageProvider) + values = GenerateValues( + etcd, + pointer.Int32Ptr(clientPort), + pointer.Int32Ptr(serverPort), + pointer.Int32Ptr(backupPort), + imageEtcd, + imageBR, + checkSumAnnotations) + stsDeployer = New(cl, logr.Discard(), values) + + sts = &appsv1.StatefulSet{ + ObjectMeta: metav1.ObjectMeta{ + + Name: values.Name, + Namespace: values.Namespace, + }, + } + }) + + BeforeEach(func() { + ctx = context.Background() + cl = fakeclient.NewClientBuilder().WithScheme(kubernetes.Scheme).Build() + + name = "statefulset" + namespace = "default" + quota = resource.MustParse("8Gi") + + if replicas == nil { + replicas = pointer.Int32Ptr(1) + } + }) + + AfterEach(func() { + storageProvider = nil + }) + + Describe("#Deploy", func() { + Context("when statefulset does not exist", func() { + It("should create the statefulset successfully", func() { + Expect(stsDeployer.Deploy(ctx)).To(Succeed()) + + sts := &appsv1.StatefulSet{} + + Expect(cl.Get(ctx, kutil.Key(namespace, values.Name), sts)).To(Succeed()) + checkStatefulset(sts, values) + }) + }) + + Context("when statefulset exists", func() { + It("should update the statefulset successfully", func() { + // The generation is usually increased by the Kube-Apiserver but as we use a fake client here, we need to manually do it. + sts.Generation = 1 + Expect(cl.Create(ctx, sts)).To(Succeed()) + + Expect(stsDeployer.Deploy(ctx)).To(Succeed()) + + sts := &appsv1.StatefulSet{} + + Expect(cl.Get(ctx, kutil.Key(namespace, values.Name), sts)).To(Succeed()) + checkStatefulset(sts, values) + }) + + Context("when multi-node cluster is configured", func() { + BeforeEach(func() { + replicas = pointer.Int32(3) + }) + + It("should re-create statefulset because serviceName is changed", func() { + sts.Generation = 2 + sts.Spec.ServiceName = "foo" + sts.Spec.Replicas = pointer.Int32Ptr(3) + Expect(cl.Create(ctx, sts)).To(Succeed()) + + values.Replicas = 3 + Expect(stsDeployer.Deploy(ctx)).To(Succeed()) + + sts := &appsv1.StatefulSet{} + Expect(cl.Get(ctx, kutil.Key(namespace, values.Name), sts)).To(Succeed()) + checkStatefulset(sts, values) + }) + }) + }) + + Context("with backup", func() { + for _, p := range []string{ + druidutils.ABS, + druidutils.GCS, + druidutils.S3, + druidutils.Swift, + druidutils.OSS, + druidutils.OCS, + } { + provider := p + Context(fmt.Sprintf("with provider %s", provider), func() { + BeforeEach(func() { + storageProvider = &provider + }) + + It("should configure the correct provider values", func() { + Expect(stsDeployer.Deploy(ctx)).To(Succeed()) + sts := &appsv1.StatefulSet{} + Expect(cl.Get(ctx, kutil.Key(namespace, values.Name), sts)).To(Succeed()) + + checkBackup(etcd, sts) + }) + }) + } + + Context("with provider Local", func() { + BeforeEach(func() { + storageProvider = pointer.StringPtr(druidutils.Local) + }) + + It("should configure the correct provider values", func() { + Expect(stsDeployer.Deploy(ctx)).To(Succeed()) + sts := &appsv1.StatefulSet{} + Expect(cl.Get(ctx, kutil.Key(namespace, values.Name), sts)).To(Succeed()) + + hpt := corev1.HostPathDirectory + + // check volumes + Expect(sts.Spec.Template.Spec.Volumes).To(ContainElements(corev1.Volume{ + Name: "host-storage", + VolumeSource: corev1.VolumeSource{ + HostPath: &corev1.HostPathVolumeSource{ + Path: "/etc/gardener/local-backupbuckets/" + container, + Type: &hpt, + }, + }, + })) + + backupRestoreContainer := sts.Spec.Template.Spec.Containers[1] + Expect(backupRestoreContainer.Name).To(Equal(backupRestore)) + + // Check command + Expect(backupRestoreContainer.Command).To(ContainElements( + "--storage-provider="+string(*etcd.Spec.Backup.Store.Provider), + "--store-prefix="+prefix, + )) + + // check volume mount + Expect(backupRestoreContainer.VolumeMounts).To(ContainElement(corev1.VolumeMount{ + Name: "host-storage", + MountPath: container, + })) + }) + }) + }) + }) + + Describe("#Destroy", func() { + Context("when statefulset does not exist", func() { + It("should destroy successfully", func() { + Expect(stsDeployer.Destroy(ctx)).To(Succeed()) + Expect(cl.Get(ctx, client.ObjectKeyFromObject(sts), &appsv1.StatefulSet{})).To(BeNotFoundError()) + }) + }) + + Context("when statefulset exists", func() { + It("should destroy successfully", func() { + Expect(cl.Create(ctx, sts)).To(Succeed()) + + Expect(stsDeployer.Destroy(ctx)).To(Succeed()) + + Expect(cl.Get(ctx, kutil.Key(namespace, sts.Name), &appsv1.StatefulSet{})).To(BeNotFoundError()) + }) + }) + }) +}) + +func checkBackup(etcd *druidv1alpha1.Etcd, sts *appsv1.StatefulSet) { + // Check secret volume mount + Expect(sts.Spec.Template.Spec.Volumes).To(ContainElement(corev1.Volume{ + Name: "etcd-backup", + VolumeSource: corev1.VolumeSource{ + Secret: &corev1.SecretVolumeSource{ + SecretName: etcd.Spec.Backup.Store.SecretRef.Name, + }, + }, + })) + + backupRestoreContainer := sts.Spec.Template.Spec.Containers[1] + Expect(backupRestoreContainer.Name).To(Equal(backupRestore)) + + mountPath := "/root/etcd-backup/" + if *etcd.Spec.Backup.Store.Provider == druidutils.GCS { + mountPath = "/root/.gcp/" + } + + // Check volume mount + Expect(backupRestoreContainer.VolumeMounts).To(ContainElement(corev1.VolumeMount{ + Name: "etcd-backup", + MountPath: mountPath, + })) + + // Check command + Expect(backupRestoreContainer.Command).To(ContainElements( + "--storage-provider="+string(*etcd.Spec.Backup.Store.Provider), + "--store-prefix="+prefix, + )) + + var ( + envVarName string + envVarValue = "/root/etcd-backup" + ) + + switch *etcd.Spec.Backup.Store.Provider { + case druidutils.S3: + envVarName = "AWS_APPLICATION_CREDENTIALS" + + case druidutils.ABS: + envVarName = "AZURE_APPLICATION_CREDENTIALS" + + case druidutils.GCS: + envVarName = "GOOGLE_APPLICATION_CREDENTIALS" + envVarValue = "/root/.gcp/serviceaccount.json" + + case druidutils.Swift: + envVarName = "OPENSTACK_APPLICATION_CREDENTIALS" + + case druidutils.OSS: + envVarName = "ALICLOUD_APPLICATION_CREDENTIALS" + + case druidutils.OCS: + envVarName = "OPENSHIFT_APPLICATION_CREDENTIALS" + } + + // Check env var + Expect(backupRestoreContainer.Env).To(ContainElement(corev1.EnvVar{ + Name: envVarName, + Value: envVarValue, + })) +} + +func checkStatefulset(sts *appsv1.StatefulSet, values Values) { + checkStsOwnerRefs(sts.ObjectMeta.OwnerReferences, values) + + readinessProbeUrl := fmt.Sprintf("https://%s-local:%d/health", values.Name, clientPort) + if int(values.Replicas) == 1 { + readinessProbeUrl = fmt.Sprintf("https://%s-local:%d/healthz", values.Name, backupPort) + } + + store, err := druidutils.StorageProviderFromInfraProvider(values.BackupStore.Provider) + Expect(err).NotTo(HaveOccurred()) + Expect(*sts).To(MatchFields(IgnoreExtras, Fields{ + "ObjectMeta": MatchFields(IgnoreExtras, Fields{ + "Name": Equal(values.Name), + "Namespace": Equal(values.Namespace), + "Annotations": MatchAllKeys(Keys{ + "checksum/etcd-configmap": Equal("abc123"), + "gardener.cloud/owned-by": Equal(fmt.Sprintf("%s/%s", values.Namespace, values.Name)), + "gardener.cloud/owner-type": Equal("etcd"), + "app": Equal("etcd-statefulset"), + "role": Equal("test"), + "instance": Equal(values.Name), + }), + "Labels": MatchAllKeys(Keys{ + "name": Equal("etcd"), + "instance": Equal(values.Name), + "foo": Equal("bar"), + }), + }), + + "Spec": MatchFields(IgnoreExtras, Fields{ + "UpdateStrategy": MatchFields(IgnoreExtras, Fields{ + "Type": Equal(appsv1.RollingUpdateStatefulSetStrategyType), + }), + "Replicas": PointTo(Equal(values.Replicas)), + "Selector": PointTo(MatchFields(IgnoreExtras, Fields{ + "MatchLabels": MatchAllKeys(Keys{ + "name": Equal("etcd"), + "instance": Equal(values.Name), + }), + })), + "Template": MatchFields(IgnoreExtras, Fields{ + "ObjectMeta": MatchFields(IgnoreExtras, Fields{ + "Annotations": MatchKeys(IgnoreExtras, Keys{ + "app": Equal("etcd-statefulset"), + "role": Equal("test"), + "instance": Equal(values.Name), + }), + "Labels": MatchAllKeys(Keys{ + "name": Equal("etcd"), + "instance": Equal(values.Name), + "foo": Equal("bar"), + }), + }), + //s.Spec.Template.Spec.HostAliases + "Spec": MatchFields(IgnoreExtras, Fields{ + "HostAliases": MatchAllElements(hostAliasIterator, Elements{ + "127.0.0.1": MatchFields(IgnoreExtras, Fields{ + "IP": Equal("127.0.0.1"), + "Hostnames": MatchAllElements(cmdIterator, Elements{ + fmt.Sprintf("%s-local", values.Name): Equal(fmt.Sprintf("%s-local", values.Name)), + }), + }), + }), + "Containers": MatchAllElements(containerIterator, Elements{ + common.Etcd: MatchFields(IgnoreExtras, Fields{ + "Ports": ConsistOf([]corev1.ContainerPort{ + { + Name: "server", + Protocol: corev1.ProtocolTCP, + HostPort: 0, + ContainerPort: *values.ServerPort, + }, + { + Name: "client", + Protocol: corev1.ProtocolTCP, + HostPort: 0, + ContainerPort: *values.ClientPort, + }, + }), + "Command": MatchAllElements(cmdIterator, Elements{ + "/var/etcd/bin/bootstrap.sh": Equal("/var/etcd/bin/bootstrap.sh"), + }), + "ImagePullPolicy": Equal(corev1.PullIfNotPresent), + "Image": Equal(values.EtcdImage), + "ReadinessProbe": PointTo(MatchFields(IgnoreExtras, Fields{ + "Handler": MatchFields(IgnoreExtras, Fields{ + "Exec": PointTo(MatchFields(IgnoreExtras, Fields{ + "Command": MatchAllElements(cmdIterator, Elements{ + "/usr/bin/curl": Equal("/usr/bin/curl"), + "--cert": Equal("--cert"), + "/var/etcd/ssl/client/client/tls.crt": Equal("/var/etcd/ssl/client/client/tls.crt"), + "--key": Equal("--key"), + "/var/etcd/ssl/client/client/tls.key": Equal("/var/etcd/ssl/client/client/tls.key"), + "--cacert": Equal("--cacert"), + "/var/etcd/ssl/client/ca/ca.crt": Equal("/var/etcd/ssl/client/ca/ca.crt"), + readinessProbeUrl: Equal(readinessProbeUrl), + }), + })), + }), + "InitialDelaySeconds": Equal(int32(15)), + "PeriodSeconds": Equal(int32(5)), + })), + "LivenessProbe": PointTo(MatchFields(IgnoreExtras, Fields{ + "Handler": MatchFields(IgnoreExtras, Fields{ + "Exec": PointTo(MatchFields(IgnoreExtras, Fields{ + "Command": MatchAllElements(cmdIterator, Elements{ + "/bin/sh": Equal("/bin/sh"), + "-ec": Equal("-ec"), + "ETCDCTL_API=3": Equal("ETCDCTL_API=3"), + "etcdctl": Equal("etcdctl"), + "--cert=/var/etcd/ssl/client/client/tls.crt": Equal("--cert=/var/etcd/ssl/client/client/tls.crt"), + "--key=/var/etcd/ssl/client/client/tls.key": Equal("--key=/var/etcd/ssl/client/client/tls.key"), + "--cacert=/var/etcd/ssl/client/ca/ca.crt": Equal("--cacert=/var/etcd/ssl/client/ca/ca.crt"), + fmt.Sprintf("--endpoints=https://%s-local:%d", values.Name, clientPort): Equal(fmt.Sprintf("--endpoints=https://%s-local:%d", values.Name, clientPort)), + "get": Equal("get"), + "foo": Equal("foo"), + "--consistency=s": Equal("--consistency=s"), + }), + })), + }), + "InitialDelaySeconds": Equal(int32(15)), + "PeriodSeconds": Equal(int32(5)), + })), + "Resources": Equal(etcdResources), + "VolumeMounts": MatchAllElements(volumeMountIterator, Elements{ + values.VolumeClaimTemplateName: MatchFields(IgnoreExtras, Fields{ + "Name": Equal(values.VolumeClaimTemplateName), + "MountPath": Equal("/var/etcd/data/"), + }), + "client-url-ca-etcd": MatchFields(IgnoreExtras, Fields{ + "Name": Equal("client-url-ca-etcd"), + "MountPath": Equal("/var/etcd/ssl/client/ca"), + }), + "client-url-etcd-server-tls": MatchFields(IgnoreExtras, Fields{ + "Name": Equal("client-url-etcd-server-tls"), + "MountPath": Equal("/var/etcd/ssl/client/server"), + }), + "client-url-etcd-client-tls": MatchFields(IgnoreExtras, Fields{ + "Name": Equal("client-url-etcd-client-tls"), + "MountPath": Equal("/var/etcd/ssl/client/client"), + }), + "peer-url-ca-etcd": MatchFields(IgnoreExtras, Fields{ + "Name": Equal("peer-url-ca-etcd"), + "MountPath": Equal("/var/etcd/ssl/peer/ca"), + }), + "peer-url-etcd-server-tls": MatchFields(IgnoreExtras, Fields{ + "Name": Equal("peer-url-etcd-server-tls"), + "MountPath": Equal("/var/etcd/ssl/peer/server"), + }), + }), + }), + + backupRestore: MatchFields(IgnoreExtras, Fields{ + "Command": MatchAllElements(cmdIterator, Elements{ + "etcdbrctl": Equal("etcdbrctl"), + "server": Equal("server"), + "--cert=/var/etcd/ssl/client/client/tls.crt": Equal("--cert=/var/etcd/ssl/client/client/tls.crt"), + "--key=/var/etcd/ssl/client/client/tls.key": Equal("--key=/var/etcd/ssl/client/client/tls.key"), + "--cacert=/var/etcd/ssl/client/ca/ca.crt": Equal("--cacert=/var/etcd/ssl/client/ca/ca.crt"), + "--server-cert=/var/etcd/ssl/client/server/tls.crt": Equal("--server-cert=/var/etcd/ssl/client/server/tls.crt"), + "--server-key=/var/etcd/ssl/client/server/tls.key": Equal("--server-key=/var/etcd/ssl/client/server/tls.key"), + "--data-dir=/var/etcd/data/new.etcd": Equal("--data-dir=/var/etcd/data/new.etcd"), + "--insecure-transport=false": Equal("--insecure-transport=false"), + "--insecure-skip-tls-verify=false": Equal("--insecure-skip-tls-verify=false"), + "--snapstore-temp-directory=/var/etcd/data/temp": Equal("--snapstore-temp-directory=/var/etcd/data/temp"), + "--etcd-process-name=etcd": Equal("--etcd-process-name=etcd"), + fmt.Sprintf("%s=%s", "--etcd-connection-timeout-leader-election", etcdLeaderElectionConnectionTimeout.Duration.String()): Equal(fmt.Sprintf("%s=%s", "--etcd-connection-timeout-leader-election", values.LeaderElection.EtcdConnectionTimeout.Duration.String())), + "--etcd-connection-timeout=5m": Equal("--etcd-connection-timeout=5m"), + "--enable-snapshot-lease-renewal=true": Equal("--enable-snapshot-lease-renewal=true"), + "--enable-member-lease-renewal=true": Equal("--enable-member-lease-renewal=true"), + "--k8s-heartbeat-duration=10s": Equal("--k8s-heartbeat-duration=10s"), + fmt.Sprintf("--defragmentation-schedule=%s", *values.DefragmentationSchedule): Equal(fmt.Sprintf("--defragmentation-schedule=%s", *values.DefragmentationSchedule)), + fmt.Sprintf("--schedule=%s", *values.FullSnapshotSchedule): Equal(fmt.Sprintf("--schedule=%s", *values.FullSnapshotSchedule)), + fmt.Sprintf("%s=%s", "--garbage-collection-policy", *values.GarbageCollectionPolicy): Equal(fmt.Sprintf("%s=%s", "--garbage-collection-policy", *values.GarbageCollectionPolicy)), + fmt.Sprintf("%s=%s", "--storage-provider", store): Equal(fmt.Sprintf("%s=%s", "--storage-provider", store)), + fmt.Sprintf("%s=%s", "--store-prefix", values.BackupStore.Prefix): Equal(fmt.Sprintf("%s=%s", "--store-prefix", values.BackupStore.Prefix)), + fmt.Sprintf("--delta-snapshot-memory-limit=%d", values.DeltaSnapshotMemoryLimit.Value()): Equal(fmt.Sprintf("--delta-snapshot-memory-limit=%d", values.DeltaSnapshotMemoryLimit.Value())), + fmt.Sprintf("--garbage-collection-policy=%s", *values.GarbageCollectionPolicy): Equal(fmt.Sprintf("--garbage-collection-policy=%s", *values.GarbageCollectionPolicy)), + fmt.Sprintf("--endpoints=https://%s-local:%d", values.Name, clientPort): Equal(fmt.Sprintf("--endpoints=https://%s-local:%d", values.Name, clientPort)), + fmt.Sprintf("--embedded-etcd-quota-bytes=%d", int64(values.Quota.Value())): Equal(fmt.Sprintf("--embedded-etcd-quota-bytes=%d", int64(values.Quota.Value()))), + fmt.Sprintf("%s=%s", "--delta-snapshot-period", values.DeltaSnapshotPeriod.Duration.String()): Equal(fmt.Sprintf("%s=%s", "--delta-snapshot-period", values.DeltaSnapshotPeriod.Duration.String())), + fmt.Sprintf("%s=%s", "--garbage-collection-period", values.GarbageCollectionPeriod.Duration.String()): Equal(fmt.Sprintf("%s=%s", "--garbage-collection-period", values.GarbageCollectionPeriod.Duration.String())), + fmt.Sprintf("%s=%s", "--auto-compaction-mode", *values.AutoCompactionMode): Equal(fmt.Sprintf("%s=%s", "--auto-compaction-mode", *values.AutoCompactionMode)), + fmt.Sprintf("%s=%s", "--auto-compaction-retention", *values.AutoCompactionRetention): Equal(fmt.Sprintf("%s=%s", "--auto-compaction-retention", *values.AutoCompactionRetention)), + fmt.Sprintf("%s=%s", "--etcd-snapshot-timeout", values.EtcdSnapshotTimeout.Duration.String()): Equal(fmt.Sprintf("%s=%s", "--etcd-snapshot-timeout", values.EtcdSnapshotTimeout.Duration.String())), + fmt.Sprintf("%s=%s", "--etcd-defrag-timeout", values.EtcdDefragTimeout.Duration.String()): Equal(fmt.Sprintf("%s=%s", "--etcd-defrag-timeout", values.EtcdDefragTimeout.Duration.String())), + fmt.Sprintf("%s=%s", "--owner-name", values.OwnerCheck.Name): Equal(fmt.Sprintf("%s=%s", "--owner-name", values.OwnerCheck.Name)), + fmt.Sprintf("%s=%s", "--owner-id", values.OwnerCheck.ID): Equal(fmt.Sprintf("%s=%s", "--owner-id", values.OwnerCheck.ID)), + fmt.Sprintf("%s=%s", "--owner-check-interval", values.OwnerCheck.Interval.Duration.String()): Equal(fmt.Sprintf("%s=%s", "--owner-check-interval", values.OwnerCheck.Interval.Duration.String())), + fmt.Sprintf("%s=%s", "--owner-check-timeout", values.OwnerCheck.Timeout.Duration.String()): Equal(fmt.Sprintf("%s=%s", "--owner-check-timeout", values.OwnerCheck.Timeout.Duration.String())), + fmt.Sprintf("%s=%s", "--owner-check-dns-cache-ttl", values.OwnerCheck.DNSCacheTTL.Duration.String()): Equal(fmt.Sprintf("%s=%s", "--owner-check-dns-cache-ttl", values.OwnerCheck.DNSCacheTTL.Duration.String())), + fmt.Sprintf("%s=%s", "--delta-snapshot-lease-name", values.DeltaSnapLeaseName): Equal(fmt.Sprintf("%s=%s", "--delta-snapshot-lease-name", values.DeltaSnapLeaseName)), + fmt.Sprintf("%s=%s", "--full-snapshot-lease-name", values.FullSnapLeaseName): Equal(fmt.Sprintf("%s=%s", "--full-snapshot-lease-name", values.FullSnapLeaseName)), + }), + "Ports": ConsistOf([]corev1.ContainerPort{ + { + Name: "server", + Protocol: corev1.ProtocolTCP, + HostPort: 0, + ContainerPort: *values.BackupPort, + }, + }), + "Image": Equal(values.BackupImage), + "ImagePullPolicy": Equal(corev1.PullIfNotPresent), + "VolumeMounts": MatchElements(volumeMountIterator, IgnoreExtras, Elements{ + values.VolumeClaimTemplateName: MatchFields(IgnoreExtras, Fields{ + "Name": Equal(values.VolumeClaimTemplateName), + "MountPath": Equal("/var/etcd/data"), + }), + "etcd-config-file": MatchFields(IgnoreExtras, Fields{ + "Name": Equal("etcd-config-file"), + "MountPath": Equal("/var/etcd/config/"), + }), + "etcd-backup": MatchFields(IgnoreExtras, Fields{ + "Name": Equal("etcd-backup"), + "MountPath": Equal("/root/etcd-backup/"), + }), + }), + "Env": MatchElements(envIterator, IgnoreExtras, Elements{ + "STORAGE_CONTAINER": MatchFields(IgnoreExtras, Fields{ + "Name": Equal("STORAGE_CONTAINER"), + "Value": Equal(*values.BackupStore.Container), + }), + "POD_NAME": MatchFields(IgnoreExtras, Fields{ + "Name": Equal("POD_NAME"), + "ValueFrom": PointTo(MatchFields(IgnoreExtras, Fields{ + "FieldRef": PointTo(MatchFields(IgnoreExtras, Fields{ + "FieldPath": Equal("metadata.name"), + })), + })), + }), + "POD_NAMESPACE": MatchFields(IgnoreExtras, Fields{ + "Name": Equal("POD_NAMESPACE"), + "ValueFrom": PointTo(MatchFields(IgnoreExtras, Fields{ + "FieldRef": PointTo(MatchFields(IgnoreExtras, Fields{ + "FieldPath": Equal("metadata.namespace"), + })), + })), + }), + "AZURE_APPLICATION_CREDENTIALS": MatchFields(IgnoreExtras, Fields{ + "Name": Equal("AZURE_APPLICATION_CREDENTIALS"), + "Value": Equal("/root/etcd-backup"), + }), + }), + "Resources": Equal(backupRestoreResources), + "SecurityContext": PointTo(MatchFields(IgnoreExtras, Fields{ + "Capabilities": PointTo(MatchFields(IgnoreExtras, Fields{ + "Add": ConsistOf([]corev1.Capability{ + "SYS_PTRACE", + }), + })), + })), + }), + }), + "ShareProcessNamespace": Equal(pointer.BoolPtr(true)), + "Volumes": MatchAllElements(volumeIterator, Elements{ + "etcd-config-file": MatchFields(IgnoreExtras, Fields{ + "Name": Equal("etcd-config-file"), + "VolumeSource": MatchFields(IgnoreExtras, Fields{ + "ConfigMap": PointTo(MatchFields(IgnoreExtras, Fields{ + "LocalObjectReference": MatchFields(IgnoreExtras, Fields{ + "Name": Equal(fmt.Sprintf("etcd-bootstrap-%s", string(values.EtcdUID[:6]))), + }), + "DefaultMode": PointTo(Equal(int32(0644))), + "Items": MatchAllElements(keyIterator, Elements{ + "etcd.conf.yaml": MatchFields(IgnoreExtras, Fields{ + "Key": Equal("etcd.conf.yaml"), + "Path": Equal("etcd.conf.yaml"), + }), + }), + })), + }), + }), + "etcd-backup": MatchFields(IgnoreExtras, Fields{ + "Name": Equal("etcd-backup"), + "VolumeSource": MatchFields(IgnoreExtras, Fields{ + "Secret": PointTo(MatchFields(IgnoreExtras, Fields{ + "SecretName": Equal(values.BackupStore.SecretRef.Name), + })), + }), + }), + "client-url-etcd-server-tls": MatchFields(IgnoreExtras, Fields{ + "Name": Equal("client-url-etcd-server-tls"), + "VolumeSource": MatchFields(IgnoreExtras, Fields{ + "Secret": PointTo(MatchFields(IgnoreExtras, Fields{ + "SecretName": Equal(values.ClientUrlTLS.ServerTLSSecretRef.Name), + })), + }), + }), + "client-url-etcd-client-tls": MatchFields(IgnoreExtras, Fields{ + "Name": Equal("client-url-etcd-client-tls"), + "VolumeSource": MatchFields(IgnoreExtras, Fields{ + "Secret": PointTo(MatchFields(IgnoreExtras, Fields{ + "SecretName": Equal(values.ClientUrlTLS.ClientTLSSecretRef.Name), + })), + }), + }), + "client-url-ca-etcd": MatchFields(IgnoreExtras, Fields{ + "Name": Equal("client-url-ca-etcd"), + "VolumeSource": MatchFields(IgnoreExtras, Fields{ + "Secret": PointTo(MatchFields(IgnoreExtras, Fields{ + "SecretName": Equal(values.ClientUrlTLS.TLSCASecretRef.Name), + })), + }), + }), + "peer-url-etcd-server-tls": MatchFields(IgnoreExtras, Fields{ + "Name": Equal("peer-url-etcd-server-tls"), + "VolumeSource": MatchFields(IgnoreExtras, Fields{ + "Secret": PointTo(MatchFields(IgnoreExtras, Fields{ + "SecretName": Equal(values.PeerUrlTLS.ServerTLSSecretRef.Name), + })), + }), + }), + "peer-url-ca-etcd": MatchFields(IgnoreExtras, Fields{ + "Name": Equal("peer-url-ca-etcd"), + "VolumeSource": MatchFields(IgnoreExtras, Fields{ + "Secret": PointTo(MatchFields(IgnoreExtras, Fields{ + "SecretName": Equal(values.PeerUrlTLS.TLSCASecretRef.Name), + })), + }), + }), + }), + }), + }), + "VolumeClaimTemplates": MatchAllElements(pvcIterator, Elements{ + values.VolumeClaimTemplateName: MatchFields(IgnoreExtras, Fields{ + "ObjectMeta": MatchFields(IgnoreExtras, Fields{ + "Name": Equal(values.VolumeClaimTemplateName), + }), + "Spec": MatchFields(IgnoreExtras, Fields{ + "StorageClassName": PointTo(Equal(*values.StorageClass)), + "AccessModes": MatchAllElements(accessModeIterator, Elements{ + "ReadWriteOnce": Equal(corev1.ReadWriteOnce), + }), + "Resources": MatchFields(IgnoreExtras, Fields{ + "Requests": MatchKeys(IgnoreExtras, Keys{ + corev1.ResourceStorage: Equal(*values.StorageCapacity), + }), + }), + }), + }), + }), + }), + })) +} + +func checkStsOwnerRefs(ors []metav1.OwnerReference, values Values) { + Expect(ors).To(ConsistOf(Equal(metav1.OwnerReference{ + APIVersion: druidv1alpha1.GroupVersion.String(), + Kind: "Etcd", + Name: values.Name, + UID: values.EtcdUID, + Controller: pointer.BoolPtr(true), + BlockOwnerDeletion: pointer.BoolPtr(true), + }))) +} + +func getEtcd(name, namespace string, tlsEnabled bool, replicas int32, storageProvider *string) *druidv1alpha1.Etcd { + instance := &druidv1alpha1.Etcd{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: namespace, + UID: types.UID(uid), + }, + Spec: druidv1alpha1.EtcdSpec{ + Annotations: map[string]string{ + "app": "etcd-statefulset", + "role": "test", + "instance": name, + }, + Labels: map[string]string{ + "foo": "bar", + }, + Replicas: replicas, + StorageCapacity: &storageCapacity, + StorageClass: &storageClass, + PriorityClassName: &priorityClassName, + VolumeClaimTemplate: &volumeClaimTemplateName, + Backup: druidv1alpha1.BackupSpec{ + Image: &imageBR, + Port: pointer.Int32Ptr(backupPort), + Store: getEtcdBackup(storageProvider), + FullSnapshotSchedule: &snapshotSchedule, + GarbageCollectionPolicy: &garbageCollectionPolicy, + GarbageCollectionPeriod: &garbageCollectionPeriod, + DeltaSnapshotPeriod: &deltaSnapshotPeriod, + DeltaSnapshotMemoryLimit: &deltaSnapShotMemLimit, + EtcdSnapshotTimeout: &etcdSnapshotTimeout, + LeaderElection: &druidv1alpha1.LeaderElectionSpec{ + EtcdConnectionTimeout: &etcdLeaderElectionConnectionTimeout, + }, + + Resources: &backupRestoreResources, + OwnerCheck: &druidv1alpha1.OwnerCheckSpec{ + Name: ownerName, + ID: ownerID, + Interval: &ownerCheckInterval, + Timeout: &ownerCheckTimeout, + DNSCacheTTL: &ownerCheckDNSCacheTTL, + }, + }, + Etcd: druidv1alpha1.EtcdConfig{ + Quota: "a, + Metrics: &metricsBasic, + Image: &imageEtcd, + DefragmentationSchedule: &defragSchedule, + EtcdDefragTimeout: &etcdDefragTimeout, + HeartbeatDuration: &heartbeatDuration, + Resources: &etcdResources, + ClientPort: pointer.Int32Ptr(clientPort), + ServerPort: pointer.Int32Ptr(serverPort), + }, + Common: druidv1alpha1.SharedConfig{ + AutoCompactionMode: &autoCompactionMode, + AutoCompactionRetention: &autoCompactionRetention, + }, + }, + Status: druidv1alpha1.EtcdStatus{ + Replicas: 0, + }, + } + + if tlsEnabled { + clientTlsConfig := &druidv1alpha1.TLSConfig{ + TLSCASecretRef: druidv1alpha1.SecretReference{ + SecretReference: corev1.SecretReference{ + Name: "client-url-ca-etcd", + }, + DataKey: pointer.String("ca.crt"), + }, + ClientTLSSecretRef: corev1.SecretReference{ + Name: "client-url-etcd-client-tls", + }, + ServerTLSSecretRef: corev1.SecretReference{ + Name: "client-url-etcd-server-tls", + }, + } + + peerTlsConfig := &druidv1alpha1.TLSConfig{ + TLSCASecretRef: druidv1alpha1.SecretReference{ + SecretReference: corev1.SecretReference{ + Name: "peer-url-ca-etcd", + }, + DataKey: pointer.String("ca.crt"), + }, + ServerTLSSecretRef: corev1.SecretReference{ + Name: "peer-url-etcd-server-tls", + }, + } + + instance.Spec.Etcd.ClientUrlTLS = clientTlsConfig + instance.Spec.Etcd.PeerUrlTLS = peerTlsConfig + instance.Spec.Backup.TLS = clientTlsConfig + } + return instance +} + +func parseQuantity(q string) resource.Quantity { + val, _ := resource.ParseQuantity(q) + return val +} + +func getEtcdBackup(provider *string) *druidv1alpha1.StoreSpec { + storageProvider := pointer.StringDeref(provider, druidutils.ABS) + + return &druidv1alpha1.StoreSpec{ + Container: &container, + Prefix: prefix, + Provider: (*druidv1alpha1.StorageProvider)(&storageProvider), + SecretRef: &corev1.SecretReference{ + Name: "etcd-backup", + }, + } +} + +func volumeMountIterator(element interface{}) string { + return (element.(corev1.VolumeMount)).Name +} + +func volumeIterator(element interface{}) string { + return (element.(corev1.Volume)).Name +} + +func keyIterator(element interface{}) string { + return (element.(corev1.KeyToPath)).Key +} + +func envIterator(element interface{}) string { + return (element.(corev1.EnvVar)).Name +} + +func containerIterator(element interface{}) string { + return (element.(corev1.Container)).Name +} + +func hostAliasIterator(element interface{}) string { + return (element.(corev1.HostAlias)).IP +} + +func pvcIterator(element interface{}) string { + return (element.(corev1.PersistentVolumeClaim)).Name +} + +func accessModeIterator(element interface{}) string { + return string(element.(corev1.PersistentVolumeAccessMode)) +} + +func cmdIterator(element interface{}) string { + return element.(string) +} diff --git a/pkg/component/etcd/statefulset/values.go b/pkg/component/etcd/statefulset/values.go new file mode 100644 index 000000000..369c978ac --- /dev/null +++ b/pkg/component/etcd/statefulset/values.go @@ -0,0 +1,128 @@ +// Copyright (c) 2022 SAP SE or an SAP affiliate company. All rights reserved. This file is licensed under the Apache Software License, v. 2 except as noted otherwise in the LICENSE file +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package statefulset + +import ( + druidv1alpha1 "github.com/gardener/etcd-druid/api/v1alpha1" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + "k8s.io/apimachinery/pkg/api/resource" + "k8s.io/apimachinery/pkg/types" +) + +// Values contains the values necessary for creating ETCD statefulset. +type Values struct { + // Name is the name of the etcd resource. + Name string + // Namespace is the namespace of etcd resource. + Namespace string + // Name is the UID of the etcd resource. + EtcdUID types.UID + + // Replicas is the number of ETCD instance that the ETCD cluster will have. + Replicas int32 + // StatusReplicas is the number of replicas maintained in ETCD status. + StatusReplicas int32 + + // Annotations is the annotation provided in ETCD spec. + Annotations map[string]string + // Labels is the labels provided in ETCD spec. + Labels map[string]string + // BackupImage is the backup restore image. + BackupImage string + // EtcdImage is the etcd custom image. + EtcdImage string + // PriorityClassName is the Priority Class name. + PriorityClassName *string + // ServiceName is the name of the peer service. + ServiceName string + // ServiceAccountName is the service account name. + ServiceAccountName string + Affinity *corev1.Affinity + TopologySpreadConstraints []corev1.TopologySpreadConstraint + + EtcdResourceRequirements *corev1.ResourceRequirements + BackupResourceRequirements *corev1.ResourceRequirements + + EtcdCommand []string + ReadinessProbeCommand []string + LivenessProbCommand []string + EtcdBackupCommand []string + + EnableClientTLS string + EnablePeerTLS string + + FailBelowRevision string + VolumeClaimTemplateName string + + FullSnapLeaseName string + DeltaSnapLeaseName string + + StorageCapacity *resource.Quantity + StorageClass *string + + DefragmentationSchedule *string + FullSnapshotSchedule *string + + EtcdSnapshotTimeout *metav1.Duration + EtcdDefragTimeout *metav1.Duration + + DeltaSnapshotMemoryLimit *resource.Quantity + + GarbageCollectionPolicy *druidv1alpha1.GarbageCollectionPolicy + GarbageCollectionPeriod *metav1.Duration + + LeaderElection *druidv1alpha1.LeaderElectionSpec + BackupStore *druidv1alpha1.StoreSpec + + EnableProfiling *bool + + DeltaSnapshotPeriod *metav1.Duration + + SnapshotCompression *druidv1alpha1.CompressionSpec + HeartbeatDuration *metav1.Duration + + // MetricsLevel defines the level of detail for exported metrics of etcd, specify 'extensive' to include histogram metrics. + MetricsLevel *druidv1alpha1.MetricsLevel + // Quota defines the etcd DB quota. + Quota *resource.Quantity + + // ClientUrlTLS holds the TLS configuration details for client communication. + ClientUrlTLS *druidv1alpha1.TLSConfig + // PeerUrlTLS hold the TLS configuration details for peer communication. + PeerUrlTLS *druidv1alpha1.TLSConfig + // BackupTLS hold the TLS configuration for communication with Backup server. + BackupTLS *druidv1alpha1.TLSConfig + + //ClientServiceName is name of the etcd client service. + ClientServiceName string + // ClientPort holds the client port. + ClientPort *int32 + //PeerServiceName is name of the etcd peer service. + PeerServiceName string + // ServerPort is the peer port. + ServerPort *int32 + // ServerPort is the backup-restore side-car port. + BackupPort *int32 + + OwnerCheck *druidv1alpha1.OwnerCheckSpec + // AutoCompactionMode defines the auto-compaction-mode: 'periodic' or 'revision'. + AutoCompactionMode *druidv1alpha1.CompactionMode + //AutoCompactionRetention defines the auto-compaction-retention length for etcd as well as for embedded-Etcd of backup-restore sidecar. + AutoCompactionRetention *string + // ConfigMapName is the name of the configmap that holds the ETCD config. + ConfigMapName string +} diff --git a/pkg/component/etcd/statefulset/values_helper.go b/pkg/component/etcd/statefulset/values_helper.go new file mode 100644 index 000000000..dc004892b --- /dev/null +++ b/pkg/component/etcd/statefulset/values_helper.go @@ -0,0 +1,355 @@ +// Copyright (c) 2022 SAP SE or an SAP affiliate company. All rights reserved. This file is licensed under the Apache Software License, v.2 except as noted otherwise in the LICENSE file +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package statefulset + +import ( + "fmt" + + druidv1alpha1 "github.com/gardener/etcd-druid/api/v1alpha1" + "github.com/gardener/etcd-druid/pkg/utils" + "k8s.io/apimachinery/pkg/api/resource" + "k8s.io/utils/pointer" +) + +const ( + defaultBackupPort int32 = 8080 + defaultServerPort int32 = 2380 + defaultClientPort int32 = 2379 + defaultQuota int64 = 8 * 1024 * 1024 * 1024 // 8Gi + defaultSnapshotMemoryLimit int64 = 100 * 1024 * 1024 // 100Mi + defaultHeartbeatDuration = "10s" + defaultGbcPolicy = "LimitBased" + defaultAutoCompactionRetention = "30m" + defaultEtcdSnapshotTimeout = "15m" + defaultEtcdDefragTimeout = "15m" + defaultAutoCompactionMode = "periodic" + defaultEtcdConnectionTimeout = "5m" + defaultLocalPrefix = "/etc/gardener/local-backupbuckets" +) + +var defaultStorageCapacity = resource.MustParse("16Gi") + +// GenerateValues generates `statefulset.Values` for the statefulset component with the given parameters. +func GenerateValues( + etcd *druidv1alpha1.Etcd, + clientPort, serverPort, backupPort *int32, + etcdImage, backupImage string, + checksumAnnotations map[string]string, +) Values { + volumeClaimTemplateName := etcd.Name + if etcd.Spec.VolumeClaimTemplate != nil && len(*etcd.Spec.VolumeClaimTemplate) != 0 { + volumeClaimTemplateName = *etcd.Spec.VolumeClaimTemplate + } + + values := Values{ + Name: etcd.Name, + Namespace: etcd.Namespace, + EtcdUID: etcd.UID, + Replicas: etcd.Spec.Replicas, + StatusReplicas: etcd.Status.Replicas, + Annotations: utils.MergeStringMaps(checksumAnnotations, etcd.Spec.Annotations), + Labels: etcd.Spec.Labels, + EtcdImage: etcdImage, + BackupImage: backupImage, + PriorityClassName: etcd.Spec.PriorityClassName, + ServiceName: utils.GetPeerServiceName(etcd), + ServiceAccountName: utils.GetServiceAccountName(etcd), + Affinity: etcd.Spec.SchedulingConstraints.Affinity, + TopologySpreadConstraints: etcd.Spec.SchedulingConstraints.TopologySpreadConstraints, + + EtcdResourceRequirements: etcd.Spec.Etcd.Resources, + BackupResourceRequirements: etcd.Spec.Backup.Resources, + + VolumeClaimTemplateName: volumeClaimTemplateName, + + FullSnapLeaseName: utils.GetFullSnapshotLeaseName(etcd), + DeltaSnapLeaseName: utils.GetDeltaSnapshotLeaseName(etcd), + + StorageCapacity: etcd.Spec.StorageCapacity, + StorageClass: etcd.Spec.StorageClass, + + ClientUrlTLS: etcd.Spec.Etcd.ClientUrlTLS, + PeerUrlTLS: etcd.Spec.Etcd.PeerUrlTLS, + BackupTLS: etcd.Spec.Backup.TLS, + + LeaderElection: etcd.Spec.Backup.LeaderElection, + + BackupStore: etcd.Spec.Backup.Store, + EnableProfiling: etcd.Spec.Backup.EnableProfiling, + + DeltaSnapshotPeriod: etcd.Spec.Backup.DeltaSnapshotPeriod, + DeltaSnapshotMemoryLimit: etcd.Spec.Backup.DeltaSnapshotMemoryLimit, + + DefragmentationSchedule: etcd.Spec.Etcd.DefragmentationSchedule, + FullSnapshotSchedule: etcd.Spec.Backup.FullSnapshotSchedule, + + EtcdSnapshotTimeout: etcd.Spec.Backup.EtcdSnapshotTimeout, + EtcdDefragTimeout: etcd.Spec.Etcd.EtcdDefragTimeout, + + GarbageCollectionPolicy: etcd.Spec.Backup.GarbageCollectionPolicy, + GarbageCollectionPeriod: etcd.Spec.Backup.GarbageCollectionPeriod, + + SnapshotCompression: etcd.Spec.Backup.SnapshotCompression, + HeartbeatDuration: etcd.Spec.Etcd.HeartbeatDuration, + + MetricsLevel: etcd.Spec.Etcd.Metrics, + Quota: etcd.Spec.Etcd.Quota, + ClientServiceName: utils.GetClientServiceName(etcd), + ClientPort: clientPort, + PeerServiceName: utils.GetPeerServiceName(etcd), + ServerPort: serverPort, + BackupPort: backupPort, + + OwnerCheck: etcd.Spec.Backup.OwnerCheck, + + AutoCompactionMode: etcd.Spec.Common.AutoCompactionMode, + AutoCompactionRetention: etcd.Spec.Common.AutoCompactionRetention, + ConfigMapName: utils.GetConfigmapName(etcd), + } + + values.EtcdCommand = getEtcdCommand() + values.ReadinessProbeCommand = getReadinessProbeCommand(values) + values.LivenessProbCommand = getLivenessProbeCommand(values) + values.EtcdBackupCommand = getBackupRestoreCommand(values) + + return values +} + +func getEtcdCommand() []string { + return []string{"/var/etcd/bin/bootstrap.sh"} +} + +func getReadinessProbeCommand(val Values) []string { + command := []string{"/usr/bin/curl"} + + protocol := "http" + if tlsReadinessProvidedByBackupRestore(val) || tlsReadinessProvidedByEtcd(val) { + protocol = "https" + command = append(command, "--cert") + command = append(command, "/var/etcd/ssl/client/client/tls.crt") + command = append(command, "--key") + command = append(command, "/var/etcd/ssl/client/client/tls.key") + + dataKey := "ca.crt" + if val.ClientUrlTLS.TLSCASecretRef.DataKey != nil { + dataKey = *val.ClientUrlTLS.TLSCASecretRef.DataKey + } + + command = append(command, "--cacert") + command = append(command, "/var/etcd/ssl/client/ca/"+dataKey) + } + + var readinessEndpoint string + if val.Replicas == 1 { + // For single replica etcds we use the `healthz` endpoint which is provided by the etcd-backup-restore side-car. + // This is required because of the owner checks that are considered for readiness. + readinessEndpoint = fmt.Sprintf("%s://%s-local:%d/healthz", protocol, val.Name, pointer.Int32Deref(val.BackupPort, defaultBackupPort)) + } else if val.Replicas > 1 { + readinessEndpoint = fmt.Sprintf("%s://%s-local:%d/health", protocol, val.Name, pointer.Int32Deref(val.ClientPort, defaultClientPort)) + } + + command = append(command, readinessEndpoint) + + return command +} + +func tlsReadinessProvidedByEtcd(val Values) bool { + return val.Replicas != 1 && val.ClientUrlTLS != nil +} + +func tlsReadinessProvidedByBackupRestore(val Values) bool { + return val.Replicas == 1 && val.BackupTLS != nil +} + +func getLivenessProbeCommand(val Values) []string { + command := []string{"" + "/bin/sh"} + command = append(command, "-ec") + command = append(command, "ETCDCTL_API=3") + command = append(command, "etcdctl") + + if val.ClientUrlTLS != nil { + command = append(command, "--cert=/var/etcd/ssl/client/client/tls.crt") + command = append(command, "--key=/var/etcd/ssl/client/client/tls.key") + + dataKey := "ca.crt" + if val.ClientUrlTLS.TLSCASecretRef.DataKey != nil { + dataKey = *val.ClientUrlTLS.TLSCASecretRef.DataKey + } + + command = append(command, "--cacert=/var/etcd/ssl/client/ca/"+dataKey) + command = append(command, fmt.Sprintf("--endpoints=https://%s-local:%d", val.Name, pointer.Int32Deref(val.ClientPort, defaultClientPort))) + } else { + command = append(command, fmt.Sprintf("--endpoints=http://%s-local:%d", val.Name, pointer.Int32Deref(val.ClientPort, defaultClientPort))) + } + + command = append(command, "get") + command = append(command, "foo") + command = append(command, "--consistency=s") + + return command +} + +func getBackupRestoreCommand(val Values) []string { + command := []string{"" + "etcdbrctl"} + command = append(command, "server") + + if val.BackupStore != nil { + command = append(command, "--enable-snapshot-lease-renewal=true") + command = append(command, "--delta-snapshot-lease-name="+val.DeltaSnapLeaseName) + command = append(command, "--full-snapshot-lease-name="+val.FullSnapLeaseName) + } + + if val.DefragmentationSchedule != nil { + command = append(command, "--defragmentation-schedule="+*val.DefragmentationSchedule) + } + + if val.FullSnapshotSchedule != nil { + command = append(command, "--schedule="+*val.FullSnapshotSchedule) + } + + garbageCollectionPolicy := defaultGbcPolicy + if val.GarbageCollectionPolicy != nil { + garbageCollectionPolicy = string(*val.GarbageCollectionPolicy) + } + + command = append(command, "--garbage-collection-policy="+garbageCollectionPolicy) + if garbageCollectionPolicy == "LimitBased" { + command = append(command, "--max-backups=7") + } + + command = append(command, "--data-dir=/var/etcd/data/new.etcd") + + if val.BackupStore != nil { + store, _ := utils.StorageProviderFromInfraProvider(val.BackupStore.Provider) + command = append(command, "--storage-provider="+store) + command = append(command, "--store-prefix="+string(val.BackupStore.Prefix)) + } + + var quota = defaultQuota + if val.Quota != nil { + quota = val.Quota.Value() + } + + command = append(command, "--embedded-etcd-quota-bytes="+fmt.Sprint(quota)) + + if pointer.BoolDeref(val.EnableProfiling, false) { + command = append(command, "--enable-profiling=true") + } + + if val.ClientUrlTLS != nil { + command = append(command, "--cert=/var/etcd/ssl/client/client/tls.crt") + command = append(command, "--key=/var/etcd/ssl/client/client/tls.key") + command = append(command, "--cacert=/var/etcd/ssl/client/ca/"+pointer.StringPtrDerefOr(val.ClientUrlTLS.TLSCASecretRef.DataKey, "ca.crt")) + command = append(command, "--insecure-transport=false") + command = append(command, "--insecure-skip-tls-verify=false") + command = append(command, fmt.Sprintf("--endpoints=https://%s-local:%d", val.Name, pointer.Int32Deref(val.ClientPort, defaultClientPort))) + } else { + command = append(command, "--insecure-transport=true") + command = append(command, "--insecure-skip-tls-verify=true") + command = append(command, fmt.Sprintf("--endpoints=http://%s-local:%d", val.Name, pointer.Int32Deref(val.ClientPort, defaultClientPort))) + } + + if val.BackupTLS != nil { + command = append(command, "--server-cert=/var/etcd/ssl/client/server/tls.crt") + command = append(command, "--server-key=/var/etcd/ssl/client/server/tls.key") + } + + command = append(command, "--etcd-connection-timeout="+defaultEtcdConnectionTimeout) + + if val.DeltaSnapshotPeriod != nil { + command = append(command, "--delta-snapshot-period="+val.DeltaSnapshotPeriod.Duration.String()) + } + + var deltaSnapshotMemoryLimit = defaultSnapshotMemoryLimit + if val.DeltaSnapshotMemoryLimit != nil { + deltaSnapshotMemoryLimit = val.DeltaSnapshotMemoryLimit.Value() + } + + command = append(command, "--delta-snapshot-memory-limit="+fmt.Sprint(deltaSnapshotMemoryLimit)) + + if val.GarbageCollectionPeriod != nil { + command = append(command, "--garbage-collection-period="+val.GarbageCollectionPeriod.Duration.String()) + } + + if val.SnapshotCompression != nil { + if pointer.BoolPtrDerefOr(val.SnapshotCompression.Enabled, false) { + command = append(command, "--compress-snapshots="+fmt.Sprint(*val.SnapshotCompression.Enabled)) + } + if val.SnapshotCompression.Policy != nil { + command = append(command, "--compression-policy="+string(*val.SnapshotCompression.Policy)) + } + } + + if val.OwnerCheck != nil { + command = append(command, "--owner-name="+val.OwnerCheck.Name) + command = append(command, "--owner-id="+val.OwnerCheck.ID) + + if val.OwnerCheck.Interval != nil { + command = append(command, "--owner-check-interval="+val.OwnerCheck.Interval.Duration.String()) + } + if val.OwnerCheck.Timeout != nil { + command = append(command, "--owner-check-timeout="+val.OwnerCheck.Timeout.Duration.String()) + } + if val.OwnerCheck.DNSCacheTTL != nil { + command = append(command, "--owner-check-dns-cache-ttl="+val.OwnerCheck.DNSCacheTTL.Duration.String()) + } + } + + compactionMode := defaultAutoCompactionMode + if val.AutoCompactionMode != nil { + compactionMode = string(*val.AutoCompactionMode) + } + command = append(command, "--auto-compaction-mode="+compactionMode) + + compactionRetention := defaultAutoCompactionRetention + if val.AutoCompactionRetention != nil { + compactionRetention = string(*val.AutoCompactionRetention) + } + command = append(command, "--auto-compaction-retention="+compactionRetention) + + etcdSnapshotTimeout := defaultEtcdSnapshotTimeout + if val.EtcdSnapshotTimeout != nil { + etcdSnapshotTimeout = val.EtcdSnapshotTimeout.Duration.String() + } + command = append(command, "--etcd-snapshot-timeout="+etcdSnapshotTimeout) + + etcdDefragTimeout := defaultEtcdDefragTimeout + if val.EtcdDefragTimeout != nil { + etcdDefragTimeout = val.EtcdDefragTimeout.Duration.String() + } + command = append(command, "--etcd-defrag-timeout="+etcdDefragTimeout) + + command = append(command, "--snapstore-temp-directory=/var/etcd/data/temp") + command = append(command, "--etcd-process-name=etcd") + command = append(command, "--enable-member-lease-renewal=true") + + heartbeatDuration := defaultHeartbeatDuration + if val.HeartbeatDuration != nil { + heartbeatDuration = val.HeartbeatDuration.Duration.String() + } + command = append(command, "--k8s-heartbeat-duration="+heartbeatDuration) + + if val.LeaderElection != nil { + if val.LeaderElection.EtcdConnectionTimeout != nil { + command = append(command, "--etcd-connection-timeout-leader-election="+val.LeaderElection.EtcdConnectionTimeout.Duration.String()) + } + + if val.LeaderElection.ReelectionPeriod != nil { + command = append(command, "--reelection-period="+val.LeaderElection.ReelectionPeriod.Duration.String()) + } + } + + return command +} diff --git a/pkg/component/etcd/values.go b/pkg/component/etcd/values.go index cc95326ba..fbf9c83ad 100644 --- a/pkg/component/etcd/values.go +++ b/pkg/component/etcd/values.go @@ -18,11 +18,13 @@ import ( "github.com/gardener/etcd-druid/pkg/component/etcd/configmap" "github.com/gardener/etcd-druid/pkg/component/etcd/lease" "github.com/gardener/etcd-druid/pkg/component/etcd/service" + "github.com/gardener/etcd-druid/pkg/component/etcd/statefulset" ) // Values contains all values relevant for deploying etcd components. type Values struct { - ConfigMap *configmap.Values - Service service.Values - Lease lease.Values + ConfigMap *configmap.Values + Service service.Values + Lease lease.Values + StatefulSet statefulset.Values } diff --git a/pkg/utils/miscellaneous.go b/pkg/utils/miscellaneous.go index 5cc60a17b..7028136c6 100644 --- a/pkg/utils/miscellaneous.go +++ b/pkg/utils/miscellaneous.go @@ -50,27 +50,24 @@ const ( ) const ( - s3 = "S3" - abs = "ABS" - gcs = "GCS" - oss = "OSS" - swift = "Swift" + // S3 is a constant for the AWS and S3 compliant storage provider. + S3 = "S3" + // ABS is a constant for the Azure storage provider. + ABS = "ABS" + // GCS is a constant for the Google storage provider. + GCS = "GCS" + // OSS is a constant for the Alicloud storage provider. + OSS = "OSS" + // Swift is a constant for the OpenStack storage provider. + Swift = "Swift" + // Local is a constant for the Local storage provider. Local = "Local" - ecs = "ECS" - ocs = "OCS" + // ECS is a constant for the EMC storage provider. + ECS = "ECS" + // OSC is a constant for the OpenShift storage provider. + OCS = "OCS" ) -// ValueExists returns true or false, depending on whether the given string -// is part of the given []string list . -func ValueExists(value string, list []string) bool { - for _, v := range list { - if v == value { - return true - } - } - return false -} - // MergeMaps takes two maps , and merges them. If defines a value with a key // already existing in the map, the value for that key will be overwritten. func MergeMaps(a, b map[string]interface{}) map[string]interface{} { @@ -211,20 +208,20 @@ func StorageProviderFromInfraProvider(infra *druidv1alpha1.StorageProvider) (str } switch *infra { - case aws, s3: - return s3, nil - case azure, abs: - return abs, nil - case alicloud, oss: - return oss, nil - case openstack, swift: - return swift, nil - case gcp, gcs: - return gcs, nil - case dell, ecs: - return ecs, nil - case openshift, ocs: - return ocs, nil + case aws, S3: + return S3, nil + case azure, ABS: + return ABS, nil + case alicloud, OSS: + return OSS, nil + case openstack, Swift: + return Swift, nil + case gcp, GCS: + return GCS, nil + case dell, ECS: + return ECS, nil + case openshift, OCS: + return OCS, nil case Local, druidv1alpha1.StorageProvider(strings.ToLower(Local)): return Local, nil default: diff --git a/pkg/utils/names.go b/pkg/utils/names.go index 8ad48319a..575a5523d 100644 --- a/pkg/utils/names.go +++ b/pkg/utils/names.go @@ -54,3 +54,13 @@ func GetJobName(etcd *druidv1alpha1.Etcd) string { func GetOrdinalPodName(etcd *druidv1alpha1.Etcd, order int) string { return fmt.Sprintf("%s-%d", etcd.Name, order) } + +// GetDeltaSnapshotLeaseName returns the name of the delta snapshot lease based on the given `etcd` object. +func GetDeltaSnapshotLeaseName(etcd *druidv1alpha1.Etcd) string { + return fmt.Sprintf("%s-delta-snap", etcd.Name) +} + +// GetFullSnapshotLeaseName returns the name of the full snapshot lease based on the given `etcd` object. +func GetFullSnapshotLeaseName(etcd *druidv1alpha1.Etcd) string { + return fmt.Sprintf("%s-full-snap", etcd.Name) +} diff --git a/pkg/utils/statefulset.go b/pkg/utils/statefulset.go new file mode 100644 index 000000000..e4e32d805 --- /dev/null +++ b/pkg/utils/statefulset.go @@ -0,0 +1,37 @@ +// Copyright (c) 2022 SAP SE or an SAP affiliate company. All rights reserved. This file is licensed under the Apache Software License, v. 2 except as noted otherwise in the LICENSE file +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package utils + +import ( + "fmt" + + appsv1 "k8s.io/api/apps/v1" +) + +// CheckStatefulSet checks whether the given StatefulSet is healthy. +// A StatefulSet is considered healthy if its controller observed its current revision, +// it is not in an update (i.e. UpdateRevision is empty) and if its current replicas are equal to +// desired replicas specified in ETCD specs. +func CheckStatefulSet(etcdReplicas int32, statefulSet *appsv1.StatefulSet) error { + if statefulSet.Status.ObservedGeneration < statefulSet.Generation { + return fmt.Errorf("observed generation outdated (%d/%d)", statefulSet.Status.ObservedGeneration, statefulSet.Generation) + } + + if statefulSet.Status.ReadyReplicas < etcdReplicas { + return fmt.Errorf("not enough ready replicas (%d/%d)", statefulSet.Status.ReadyReplicas, etcdReplicas) + } + + return nil +}