Skip to content

Commit

Permalink
Create member, delta/full-snapshot Leases (#262)
Browse files Browse the repository at this point in the history
* Add lease component

* Add HeartbeatDuration configuration

* Add label selector for member check

* Use etcd-member-unknown-threshold flag instead of LeaseDurationSeconds

* Address review feedback

* Address review feedback
  • Loading branch information
timuthy authored Dec 22, 2021
1 parent 953b2f1 commit 0469cc9
Show file tree
Hide file tree
Showing 34 changed files with 3,424 additions and 170 deletions.
11 changes: 7 additions & 4 deletions api/v1alpha1/etcd_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -121,9 +121,9 @@ type OwnerCheckSpec struct {
DNSCacheTTL *metav1.Duration `json:"dnsCacheTTL,omitempty"`
}

// BackupSpec defines parametes associated with the full and delta snapshots of etcd
// BackupSpec defines parameters associated with the full and delta snapshots of etcd.
type BackupSpec struct {
// Port define the port on which etcd-backup-restore server will exposed.
// Port define the port on which etcd-backup-restore server will be exposed.
// +optional
Port *int32 `json:"port,omitempty"`
// +optional
Expand All @@ -134,11 +134,11 @@ type BackupSpec struct {
// Store defines the specification of object store provider for storing backups.
// +optional
Store *StoreSpec `json:"store,omitempty"`
// Resources defines the compute Resources required by backup-restore container.
// Resources defines compute Resources required by backup-restore container.
// More info: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/
// +optional
Resources *corev1.ResourceRequirements `json:"resources,omitempty"`
// CompactionResources defines the compute Resources required by compaction job.
// CompactionResources defines compute Resources required by compaction job.
// More info: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/
// +optional
CompactionResources *corev1.ResourceRequirements `json:"compactionResources,omitempty"`
Expand Down Expand Up @@ -201,6 +201,9 @@ type EtcdConfig struct {
// EtcdDefragTimeout defines the timeout duration for etcd defrag call
// +optional
EtcdDefragTimeout *metav1.Duration `json:"etcdDefragTimeout,omitempty"`
// HeartbeatDuration defines the duration for members to send heartbeats. The default value is 10s.
// +optional
HeartbeatDuration *metav1.Duration `json:"heartbeatDuration,omitempty"`
}

// SharedConfig defines parameters shared and used by Etcd as well as backup-restore sidecar.
Expand Down
5 changes: 5 additions & 0 deletions api/v1alpha1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions charts/etcd/templates/etcd-statefulset.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,10 @@ spec:
{{- end }}
- --snapstore-temp-directory={{ .Values.backup.snapstoreTempDir }}
- --etcd-process-name=etcd
{{- if .Values.etcd.heartbeatDuration }}
- --enable-member-lease-renewal=true
- --k8s-heartbeat-duration={{ .Values.etcd.heartbeatDuration }}
{{- end }}
image: {{ .Values.backup.image }}
imagePullPolicy: {{ .Values.backup.pullPolicy }}
ports:
Expand Down
1 change: 1 addition & 0 deletions charts/etcd/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ etcd:
memory: 128Mi
#username: username
#password: password
heartbeatDuration: 10s

backup:
port: 8080
Expand Down
18 changes: 11 additions & 7 deletions config/crd/bases/10-crd-druid.gardener.cloud_etcds.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -48,12 +48,12 @@ spec:
type: string
type: object
backup:
description: BackupSpec defines parametes associated with the full
and delta snapshots of etcd
description: BackupSpec defines parameters associated with the full
and delta snapshots of etcd.
properties:
compactionResources:
description: 'CompactionResources defines the compute Resources
required by compaction job. More info: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/'
description: 'CompactionResources defines compute Resources required
by compaction job. More info: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/'
properties:
limits:
additionalProperties:
Expand Down Expand Up @@ -158,12 +158,12 @@ spec:
type: object
port:
description: Port define the port on which etcd-backup-restore
server will exposed.
server will be exposed.
format: int32
type: integer
resources:
description: 'Resources defines the compute Resources required
by backup-restore container. More info: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/'
description: 'Resources defines compute Resources required by
backup-restore container. More info: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/'
properties:
limits:
additionalProperties:
Expand Down Expand Up @@ -293,6 +293,10 @@ spec:
description: EtcdDefragTimeout defines the timeout duration for
etcd defrag call
type: string
heartbeatDuration:
description: HeartbeatDuration defines the duration for members
to send heartbeats. The default value is 10s.
type: string
image:
description: Image defines the etcd container image and tag
type: string
Expand Down
1 change: 1 addition & 0 deletions config/samples/druid_v1alpha1_etcd.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ spec:
clientPort: 2379
serverPort: 2380
quota: 8Gi
# heartbeatDuration: 10s
backup:
port: 8080
fullSnapshotSchedule: "0 */24 * * *"
Expand Down
9 changes: 5 additions & 4 deletions controllers/compaction_lease_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ import (
druidv1alpha1 "github.com/gardener/etcd-druid/api/v1alpha1"
controllersconfig "github.com/gardener/etcd-druid/controllers/config"
"github.com/gardener/etcd-druid/pkg/common"
componentlease "github.com/gardener/etcd-druid/pkg/component/etcd/lease"
druidpredicates "github.com/gardener/etcd-druid/pkg/predicate"
"github.com/gardener/etcd-druid/pkg/utils"
"github.com/gardener/gardener/pkg/utils/imagevector"
Expand Down Expand Up @@ -116,7 +117,7 @@ func (lc *CompactionLeaseController) Reconcile(ctx context.Context, req ctrl.Req

// Get full and delta snapshot lease to check the HolderIdentity value to take decision on compaction job
fullLease := &coordinationv1.Lease{}
if err := lc.Get(ctx, kutil.Key(etcd.Namespace, getFullSnapshotLeaseName(etcd)), fullLease); err != nil {
if err := lc.Get(ctx, kutil.Key(etcd.Namespace, componentlease.GetFullSnapshotLeaseName(etcd)), fullLease); err != nil {
logger.Info("Couldn't fetch full snap lease because: " + err.Error())

return ctrl.Result{
Expand All @@ -125,7 +126,7 @@ func (lc *CompactionLeaseController) Reconcile(ctx context.Context, req ctrl.Req
}

deltaLease := &coordinationv1.Lease{}
if err := lc.Get(ctx, kutil.Key(etcd.Namespace, getDeltaSnapshotLeaseName(etcd)), deltaLease); err != nil {
if err := lc.Get(ctx, kutil.Key(etcd.Namespace, componentlease.GetDeltaSnapshotLeaseName(etcd)), deltaLease); err != nil {
logger.Info("Couldn't fetch delta snap lease because: " + err.Error())

return ctrl.Result{
Expand Down Expand Up @@ -494,8 +495,8 @@ func getCompactJobCommands(etcd *druidv1alpha1.Etcd) []string {
command = append(command, "--data-dir=/var/etcd/data")
command = append(command, "--snapstore-temp-directory=/var/etcd/data/tmp")
command = append(command, "--enable-snapshot-lease-renewal=true")
command = append(command, "--full-snapshot-lease-name="+getFullSnapshotLeaseName(etcd))
command = append(command, "--delta-snapshot-lease-name="+getDeltaSnapshotLeaseName(etcd))
command = append(command, "--full-snapshot-lease-name="+componentlease.GetFullSnapshotLeaseName(etcd))
command = append(command, "--delta-snapshot-lease-name="+componentlease.GetDeltaSnapshotLeaseName(etcd))

var quota int64 = DefaultETCDQuota
if etcd.Spec.Etcd.Quota != nil {
Expand Down
15 changes: 8 additions & 7 deletions controllers/compaction_lease_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import (
"time"

druidv1alpha1 "github.com/gardener/etcd-druid/api/v1alpha1"
componentlease "github.com/gardener/etcd-druid/pkg/component/etcd/lease"
"github.com/gardener/etcd-druid/pkg/utils"
"github.com/gardener/gardener/pkg/controllerutils"
"github.com/gardener/gardener/pkg/utils/test/matchers"
Expand Down Expand Up @@ -402,11 +403,11 @@ func validateEtcdForCmpctJob(instance *druidv1alpha1.Etcd, j *batchv1.Job) {
"Containers": MatchElements(containerIterator, IgnoreExtras, Elements{
"compact-backup": MatchFields(IgnoreExtras, Fields{
"Command": MatchElements(cmdIterator, IgnoreExtras, Elements{
"--data-dir=/var/etcd/data": Equal("--data-dir=/var/etcd/data"),
"--snapstore-temp-directory=/var/etcd/data/tmp": Equal("--snapstore-temp-directory=/var/etcd/data/tmp"),
"--enable-snapshot-lease-renewal=true": Equal("--enable-snapshot-lease-renewal=true"),
fmt.Sprintf("%s=%s", "--full-snapshot-lease-name", getFullSnapshotLeaseName(instance)): Equal(fmt.Sprintf("%s=%s", "--full-snapshot-lease-name", getFullSnapshotLeaseName(instance))),
fmt.Sprintf("%s=%s", "--delta-snapshot-lease-name", getDeltaSnapshotLeaseName(instance)): Equal(fmt.Sprintf("%s=%s", "--delta-snapshot-lease-name", getDeltaSnapshotLeaseName(instance))),
"--data-dir=/var/etcd/data": Equal("--data-dir=/var/etcd/data"),
"--snapstore-temp-directory=/var/etcd/data/tmp": Equal("--snapstore-temp-directory=/var/etcd/data/tmp"),
"--enable-snapshot-lease-renewal=true": Equal("--enable-snapshot-lease-renewal=true"),
fmt.Sprintf("%s=%s", "--full-snapshot-lease-name", componentlease.GetFullSnapshotLeaseName(instance)): Equal(fmt.Sprintf("%s=%s", "--full-snapshot-lease-name", componentlease.GetFullSnapshotLeaseName(instance))),
fmt.Sprintf("%s=%s", "--delta-snapshot-lease-name", componentlease.GetDeltaSnapshotLeaseName(instance)): Equal(fmt.Sprintf("%s=%s", "--delta-snapshot-lease-name", componentlease.GetDeltaSnapshotLeaseName(instance))),
fmt.Sprintf("%s=%s", "--store-prefix", instance.Spec.Backup.Store.Prefix): Equal(fmt.Sprintf("%s=%s", "--store-prefix", instance.Spec.Backup.Store.Prefix)),
fmt.Sprintf("%s=%s", "--storage-provider", store): Equal(fmt.Sprintf("%s=%s", "--storage-provider", store)),
fmt.Sprintf("%s=%s", "--store-container", *instance.Spec.Backup.Store.Container): Equal(fmt.Sprintf("%s=%s", "--store-container", *instance.Spec.Backup.Store.Container)),
Expand Down Expand Up @@ -843,7 +844,7 @@ func fullLeaseIsCorrectlyReconciled(c client.Client, instance *druidv1alpha1.Etc
ctx, cancel := context.WithTimeout(context.TODO(), timeout)
defer cancel()
req := types.NamespacedName{
Name: getFullSnapshotLeaseName(instance),
Name: componentlease.GetFullSnapshotLeaseName(instance),
Namespace: instance.Namespace,
}

Expand All @@ -861,7 +862,7 @@ func deltaLeaseIsCorrectlyReconciled(c client.Client, instance *druidv1alpha1.Et
ctx, cancel := context.WithTimeout(context.TODO(), timeout)
defer cancel()
req := types.NamespacedName{
Name: getDeltaSnapshotLeaseName(instance),
Name: componentlease.GetDeltaSnapshotLeaseName(instance),
Namespace: instance.Namespace,
}

Expand Down
4 changes: 3 additions & 1 deletion controllers/config/custodian.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@ type EtcdCustodianController struct {
}

type EtcdMemberConfig struct {
// EtcdMemberUnknownThreshold is the duration after which a etcd member's state is considered `NotReady`.
// EtcdMemberNotReadyThreshold is the duration after which an etcd member's state is considered `NotReady`.
EtcdMemberNotReadyThreshold time.Duration
// EtcdMemberUnknownThreshold is the duration after which an etcd member's state is considered `Unknown`.
EtcdMemberUnknownThreshold time.Duration
}
Loading

0 comments on commit 0469cc9

Please sign in to comment.