Skip to content

Commit

Permalink
Use etcd-member-unknown-threshold flag instead of LeaseDurationSeconds
Browse files Browse the repository at this point in the history
  • Loading branch information
timuthy committed Nov 5, 2021
1 parent 721f412 commit 83258d9
Show file tree
Hide file tree
Showing 4 changed files with 22 additions and 24 deletions.
4 changes: 3 additions & 1 deletion controllers/config/custodian.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@ type EtcdCustodianController struct {
}

type EtcdMemberConfig struct {
// EtcdMemberUnknownThreshold is the duration after which a etcd member's state is considered `NotReady`.
// EtcdMemberNotReadyThreshold is the duration after which an etcd member's state is considered `NotReady`.
EtcdMemberNotReadyThreshold time.Duration
// EtcdMemberUnknownThreshold is the duration after which an etcd member's state is considered `Unknown`.
EtcdMemberUnknownThreshold time.Duration
}
3 changes: 3 additions & 0 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ func main() {
enableBackupCompactionJobTempFS bool

etcdMemberNotReadyThreshold time.Duration
etcdMemberUnknownThreshold time.Duration

// TODO: migrate default to `leases` in one of the next releases
defaultLeaderElectionResourceLock = resourcelock.ConfigMapsLeasesResourceLock
Expand All @@ -68,6 +69,7 @@ func main() {
flag.BoolVar(&disableLeaseCache, "disable-lease-cache", false, "Disable cache for lease.coordination.k8s.io resources.")
flag.BoolVar(&ignoreOperationAnnotation, "ignore-operation-annotation", true, "Ignore the operation annotation or not.")
flag.DurationVar(&etcdMemberNotReadyThreshold, "etcd-member-notready-threshold", 5*time.Minute, "Threshold after which an etcd member is considered not ready if the status was unknown before.")
flag.DurationVar(&etcdMemberUnknownThreshold, "etcd-member-unknown-threshold", 1*time.Minute, "Threshold after which an etcd member is considered unknown.")
flag.BoolVar(&enableBackupCompactionJobTempFS, "enable-backup-compaction-job-tempfs", false, "Enable the backup compaction job to use tempfs as its volume mount")

flag.Parse()
Expand Down Expand Up @@ -109,6 +111,7 @@ func main() {
custodian := controllers.NewEtcdCustodian(mgr, controllersconfig.EtcdCustodianController{
EtcdMember: controllersconfig.EtcdMemberConfig{
EtcdMemberNotReadyThreshold: etcdMemberNotReadyThreshold,
EtcdMemberUnknownThreshold: etcdMemberUnknownThreshold,
},
SyncPeriod: custodianSyncPeriod,
})
Expand Down
11 changes: 2 additions & 9 deletions pkg/health/etcdmember/check_ready.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@ package etcdmember

import (
"context"
"fmt"
"strings"
"time"

Expand Down Expand Up @@ -54,12 +53,6 @@ func (r *readyCheck) Check(ctx context.Context, etcd druidv1alpha1.Etcd) []Resul
}

for _, lease := range leases.Items {
leaseDurationSeconds := lease.Spec.LeaseDurationSeconds
if leaseDurationSeconds == nil {
r.logger.Error(fmt.Errorf("leaseDurationSeconds not set for lease object %s/%s", lease.Namespace, lease.Name), "Failed to perform member readiness check")
continue
}

var (
id, role = separateIdFromRole(lease.Spec.HolderIdentity)
res = &result{
Expand All @@ -79,15 +72,15 @@ func (r *readyCheck) Check(ctx context.Context, etcd druidv1alpha1.Etcd) []Resul
}

// Check if member state must be considered as not ready
if renew.Add(time.Duration(*leaseDurationSeconds) * time.Second).Add(r.memberConfig.EtcdMemberNotReadyThreshold).Before(checkTime) {
if renew.Add(r.memberConfig.EtcdMemberUnknownThreshold).Add(r.memberConfig.EtcdMemberNotReadyThreshold).Before(checkTime) {
res.status = druidv1alpha1.EtcdMemberStatusNotReady
res.reason = "UnknownGracePeriodExceeded"
results = append(results, res)
continue
}

// Check if member state must be considered as unknown
if renew.Add(time.Duration(*leaseDurationSeconds) * time.Second).Before(checkTime) {
if renew.Add(r.memberConfig.EtcdMemberUnknownThreshold).Before(checkTime) {
// If pod is not running or cannot be found then we deduce that the status is NotReady.
ready, err := r.checkContainersAreReady(ctx, lease.Namespace, lease.Name)
if (err == nil && !ready) || apierrors.IsNotFound(err) {
Expand Down
28 changes: 14 additions & 14 deletions pkg/health/etcdmember/check_ready_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,13 +44,13 @@ import (
var _ = Describe("ReadyCheck", func() {
Describe("#Check", func() {
var (
ctx context.Context
mockCtrl *gomock.Controller
cl *mockclient.MockClient
leaseDurationSeconds *int32
leaseDuration, notReadyThreshold time.Duration
now time.Time
check Checker
ctx context.Context
mockCtrl *gomock.Controller
cl *mockclient.MockClient
leaseDurationSeconds *int32
unknownThreshold, notReadyThreshold time.Duration
now time.Time
check Checker

member1Name string
member1ID *string
Expand All @@ -62,13 +62,13 @@ var _ = Describe("ReadyCheck", func() {
ctx = context.Background()
mockCtrl = gomock.NewController(GinkgoT())
cl = mockclient.NewMockClient(mockCtrl)
leaseDurationSeconds = pointer.Int32Ptr(300)
leaseDuration = time.Duration(*leaseDurationSeconds) * time.Second
unknownThreshold = 300 * time.Second
notReadyThreshold = 60 * time.Second
now, _ = time.Parse(time.RFC3339, "2021-06-01T00:00:00Z")
check = ReadyCheck(cl, log.NullLogger{}, controllersconfig.EtcdCustodianController{
EtcdMember: controllersconfig.EtcdMemberConfig{
EtcdMemberNotReadyThreshold: notReadyThreshold,
EtcdMemberUnknownThreshold: unknownThreshold,
},
})

Expand Down Expand Up @@ -97,7 +97,7 @@ var _ = Describe("ReadyCheck", func() {

Context("when just expired", func() {
BeforeEach(func() {
renewTime := metav1.NewMicroTime(now.Add(-1 * leaseDuration).Add(-1 * time.Second))
renewTime := metav1.NewMicroTime(now.Add(-1 * unknownThreshold).Add(-1 * time.Second))
leasesList = &coordinationv1.LeaseList{
Items: []coordinationv1.Lease{
{
Expand Down Expand Up @@ -223,8 +223,8 @@ var _ = Describe("ReadyCheck", func() {
member2ID = pointer.StringPtr("2")

var (
shortExpirationTime = metav1.NewMicroTime(now.Add(-1 * leaseDuration).Add(-1 * time.Second))
longExpirationTime = metav1.NewMicroTime(now.Add(-1 * leaseDuration).Add(-1 * time.Second).Add(-1 * notReadyThreshold))
shortExpirationTime = metav1.NewMicroTime(now.Add(-1 * unknownThreshold).Add(-1 * time.Second))
longExpirationTime = metav1.NewMicroTime(now.Add(-1 * unknownThreshold).Add(-1 * time.Second).Add(-1 * notReadyThreshold))
)

leasesList = &coordinationv1.LeaseList{
Expand Down Expand Up @@ -289,7 +289,7 @@ var _ = Describe("ReadyCheck", func() {
member2ID = pointer.StringPtr("2")
member3Name = "member3"
member3ID = pointer.StringPtr("3")
renewTime := metav1.NewMicroTime(now.Add(-1 * leaseDuration))
renewTime := metav1.NewMicroTime(now.Add(-1 * unknownThreshold))
leasesList = &coordinationv1.LeaseList{
Items: []coordinationv1.Lease{
{
Expand Down Expand Up @@ -356,7 +356,7 @@ var _ = Describe("ReadyCheck", func() {

BeforeEach(func() {
member2Name = "member2"
renewTime := metav1.NewMicroTime(now.Add(-1 * leaseDuration))
renewTime := metav1.NewMicroTime(now.Add(-1 * unknownThreshold))
leasesList = &coordinationv1.LeaseList{
Items: []coordinationv1.Lease{
{
Expand Down

0 comments on commit 83258d9

Please sign in to comment.