diff --git a/controllers/config/custodian.go b/controllers/config/custodian.go index 62134a832..f964fdd41 100644 --- a/controllers/config/custodian.go +++ b/controllers/config/custodian.go @@ -25,6 +25,8 @@ type EtcdCustodianController struct { } type EtcdMemberConfig struct { - // EtcdMemberUnknownThreshold is the duration after which a etcd member's state is considered `NotReady`. + // EtcdMemberNotReadyThreshold is the duration after which an etcd member's state is considered `NotReady`. EtcdMemberNotReadyThreshold time.Duration + // EtcdMemberUnknownThreshold is the duration after which an etcd member's state is considered `Unknown`. + EtcdMemberUnknownThreshold time.Duration } diff --git a/main.go b/main.go index 9b9878fd2..4b84759d0 100644 --- a/main.go +++ b/main.go @@ -53,6 +53,7 @@ func main() { ignoreOperationAnnotation bool etcdMemberNotReadyThreshold time.Duration + etcdMemberUnknownThreshold time.Duration // TODO: migrate default to `leases` in one of the next releases defaultLeaderElectionResourceLock = resourcelock.ConfigMapsLeasesResourceLock @@ -78,6 +79,7 @@ func main() { flag.BoolVar(&disableLeaseCache, "disable-lease-cache", false, "Disable cache for lease.coordination.k8s.io resources.") flag.BoolVar(&ignoreOperationAnnotation, "ignore-operation-annotation", true, "Ignore the operation annotation or not.") flag.DurationVar(&etcdMemberNotReadyThreshold, "etcd-member-notready-threshold", 5*time.Minute, "Threshold after which an etcd member is considered not ready if the status was unknown before.") + flag.DurationVar(&etcdMemberUnknownThreshold, "etcd-member-unknown-threshold", 1*time.Minute, "Threshold after which an etcd member is considered unknown.") flag.Parse() @@ -118,6 +120,7 @@ func main() { custodian := controllers.NewEtcdCustodian(mgr, controllersconfig.EtcdCustodianController{ EtcdMember: controllersconfig.EtcdMemberConfig{ EtcdMemberNotReadyThreshold: etcdMemberNotReadyThreshold, + EtcdMemberUnknownThreshold: etcdMemberUnknownThreshold, }, SyncPeriod: custodianSyncPeriod, }) diff --git a/pkg/health/etcdmember/check_ready.go b/pkg/health/etcdmember/check_ready.go index b273d7549..b917f7ed8 100644 --- a/pkg/health/etcdmember/check_ready.go +++ b/pkg/health/etcdmember/check_ready.go @@ -16,7 +16,6 @@ package etcdmember import ( "context" - "fmt" "strings" "time" @@ -59,12 +58,6 @@ func (r *readyCheck) Check(ctx context.Context, etcd druidv1alpha1.Etcd) []Resul } for _, lease := range leases.Items { - leaseDurationSeconds := lease.Spec.LeaseDurationSeconds - if leaseDurationSeconds == nil { - r.logger.Error(fmt.Errorf("leaseDurationSeconds not set for lease object %s/%s", lease.Namespace, lease.Name), "Failed to perform member readiness check") - continue - } - var ( id, role = separateIdFromRole(lease.Spec.HolderIdentity) res = &result{ @@ -84,7 +77,7 @@ func (r *readyCheck) Check(ctx context.Context, etcd druidv1alpha1.Etcd) []Resul } // Check if member state must be considered as not ready - if renew.Add(time.Duration(*leaseDurationSeconds) * time.Second).Add(r.memberConfig.EtcdMemberNotReadyThreshold).Before(checkTime) { + if renew.Add(r.memberConfig.EtcdMemberUnknownThreshold).Add(r.memberConfig.EtcdMemberNotReadyThreshold).Before(checkTime) { res.status = druidv1alpha1.EtcdMemberStatusNotReady res.reason = "UnknownGracePeriodExceeded" results = append(results, res) @@ -92,7 +85,7 @@ func (r *readyCheck) Check(ctx context.Context, etcd druidv1alpha1.Etcd) []Resul } // Check if member state must be considered as unknown - if renew.Add(time.Duration(*leaseDurationSeconds) * time.Second).Before(checkTime) { + if renew.Add(r.memberConfig.EtcdMemberUnknownThreshold).Before(checkTime) { // If pod is not running or cannot be found then we deduce that the status is NotReady. ready, err := r.checkContainersAreReady(ctx, lease.Namespace, lease.Name) if (err == nil && !ready) || apierrors.IsNotFound(err) { diff --git a/pkg/health/etcdmember/check_ready_test.go b/pkg/health/etcdmember/check_ready_test.go index 7353622a1..9b4b1a23e 100644 --- a/pkg/health/etcdmember/check_ready_test.go +++ b/pkg/health/etcdmember/check_ready_test.go @@ -47,13 +47,13 @@ import ( var _ = Describe("ReadyCheck", func() { Describe("#Check", func() { var ( - ctx context.Context - mockCtrl *gomock.Controller - cl *mockclient.MockClient - leaseDurationSeconds *int32 - leaseDuration, notReadyThreshold time.Duration - now time.Time - check Checker + ctx context.Context + mockCtrl *gomock.Controller + cl *mockclient.MockClient + leaseDurationSeconds *int32 + unknownThreshold, notReadyThreshold time.Duration + now time.Time + check Checker member1Name string member1ID *string @@ -65,13 +65,13 @@ var _ = Describe("ReadyCheck", func() { ctx = context.Background() mockCtrl = gomock.NewController(GinkgoT()) cl = mockclient.NewMockClient(mockCtrl) - leaseDurationSeconds = pointer.Int32Ptr(300) - leaseDuration = time.Duration(*leaseDurationSeconds) * time.Second + unknownThreshold = 300 * time.Second notReadyThreshold = 60 * time.Second now, _ = time.Parse(time.RFC3339, "2021-06-01T00:00:00Z") check = ReadyCheck(cl, log.NullLogger{}, controllersconfig.EtcdCustodianController{ EtcdMember: controllersconfig.EtcdMemberConfig{ EtcdMemberNotReadyThreshold: notReadyThreshold, + EtcdMemberUnknownThreshold: unknownThreshold, }, }) @@ -102,7 +102,7 @@ var _ = Describe("ReadyCheck", func() { Context("when just expired", func() { BeforeEach(func() { - renewTime := metav1.NewMicroTime(now.Add(-1 * leaseDuration).Add(-1 * time.Second)) + renewTime := metav1.NewMicroTime(now.Add(-1 * unknownThreshold).Add(-1 * time.Second)) leasesList = &coordinationv1.LeaseList{ Items: []coordinationv1.Lease{ { @@ -228,8 +228,8 @@ var _ = Describe("ReadyCheck", func() { member2ID = pointer.StringPtr("2") var ( - shortExpirationTime = metav1.NewMicroTime(now.Add(-1 * leaseDuration).Add(-1 * time.Second)) - longExpirationTime = metav1.NewMicroTime(now.Add(-1 * leaseDuration).Add(-1 * time.Second).Add(-1 * notReadyThreshold)) + shortExpirationTime = metav1.NewMicroTime(now.Add(-1 * unknownThreshold).Add(-1 * time.Second)) + longExpirationTime = metav1.NewMicroTime(now.Add(-1 * unknownThreshold).Add(-1 * time.Second).Add(-1 * notReadyThreshold)) ) leasesList = &coordinationv1.LeaseList{ @@ -294,7 +294,7 @@ var _ = Describe("ReadyCheck", func() { member2ID = pointer.StringPtr("2") member3Name = "member3" member3ID = pointer.StringPtr("3") - renewTime := metav1.NewMicroTime(now.Add(-1 * leaseDuration)) + renewTime := metav1.NewMicroTime(now.Add(-1 * unknownThreshold)) leasesList = &coordinationv1.LeaseList{ Items: []coordinationv1.Lease{ { @@ -361,7 +361,7 @@ var _ = Describe("ReadyCheck", func() { BeforeEach(func() { member2Name = "member2" - renewTime := metav1.NewMicroTime(now.Add(-1 * leaseDuration)) + renewTime := metav1.NewMicroTime(now.Add(-1 * unknownThreshold)) leasesList = &coordinationv1.LeaseList{ Items: []coordinationv1.Lease{ {