Skip to content
This repository is currently being migrated. It's locked while the migration is in progress.

Commit

Permalink
deploy resource quota & watch pdbs (#48)
Browse files Browse the repository at this point in the history
  • Loading branch information
Mojachieee authored Jun 24, 2022
1 parent 7ebdd73 commit 7cfdf19
Show file tree
Hide file tree
Showing 5 changed files with 134 additions and 1 deletion.
9 changes: 9 additions & 0 deletions config/rbac/role.yaml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

91 changes: 90 additions & 1 deletion controllers/etcdcluster_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import (
monitorv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
merrors "k8s.io/apimachinery/pkg/api/meta"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"

"k8s.io/apimachinery/pkg/types"
Expand All @@ -46,6 +47,7 @@ const (
const (
getTLSConfigTimeout = time.Second * 24
defragTimeout = time.Minute * 10
resourceQuotaName = "etcd-critical-pods"
)

// EtcdClusterReconciler reconciles a EtcdCluster object
Expand Down Expand Up @@ -413,6 +415,71 @@ func (r *EtcdClusterReconciler) createService(ctx context.Context, cluster *etcd
return &reconcilerevent.ServiceCreatedEvent{Object: cluster, ServiceName: service.Name}, nil
}

func resourceQuotaNameKey(cluster *etcdv1alpha1.EtcdCluster) types.NamespacedName {
return types.NamespacedName{
// It only makes sense to have 1 storage quota per namespace
// so we don't use the cluster name when naming this object
Name: resourceQuotaName,
Namespace: cluster.Namespace,
}
}

func (r *EtcdClusterReconciler) hasResourceQuota(ctx context.Context, cluster *etcdv1alpha1.EtcdCluster) (bool, error) {
quota := &v1.ResourceQuota{}
err := r.Get(ctx, resourceQuotaNameKey(cluster), quota)
if err != nil {
if apierrors.IsNotFound(err) {
// We got the expected error, which is that it's not found
return false, nil
}
// Unexpected error, some other problem?
return false, err
}
// We found it because we got no error
return true, nil
}

// createResourceQuota creates a resource quota in Kubernetes
func (r *EtcdClusterReconciler) createResourceQuota(ctx context.Context, cluster *etcdv1alpha1.EtcdCluster) (*reconcilerevent.ResourceQuotaCreatedEvent, error) {
quota := resourceQuotaForCluster(cluster)
if err := r.Create(ctx, quota); err != nil {
return nil, err
}
return &reconcilerevent.ResourceQuotaCreatedEvent{Object: cluster, ResourceQuotaName: quota.Name}, nil
}

func resourceQuotaForCluster(cluster *etcdv1alpha1.EtcdCluster) *v1.ResourceQuota {
name := resourceQuotaNameKey(cluster)
return &v1.ResourceQuota{
ObjectMeta: metav1.ObjectMeta{
Name: name.Name,
Namespace: name.Namespace,
OwnerReferences: []metav1.OwnerReference{
*metav1.NewControllerRef(cluster, etcdv1alpha1.GroupVersion.WithKind("EtcdCluster")),
},
Labels: map[string]string{
appLabel: appName,
clusterLabel: cluster.Name,
},
},
Spec: v1.ResourceQuotaSpec{
Hard: v1.ResourceList{
// We set an arbitary limit here, GKE requires a resource quota, but we don't actually want to hit the limit
v1.ResourcePods: resource.MustParse("10000"),
},
ScopeSelector: &v1.ScopeSelector{
MatchExpressions: []v1.ScopedResourceSelectorRequirement{
{
ScopeName: v1.ResourceQuotaScopePriorityClass,
Operator: v1.ScopeSelectorOpIn,
Values: []string{"system-cluster-critical", "system-node-critical"},
},
},
},
},
}
}

// minAvailableForETCDQuorum will calculate the minimum amount of pods required to maintain quorum
func minAvailableForETCDQuorum(cluster *etcdv1alpha1.EtcdCluster) int {
// defined in https://etcd.io/docs/v3.3/faq/
Expand Down Expand Up @@ -749,6 +816,23 @@ func (r *EtcdClusterReconciler) reconcile(
return result, pdbEvent, nil
}

// GKE requires a resource quota to be made if we're making pods that use
// the "system-cluster-critical" or "system-node-critical" priority class.
// We use both of these so we create a resource quota per namespace, so that
// GKE will schedule our etcd pods
quotaExists, err := r.hasResourceQuota(ctx, cluster)
if err != nil {
return result, nil, fmt.Errorf("unable to fetch resource quota from Kubernetes API: %w", err)
}
if !quotaExists {
quotaCreatedEvent, err := r.createResourceQuota(ctx, cluster)
if err != nil {
return result, nil, fmt.Errorf("unable to create resource quota: %w", err)
}
log.V(1).Info("Created ResourceQuota", "quota", quotaCreatedEvent.ResourceQuotaName)
return result, quotaCreatedEvent, nil
}

serviceMonitorCRDInstalled := true
// Assuming the service monitor CRD is installed
// Check if a service monitor CR exists and create one if it doesn't
Expand All @@ -767,7 +851,9 @@ func (r *EtcdClusterReconciler) reconcile(
return result, nil, fmt.Errorf("unable to create service monitor: %w", err)
}
log.Info("Created ServiceMonitor", "service_monitor", smCreatedEvent.ServiceMonitorName)
return result, smCreatedEvent, nil
// As we can't be sure if this CRD exists, we can't watch events for it
// so we force a reconcile requeue
return ctrl.Result{RequeueAfter: time.Millisecond * 500}, smCreatedEvent, nil
}

withThresholdSchedule, found := r.Schedules.Read(scheduleMapKeyFor(cluster))
Expand Down Expand Up @@ -1100,6 +1186,7 @@ func peerNameForMember(member etcd.Member) (string, error) {
// +kubebuilder:rbac:groups=etcd.improbable.io,resources=etcdclusters,verbs=get;list;watch
// +kubebuilder:rbac:groups=etcd.improbable.io,resources=etcdclusters/status;etcdclusters/finalizers,verbs=get;update;patch
// +kubebuilder:rbac:groups=core,resources=services,verbs=get;list;watch;create
// +kubebuilder:rbac:groups=core,resources=resourcequotas,verbs=get;list;watch;create
// +kubebuilder:rbac:groups=etcd.improbable.io,resources=etcdpeers,verbs=get;list;watch;create;delete;patch
// +kubebuilder:rbac:groups=core,resources=events,verbs=create;patch
// +kubebuilder:rbac:groups=core,resources=secrets,verbs=*
Expand Down Expand Up @@ -1521,6 +1608,8 @@ func (r *EtcdClusterReconciler) SetupWithManager(mgr ctrl.Manager) error {
return ctrl.NewControllerManagedBy(mgr).
For(&etcdv1alpha1.EtcdCluster{}).
Owns(&v1.Service{}).
Owns(&v1.ResourceQuota{}).
Owns(&policyv1beta1.PodDisruptionBudget{}).
Owns(&etcdv1alpha1.EtcdPeer{}).
Complete(r)
}
14 changes: 14 additions & 0 deletions controllers/etcdcluster_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -402,6 +402,20 @@ func (s *controllerSuite) testClusterController(t *testing.T) {
}, time.Second*20, time.Millisecond*500)
})

t.Run("CreateResourceQuota", func(t *testing.T) {
err = try.Eventually(func() error {
var quota v1.ResourceQuota
err = s.k8sClient.Get(s.ctx, client.ObjectKey{
Namespace: namespace,
Name: resourceQuotaName,
}, &quota)

require.NoError(t, err, "failed to find resource quota")
require.NotNil(t, quota.Spec.ScopeSelector, "no scope selector set for resource quota")
return nil
}, time.Second*15, time.Millisecond*500)
})

t.Run("CreatesCronJob", func(t *testing.T) {
err = try.Eventually(func() error {
_, ok := s.clusterControllerSchedules.Read(string(etcdCluster.UID) + "-defrag")
Expand Down
12 changes: 12 additions & 0 deletions internal/reconcilerevent/cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,18 @@ func (s *ServiceCreatedEvent) Record(recorder record.EventRecorder) {
fmt.Sprintf("Created service with name %q", s.ServiceName))
}

type ResourceQuotaCreatedEvent struct {
Object runtime.Object
ResourceQuotaName string
}

func (s *ResourceQuotaCreatedEvent) Record(recorder record.EventRecorder) {
recorder.Event(s.Object,
K8sEventTypeNormal,
"ResourceQuotaCreated",
fmt.Sprintf("Created ResourceQuota with name %q", s.ResourceQuotaName))
}

type ServiceMonitorCreatedEvent struct {
Object runtime.Object
ServiceMonitorName string
Expand Down
9 changes: 9 additions & 0 deletions kuttl/e2e/deployment-test/00-deploy.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3708,6 +3708,15 @@ rules:
- list
- patch
- watch
- apiGroups:
- ""
resources:
- resourcequotas
verbs:
- create
- get
- list
- watch
- apiGroups:
- etcd.improbable.io
resources:
Expand Down

0 comments on commit 7cfdf19

Please sign in to comment.