From 0c8e235b0c68119b0420cb605f41565709d575c7 Mon Sep 17 00:00:00 2001 From: Jason Deal Date: Sat, 23 Nov 2024 18:30:03 -0800 Subject: [PATCH] hydration controller --- .../nodeclaim/lifecycle/controller.go | 3 + .../nodeclaim/lifecycle/hydration.go | 69 +++++++++++++++++ .../nodeclaim/lifecycle/hydration_test.go | 76 +++++++++++++++++++ pkg/test/nodeclaim.go | 4 +- 4 files changed, 149 insertions(+), 3 deletions(-) create mode 100644 pkg/controllers/nodeclaim/lifecycle/hydration.go create mode 100644 pkg/controllers/nodeclaim/lifecycle/hydration_test.go diff --git a/pkg/controllers/nodeclaim/lifecycle/controller.go b/pkg/controllers/nodeclaim/lifecycle/controller.go index 6293af6b9..89401dfda 100644 --- a/pkg/controllers/nodeclaim/lifecycle/controller.go +++ b/pkg/controllers/nodeclaim/lifecycle/controller.go @@ -69,6 +69,7 @@ type Controller struct { registration *Registration initialization *Initialization liveness *Liveness + hydration *Hydration } func NewController(clk clock.Clock, kubeClient client.Client, cloudProvider cloudprovider.CloudProvider, recorder events.Recorder) *Controller { @@ -81,6 +82,7 @@ func NewController(clk clock.Clock, kubeClient client.Client, cloudProvider clou registration: &Registration{kubeClient: kubeClient}, initialization: &Initialization{kubeClient: kubeClient}, liveness: &Liveness{clock: clk, kubeClient: kubeClient}, + hydration: &Hydration{kubeClient: kubeClient}, } } @@ -142,6 +144,7 @@ func (c *Controller) Reconcile(ctx context.Context, nodeClaim *v1.NodeClaim) (re c.registration, c.initialization, c.liveness, + c.hydration, } { res, err := reconciler.Reconcile(ctx, nodeClaim) errs = multierr.Append(errs, err) diff --git a/pkg/controllers/nodeclaim/lifecycle/hydration.go b/pkg/controllers/nodeclaim/lifecycle/hydration.go new file mode 100644 index 000000000..832f8480f --- /dev/null +++ b/pkg/controllers/nodeclaim/lifecycle/hydration.go @@ -0,0 +1,69 @@ +/* +Copyright The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package lifecycle + +import ( + "context" + "fmt" + + "github.com/samber/lo" + "k8s.io/apimachinery/pkg/api/equality" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/reconcile" + + v1 "sigs.k8s.io/karpenter/pkg/apis/v1" + nodeclaimutils "sigs.k8s.io/karpenter/pkg/utils/nodeclaim" +) + +// The Hydration sub-reconciler is used to hydrate Nodes / NodeClaims with metadata added in new versions of Karpenter. +// TODO: Remove after a sufficiently long time from the v1.1 release +type Hydration struct { + kubeClient client.Client +} + +func (h *Hydration) Reconcile(ctx context.Context, nodeClaim *v1.NodeClaim) (reconcile.Result, error) { + nodeClaim.Labels = lo.Assign(nodeClaim.Labels, map[string]string{ + v1.NodeClassLabelKey(nodeClaim.Spec.NodeClassRef.GroupKind()): nodeClaim.Spec.NodeClassRef.Name, + }) + if err := h.hydrateNode(ctx, nodeClaim); err != nil { + return reconcile.Result{}, client.IgnoreNotFound(fmt.Errorf("hydrating node, %w", err)) + } + return reconcile.Result{}, nil +} + +func (h *Hydration) hydrateNode(ctx context.Context, nodeClaim *v1.NodeClaim) error { + node, err := nodeclaimutils.NodeForNodeClaim(ctx, h.kubeClient, nodeClaim) + if err != nil { + if nodeclaimutils.IsNodeNotFoundError(err) { + return nil + } + return err + } + stored := node.DeepCopy() + node.Labels = lo.Assign(node.Labels, map[string]string{ + v1.NodeClassLabelKey(nodeClaim.Spec.NodeClassRef.GroupKind()): nodeClaim.Spec.NodeClassRef.Name, + }) + if !equality.Semantic.DeepEqual(stored, node) { + // We use client.MergeFromWithOptimisticLock because patching a list with a JSON merge patch + // can cause races due to the fact that it fully replaces the list on a change + // Here, we are updating the taint list + if err := h.kubeClient.Patch(ctx, node, client.MergeFrom(stored)); err != nil { + return err + } + } + return nil +} diff --git a/pkg/controllers/nodeclaim/lifecycle/hydration_test.go b/pkg/controllers/nodeclaim/lifecycle/hydration_test.go new file mode 100644 index 000000000..9ada851a3 --- /dev/null +++ b/pkg/controllers/nodeclaim/lifecycle/hydration_test.go @@ -0,0 +1,76 @@ +/* +Copyright The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package lifecycle_test + +import ( + "fmt" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + "github.com/samber/lo" + + v1 "sigs.k8s.io/karpenter/pkg/apis/v1" + "sigs.k8s.io/karpenter/pkg/test" + . "sigs.k8s.io/karpenter/pkg/test/expectations" +) + +var _ = Describe("Hydration", func() { + DescribeTable( + "Hydration", + func(isNodeClaimManaged bool) { + nodeClassRef := lo.Ternary(isNodeClaimManaged, &v1.NodeClassReference{ + Group: "karpenter.test.sh", + Kind: "TestNodeClass", + Name: "default", + }, &v1.NodeClassReference{ + Group: "karpenter.test.sh", + Kind: "UnmanagedNodeClass", + Name: "default", + }) + nodeClaim, node := test.NodeClaimAndNode(v1.NodeClaim{ + Spec: v1.NodeClaimSpec{ + NodeClassRef: nodeClassRef, + }, + }) + delete(nodeClaim.Labels, v1.NodeClassLabelKey(nodeClassRef.GroupKind())) + delete(node.Labels, v1.NodeClassLabelKey(nodeClassRef.GroupKind())) + // Launch the NodeClaim to ensure the lifecycle controller doesn't override the provider-id and break the + // link between the Node and NodeClaim. + nodeClaim.StatusConditions().SetTrue(v1.ConditionTypeLaunched) + ExpectApplied(ctx, env.Client, nodeClaim, node) + ExpectObjectReconciled(ctx, env.Client, nodeClaimController, nodeClaim) + + // The missing NodeClass label should have been propagated to both the Node and NodeClaim + node = ExpectExists(ctx, env.Client, node) + fmt.Printf("provider id: %s\n", node.Spec.ProviderID) + value, ok := node.Labels[v1.NodeClassLabelKey(nodeClassRef.GroupKind())] + Expect(ok).To(Equal(isNodeClaimManaged)) + if isNodeClaimManaged { + Expect(value).To(Equal(nodeClassRef.Name)) + } + + nodeClaim = ExpectExists(ctx, env.Client, nodeClaim) + value, ok = nodeClaim.Labels[v1.NodeClassLabelKey(nodeClassRef.GroupKind())] + Expect(ok).To(Equal(isNodeClaimManaged)) + if isNodeClaimManaged { + Expect(value).To(Equal(nodeClassRef.Name)) + } + }, + Entry("should hydrate missing metadata onto the NodeClaim and Node", true), + Entry("should ignore NodeClaims which aren't managed by this Karpenter instance", false), + ) +}) diff --git a/pkg/test/nodeclaim.go b/pkg/test/nodeclaim.go index 797f087e5..467297b66 100644 --- a/pkg/test/nodeclaim.go +++ b/pkg/test/nodeclaim.go @@ -64,9 +64,7 @@ func NodeClaim(overrides ...v1.NodeClaim) *v1.NodeClaim { func NodeClaimAndNode(overrides ...v1.NodeClaim) (*v1.NodeClaim, *corev1.Node) { nc := NodeClaim(overrides...) - node := NodeClaimLinkedNode(nc) - nc.Status.NodeName = node.Name - return nc, node + return nc, NodeClaimLinkedNode(nc) } // NodeClaimsAndNodes creates homogeneous groups of NodeClaims and Nodes based on the passed in options, evenly divided by the total nodeclaims requested