Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Introduce noRetry Parameter for checkcapacity ProvisioningRequest #7496

Merged
merged 4 commits into from
Nov 25, 2024
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,10 @@ import (
schedulerframework "k8s.io/kubernetes/pkg/scheduler/framework"
)

const (
NoRetryParameter = "noRetry"
)

type checkCapacityProvClass struct {
context *context.AutoscalingContext
client *provreqclient.ProvisioningRequestClient
Expand Down Expand Up @@ -139,7 +143,13 @@ func (o *checkCapacityProvClass) checkcapacity(unschedulablePods []*apiv1.Pod, p
err, cleanupErr := clustersnapshot.WithForkedSnapshot(o.context.ClusterSnapshot, func() (bool, error) {
st, _, err := o.schedulingSimulator.TrySchedulePods(o.context.ClusterSnapshot, unschedulablePods, scheduling.ScheduleAnywhere, true)
if len(st) < len(unschedulablePods) || err != nil {
conditions.AddOrUpdateCondition(provReq, v1.Provisioned, metav1.ConditionFalse, conditions.CapacityIsNotFoundReason, "Capacity is not found, CA will try to find it later.", metav1.Now())
if noRetry, ok := provReq.Spec.Parameters[NoRetryParameter]; ok && noRetry == "true" {
PBundyra marked this conversation as resolved.
Show resolved Hide resolved
// Failed=true condition triggers retry in Kueue. Otherwise ProvisioningRequest with Provisioned=Failed
// condition block capacity in Kueue even it's in the middle of backoff waiting time.
PBundyra marked this conversation as resolved.
Show resolved Hide resolved
conditions.AddOrUpdateCondition(provReq, v1.Failed, metav1.ConditionTrue, conditions.CapacityIsNotFoundReason, "CA could not find requested capacity", metav1.Now())
} else {
conditions.AddOrUpdateCondition(provReq, v1.Provisioned, metav1.ConditionFalse, conditions.CapacityIsNotFoundReason, "Capacity is not found, CA will try to find it later.", metav1.Now())
}
capacityAvailable = false
} else {
conditions.AddOrUpdateCondition(provReq, v1.Provisioned, metav1.ConditionTrue, conditions.CapacityIsFoundReason, conditions.CapacityIsFoundMsg, metav1.Now())
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,7 @@ func TestScaleUp(t *testing.T) {
batchTimebox time.Duration
numProvisionedTrue int
numProvisionedFalse int
numFailedTrue int
}{
{
name: "no ProvisioningRequests",
Expand Down Expand Up @@ -242,6 +243,15 @@ func TestScaleUp(t *testing.T) {
provReqToScaleUp: newCheckCapacityCpuProvReq,
scaleUpResult: status.ScaleUpSuccessful,
},
{
name: "impossible check-capacity, with noRetry parameter",
provReqs: []*provreqwrapper.ProvisioningRequest{
impossibleCheckCapacityReq.Parameters(map[string]v1.Parameter{"noRetry": "true"}),
PBundyra marked this conversation as resolved.
Show resolved Hide resolved
},
provReqToScaleUp: impossibleCheckCapacityReq,
scaleUpResult: status.ScaleUpNoOptionsAvailable,
numFailedTrue: 1,
},
{
name: "some capacity is pre-booked, atomic scale-up not needed",
provReqs: []*provreqwrapper.ProvisioningRequest{bookedCapacityProvReq, atomicScaleUpProvReq},
Expand Down Expand Up @@ -438,6 +448,7 @@ func TestScaleUp(t *testing.T) {
provReqsAfterScaleUp, err := client.ProvisioningRequestsNoCache()
assert.NoError(t, err)
assert.Equal(t, len(tc.provReqs), len(provReqsAfterScaleUp))
assert.Equal(t, tc.numFailedTrue, NumProvisioningRequestsWithCondition(provReqsAfterScaleUp, v1.Failed, metav1.ConditionTrue))

if tc.batchProcessing {
// Since batch processing returns aggregated result, we need to check the number of provisioned requests which have the provisioned condition.
Expand Down
12 changes: 12 additions & 0 deletions cluster-autoscaler/provisioningrequest/provreqwrapper/wrapper.go
Original file line number Diff line number Diff line change
Expand Up @@ -81,3 +81,15 @@ func errMissingPodTemplates(podSets []v1.PodSet, podTemplates []*apiv1.PodTempla
}
return fmt.Errorf("missing pod templates, %d pod templates were referenced, %d templates were missing: %s", len(podSets), len(missingTemplates), strings.Join(missingTemplates, ","))
}

// Parameters makes a deep copy of embedded ProvReq and sets its Parameters
func (pr *ProvisioningRequest) Parameters(params map[string]v1.Parameter) *ProvisioningRequest {
prCopy := pr.DeepCopy()
PBundyra marked this conversation as resolved.
Show resolved Hide resolved
if prCopy.Spec.Parameters == nil {
prCopy.Spec.Parameters = make(map[string]v1.Parameter, len(params))
}
for key, val := range params {
prCopy.Spec.Parameters[key] = val
}
return &ProvisioningRequest{prCopy, pr.PodTemplates}
}
Loading