Skip to content

Commit 3761abf

Browse files
authored
[jobframework] Handle all errors when job is suspended (#1907)
1 parent 0b5b97c commit 3761abf

File tree

3 files changed

+9
-2
lines changed

3 files changed

+9
-2
lines changed

‎.golangci.yaml‎

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,5 +25,6 @@ linters:
2525
- gocritic
2626
- goimports
2727
- govet
28+
- loggercheck
2829
- misspell
2930
- unconvert

‎pkg/controller/jobframework/reconciler.go‎

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -389,12 +389,14 @@ func (r *JobReconciler) ReconcileGenericJob(ctx context.Context, req ctrl.Reques
389389
if err != nil {
390390
log.Error(err, "Unsuspending job")
391391
if podset.IsPermanent(err) {
392+
allErrs := err
392393
// Mark the workload as finished with failure since the is no point to retry.
393394
errUpdateStatus := workload.UpdateStatus(ctx, r.client, wl, kueue.WorkloadFinished, metav1.ConditionTrue, FailedToStartFinishedReason, err.Error(), constants.JobControllerName)
394395
if errUpdateStatus != nil {
395-
log.Error(errUpdateStatus, "Updating workload status, on start failure %s", err.Error())
396+
allErrs = errors.Join(err, errUpdateStatus)
397+
log.Error(allErrs, "Could not mark Workload as finished after start failure")
396398
}
397-
return ctrl.Result{}, errUpdateStatus
399+
return ctrl.Result{}, allErrs
398400
}
399401
}
400402
return ctrl.Result{}, err

‎pkg/controller/jobs/job/job_controller_test.go‎

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -768,6 +768,7 @@ func TestReconciler(t *testing.T) {
768768
}).
769769
Obj(),
770770
},
771+
wantErr: podset.ErrInvalidPodSetUpdate,
771772
},
772773
"when workload is admitted and PodSetUpdates conflict between admission checks on annotations, the workload is finished with failure": {
773774
job: *baseJobWrapper.Clone().
@@ -839,6 +840,7 @@ func TestReconciler(t *testing.T) {
839840
}).
840841
Obj(),
841842
},
843+
wantErr: podset.ErrInvalidPodSetUpdate,
842844
},
843845
"when workload is admitted and PodSetUpdates conflict between admission checks on nodeSelector, the workload is finished with failure": {
844846
job: *baseJobWrapper.Clone().
@@ -910,6 +912,7 @@ func TestReconciler(t *testing.T) {
910912
}).
911913
Obj(),
912914
},
915+
wantErr: podset.ErrInvalidPodSetUpdate,
913916
},
914917
"when workload is admitted and PodSetUpdates conflict between admission check nodeSelector and current node selector, the workload is finished with failure": {
915918
job: *baseJobWrapper.Clone().
@@ -959,6 +962,7 @@ func TestReconciler(t *testing.T) {
959962
}).
960963
Obj(),
961964
},
965+
wantErr: podset.ErrInvalidPodSetUpdate,
962966
},
963967
"when workload is admitted the PodSetUpdates values matching for key": {
964968
job: *baseJobWrapper.Clone().

0 commit comments

Comments
 (0)