diff --git a/clusterloader2/pkg/measurement/common/wait_for_jobs.go b/clusterloader2/pkg/measurement/common/wait_for_jobs.go index b195a29a43..b53e865a32 100644 --- a/clusterloader2/pkg/measurement/common/wait_for_jobs.go +++ b/clusterloader2/pkg/measurement/common/wait_for_jobs.go @@ -217,13 +217,20 @@ func (w *waitForFinishedJobsMeasurement) handleObject(oldObj, newObj interface{} klog.Errorf("Failed obtaining meta key for Job: %v", err) return } - completed, condition := finishedJobCondition(newJob) + // Use handleJob (not newJob) to check completion status. + // When a job is deleted (newJob is nil), we need to check the old state + // to preserve completion status for jobs deleted via ttlSecondsAfterFinished. + completed, condition := finishedJobCondition(handleJob) w.lock.Lock() defer w.lock.Unlock() + // Once a job is marked complete/failed, preserve that state even if deleted. + // Only remove from tracking if job exists but is not yet complete. if completed { w.finishedJobs[key] = condition - } else { + } else if _, alreadyFinished := w.finishedJobs[key]; !alreadyFinished { + // Only delete if the job was never completed. If it was previously + // marked complete, keep that status even if the job is deleted. delete(w.finishedJobs, key) } } diff --git a/clusterloader2/testing/dra/config.yaml b/clusterloader2/testing/dra/config.yaml index 367fae86fd..c88877809a 100644 --- a/clusterloader2/testing/dra/config.yaml +++ b/clusterloader2/testing/dra/config.yaml @@ -210,7 +210,7 @@ steps: Params: action: gather labelSelector: job-type = short-lived - timeout: 15m + timeout: 30m - name: Measure scheduler metrics measurements: - Identifier: ChurnSchedulingMetrics